dbworkload 0.6.2__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbworkload
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Summary: Workload framework
5
5
  Home-page: https://dbworkload.github.io/dbworkload/
6
6
  License: GPLv3+
@@ -297,5 +297,6 @@ def version_option(
297
297
  ) -> None:
298
298
  pass
299
299
 
300
+
300
301
  # this is only needed for mkdocs-click
301
302
  click_app = typer.main.get_command(app)
@@ -7,6 +7,7 @@ import pandas as pd
7
7
  import uuid
8
8
  import random
9
9
  import builtins
10
+ from .common import import_class_at_runtime
10
11
 
11
12
  logger = logging.getLogger("dbworkload")
12
13
 
@@ -584,6 +585,9 @@ class SimpleFaker:
584
585
  return [SimpleFaker.Bit(seed=s, **args) for s in seeds]
585
586
  elif obj_type == "bytes":
586
587
  return [SimpleFaker.Bytes(seed=s, **args) for s in seeds]
588
+ elif obj_type == "custom":
589
+ custom_gen = import_class_at_runtime(args.pop("path"))
590
+ return [custom_gen(seed=s, **args) for s in seeds]
587
591
  else:
588
592
  raise ValueError(
589
593
  f"SimpleFaker type not implemented or recognized: '{obj_type}'"
@@ -610,47 +614,57 @@ class SimpleFaker:
610
614
  separator (str): the field delimiter in the CSV file
611
615
  compression (str): the compression format (gzip, zip, None..)
612
616
  """
617
+
618
+ def gen_to_csv(iters: int):
619
+ # create individual Series and then concat them together
620
+ df = pd.concat(
621
+ [pd.Series([next(gen) for _ in range(iters)]) for gen in generators],
622
+ axis=1,
623
+ keys=col_names,
624
+ )
625
+
626
+ # get a list of the colums that are not to be sorted by
627
+ remaining = list(set(col_names) - set(sort_by))
628
+
629
+ # create a dataframe by concatenating:
630
+ # 1 - the df subset with the sort_by columns sorted by the sort_by columns
631
+ # 2 - the df subset with the remaining columns
632
+ # finally order the columns by the original col_names
633
+ # then save to csv
634
+ pd.concat(
635
+ [
636
+ df[sort_by].sort_values(sort_by).reset_index(drop=True),
637
+ df[remaining],
638
+ ],
639
+ axis=1,
640
+ )[col_names].to_csv(
641
+ basename + "_" + str(counter) + suffix,
642
+ quoting=csv.QUOTE_MINIMAL,
643
+ sep=separator,
644
+ header=False,
645
+ index=False,
646
+ compression=compression,
647
+ )
648
+
613
649
  logger.debug("SimpleFaker worker created")
614
650
  if iterations > self.csv_max_rows:
615
- count = int(iterations / self.csv_max_rows)
651
+ count = iterations // self.csv_max_rows
616
652
  rem = iterations % self.csv_max_rows
617
653
  iterations = self.csv_max_rows
618
654
  else:
619
655
  count = 1
620
656
  rem = 0
621
657
 
622
- if separator == "\t":
623
- suffix = ".tsv"
624
- else:
625
- suffix = ".csv"
626
-
627
- if compression == "gzip":
628
- suffix += ".gz"
629
- elif compression == "zip":
630
- suffix += ".zip"
631
- elif compression == "bz2":
632
- suffix += ".bz2"
633
- elif compression == "xz":
634
- suffix += ".xz"
635
-
636
- for x in range(count):
658
+ suffix = ".tsv" if separator == "\t" else ".csv"
659
+
660
+ if compression:
661
+ suffix += "." + {
662
+ "gzip": "gz",
663
+ }.get(compression, compression)
664
+
665
+ for counter in range(count):
637
666
  try:
638
- pd.DataFrame(
639
- [
640
- row
641
- for row in [
642
- [next(x) for x in generators] for _ in range(iterations)
643
- ]
644
- ],
645
- columns=col_names,
646
- ).sort_values(by=sort_by).to_csv(
647
- basename + "_" + str(x) + suffix,
648
- quoting=csv.QUOTE_MINIMAL,
649
- sep=separator,
650
- header=False,
651
- index=False,
652
- compression=compression,
653
- )
667
+ gen_to_csv(iterations)
654
668
  except csv.Error as e:
655
669
  logger.error(e)
656
670
  if e.args[0] == "need to escape, but no escapechar set":
@@ -658,20 +672,11 @@ class SimpleFaker:
658
672
  f"You cannot use the selected delimiter '{separator}'. Consider using another char or the the tab key."
659
673
  )
660
674
 
661
- logger.debug(f"Saved file '{basename + '_' + str(x) + suffix}'")
675
+ logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")
662
676
 
663
677
  # remaining rows, if any
664
678
  if rem > 0:
665
- pd.DataFrame(
666
- [row for row in [[next(x) for x in generators] for _ in range(rem)]],
667
- columns=col_names,
668
- ).sort_values(by=sort_by).to_csv(
669
- basename + "_" + str(count) + suffix,
670
- quoting=csv.QUOTE_MINIMAL,
671
- sep=separator,
672
- header=False,
673
- index=False,
674
- compression=compression,
675
- )
679
+ counter = count
680
+ gen_to_csv(rem)
676
681
 
677
- logger.debug(f"Saved file '{basename + '_' + str(x) + suffix}'")
682
+ logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbworkload"
3
- version = "0.6.2"
3
+ version = "0.6.3"
4
4
  description = "Workload framework"
5
5
  authors = ["Fabio Ghirardello"]
6
6
  license = "GPLv3+"
File without changes
File without changes