dbworkload 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbworkload-0.6.2 → dbworkload-0.6.3}/PKG-INFO +1 -1
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/cli/main.py +1 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/utils/simplefaker.py +50 -45
- {dbworkload-0.6.2 → dbworkload-0.6.3}/pyproject.toml +1 -1
- {dbworkload-0.6.2 → dbworkload-0.6.3}/LICENSE +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/README.md +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/__init__.py +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/cli/dep.py +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/cli/util.py +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/models/run.py +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/models/util.py +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/templates/stub.j2 +0 -0
- {dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/utils/common.py +0 -0
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
|
7
7
|
import uuid
|
|
8
8
|
import random
|
|
9
9
|
import builtins
|
|
10
|
+
from .common import import_class_at_runtime
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger("dbworkload")
|
|
12
13
|
|
|
@@ -584,6 +585,9 @@ class SimpleFaker:
|
|
|
584
585
|
return [SimpleFaker.Bit(seed=s, **args) for s in seeds]
|
|
585
586
|
elif obj_type == "bytes":
|
|
586
587
|
return [SimpleFaker.Bytes(seed=s, **args) for s in seeds]
|
|
588
|
+
elif obj_type == "custom":
|
|
589
|
+
custom_gen = import_class_at_runtime(args.pop("path"))
|
|
590
|
+
return [custom_gen(seed=s, **args) for s in seeds]
|
|
587
591
|
else:
|
|
588
592
|
raise ValueError(
|
|
589
593
|
f"SimpleFaker type not implemented or recognized: '{obj_type}'"
|
|
@@ -610,47 +614,57 @@ class SimpleFaker:
|
|
|
610
614
|
separator (str): the field delimiter in the CSV file
|
|
611
615
|
compression (str): the compression format (gzip, zip, None..)
|
|
612
616
|
"""
|
|
617
|
+
|
|
618
|
+
def gen_to_csv(iters: int):
|
|
619
|
+
# create individual Series and then concat them together
|
|
620
|
+
df = pd.concat(
|
|
621
|
+
[pd.Series([next(gen) for _ in range(iters)]) for gen in generators],
|
|
622
|
+
axis=1,
|
|
623
|
+
keys=col_names,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# get a list of the colums that are not to be sorted by
|
|
627
|
+
remaining = list(set(col_names) - set(sort_by))
|
|
628
|
+
|
|
629
|
+
# create a dataframe by concatenating:
|
|
630
|
+
# 1 - the df subset with the sort_by columns sorted by the sort_by columns
|
|
631
|
+
# 2 - the df subset with the remaining columns
|
|
632
|
+
# finally order the columns by the original col_names
|
|
633
|
+
# then save to csv
|
|
634
|
+
pd.concat(
|
|
635
|
+
[
|
|
636
|
+
df[sort_by].sort_values(sort_by).reset_index(drop=True),
|
|
637
|
+
df[remaining],
|
|
638
|
+
],
|
|
639
|
+
axis=1,
|
|
640
|
+
)[col_names].to_csv(
|
|
641
|
+
basename + "_" + str(counter) + suffix,
|
|
642
|
+
quoting=csv.QUOTE_MINIMAL,
|
|
643
|
+
sep=separator,
|
|
644
|
+
header=False,
|
|
645
|
+
index=False,
|
|
646
|
+
compression=compression,
|
|
647
|
+
)
|
|
648
|
+
|
|
613
649
|
logger.debug("SimpleFaker worker created")
|
|
614
650
|
if iterations > self.csv_max_rows:
|
|
615
|
-
count =
|
|
651
|
+
count = iterations // self.csv_max_rows
|
|
616
652
|
rem = iterations % self.csv_max_rows
|
|
617
653
|
iterations = self.csv_max_rows
|
|
618
654
|
else:
|
|
619
655
|
count = 1
|
|
620
656
|
rem = 0
|
|
621
657
|
|
|
622
|
-
if separator == "\t"
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
suffix
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
suffix += ".zip"
|
|
631
|
-
elif compression == "bz2":
|
|
632
|
-
suffix += ".bz2"
|
|
633
|
-
elif compression == "xz":
|
|
634
|
-
suffix += ".xz"
|
|
635
|
-
|
|
636
|
-
for x in range(count):
|
|
658
|
+
suffix = ".tsv" if separator == "\t" else ".csv"
|
|
659
|
+
|
|
660
|
+
if compression:
|
|
661
|
+
suffix += "." + {
|
|
662
|
+
"gzip": "gz",
|
|
663
|
+
}.get(compression, compression)
|
|
664
|
+
|
|
665
|
+
for counter in range(count):
|
|
637
666
|
try:
|
|
638
|
-
|
|
639
|
-
[
|
|
640
|
-
row
|
|
641
|
-
for row in [
|
|
642
|
-
[next(x) for x in generators] for _ in range(iterations)
|
|
643
|
-
]
|
|
644
|
-
],
|
|
645
|
-
columns=col_names,
|
|
646
|
-
).sort_values(by=sort_by).to_csv(
|
|
647
|
-
basename + "_" + str(x) + suffix,
|
|
648
|
-
quoting=csv.QUOTE_MINIMAL,
|
|
649
|
-
sep=separator,
|
|
650
|
-
header=False,
|
|
651
|
-
index=False,
|
|
652
|
-
compression=compression,
|
|
653
|
-
)
|
|
667
|
+
gen_to_csv(iterations)
|
|
654
668
|
except csv.Error as e:
|
|
655
669
|
logger.error(e)
|
|
656
670
|
if e.args[0] == "need to escape, but no escapechar set":
|
|
@@ -658,20 +672,11 @@ class SimpleFaker:
|
|
|
658
672
|
f"You cannot use the selected delimiter '{separator}'. Consider using another char or the the tab key."
|
|
659
673
|
)
|
|
660
674
|
|
|
661
|
-
logger.debug(f"Saved file '{basename + '_' + str(
|
|
675
|
+
logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")
|
|
662
676
|
|
|
663
677
|
# remaining rows, if any
|
|
664
678
|
if rem > 0:
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
columns=col_names,
|
|
668
|
-
).sort_values(by=sort_by).to_csv(
|
|
669
|
-
basename + "_" + str(count) + suffix,
|
|
670
|
-
quoting=csv.QUOTE_MINIMAL,
|
|
671
|
-
sep=separator,
|
|
672
|
-
header=False,
|
|
673
|
-
index=False,
|
|
674
|
-
compression=compression,
|
|
675
|
-
)
|
|
679
|
+
counter = count
|
|
680
|
+
gen_to_csv(rem)
|
|
676
681
|
|
|
677
|
-
logger.debug(f"Saved file '{basename + '_' + str(
|
|
682
|
+
logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|