PyPI - dbworkload - Versions diffs - 0.6.2__tar.gz → 0.6.3__tar.gz - Mend

dbworkload 0.6.2tar.gz → 0.6.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dbworkload-0.6.2 → dbworkload-0.6.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dbworkload
-Version: 0.6.2
+Version: 0.6.3
 Summary: Workload framework
 Home-page: https://dbworkload.github.io/dbworkload/
 License: GPLv3+

{dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/cli/main.py RENAMED Viewed

@@ -297,5 +297,6 @@ def version_option(
 ) -> None:
     pass
 # this is only needed for mkdocs-click
 click_app = typer.main.get_command(app)

{dbworkload-0.6.2 → dbworkload-0.6.3}/dbworkload/utils/simplefaker.py RENAMED Viewed

@@ -7,6 +7,7 @@ import pandas as pd
 import uuid
 import random
 import builtins
+from .common import import_class_at_runtime
 logger = logging.getLogger("dbworkload")
@@ -584,6 +585,9 @@ class SimpleFaker:
             return [SimpleFaker.Bit(seed=s, **args) for s in seeds]
         elif obj_type == "bytes":
             return [SimpleFaker.Bytes(seed=s, **args) for s in seeds]
+        elif obj_type == "custom":
+            custom_gen = import_class_at_runtime(args.pop("path"))
+            return [custom_gen(seed=s, **args) for s in seeds]
         else:
             raise ValueError(
                 f"SimpleFaker type not implemented or recognized: '{obj_type}'"
@@ -610,47 +614,57 @@ class SimpleFaker:
             separator (str): the field delimiter in the CSV file
             compression (str): the compression format (gzip, zip, None..)
         """
+        def gen_to_csv(iters: int):
+            # create individual Series and then concat them together
+            df = pd.concat(
+                [pd.Series([next(gen) for _ in range(iters)]) for gen in generators],
+                axis=1,
+                keys=col_names,
+            )
+            # get a list of the colums that are not to be sorted by
+            remaining = list(set(col_names) - set(sort_by))
+            # create a dataframe by concatenating:
+            # 1 - the df subset with the sort_by columns sorted by the sort_by columns
+            # 2 - the df subset with the remaining columns
+            # finally order the columns by the original col_names
+            # then save to csv
+            pd.concat(
+                [
+                    df[sort_by].sort_values(sort_by).reset_index(drop=True),
+                    df[remaining],
+                ],
+                axis=1,
+            )[col_names].to_csv(
+                basename + "_" + str(counter) + suffix,
+                quoting=csv.QUOTE_MINIMAL,
+                sep=separator,
+                header=False,
+                index=False,
+                compression=compression,
+            )
         logger.debug("SimpleFaker worker created")
         if iterations > self.csv_max_rows:
-            count = int(iterations / self.csv_max_rows)
+            count = iterations // self.csv_max_rows
             rem = iterations % self.csv_max_rows
             iterations = self.csv_max_rows
         else:
             count = 1
             rem = 0
-        if separator == "\t":
-            suffix = ".tsv"
-        else:
-            suffix = ".csv"
-        if compression == "gzip":
-            suffix += ".gz"
-        elif compression == "zip":
-            suffix += ".zip"
-        elif compression == "bz2":
-            suffix += ".bz2"
-        elif compression == "xz":
-            suffix += ".xz"
-        for x in range(count):
+        suffix = ".tsv" if separator == "\t" else ".csv"
+        if compression:
+            suffix += "." + {
+                "gzip": "gz",
+            }.get(compression, compression)
+        for counter in range(count):
             try:
-                pd.DataFrame(
-                    [
-                        row
-                        for row in [
-                            [next(x) for x in generators] for _ in range(iterations)
-                        ]
-                    ],
-                    columns=col_names,
-                ).sort_values(by=sort_by).to_csv(
-                    basename + "_" + str(x) + suffix,
-                    quoting=csv.QUOTE_MINIMAL,
-                    sep=separator,
-                    header=False,
-                    index=False,
-                    compression=compression,
-                )
+                gen_to_csv(iterations)
             except csv.Error as e:
                 logger.error(e)
                 if e.args[0] == "need to escape, but no escapechar set":
@@ -658,20 +672,11 @@ class SimpleFaker:
                         f"You cannot use the selected delimiter '{separator}'. Consider using another char or the the tab key."
                     )
-            logger.debug(f"Saved file '{basename + '_' + str(x) + suffix}'")
+            logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")
         # remaining rows, if any
         if rem > 0:
-            pd.DataFrame(
-                [row for row in [[next(x) for x in generators] for _ in range(rem)]],
-                columns=col_names,
-            ).sort_values(by=sort_by).to_csv(
-                basename + "_" + str(count) + suffix,
-                quoting=csv.QUOTE_MINIMAL,
-                sep=separator,
-                header=False,
-                index=False,
-                compression=compression,
-            )
+            counter = count
+            gen_to_csv(rem)
-            logger.debug(f"Saved file '{basename + '_' + str(x) + suffix}'")
+            logger.debug(f"Saved file '{basename + '_' + str(counter) + suffix}'")

{dbworkload-0.6.2 → dbworkload-0.6.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dbworkload"
-version = "0.6.2"
+version = "0.6.3"
 description = "Workload framework"
 authors = ["Fabio Ghirardello"]
 license = "GPLv3+"