PyPI - dbworkload - Versions diffs - 0.6.5__tar.gz → 0.7.0__tar.gz - Mend

dbworkload 0.6.5tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dbworkload-0.6.5 → dbworkload-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,7 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: dbworkload
-Version: 0.6.5
+Version: 0.7.0
 Summary: Workload framework
-Home-page: https://dbworkload.github.io/dbworkload/
 License: GPLv3+
 Author: Fabio Ghirardello
 Requires-Python: >=3.8,<4.0
@@ -45,6 +44,7 @@ Requires-Dist: pyyaml
 Requires-Dist: sqlparse
 Requires-Dist: tabulate
 Requires-Dist: typer[all]
+Project-URL: Homepage, https://dbworkload.github.io/dbworkload/
 Project-URL: Repository, https://github.com/dbworkload/dbworkload
 Description-Content-Type: text/markdown

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import logging
+import time
 from importlib import metadata
 try:
@@ -10,10 +10,15 @@ except:
 del metadata  # optional, avoids polluting the results of dir(__package__)
 logger = logging.getLogger("dbworkload")
-# logger.setLevel(logging.INFO)
 sh = logging.StreamHandler()
 formatter = logging.Formatter(
-    "%(asctime)s [%(levelname)s] (%(processName)s %(threadName)s) %(module)s:%(lineno)d: %(message)s"
+    "%(asctime)s [%(levelname)s] (%(processName)s %(threadName)s) %(module)s:%(lineno)d: %(message)s",
 )
+# set the formatter to use UTC and show microseconds
+formatter.converter = time.gmtime
+formatter.default_msec_format = "%s.%06d"
 sh.setFormatter(formatter)
 logger.addHandler(sh)

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/cli/dep.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from .. import __version__
 import typer
-EPILOG = "GitHub: <https://github.com/fabiog1901/dbworkload>"
+EPILOG = "Docs: <https://dbworkload.github.io/dbworkload/>"
 class ConnInfo:

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/cli/main.py RENAMED Viewed

@@ -17,7 +17,7 @@ import platform
 import sys
 import typer
 import yaml
+import pandas as pd
 logger = logging.getLogger("dbworkload")
@@ -97,6 +97,12 @@ def run(
         help="Duration in seconds. Defaults to <ad infinitum>.",
         show_default=False,
     ),
+    max_rate: int = typer.Option(
+        None,
+        "--max-rate",
+        show_default=False,
+        help="Set the max-rate to have dbworkload manage concurrency. Defaults to None.",
+    ),
     conn_duration: int = typer.Option(
         None,
         "-k",
@@ -134,6 +140,11 @@ def run(
         show_default=False,
         help="Save stats to CSV files.",
     ),
+    schedule: str = typer.Option(
+        None,
+        "--schedule",
+        help="schedule JSON string or filepath to the schedule file.",
+    ),
     log_level: LogLevel = Param.LogLevel,
 ):
     logger.setLevel(log_level.upper())
@@ -220,6 +231,8 @@ def run(
     args = load_args(args)
+    schedule = load_schedule(schedule)
     dbworkload.models.run.run(
         concurrency,
         workload_path,
@@ -230,10 +243,12 @@ def run(
         conn_info,
         duration,
         conn_duration,
+        max_rate,
         args,
         driver,
         quiet,
         save,
+        schedule,
         log_level.upper(),
     )
@@ -278,6 +293,21 @@ def load_args(args: str):
     return {}
+def load_schedule(schedule_path: str):
+    if schedule_path:
+        if os.path.exists(schedule_path):
+            df = pd.read_csv(schedule_path, dtype="Int64", comment="#").fillna(0)
+            # trasform ramp and duration columns from minutes to seconds
+            df[["ramp", "duration"]] = df[["ramp", "duration"]] * 60
+            return df.values.tolist()
+        else:
+            try:
+                return json.loads(schedule_path)
+            except:
+                logger.error(f"couldn't decode {schedule_path} as JSON")
 def _version_callback(value: bool) -> None:
     if value:
         typer.echo(f"dbworkload : {__version__}")

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/models/run.py RENAMED Viewed

@@ -13,7 +13,7 @@ import signal
 import sys
 import sys
 import tabulate
-import threading
+from threading import Thread
 import time
 import traceback
@@ -92,35 +92,72 @@ def signal_handler(sig, frame):
     """
     logger.info("KeyboardInterrupt signal detected. Stopping processes...")
-    # send the poison pill to each worker.
+    # send the poison pill to each proc.
     # if dbworkload cannot graceful shutdown due
     # to processes being still in the init phase
     # when the pill is sent, a subsequent Ctrl+C will cause
     # the pill to overflow the kill_q
     # and raise the queue.Full exception, forcing to quit.
-    for _ in range(concurrency):
+    for q in queues.values():
         try:
-            kill_q.put(None, timeout=0.1)
+            q.put("proc_end", timeout=0.1)
         except queue.Full:
             logger.error("Timed out")
             sys.exit(1)
-    logger.debug("Sent poison pill to all threads")
+    logger.debug("Sent poison pill to all procs")
-def ramp_up(
-    processes: list, interval: float, threads_per_proc: list, init_sleep: int = 0
+def cycle(iterable, backwards=False):
+    global current_proc
+    if not backwards:
+        current_proc += 1
+        return current_proc % iterable
+    else:
+        v = current_proc % iterable
+        current_proc -= 1
+        return v
+# Launch or kill worker threads based on cc_change value.
+# workers are added or removed evenly across all supervisors.
+# If a ramp time is specified, threads creation or destruction
+# will be paced accordingly.
+def launch_or_kill_workers(
+    queues: list,
+    ramp_time: int,
+    cc_change: int,
+    proc_len: list,
+    iterations_per_thread,
+    concurrency,
 ):
-    """Start each process in the list sequentially respecting the interval between each process"""
-    time.sleep(init_sleep)
-    for i, p in enumerate(processes):
-        logger.debug("Starting a new Process...")
-        p.start()
-        time.sleep(interval * threads_per_proc[i])
+    if cc_change == 0:
+        return
+    ramp_interval = ramp_time / abs(cc_change)
+    global thread_id
+    if cc_change > 0:
+        for _ in range(cc_change):
+            queues[cycle(proc_len)].put(
+                (
+                    thread_id,
+                    iterations_per_thread,
+                    concurrency,
+                )
+            )
+            thread_id += 1
+            time.sleep(ramp_interval)
+    if cc_change < 0:
+        for _ in range(abs(cc_change)):
+            queues[cycle(proc_len, backwards=True)].put("kill_one")
+            time.sleep(ramp_interval)
 def run(
-    conc: int,
+    concurrency: int,
     workload_path: str,
     prom_port: int,
     iterations: int,
@@ -129,13 +166,15 @@ def run(
     conn_info: dict,
     duration: int,
     conn_duration: int,
+    max_rate: int,
     args: dict,
     driver: str,
     quiet: bool,
     save: bool,
+    schedule: list,
     log_level: str,
 ):
-    def gracefully_shutdown():
+    def gracefully_shutdown(by_keyinterrupt: bool = False):
         """
         wait for final stat reports to come in,
         then print final stats and quit
@@ -144,10 +183,21 @@ def run(
         end_time = int(time.time())
         _s = stats_received
+        if not by_keyinterrupt:
+            for q in queues.values():
+                try:
+                    q.put("proc_end", timeout=0.1)
+                except queue.Full:
+                    logger.error("Timed out")
+                    sys.exit(1)
+            for x in supervisors.values():
+                if x.is_alive():
+                    x.join()
         while True:
             try:
-                msg = q.get(block=True, timeout=2.0)
+                msg = to_main_q.get(block=True, timeout=2.0)
                 if isinstance(msg, list):
                     _s += 1
                     stats.add_tds(msg)
@@ -249,16 +299,7 @@ def run(
     logger.setLevel(log_level)
-    global concurrency
-    concurrency = conc
-    global kill_q
-    global q
     start_time = int(time.time())
-    # the offset registers at what second we want all threads
-    # to send the stat report, so they all send it at the same time
-    offset = start_time % FREQUENCY
     workload = dbworkload.utils.common.import_class_at_runtime(workload_path)
     run_name = (
@@ -269,6 +310,10 @@ def run(
     logger.info(f"Starting workload {run_name}")
+    # the offset registers at what second we want all threads
+    # to send the stat report, so they all send it at the same time
+    offset = start_time % FREQUENCY
     # open a new csv file and just write the header columns
     if save:
         with open(run_name + ".csv", "w") as f:
@@ -281,6 +326,51 @@ def run(
     prom = dbworkload.utils.common.Prom(prom_port)
+    to_main_q = mp.Queue()
+    global queues
+    global supervisors
+    supervisors = {}
+    queues = {}
+    # launch supervisors in a dedicated OS process
+    for x in range(procs):
+        queues[x] = mp.Queue()
+        supervisors[x] = mp.Process(
+            target=supervisor,
+            args=(
+                to_main_q,
+                queues[x],
+                log_level,
+                conn_info,
+                driver,
+                workload,
+                args,
+                conn_duration,
+                offset,
+                x,
+            ),
+            daemon=True,
+        )
+        supervisors[x].start()
+    # report time happens 2 seconds after the stats are received.
+    # we add this buffer to make sure we get all the stats reports
+    # from each thread before we aggregate and display
+    report_time = start_time + FREQUENCY + 2
+    returned_procs = 0
+    active_connections = 0
+    stats_received = 0
+    global current_proc
+    global thread_id
+    current_proc = -1
+    current_cc = 0
+    thread_id = 0
+    pause_for_ramp_time = 0
     iterations_per_thread = None
     if iterations:
         # ensure we don't create more threads than the total number of iterations requested.
@@ -293,232 +383,299 @@ def run(
                 f"You have requested {iterations} iterations on {concurrency} threads. {iterations} modulo {concurrency} = {iterations%concurrency} iterations will not be executed."
             )
-    duration_endtime = None
-    if duration:
-        duration_endtime = time.time() + duration
+    # if no schedule was passed, create a schedule with just 1 line
+    if schedule is None:
+        schedule = [(concurrency, max_rate, ramp, duration)]
-    q = mp.Queue(maxsize=0)
-    kill_q = mp.Queue(maxsize=concurrency)
+    # loop through all lines in the schedule
+    for i, s in enumerate(schedule):
+        cc, max_rate, ramp_time, dur = s
-    # calculate the ramp up schedule, if any
-    threads_per_proc = dbworkload.utils.common.get_threads_per_proc(procs, concurrency)
-    ramp_interval = ramp / concurrency
+        # sanitize
+        if dur and ramp_time > dur:
+            ramp_time = dur
-    # each Process must generate an ID for each of its threads,
-    # starting from the id_base_counter and incrementing by 1.
-    # for each Process' MainThread, the id_base_counter is also its id.
-    id_base_counter = 0
+        logger.info(
+            f"Starting schedule {i+1}/{len(schedule)}: cc={cc}, max_rate={max_rate}, ramp={ramp_time}, dur={dur}"
+        )
-    processes: list[mp.Process] = []
-    for x in threads_per_proc:
-        processes.append(
-            mp.Process(
-                target=worker,
+        # always make sure that a duration is specified, even if none was passed
+        # in which case it defaults to infinite
+        end_schedule_time = time.time() + dur if dur else float("inf")
+        # if max_rate was set instead of concurrency
+        # and current_cc = 0,
+        # start the workload with 1 thread so that dbworkload
+        # has stats to measure on for adding/removing threads
+        # as part of the calculations for maintaining
+        # the desired max_rate
+        if current_cc == 0 and max_rate:
+            Thread(
+                target=launch_or_kill_workers,
+                daemon=True,
                 args=(
-                    x - 1,
-                    ramp_interval,
-                    q,
-                    kill_q,
-                    log_level,
-                    conn_info,
-                    workload,
-                    args,
+                    queues,
+                    ramp_time,
+                    1,
+                    procs,
                     iterations_per_thread,
-                    duration_endtime,
-                    conn_duration,
                     concurrency,
-                    offset,
-                    id_base_counter,
-                    id_base_counter,
-                    driver,
                 ),
-                daemon=True,
-            )
-        )
-        id_base_counter += x
+            ).start()
-    # starting the actual processes is done by the ramp_up method,
-    # executed asynchronously, in its own thread
-    threading.Thread(
-        target=ramp_up, daemon=True, args=(processes, ramp_interval, threads_per_proc)
-    ).start()
+            current_cc = 1
-    # report time happens 2 seconds after the stats are received.
-    # we add this buffer to make sure we get all the stats reports
-    # from each thread before we aggregate and display
-    report_time = start_time + FREQUENCY + 2
+        if not max_rate:
+            Thread(
+                target=launch_or_kill_workers,
+                daemon=True,
+                args=(
+                    queues,
+                    ramp_time,
+                    cc - current_cc,
+                    procs,
+                    iterations_per_thread,
+                    concurrency,
+                ),
+            ).start()
-    returned_threads = 0
-    active_connections = 0
-    stats_received = 0
+            current_cc = cc
-    while True:
-        try:
-            # read from the queue for stats or completion messages
-            msg = q.get(block=False)
-            # a stats report is a list obj
-            if isinstance(msg, list):
-                stats_received += 1
-                stats.add_tds(msg)
-            elif msg == "init":
-                active_connections += 1
-            else:
-                # the worker returned
-                # the mmsg is either a 'task_done' or 'poison_pill',
-                # depending on the reason why the thread returned
-                returned_threads += 1
-        except queue.Empty:
-            pass
+        returned_threads = 0
-        # once the sum of the completion messages matches
-        # the count of threads, identify what type of
-        # completion message it was
-        if returned_threads > 0 and returned_threads >= active_connections:
-            if msg == "task_done":
-                logger.info("Requested iteration/duration limit reached")
-                gracefully_shutdown()
-            elif msg == "poison_pill":
-                gracefully_shutdown()
-            elif isinstance(msg, Exception):
-                logger.error(f"error_type={msg.__class__.__name__}, msg={msg}")
-                sys.exit(1)
-            else:
-                logger.error(f"unrecognized message: {msg}")
-                sys.exit(1)
+        # loop for the entire duration of the schedule's current line
+        while time.time() < end_schedule_time:
+            try:
+                # read from the queue for stats or completion messages
+                msg = to_main_q.get(block=False)
+                # a stats report is a list obj
+                if isinstance(msg, list):
+                    stats_received += 1
+                    stats.add_tds(msg)
+                elif msg == "init":
+                    active_connections += 1
+                elif msg == "got_killed":
+                    active_connections -= 1
+                elif msg == "proc_returned":
+                    returned_procs += 1
+                elif msg == "task_done":
+                    returned_threads += 1
+            except queue.Empty:
+                pass
+            # check if all procs returned, then exit
+            if returned_procs >= procs or (
+                returned_threads > 0 and returned_threads >= active_connections
+            ):
+                if msg == "task_done":
+                    logger.info("Requested iteration/duration limit reached")
+                    gracefully_shutdown()
+                elif msg == "proc_returned":
+                    logger.debug("All procs returned")
+                    gracefully_shutdown(by_keyinterrupt=True)
+                elif isinstance(msg, Exception):
+                    logger.error(f"error_type={msg.__class__.__name__}, msg={msg}")
+                    sys.exit(1)
+                else:
+                    logger.error(f"unrecognized message: {msg}")
+                    sys.exit(1)
-        if time.time() >= report_time:
-            if stats_received != active_connections:
-                logger.warning("didn't receive all stats reports yet")
+            if time.time() >= report_time:
+                # if stats_received != active_connections:
+                #     logger.warning("didn't receive all stats reports yet")
+                # remove the 2 seconds added
+                endtime = int(time.time()) - 2
+                report = stats.calculate_stats(active_connections, endtime)
+                # if max_rate is specified, try to stick to it.
+                # to calculate how to get to the max rate, we need a non-empty report
+                if max_rate and report:
+                    current_rate = report[0][6]  # __cycle__ period_ops/s
+                    # approximate how many threads are needed to get
+                    # to the desired max_rate given the current QPS rate
+                    # and current threads count
+                    extrapolated_cc = int(max_rate / (current_rate / current_cc))
+                    # adjust the thread count if there is a difference
+                    # between the current thread count and the calculated
+                    # thread count, but not if there is one such operation already
+                    # running, that is, not if there's an operation that is slow due
+                    # to a long ramp_time.
+                    if (
+                        extrapolated_cc - current_cc
+                        and time.time() >= pause_for_ramp_time
+                    ):
+                        Thread(
+                            target=launch_or_kill_workers,
+                            daemon=True,
+                            args=(
+                                queues,
+                                ramp_time,
+                                extrapolated_cc - current_cc,
+                                procs,
+                                iterations_per_thread,
+                                concurrency,
+                            ),
+                        ).start()
+                        # make sure we will not add/remove threads while the newly
+                        # created thread is still working
+                        pause_for_ramp_time = time.time() + ramp_time + 2 * FREQUENCY
+                        logger.warning(
+                            f"Calculating max_rate: desired max_rate: {max_rate}, "
+                            f"current_rate: {report[0][6]}, current_cc = {current_cc}, "
+                            f"extrapolated_cc = {extrapolated_cc}, "
+                            f"difference: {extrapolated_cc-current_cc}"
+                        )
+                        current_cc = extrapolated_cc
-            # remove the 2 seconds added
-            endtime = int(time.time()) - 2
+                        # ramp_time is only considered for reaching the desired max_rate.
+                        # For adjustments over time, we want the changes to happen immediately
+                        # and not smoothed out over the initial ramp_time value
+                        ramp_time = 0
-            report = stats.calculate_stats(active_connections, endtime)
+                centroids = stats.get_centroids()
-            centroids = stats.get_centroids()
+                stats.new_window(endtime)
+                stats_received = 0
-            stats.new_window(endtime)
-            stats_received = 0
+                if save:
+                    with open(run_name + ".csv", "a") as f:
+                        for row in report:
+                            f.write(str(stats.endtime) + ",")
+                            for col in row:
+                                f.write(str(col) + ",")
+                            np.savetxt(f, next(centroids), newline=";")
+                            f.write("\n")
-            if save:
-                with open(run_name + ".csv", "a") as f:
-                    for row in report:
-                        f.write(str(stats.endtime) + ",")
-                        for col in row:
-                            f.write(str(col) + ",")
-                        np.savetxt(f, next(centroids), newline=";")
-                        f.write("\n")
+                if not quiet:
+                    print_stats(report)
-            if not quiet:
-                print_stats(report)
+                prom.publish(report)
-            prom.publish(report)
+                report_time += FREQUENCY
-            report_time += FREQUENCY
+            # pause briefly to prevent the loop from overheating the CPU
+            time.sleep(0.1)
-        # pause briefly to prevent the loop from overheating the CPU
-        time.sleep(0.1)
+    gracefully_shutdown()
-def worker(
-    thread_count: int,
-    interval: int,
-    q: mp.Queue,
-    kill_q: mp.Queue,
+# a supervisor runs in a separate process.
+# The idea is to create as many supervisors as vCPUs.
+# The sole role of the supervisor is to listen for instructions
+# from the MainProcess.
+# Instructions are:
+#   - Create a new worker.
+#   - Destroy a worker.
+#   - Destroy all workers and return.
+def supervisor(
+    to_main_q: mp.Queue,
+    from_main_q: mp.Queue,
     log_level: str,
     conn_info: ConnInfo,
+    driver: str,
     workload: object,
     args: dict,
-    iterations: int,
-    duration_endtime: float,
     conn_duration: int,
-    conc: int,
     offset: int,
-    id_base_counter: int = 0,
-    id: int = 0,
-    driver: str = None,
+    id: int,
 ):
-    """Process worker function to run the workload in a multiprocessing env
-    Args:
-        thread_count (int): The number of threads to create
-        q (mp.Queue): queue to report query metrics
-        kill_q (mp.Queue): queue to handle stopping the worker
-        log_level (str): log level to set the logger to
-        conn_info (ConnInfo): connection data
-        workload (object): workload class object
-        args (dict): args to init the workload class
-        iterations (int): count of workload iteration before returning
-        duration_endtime (float): timestamp at which to stop and return
-        conn_duration (int): seconds before restarting the database connection
-        conc: (int): the total number of threads
-        id_base_counter (int): the base counter to generate ID for each Process
-        id (int): the ID of the thread
-        driver (str): the friendly driver name
-    """
     def gracefully_return(msg):
-        # send notification to MainThread
-        q.put(msg)
-        # send final stats
-        q.put(ws.get_tdigest_ndarray(), block=False)
-        # wait for all Processes children threads to return before
+        # wait for Threads to return before
         # letting the Process MainThread return
+        # threading.enumerate()
+        for x in threads:
+            if x.is_alive():
+                from_proc_q.put("poison_pill")
         for x in threads:
             if x.is_alive():
                 x.join()
+        # send notification to MainThread
+        to_main_q.put(msg)
+        logger.debug(f"PROC-{id} terminated")
+        return
     logger.setLevel(log_level)
+    logger.debug(f"PROC-{id} started")
-    logger.debug(f"My ID is {id}")
-    threads: list[threading.Thread] = []
-    # execute only if the current thread is the main thread for each process
-    if thread_count is not None:
-        # capture KeyboardInterrupt and do nothing
-        signal.signal(signal.SIGINT, signal.SIG_IGN)
-        # only the MainThread of a child Process spawns Threads
-        for i in range(thread_count):
-            threads.append(
-                threading.Thread(
-                    target=worker,
-                    daemon=True,
-                    args=(
-                        None,
-                        0,
-                        q,
-                        kill_q,
-                        log_level,
-                        conn_info,
-                        workload,
-                        args,
-                        iterations,
-                        duration_endtime,
-                        conn_duration,
-                        conc,
-                        offset,
-                        None,
-                        id_base_counter + i + 1,
-                        driver,
-                    ),
-                )
+    threads: list[Thread] = []
+    from_proc_q = mp.Queue()
+    # capture KeyboardInterrupt and do nothing
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+    while True:
+        msg = from_main_q.get(block=True)
+        if msg == "proc_end":
+            logger.debug(f"PROC-{id} terminating...")
+            gracefully_return("proc_returned")
+            return
+        elif msg == "kill_one":
+            from_proc_q.put("poison_pill")
+        elif isinstance(msg, tuple):
+            t = Thread(
+                target=worker,
+                daemon=True,
+                args=(
+                    to_main_q,
+                    from_proc_q,
+                    log_level,
+                    conn_info,
+                    driver,
+                    workload,
+                    args,
+                    conn_duration,
+                    offset,
+                    *msg,
+                ),
             )
+            t.start()
+            threads.append(t)
-        # starting each Thread is done by the ramp_up in its own thread
-        threading.Thread(
-            target=ramp_up,
-            daemon=True,
-            args=(threads, interval, [1] * thread_count, interval),
-        ).start()
+def worker(
+    to_main_q: mp.Queue,
+    from_proc_q: mp.Queue,
+    log_level: str,
+    conn_info: ConnInfo,
+    driver: str,
+    workload: object,
+    args: dict,
+    conn_duration: int,
+    offset: int,
+    id: int = 0,
+    iterations: int = 0,
+    concurrency: int = 0,
+):
+    def gracefully_return(msg):
+        # send notification to MainThread
+        to_main_q.put(msg)
+        # send final stats
+        to_main_q.put(ws.get_tdigest_ndarray(), block=False)
+        logger.debug(f"Thread ID {id} terminated")
+        return
+    logger.setLevel(log_level)
+    logger.debug(f"Thread ID {id} started")
     # catch exception while instantiating the workload class
     try:
         w = workload(args)
     except Exception as e:
         stack_lines = traceback.format_exc()
-        q.put(Exception(stack_lines))
+        to_main_q.put(Exception(stack_lines))
         return
     c = 0
@@ -530,22 +687,13 @@ def worker(
     run_init = True
     # send notification that a new thread has started
-    q.put("init")
+    to_main_q.put("init")
     while True:
         if conn_duration:
             # reconnect every conn_duration +/- 20%
             conn_endtime = time.time() + int(conn_duration * random.uniform(0.8, 1.2))
-        # listen for termination messages (poison pill)
-        try:
-            kill_q.get(block=False)
-            logger.debug("Poison pill received")
-            gracefully_return("poison_pill")
-            return
-        except queue.Empty:
-            pass
         try:
             logger.debug(f"driver: {driver}, params: {conn_info.params}")
             # with Cluster().connect('bank') as conn:
@@ -560,7 +708,11 @@ def worker(
                         logger.debug("Executing setup() function")
                         run_transaction(
                             conn,
-                            lambda conn: w.setup(conn, id, conc),
+                            lambda conn: w.setup(
+                                conn,
+                                id,
+                                concurrency,
+                            ),
                             driver,
                             max_retries=MAX_RETRIES,
                         )
@@ -572,16 +724,14 @@ def worker(
                 while True:
                     # listen for termination messages (poison pill)
                     try:
-                        kill_q.get(block=False)
+                        from_proc_q.get(block=False)
                         logger.debug("Poison pill received")
-                        return gracefully_return("poison_pill")
+                        return gracefully_return("got_killed")
                     except queue.Empty:
                         pass
-                    # return if the limits of either iteration count and duration have been reached
-                    if (iterations and c >= iterations) or (
-                        duration_endtime and time.time() >= duration_endtime
-                    ):
+                    # return if the iteration count has been reached
+                    if iterations and c >= iterations:
                         logger.debug("Task completed!")
                         gracefully_return("task_done")
                         return
@@ -618,10 +768,10 @@ def worker(
                     ws.add_latency_measurement("__cycle__", time.time() - cycle_start)
-                    if q.full():
+                    if to_main_q.full():
                         logger.error("=========== Q FULL!!!! ======================")
                     if time.time() >= stat_time:
-                        q.put(ws.get_tdigest_ndarray(), block=False)
+                        to_main_q.put(ws.get_tdigest_ndarray(), block=False)
                         ws.new_window()
                         stat_time += FREQUENCY
@@ -630,7 +780,7 @@ def worker(
                 import psycopg
                 if isinstance(e, psycopg.errors.UndefinedTable):
-                    q.put(e)
+                    to_main_q.put(e)
                     return
                 log_and_sleep(e)
@@ -638,26 +788,26 @@ def worker(
                 import mysql.connector.errorcode
                 if e.errno == mysql.connector.errorcode.ER_NO_SUCH_TABLE:
-                    q.put(e)
+                    to_main_q.put(e)
                     return
                 log_and_sleep(e)
             elif driver == "maria":
                 if str(e).endswith(" doesn't exist"):
-                    q.put(e)
+                    to_main_q.put(e)
                     return
                 log_and_sleep(e)
             elif driver == "oracle":
                 if str(e).startswith("ORA-00942: table or view does not exist"):
-                    q.put(e)
+                    to_main_q.put(e)
                     return
                 log_and_sleep(e)
             else:
                 # for all other Exceptions, report and return
                 logger.error(type(e), stack_info=True)
-                q.put(e)
+                to_main_q.put(e)
                 return

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/models/util.py RENAMED Viewed

@@ -29,8 +29,8 @@ logger.setLevel(logging.INFO)
 def util_csv(
-    input: str,
-    output: str,
+    input: PosixPath,
+    output: PosixPath,
     compression: str,
     procs: int,
     csv_max_rows: int,
@@ -54,13 +54,11 @@ def util_csv(
     if os.path.isdir(output_dir):
         os.rename(
             output_dir,
-            output_dir + "." + dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S"),
+            str(output_dir)
+            + "."
+            + dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d-%H%M%S"),
         )
-    # if the output dir is
-    if os.path.exists(output_dir):
-        output_dir += "_dir"
     # create new directory
     os.mkdir(output_dir)
@@ -92,7 +90,7 @@ def util_csv(
         print()
-def util_yaml(input: str, output: str):
+def util_yaml(input: PosixPath, output: PosixPath):
     """Wrapper around util function ddl_to_yaml() for
     crafting a data gen definition YAML string from
     CREATE TABLE statements.
@@ -106,7 +104,12 @@ def util_yaml(input: str, output: str):
     # backup the current file as to not override
     if os.path.exists(output):
-        os.rename(output, output + "." + dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S"))
+        os.rename(
+            output,
+            str(output)
+            + "."
+            + dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d-%H%M%S"),
+        )
     # create new file
     with open(output, "w") as f:
@@ -153,7 +156,7 @@ def util_merge_sort(input_dir: str, output_dir: str, csv_max_rows: int, compress
                     self.output_dir,
                     str(self.output_dir)
                     + "."
-                    + dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S"),
+                    + dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d-%H%M%S"),
                 )
             # create new directory

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/utils/common.py RENAMED Viewed

@@ -654,6 +654,23 @@ def ddl_to_yaml(ddl: str):
             elif within_brackets > 0 and i == ",":
                 col_def += ":"
+        # process the content within parenthesis in the
+        # CREATE TABLE stmt char by char to distinguish
+        # the comma for separating columns vs the comma
+        # included in single quote strings such as those in DEFAULT
+        # eg: mycol STRING NULL DEFAULT 'corporate, inc'
+        within_quote = False
+        col_def_str = col_def
+        col_def = ""
+        for i in col_def_str:
+            if i == "'":
+                within_quote = not within_quote
+                continue
+            if within_quote:
+                continue
+            else:
+                col_def += i
         col_def = [x.strip().lower() for x in col_def.split(",")]
         ll = []

{dbworkload-0.6.5 → dbworkload-0.7.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dbworkload"
-version = "0.6.5"
+version = "0.7.0"
 description = "Workload framework"
 authors = ["Fabio Ghirardello"]
 license = "GPLv3+"

{dbworkload-0.6.5 → dbworkload-0.7.0}/LICENSE RENAMED Viewed

File without changes

{dbworkload-0.6.5 → dbworkload-0.7.0}/README.md RENAMED Viewed

File without changes

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/cli/util.py RENAMED Viewed

File without changes

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/templates/stub.j2 RENAMED Viewed

File without changes

{dbworkload-0.6.5 → dbworkload-0.7.0}/dbworkload/utils/simplefaker.py RENAMED Viewed

File without changes

dbworkload 0.6.5__tar.gz → 0.7.0__tar.gz

dbworkload 0.6.5tar.gz → 0.7.0tar.gz