PyPI - dbworkload - Versions diffs - 0.9.1__tar.gz → 0.9.2.dev1__tar.gz - Mend

dbworkload 0.9.1tar.gz → 0.9.2.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,14 @@
 Metadata-Version: 2.3
 Name: dbworkload
-Version: 0.9.1
+Version: 0.9.2.dev1
 Summary: Workload framework
 License: GPLv3+
 Author: Fabio Ghirardello
-Requires-Python: >=3.8,<4.0
+Requires-Python: >=3.11,<4.0
 Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
 Classifier: License :: Other/Proprietary License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/models/run.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import errno
 import logging
+import math
 import multiprocessing as mp
 import os
 import queue
@@ -19,6 +20,7 @@ from psutil import cpu_percent, virtual_memory
 import dbworkload.utils.common
 from dbworkload.cli.dep import ConnInfo
+from dbworkload.utils.common import Action
 # from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT, Session
 # from cassandra.policies import (
@@ -40,6 +42,9 @@ logger = logging.getLogger("dbworkload")
 sigterm_received = False
+supervisors: dict[int, mp.Process] = {}
+supervisor_queues: dict[int, mp.Queue] = {}
 HEADERS: list = [
     "elapsed",
     "id",
@@ -125,7 +130,7 @@ def cycle(iterable, backwards=False):
 # If a ramp time is specified, threads creation or destruction
 # will be paced accordingly.
 def launch_or_kill_workers(
-    queues: list,
+    supervisor_queues: list,
     ramp_time: int,
     cc_change: int,
     proc_len: list,
@@ -140,11 +145,14 @@ def launch_or_kill_workers(
     if cc_change > 0:
         for _ in range(cc_change):
-            queues[cycle(proc_len)].put(
+            supervisor_queues[cycle(proc_len)].put(
                 (
-                    thread_id,
-                    iterations_per_thread,
-                    concurrency,
+                    Action.NEW_WORKER,
+                    (
+                        thread_id,
+                        iterations_per_thread,
+                        concurrency,
+                    ),
                 )
             )
             thread_id += 1
@@ -152,7 +160,7 @@ def launch_or_kill_workers(
     if cc_change < 0:
         for _ in range(abs(cc_change)):
-            queues[cycle(proc_len, backwards=True)].put("kill_one")
+            supervisor_queues[cycle(proc_len, backwards=True)].put((Action.KILL_ONE,))
             time.sleep(ramp_interval)
@@ -183,8 +191,8 @@ def run(
         _stats_received = stats_received
         # notify all Supervisors to quit
-        for q in queues.values():
-            q.put("poison_pill")
+        for q in supervisor_queues.values():
+            q.put((Action.POISON_PILL,))
         # wait for supervisors to quit and drain
         # the to_main_q at the same time to avoid locking
@@ -339,10 +347,8 @@ def run(
     to_main_q = mp.Queue()
-    global queues
+    global supervisor_queues
     global supervisors
-    supervisors = {}
-    queues = {}
     # start a separate thread for messages coming in via the pipe
     # echo 5 > dbworkload.pipe # create 5 more connections
@@ -350,7 +356,7 @@ def run(
         target=listen_to_pipe,
         daemon=True,
         args=(
-            queues,
+            supervisor_queues,
             0,
             procs,
             None,
@@ -360,12 +366,12 @@ def run(
     # launch supervisors in a dedicated OS process
     for x in range(procs):
-        queues[x] = mp.Queue()
+        supervisor_queues[x] = mp.Queue()
         supervisors[x] = mp.Process(
             target=supervisor,
             args=(
                 to_main_q,
-                queues[x],
+                supervisor_queues[x],
                 log_level,
                 conn_info,
                 driver,
@@ -394,7 +400,6 @@ def run(
     current_proc = -1
     current_cc = 0
     thread_id = 0
-    pause_for_ramp_time = 0
     iterations_per_thread = None
     if iterations:
@@ -424,13 +429,17 @@ def run(
             f"Starting schedule {i+1}/{len(schedule)}: {cc=}, {max_rate=}, {ramp_time=}, {dur=}"
         )
+        pause_for_ramp_time = time.time() + 3 * FREQUENCY
+        exhaust_warning = 0
+        think_time = 0
         # always make sure that a duration is specified, even if none was passed
         # in which case it defaults to infinite
         end_schedule_time = time.time() + dur if dur else float("inf")
         # if max_rate was set instead of concurrency
         # and current_cc = 0,
-        # start the workload with 1 thread so that dbworkload
+        # start the workload with 10 threads so that dbworkload
         # has stats to measure on for adding/removing threads
         # as part of the calculations for maintaining
         # the desired max_rate
@@ -439,23 +448,23 @@ def run(
                 target=launch_or_kill_workers,
                 daemon=True,
                 args=(
-                    queues,
+                    supervisor_queues,
                     ramp_time,
-                    1,
+                    10,
                     procs,
                     iterations_per_thread,
                     concurrency,
                 ),
             ).start()
-            current_cc = 1
+            current_cc = 10
         if not max_rate:
             Thread(
                 target=launch_or_kill_workers,
                 daemon=True,
                 args=(
-                    queues,
+                    supervisor_queues,
                     ramp_time,
                     cc - current_cc,
                     procs,
@@ -518,49 +527,131 @@ def run(
                 if max_rate and report:
                     current_rate = report[0][6]  # __cycle__ period_ops/s
-                    # approximate how many threads are needed to get
-                    # to the desired max_rate given the current QPS rate
-                    # and current threads count
-                    extrapolated_cc = int(max_rate / (current_rate / current_cc))
-                    # adjust the thread count if there is a difference
-                    # between the current thread count and the calculated
-                    # thread count, but not if there is one such operation already
-                    # running, that is, not if there's an operation that is slow due
-                    # to a long ramp_time.
-                    if (
-                        extrapolated_cc - current_cc
-                        and time.time() >= pause_for_ramp_time
-                    ):
-                        Thread(
-                            target=launch_or_kill_workers,
-                            daemon=True,
-                            args=(
-                                queues,
-                                ramp_time,
-                                extrapolated_cc - current_cc,
-                                procs,
-                                iterations_per_thread,
-                                concurrency,
-                            ),
-                        ).start()
-                        # make sure we will not add/remove threads while the newly
-                        # created thread is still working
-                        pause_for_ramp_time = time.time() + ramp_time + 2 * FREQUENCY
-                        logger.warning(
-                            f"Calculating max_rate: desired max_rate: {max_rate}, "
-                            f"current_rate: {report[0][6]}, current_cc = {current_cc}, "
-                            f"extrapolated_cc = {extrapolated_cc}, "
-                            f"difference: {extrapolated_cc-current_cc}"
-                        )
-                        current_cc = extrapolated_cc
-                        # ramp_time is only considered for reaching the desired max_rate.
-                        # For adjustments over time, we want the changes to happen immediately
-                        # and not smoothed out over the initial ramp_time value
-                        ramp_time = 0
+                    if time.time() > pause_for_ramp_time:
+                        if current_rate < max_rate * 0.99:
+                            if think_time > 0:
+                                think_time = (
+                                    math.floor(
+                                        think_time * current_rate / max_rate * 1000
+                                    )
+                                    / 1000
+                                )
+                                pause_for_ramp_time = time.time() + 3 * FREQUENCY
+                                logger.info(
+                                    f"Calculating max_rate: {max_rate=} {current_rate=}, {current_cc=} {think_time=}"
+                                )
+                            else:
+                                if exhaust_warning > 3:
+                                    logger.warning("Pointless to add any more threads")
+                                else:
+                                    # approximate how many threads are needed to get
+                                    # to the desired max_rate given the current QPS rate
+                                    # and current thread count
+                                    # increase by 5%
+                                    extrapolated_cc = int(
+                                        max_rate / (current_rate / current_cc) * 1.05
+                                    )
+                                    exhaust_warning += 1
+                                    change = max(0, extrapolated_cc - current_cc)
+                                    Thread(
+                                        target=launch_or_kill_workers,
+                                        daemon=True,
+                                        args=(
+                                            supervisor_queues,
+                                            ramp_time,
+                                            change,
+                                            procs,
+                                            iterations_per_thread,
+                                            concurrency,
+                                        ),
+                                    ).start()
+                                    # give enough time for newly created threads to settle
+                                    # before new calculations are performed
+                                    pause_for_ramp_time = (
+                                        time.time() + ramp_time + 6 * FREQUENCY
+                                    )
+                                    logger.info(
+                                        f"Calculating max_rate: {max_rate=} {current_rate=}, {current_cc=} {change=}"
+                                    )
+                                    current_cc += change
+                                    # ramp_time is only considered for reaching the desired max_rate.
+                                    # For adjustments over time, we want the changes to happen immediately
+                                    # and not smoothed out over the initial ramp_time value
+                                    ramp_time = 0
+                            for q in supervisor_queues.values():
+                                q.put((Action.THINK_TIME, think_time))
+                        elif current_rate * 0.99 > max_rate:
+                            if think_time > 1:
+                                # pointless to add more time, remove threads instead
+                                # decrease count by 5%
+                                change = int(current_cc * -0.05)
+                                Thread(
+                                    target=launch_or_kill_workers,
+                                    daemon=True,
+                                    args=(
+                                        supervisor_queues,
+                                        ramp_time,
+                                        change,
+                                        procs,
+                                        iterations_per_thread,
+                                        concurrency,
+                                    ),
+                                ).start()
+                                # give enough time for newly created threads to settle
+                                # before new calculations are performed
+                                pause_for_ramp_time = (
+                                    time.time() + ramp_time + 6 * FREQUENCY
+                                )
+                                logger.info(
+                                    f"Calculating max_rate: {max_rate=} {current_rate=}, {current_cc=} {change=}"
+                                )
+                                current_cc += change
+                                # ramp_time is only considered for reaching the desired max_rate.
+                                # For adjustments over time, we want the changes to happen immediately
+                                # and not smoothed out over the initial ramp_time value
+                                ramp_time = 0
+                                think_time = 0
+                            else:
+                                # add think_time to slow it down a bit
+                                if think_time == 0:
+                                    think_time = round(
+                                        0.01 * current_rate / max_rate, 4
+                                    )
+                                else:
+                                    think_time = round(
+                                        think_time * current_rate / max_rate, 4
+                                    )
+                                pause_for_ramp_time = time.time() + 3 * FREQUENCY
+                                logger.info(
+                                    f"Calculating max_rate: {max_rate=} {current_rate=}, {current_cc=} {think_time=}"
+                                )
+                            for q in supervisor_queues.values():
+                                q.put((Action.THINK_TIME, think_time))
                 centroids = stats.get_centroids()
@@ -612,53 +703,60 @@ def supervisor(
     logger.setLevel(log_level)
     logger.debug(f"Supervisor-{id} started")
-    threads: list[Thread] = []
+    worker_threads: list[Thread] = []
     from_proc_q = mp.Queue()
     # capture KeyboardInterrupt and do nothing
     signal.signal(signal.SIGINT, signal.SIG_IGN)
+    global think_time
+    think_time = 0
     while True:
         msg = from_main_q.get(block=True)
-        if msg == "poison_pill":
-            logger.debug(f"Supervisor-{id} terminating...")
+        match msg[0]:
+            case Action.POISON_PILL:  # poison pill
+                logger.debug(f"Supervisor-{id} terminating...")
-            # wait for Threads to return before
-            # letting the Supervisor MainThread return
-            for x in threads:
-                if x.is_alive():
-                    from_proc_q.put("poison_pill")
+                # wait for Threads to return before
+                # letting the Supervisor MainThread return
+                for x in worker_threads:
+                    if x.is_alive():
+                        from_proc_q.put(Action.POISON_PILL)
-            for x in threads:
-                if x.is_alive():
-                    x.join()
+                for x in worker_threads:
+                    if x.is_alive():
+                        x.join()
-            logger.debug(f"Supervisor-{id} terminated")
-            return
+                logger.debug(f"Supervisor-{id} terminated")
+                return
-        elif msg == "kill_one":
-            from_proc_q.put("poison_pill")
+            case Action.KILL_ONE:  # kill_one
+                from_proc_q.put(Action.POISON_PILL)
-        elif isinstance(msg, tuple):
-            t = Thread(
-                target=worker,
-                daemon=True,
-                args=(
-                    to_main_q,
-                    from_proc_q,
-                    log_level,
-                    conn_info,
-                    driver,
-                    workload,
-                    args,
-                    conn_duration,
-                    offset,
-                    *msg,
-                ),
-            )
-            t.start()
-            threads.append(t)
+            case Action.THINK_TIME:  # set think_time
+                think_time = msg[1]
+            case Action.NEW_WORKER:  # add new worker
+                t = Thread(
+                    target=worker,
+                    daemon=True,
+                    args=(
+                        to_main_q,
+                        from_proc_q,
+                        log_level,
+                        conn_info,
+                        driver,
+                        workload,
+                        args,
+                        conn_duration,
+                        offset,
+                        *msg[1],
+                    ),
+                )
+                t.start()
+                worker_threads.append(t)
 def worker(
@@ -795,6 +893,10 @@ def worker(
                     ws.add_latency_measurement("__cycle__", time.time() - cycle_start)
+                    if think_time > 0:
+                        time.sleep(think_time)
+                        ws.add_latency_measurement("__think_time__", think_time)
                     if to_main_q.full():
                         logger.error("=========== Q FULL!!!! ======================")
                     if time.time() >= stat_time:
@@ -871,6 +973,7 @@ def listen_to_pipe(queues, ramp_time, procs, iterations_per_thread, concurrency)
 def log_and_sleep(e: Exception):
+    raise e
     logger.error(f"error_type={e.__class__.__name__}, msg={e}")
     logger.info("Sleeping for %s seconds" % (DEFAULT_SLEEP))
     time.sleep(DEFAULT_SLEEP)

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/utils/common.py RENAMED Viewed

@@ -48,10 +48,19 @@ NOT_NULL_MAX = 40
 logger = logging.getLogger("dbworkload")
+from enum import IntEnum
 from prometheus_client.core import REGISTRY, HistogramMetricFamily
 from prometheus_client.registry import Collector
+class Action(IntEnum):
+    KILL_ONE = 7
+    THINK_TIME = 2
+    POISON_PILL = 9
+    NEW_WORKER = 1
 class Stats:
     """Print workload stats
     and export the stats as Prometheus endpoints

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dbworkload"
-version = "0.9.1"
+version = "0.9.2.dev1"
 description = "Workload framework"
 authors = ["Fabio Ghirardello"]
 license = "GPLv3+"
@@ -17,7 +17,7 @@ classifiers = [
 dbworkload = 'dbworkload.cli.main:app'
 [tool.poetry.dependencies]
-python = "^3.8"
+python = "^3.11"
 pandas = "*"
 tabulate = "*"
 numpy = "*"
@@ -51,6 +51,11 @@ mongo = ["pymongo"]
 cassandra = ["cassandra-driver"]
 spanner = ["google-cloud-spanner"]
+[tool.poetry.group.dev.dependencies]
+mkdocs = "^1.6.1"
+mkdocs-material = "^9.6.14"
+mkdocs-click = "^0.9.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/LICENSE RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/README.md RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/__init__.py RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/cli/dep.py RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/cli/main.py RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/cli/util.py RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/models/util.py RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/templates/stub.j2 RENAMED Viewed

File without changes

{dbworkload-0.9.1 → dbworkload-0.9.2.dev1}/dbworkload/utils/simplefaker.py RENAMED Viewed

File without changes

dbworkload 0.9.1__tar.gz → 0.9.2.dev1__tar.gz

dbworkload 0.9.1tar.gz → 0.9.2.dev1tar.gz