PyPI - dbworkload - Versions diffs - 0.11.0__tar.gz → 0.11.1.dev1__tar.gz - Mend

dbworkload 0.11.0tar.gz → 0.11.1.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.4
 Name: dbworkload
-Version: 0.11.0
+Version: 0.11.1.dev1
 Summary: Workload framework
 License: GPLv3+
 License-File: LICENSE
 Author: Fabio Ghirardello
-Requires-Python: >=3.11,<4.0
+Requires-Python: >=3.11,<4
 Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
 Classifier: License :: Other/Proprietary License
 Classifier: Operating System :: OS Independent
@@ -26,6 +26,7 @@ Provides-Extra: pinecone
 Provides-Extra: postgres
 Provides-Extra: spanner
 Requires-Dist: cassandra-driver ; extra == "all" or extra == "cassandra"
+Requires-Dist: fastdigest (>=0.12.0,<0.13.0)
 Requires-Dist: fastembed (>=0.7.3,<0.8.0) ; extra == "convert"
 Requires-Dist: google-cloud-spanner ; extra == "all" or extra == "spanner"
 Requires-Dist: jinja2
@@ -49,7 +50,6 @@ Requires-Dist: psycopg ; extra == "all" or extra == "postgres"
 Requires-Dist: psycopg-binary ; extra == "all" or extra == "postgres"
 Requires-Dist: pymongo ; extra == "all" or extra == "mongo"
 Requires-Dist: pyodbc ; extra == "all" or extra == "odbc"
-Requires-Dist: pytdigest
 Requires-Dist: pyyaml
 Requires-Dist: sentence-transformers ; extra == "pinecone"
 Requires-Dist: sqlparse

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/util.py RENAMED Viewed

@@ -19,11 +19,11 @@ import sqlparse
 import yaml
 from jinja2 import Environment, PackageLoader
 from plotly.subplots import make_subplots
-from pytdigest import TDigest
 import dbworkload
 import dbworkload.utils.common
 import dbworkload.utils.simplefaker
+import dbworkload.utils.tdigest as tdigest
 logger = logging.getLogger("dbworkload")
 logger.setLevel(logging.INFO)
@@ -506,12 +506,10 @@ def util_merge_csvs(input_dir: str):
         """
         combine centroids of multiple TDigests together,
         and return the new aggregated centroids.
-        Note: compression=1000
+        Note: max_centroids=1000
         """
-        return (
-            TDigest(compression=1000)
-            .combine([TDigest.of_centroids(y, compression=1000) for y in x])
-            .get_centroids()
+        return tdigest.centroids(
+            tdigest.combine(tdigest.from_centroids(y) for y in x)
         )
     # for each elapsed range bucket, merge the data for all `id` together
@@ -520,9 +518,9 @@ def util_merge_csvs(input_dir: str):
         {"ts": min, "threads": sum, "centroids": combine_centroids}
     )
-    # the weight of the TDigest represents the count of ops
+    # the mass of the TDigest represents the count of ops
     df["period_ops"] = df["centroids"].map(
-        lambda x: TDigest(compression=1000).of_centroids(x, compression=1000).weight
+        lambda x: tdigest.count(tdigest.from_centroids(x))
     )
     df["period_ops_s"] = df["period_ops"].apply(lambda x: x // 10)
@@ -536,17 +534,21 @@ def util_merge_csvs(input_dir: str):
     # calculate mean and quantiles and convert from seconds to millis
     df["mean_ms"] = df["centroids"].map(
-        lambda x: TDigest(compression=1000).of_centroids(x, compression=1000).mean
-        * 1000
+        lambda x: tdigest.from_centroids(x).mean() * 1000
     )
-    df[["p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] = [
-        x * 1000
-        for x in df["centroids"].map(
-            lambda x: TDigest(compression=1000)
-            .of_centroids(x, compression=1000)
-            .inverse_cdf([0.50, 0.90, 0.95, 0.99, 1.00])
+    df[["p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] = (
+        pd.DataFrame(
+            df["centroids"]
+            .map(
+                lambda x: tdigest.from_centroids(x).quantile_vec(
+                    [0.50, 0.90, 0.95, 0.99, 1.00]
+                )
+            )
+            .tolist(),
+            index=df.index,
         )
-    ]
+        * 1000
+    )
     # round all values to 2 decimals
     df[["mean_ms", "p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] = df[

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/utils/common.py RENAMED Viewed

@@ -13,7 +13,9 @@ import prometheus_client as prom
 import yaml
 from prometheus_client.core import REGISTRY, HistogramMetricFamily
 from prometheus_client.registry import Collector
-from pytdigest import TDigest
+import dbworkload.utils.tdigest as tdigest
+from fastdigest import TDigest
 RESERVED_WORDS = [
     "unique",
@@ -77,9 +79,7 @@ class Stats:
         for x in l:
             self.cumulative_counts.setdefault(x[0], TDigest())
             self.window_stats.setdefault(x[0], [])
-            self.window_stats[x[0]].append(
-                TDigest(compression=1000).of_centroids(x[1], compression=1000)
-            )
+            self.window_stats[x[0]].append(tdigest.from_centroids(x[1]))
     # calculate the current stats this instance has collected.
     def calculate_stats(self, active_connections: int, endtime: int) -> list:
@@ -97,23 +97,21 @@ class Stats:
         )
         def get_stats_row(id: str):
-            td = TDigest(compression=1000).combine(self.window_stats[id])
+            td = tdigest.combine(self.window_stats[id])
-            self.window_stats_centroids[id] = td.get_centroids()
+            self.window_stats_centroids[id] = tdigest.centroids(td)
-            self.cumulative_counts[id] = TDigest(compression=1000).combine(
-                self.cumulative_counts[id], td
-            )
+            self.cumulative_counts[id] = self.cumulative_counts[id].merge(td)
             return [
                 elapsed,
                 id,
                 active_connections,
-                int(self.cumulative_counts[id].weight),
-                int(self.cumulative_counts[id].weight // elapsed),
-                int(td.weight),
-                int(td.weight // window_elapsed),
-                round(td.mean * 1000, 2),
-            ] + [round(x * 1000, 2) for x in td.inverse_cdf(self.quantiles)]
+                tdigest.count(self.cumulative_counts[id]),
+                tdigest.count(self.cumulative_counts[id]) // elapsed,
+                tdigest.count(td),
+                tdigest.count(td) // window_elapsed,
+                round(td.mean() * 1000, 2),
+            ] + [round(x * 1000, 2) for x in td.quantile_vec(self.quantiles)]
         return [get_stats_row(id) for id in sorted(list(self.window_stats.keys()))]
@@ -129,12 +127,12 @@ class Stats:
                 elapsed,
                 id,
                 active_connections,
-                int(self.cumulative_counts[id].weight),
-                int(self.cumulative_counts[id].weight // elapsed),
-                round(self.cumulative_counts[id].mean * 1000, 2),
+                tdigest.count(self.cumulative_counts[id]),
+                tdigest.count(self.cumulative_counts[id]) // elapsed,
+                round(self.cumulative_counts[id].mean() * 1000, 2),
             ] + [
                 round(x * 1000, 2)
-                for x in self.cumulative_counts[id].inverse_cdf(self.quantiles)
+                for x in self.cumulative_counts[id].quantile_vec(self.quantiles)
             ]
         return [get_stats_row(id) for id in sorted(list(self.window_stats.keys()))]
@@ -165,7 +163,7 @@ class WorkerStats:
     def get_tdigest_ndarray(self):
         return [
-            (id, TDigest.compute(np.array(l), compression=1000).get_centroids())
+            (id, tdigest.centroids(tdigest.from_values(l)))
             for id, l in self.window_stats.items()
         ]
@@ -182,10 +180,13 @@ class CustomHistogram(Collector):
             return [["+Inf", 0]]
         # create buckets from 10 ... 180
-        td_hist = [[x, int(td.cdf((int(x) + 1) / 1000) * td.weight)] for x in self.bins]
-        td_hist.append(["+Inf", td.weight])
+        td_count = tdigest.count(td)
+        td_hist = [
+            [x, int(td.cdf((int(x) + 1) / 1000) * td_count)] for x in self.bins
+        ]
+        td_hist.append(["+Inf", td_count])
-        return td.mean * 1000 * td.weight, td_hist
+        return td.mean() * 1000 * td_count, td_hist
     def collect(self):
         sum_value, buckets = self.get_buckets(self.name)

dbworkload-0.11.1.dev1/dbworkload/utils/tdigest.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/python
+import numpy as np
+from fastdigest import TDigest, merge_all
+MAX_CENTROIDS = 1000
+def from_values(values) -> TDigest:
+    return TDigest.from_values(
+        np.asarray(values, dtype=float), max_centroids=MAX_CENTROIDS
+    )
+def from_centroids(centroids) -> TDigest:
+    arr = np.asarray(centroids, dtype=float)
+    if arr.size == 0:
+        return TDigest(MAX_CENTROIDS)
+    arr = np.atleast_2d(arr)
+    return TDigest.from_values(arr[:, 0], arr[:, 1], max_centroids=MAX_CENTROIDS)
+def combine(digests) -> TDigest:
+    return merge_all(list(digests))
+def centroids(td: TDigest) -> np.ndarray:
+    return np.asarray(td.centroids, dtype=float).reshape(-1, 2)
+def count(td: TDigest) -> int:
+    return int(td.mass())

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dbworkload"
-version = "0.11.0"
+version = "0.11.1.dev1"
 description = "Workload framework"
 authors = ["Fabio Ghirardello"]
 license = "GPLv3+"
@@ -17,7 +17,7 @@ classifiers = [
 dbworkload = 'dbworkload.cli.main:app'
 [tool.poetry.dependencies]
-python = "^3.11"
+python = ">=3.11,<4"
 pandas = "*"
 tabulate = "*"
 numpy = "*"
@@ -42,12 +42,12 @@ fastembed = {version = "^0.7.3", optional = true }
 pgvector = {version = "^0.4.1", optional = true }
 langgraph = { version = "^1.0.3", optional = true }
 openai = { version = "^2.8.0", optional = true }
-pytdigest = "*"
 plotext = "*"
 plotly = "*"
 jinja2 = "*"
 sqlparse = "*"
 psutil = "^7.0.0"
+fastdigest = "^0.12.0"
 [tool.poetry.extras]
 all =  ["psycopg", "psycopg-binary", "mysql-connector-python", "mariadb", "oracledb", "pyodbc", "pymongo", "cassandra-driver", "google-cloud-spanner", "pinecone", "convert"]

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/LICENSE RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/README.md RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/__init__.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/dep.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/main.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/util.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/convert.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/prompts.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/run.py RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/templates/stub.j2 RENAMED Viewed

File without changes

{dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/utils/simplefaker.py RENAMED Viewed

File without changes

dbworkload 0.11.0__tar.gz → 0.11.1.dev1__tar.gz

dbworkload 0.11.0tar.gz → 0.11.1.dev1tar.gz