dbworkload 0.11.0__tar.gz → 0.11.1.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/PKG-INFO +3 -3
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/util.py +19 -17
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/utils/common.py +24 -23
- dbworkload-0.11.1.dev1/dbworkload/utils/tdigest.py +34 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/pyproject.toml +3 -3
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/LICENSE +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/README.md +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/__init__.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/dep.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/main.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/cli/util.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/convert.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/prompts.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/models/run.py +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/templates/stub.j2 +0 -0
- {dbworkload-0.11.0 → dbworkload-0.11.1.dev1}/dbworkload/utils/simplefaker.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dbworkload
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.1.dev1
|
|
4
4
|
Summary: Workload framework
|
|
5
5
|
License: GPLv3+
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Author: Fabio Ghirardello
|
|
8
|
-
Requires-Python: >=3.11,<4
|
|
8
|
+
Requires-Python: >=3.11,<4
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
10
10
|
Classifier: License :: Other/Proprietary License
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
@@ -26,6 +26,7 @@ Provides-Extra: pinecone
|
|
|
26
26
|
Provides-Extra: postgres
|
|
27
27
|
Provides-Extra: spanner
|
|
28
28
|
Requires-Dist: cassandra-driver ; extra == "all" or extra == "cassandra"
|
|
29
|
+
Requires-Dist: fastdigest (>=0.12.0,<0.13.0)
|
|
29
30
|
Requires-Dist: fastembed (>=0.7.3,<0.8.0) ; extra == "convert"
|
|
30
31
|
Requires-Dist: google-cloud-spanner ; extra == "all" or extra == "spanner"
|
|
31
32
|
Requires-Dist: jinja2
|
|
@@ -49,7 +50,6 @@ Requires-Dist: psycopg ; extra == "all" or extra == "postgres"
|
|
|
49
50
|
Requires-Dist: psycopg-binary ; extra == "all" or extra == "postgres"
|
|
50
51
|
Requires-Dist: pymongo ; extra == "all" or extra == "mongo"
|
|
51
52
|
Requires-Dist: pyodbc ; extra == "all" or extra == "odbc"
|
|
52
|
-
Requires-Dist: pytdigest
|
|
53
53
|
Requires-Dist: pyyaml
|
|
54
54
|
Requires-Dist: sentence-transformers ; extra == "pinecone"
|
|
55
55
|
Requires-Dist: sqlparse
|
|
@@ -19,11 +19,11 @@ import sqlparse
|
|
|
19
19
|
import yaml
|
|
20
20
|
from jinja2 import Environment, PackageLoader
|
|
21
21
|
from plotly.subplots import make_subplots
|
|
22
|
-
from pytdigest import TDigest
|
|
23
22
|
|
|
24
23
|
import dbworkload
|
|
25
24
|
import dbworkload.utils.common
|
|
26
25
|
import dbworkload.utils.simplefaker
|
|
26
|
+
import dbworkload.utils.tdigest as tdigest
|
|
27
27
|
|
|
28
28
|
logger = logging.getLogger("dbworkload")
|
|
29
29
|
logger.setLevel(logging.INFO)
|
|
@@ -506,12 +506,10 @@ def util_merge_csvs(input_dir: str):
|
|
|
506
506
|
"""
|
|
507
507
|
combine centroids of multiple TDigests together,
|
|
508
508
|
and return the new aggregated centroids.
|
|
509
|
-
Note:
|
|
509
|
+
Note: max_centroids=1000
|
|
510
510
|
"""
|
|
511
|
-
return (
|
|
512
|
-
|
|
513
|
-
.combine([TDigest.of_centroids(y, compression=1000) for y in x])
|
|
514
|
-
.get_centroids()
|
|
511
|
+
return tdigest.centroids(
|
|
512
|
+
tdigest.combine(tdigest.from_centroids(y) for y in x)
|
|
515
513
|
)
|
|
516
514
|
|
|
517
515
|
# for each elapsed range bucket, merge the data for all `id` together
|
|
@@ -520,9 +518,9 @@ def util_merge_csvs(input_dir: str):
|
|
|
520
518
|
{"ts": min, "threads": sum, "centroids": combine_centroids}
|
|
521
519
|
)
|
|
522
520
|
|
|
523
|
-
# the
|
|
521
|
+
# the mass of the TDigest represents the count of ops
|
|
524
522
|
df["period_ops"] = df["centroids"].map(
|
|
525
|
-
lambda x:
|
|
523
|
+
lambda x: tdigest.count(tdigest.from_centroids(x))
|
|
526
524
|
)
|
|
527
525
|
|
|
528
526
|
df["period_ops_s"] = df["period_ops"].apply(lambda x: x // 10)
|
|
@@ -536,17 +534,21 @@ def util_merge_csvs(input_dir: str):
|
|
|
536
534
|
|
|
537
535
|
# calculate mean and quantiles and convert from seconds to millis
|
|
538
536
|
df["mean_ms"] = df["centroids"].map(
|
|
539
|
-
lambda x:
|
|
540
|
-
* 1000
|
|
537
|
+
lambda x: tdigest.from_centroids(x).mean() * 1000
|
|
541
538
|
)
|
|
542
|
-
df[["p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] =
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
539
|
+
df[["p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] = (
|
|
540
|
+
pd.DataFrame(
|
|
541
|
+
df["centroids"]
|
|
542
|
+
.map(
|
|
543
|
+
lambda x: tdigest.from_centroids(x).quantile_vec(
|
|
544
|
+
[0.50, 0.90, 0.95, 0.99, 1.00]
|
|
545
|
+
)
|
|
546
|
+
)
|
|
547
|
+
.tolist(),
|
|
548
|
+
index=df.index,
|
|
548
549
|
)
|
|
549
|
-
|
|
550
|
+
* 1000
|
|
551
|
+
)
|
|
550
552
|
|
|
551
553
|
# round all values to 2 decimals
|
|
552
554
|
df[["mean_ms", "p50_ms", "p90_ms", "p95_ms", "p99_ms", "max_ms"]] = df[
|
|
@@ -13,7 +13,9 @@ import prometheus_client as prom
|
|
|
13
13
|
import yaml
|
|
14
14
|
from prometheus_client.core import REGISTRY, HistogramMetricFamily
|
|
15
15
|
from prometheus_client.registry import Collector
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
import dbworkload.utils.tdigest as tdigest
|
|
18
|
+
from fastdigest import TDigest
|
|
17
19
|
|
|
18
20
|
RESERVED_WORDS = [
|
|
19
21
|
"unique",
|
|
@@ -77,9 +79,7 @@ class Stats:
|
|
|
77
79
|
for x in l:
|
|
78
80
|
self.cumulative_counts.setdefault(x[0], TDigest())
|
|
79
81
|
self.window_stats.setdefault(x[0], [])
|
|
80
|
-
self.window_stats[x[0]].append(
|
|
81
|
-
TDigest(compression=1000).of_centroids(x[1], compression=1000)
|
|
82
|
-
)
|
|
82
|
+
self.window_stats[x[0]].append(tdigest.from_centroids(x[1]))
|
|
83
83
|
|
|
84
84
|
# calculate the current stats this instance has collected.
|
|
85
85
|
def calculate_stats(self, active_connections: int, endtime: int) -> list:
|
|
@@ -97,23 +97,21 @@ class Stats:
|
|
|
97
97
|
)
|
|
98
98
|
|
|
99
99
|
def get_stats_row(id: str):
|
|
100
|
-
td =
|
|
100
|
+
td = tdigest.combine(self.window_stats[id])
|
|
101
101
|
|
|
102
|
-
self.window_stats_centroids[id] =
|
|
102
|
+
self.window_stats_centroids[id] = tdigest.centroids(td)
|
|
103
103
|
|
|
104
|
-
self.cumulative_counts[id] =
|
|
105
|
-
self.cumulative_counts[id], td
|
|
106
|
-
)
|
|
104
|
+
self.cumulative_counts[id] = self.cumulative_counts[id].merge(td)
|
|
107
105
|
return [
|
|
108
106
|
elapsed,
|
|
109
107
|
id,
|
|
110
108
|
active_connections,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
round(td.mean * 1000, 2),
|
|
116
|
-
] + [round(x * 1000, 2) for x in td.
|
|
109
|
+
tdigest.count(self.cumulative_counts[id]),
|
|
110
|
+
tdigest.count(self.cumulative_counts[id]) // elapsed,
|
|
111
|
+
tdigest.count(td),
|
|
112
|
+
tdigest.count(td) // window_elapsed,
|
|
113
|
+
round(td.mean() * 1000, 2),
|
|
114
|
+
] + [round(x * 1000, 2) for x in td.quantile_vec(self.quantiles)]
|
|
117
115
|
|
|
118
116
|
return [get_stats_row(id) for id in sorted(list(self.window_stats.keys()))]
|
|
119
117
|
|
|
@@ -129,12 +127,12 @@ class Stats:
|
|
|
129
127
|
elapsed,
|
|
130
128
|
id,
|
|
131
129
|
active_connections,
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
round(self.cumulative_counts[id].mean * 1000, 2),
|
|
130
|
+
tdigest.count(self.cumulative_counts[id]),
|
|
131
|
+
tdigest.count(self.cumulative_counts[id]) // elapsed,
|
|
132
|
+
round(self.cumulative_counts[id].mean() * 1000, 2),
|
|
135
133
|
] + [
|
|
136
134
|
round(x * 1000, 2)
|
|
137
|
-
for x in self.cumulative_counts[id].
|
|
135
|
+
for x in self.cumulative_counts[id].quantile_vec(self.quantiles)
|
|
138
136
|
]
|
|
139
137
|
|
|
140
138
|
return [get_stats_row(id) for id in sorted(list(self.window_stats.keys()))]
|
|
@@ -165,7 +163,7 @@ class WorkerStats:
|
|
|
165
163
|
|
|
166
164
|
def get_tdigest_ndarray(self):
|
|
167
165
|
return [
|
|
168
|
-
(id,
|
|
166
|
+
(id, tdigest.centroids(tdigest.from_values(l)))
|
|
169
167
|
for id, l in self.window_stats.items()
|
|
170
168
|
]
|
|
171
169
|
|
|
@@ -182,10 +180,13 @@ class CustomHistogram(Collector):
|
|
|
182
180
|
return [["+Inf", 0]]
|
|
183
181
|
|
|
184
182
|
# create buckets from 10 ... 180
|
|
185
|
-
|
|
186
|
-
td_hist
|
|
183
|
+
td_count = tdigest.count(td)
|
|
184
|
+
td_hist = [
|
|
185
|
+
[x, int(td.cdf((int(x) + 1) / 1000) * td_count)] for x in self.bins
|
|
186
|
+
]
|
|
187
|
+
td_hist.append(["+Inf", td_count])
|
|
187
188
|
|
|
188
|
-
return td.mean * 1000 *
|
|
189
|
+
return td.mean() * 1000 * td_count, td_hist
|
|
189
190
|
|
|
190
191
|
def collect(self):
|
|
191
192
|
sum_value, buckets = self.get_buckets(self.name)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from fastdigest import TDigest, merge_all
|
|
5
|
+
|
|
6
|
+
MAX_CENTROIDS = 1000
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def from_values(values) -> TDigest:
|
|
10
|
+
return TDigest.from_values(
|
|
11
|
+
np.asarray(values, dtype=float), max_centroids=MAX_CENTROIDS
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def from_centroids(centroids) -> TDigest:
|
|
16
|
+
arr = np.asarray(centroids, dtype=float)
|
|
17
|
+
|
|
18
|
+
if arr.size == 0:
|
|
19
|
+
return TDigest(MAX_CENTROIDS)
|
|
20
|
+
|
|
21
|
+
arr = np.atleast_2d(arr)
|
|
22
|
+
return TDigest.from_values(arr[:, 0], arr[:, 1], max_centroids=MAX_CENTROIDS)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def combine(digests) -> TDigest:
|
|
26
|
+
return merge_all(list(digests))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def centroids(td: TDigest) -> np.ndarray:
|
|
30
|
+
return np.asarray(td.centroids, dtype=float).reshape(-1, 2)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def count(td: TDigest) -> int:
|
|
34
|
+
return int(td.mass())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "dbworkload"
|
|
3
|
-
version = "0.11.
|
|
3
|
+
version = "0.11.1.dev1"
|
|
4
4
|
description = "Workload framework"
|
|
5
5
|
authors = ["Fabio Ghirardello"]
|
|
6
6
|
license = "GPLv3+"
|
|
@@ -17,7 +17,7 @@ classifiers = [
|
|
|
17
17
|
dbworkload = 'dbworkload.cli.main:app'
|
|
18
18
|
|
|
19
19
|
[tool.poetry.dependencies]
|
|
20
|
-
python = "
|
|
20
|
+
python = ">=3.11,<4"
|
|
21
21
|
pandas = "*"
|
|
22
22
|
tabulate = "*"
|
|
23
23
|
numpy = "*"
|
|
@@ -42,12 +42,12 @@ fastembed = {version = "^0.7.3", optional = true }
|
|
|
42
42
|
pgvector = {version = "^0.4.1", optional = true }
|
|
43
43
|
langgraph = { version = "^1.0.3", optional = true }
|
|
44
44
|
openai = { version = "^2.8.0", optional = true }
|
|
45
|
-
pytdigest = "*"
|
|
46
45
|
plotext = "*"
|
|
47
46
|
plotly = "*"
|
|
48
47
|
jinja2 = "*"
|
|
49
48
|
sqlparse = "*"
|
|
50
49
|
psutil = "^7.0.0"
|
|
50
|
+
fastdigest = "^0.12.0"
|
|
51
51
|
|
|
52
52
|
[tool.poetry.extras]
|
|
53
53
|
all = ["psycopg", "psycopg-binary", "mysql-connector-python", "mariadb", "oracledb", "pyodbc", "pymongo", "cassandra-driver", "google-cloud-spanner", "pinecone", "convert"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|