dbworkload 0.8.0__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbworkload-0.8.0 → dbworkload-0.8.2}/PKG-INFO +1 -1
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/cli/dep.py +2 -1
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/cli/main.py +20 -10
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/cli/util.py +5 -3
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/models/run.py +11 -11
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/models/util.py +12 -11
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/utils/common.py +85 -70
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/utils/simplefaker.py +5 -3
- {dbworkload-0.8.0 → dbworkload-0.8.2}/pyproject.toml +1 -1
- {dbworkload-0.8.0 → dbworkload-0.8.2}/LICENSE +0 -0
- {dbworkload-0.8.0 → dbworkload-0.8.2}/README.md +0 -0
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/__init__.py +0 -0
- {dbworkload-0.8.0 → dbworkload-0.8.2}/dbworkload/templates/stub.j2 +0 -0
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import sys
|
|
5
8
|
from enum import Enum
|
|
6
9
|
from pathlib import Path
|
|
7
10
|
from typing import Optional
|
|
8
11
|
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import typer
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
9
17
|
import dbworkload.cli.util
|
|
10
18
|
import dbworkload.models.run
|
|
11
19
|
import dbworkload.models.util
|
|
12
20
|
import dbworkload.utils.common
|
|
13
|
-
import
|
|
14
|
-
|
|
15
|
-
import
|
|
16
|
-
import platform
|
|
17
|
-
import sys
|
|
18
|
-
import typer
|
|
19
|
-
import yaml
|
|
20
|
-
import pandas as pd
|
|
21
|
+
from dbworkload.cli.dep import EPILOG, ConnInfo, Param
|
|
22
|
+
|
|
23
|
+
from .. import __version__
|
|
21
24
|
|
|
22
25
|
logger = logging.getLogger("dbworkload")
|
|
23
26
|
|
|
@@ -145,6 +148,11 @@ def run(
|
|
|
145
148
|
"--schedule",
|
|
146
149
|
help="schedule JSON string or filepath to the schedule file.",
|
|
147
150
|
),
|
|
151
|
+
histogram_bins: str = typer.Option(
|
|
152
|
+
"5,10,25,50,75,100,125,250,500,750,1000",
|
|
153
|
+
"--bins",
|
|
154
|
+
help="comma separated list of ints defining the histogram bins.",
|
|
155
|
+
),
|
|
148
156
|
log_level: LogLevel = Param.LogLevel,
|
|
149
157
|
):
|
|
150
158
|
logger.setLevel(log_level.upper())
|
|
@@ -231,6 +239,7 @@ def run(
|
|
|
231
239
|
|
|
232
240
|
args = load_args(args)
|
|
233
241
|
|
|
242
|
+
histogram_bins = histogram_bins.split(",")
|
|
234
243
|
schedule = load_schedule(schedule)
|
|
235
244
|
|
|
236
245
|
dbworkload.models.run.run(
|
|
@@ -249,6 +258,7 @@ def run(
|
|
|
249
258
|
quiet,
|
|
250
259
|
save,
|
|
251
260
|
schedule,
|
|
261
|
+
histogram_bins,
|
|
252
262
|
log_level.upper(),
|
|
253
263
|
)
|
|
254
264
|
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
4
3
|
from enum import Enum
|
|
4
|
+
from pathlib import Path
|
|
5
5
|
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
6
9
|
import dbworkload.models.run
|
|
7
10
|
import dbworkload.models.util
|
|
8
11
|
import dbworkload.utils.common
|
|
9
|
-
from dbworkload.cli.dep import
|
|
10
|
-
import typer
|
|
12
|
+
from dbworkload.cli.dep import EPILOG, Param
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class Compression(str, Enum):
|
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
from dbworkload.cli.dep import ConnInfo
|
|
5
|
-
import dbworkload.utils.common
|
|
3
|
+
import errno
|
|
6
4
|
import logging
|
|
7
|
-
import logging.handlers
|
|
8
5
|
import multiprocessing as mp
|
|
9
|
-
import
|
|
6
|
+
import os
|
|
10
7
|
import queue
|
|
11
8
|
import random
|
|
12
9
|
import signal
|
|
13
10
|
import sys
|
|
14
|
-
import sys
|
|
15
|
-
import tabulate
|
|
16
|
-
from threading import Thread
|
|
17
11
|
import time
|
|
18
12
|
import traceback
|
|
19
|
-
import
|
|
20
|
-
import
|
|
13
|
+
from contextlib import contextmanager
|
|
14
|
+
from threading import Thread
|
|
21
15
|
|
|
16
|
+
import numpy as np
|
|
17
|
+
import tabulate
|
|
18
|
+
|
|
19
|
+
import dbworkload.utils.common
|
|
20
|
+
from dbworkload.cli.dep import ConnInfo
|
|
22
21
|
|
|
23
22
|
# from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT, Session
|
|
24
23
|
# from cassandra.policies import (
|
|
@@ -178,6 +177,7 @@ def run(
|
|
|
178
177
|
quiet: bool,
|
|
179
178
|
save: bool,
|
|
180
179
|
schedule: list,
|
|
180
|
+
histogram_bins: list,
|
|
181
181
|
log_level: str,
|
|
182
182
|
):
|
|
183
183
|
def gracefully_shutdown(by_keyinterrupt: bool = False):
|
|
@@ -330,7 +330,7 @@ def run(
|
|
|
330
330
|
|
|
331
331
|
stats = dbworkload.utils.common.Stats(start_time)
|
|
332
332
|
|
|
333
|
-
prom = dbworkload.utils.common.Prom(prom_port)
|
|
333
|
+
prom = dbworkload.utils.common.Prom(prom_port, stats, histogram_bins)
|
|
334
334
|
|
|
335
335
|
to_main_q = mp.Queue()
|
|
336
336
|
|
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
#!/usr/bin/python
|
|
2
2
|
|
|
3
|
-
from io import TextIOWrapper
|
|
4
|
-
from jinja2 import Environment, PackageLoader
|
|
5
|
-
from pathlib import PosixPath
|
|
6
|
-
from plotly.subplots import make_subplots
|
|
7
|
-
from pytdigest import TDigest
|
|
8
3
|
import datetime as dt
|
|
9
|
-
import dbworkload
|
|
10
|
-
import dbworkload.utils.common
|
|
11
|
-
import dbworkload.utils.simplefaker
|
|
12
4
|
import gzip
|
|
13
5
|
import itertools
|
|
14
6
|
import logging
|
|
15
|
-
import numpy as np
|
|
16
7
|
import os
|
|
8
|
+
import shutil
|
|
9
|
+
import sys
|
|
10
|
+
from io import TextIOWrapper
|
|
11
|
+
from pathlib import PosixPath
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
17
14
|
import pandas as pd
|
|
18
15
|
import plotext as plt
|
|
19
16
|
import plotly.graph_objects as go
|
|
20
17
|
import plotly.io as pio
|
|
21
|
-
import shutil
|
|
22
18
|
import sqlparse
|
|
23
|
-
import sys
|
|
24
19
|
import yaml
|
|
20
|
+
from jinja2 import Environment, PackageLoader
|
|
21
|
+
from plotly.subplots import make_subplots
|
|
22
|
+
from pytdigest import TDigest
|
|
25
23
|
|
|
24
|
+
import dbworkload
|
|
25
|
+
import dbworkload.utils.common
|
|
26
|
+
import dbworkload.utils.simplefaker
|
|
26
27
|
|
|
27
28
|
logger = logging.getLogger("dbworkload")
|
|
28
29
|
logger.setLevel(logging.INFO)
|
|
@@ -2,14 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import logging
|
|
5
|
-
import numpy as np
|
|
6
5
|
import os
|
|
7
6
|
import random
|
|
8
7
|
import sys
|
|
9
8
|
import time
|
|
10
9
|
import urllib.parse
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
12
|
import prometheus_client as prom
|
|
13
|
+
import yaml
|
|
14
|
+
from prometheus_client.core import REGISTRY, HistogramMetricFamily
|
|
15
|
+
from prometheus_client.registry import Collector
|
|
13
16
|
from pytdigest import TDigest
|
|
14
17
|
|
|
15
18
|
RESERVED_WORDS = [
|
|
@@ -45,57 +48,8 @@ NOT_NULL_MAX = 40
|
|
|
45
48
|
|
|
46
49
|
logger = logging.getLogger("dbworkload")
|
|
47
50
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def __init__(self, prom_port: int = 26260):
|
|
51
|
-
self.prom_latency: dict[str, list[prom.Gauge]] = {}
|
|
52
|
-
|
|
53
|
-
# don't stop just because prom server can't start
|
|
54
|
-
try:
|
|
55
|
-
prom.start_http_server(prom_port)
|
|
56
|
-
except OSError as e:
|
|
57
|
-
logger.warning(f"Cannot start prometheus server: {e}")
|
|
58
|
-
|
|
59
|
-
self.threads = prom.Gauge(
|
|
60
|
-
"threads", "count of connection threads to the database."
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
def publish(self, report: list):
|
|
64
|
-
for row in report:
|
|
65
|
-
id = row[1]
|
|
66
|
-
|
|
67
|
-
if id not in self.prom_latency:
|
|
68
|
-
self.prom_latency[id] = []
|
|
69
|
-
self.prom_latency[id].append(
|
|
70
|
-
prom.Gauge(f"{id}__tot_ops", "total count of ops")
|
|
71
|
-
)
|
|
72
|
-
self.prom_latency[id].append(
|
|
73
|
-
prom.Gauge(
|
|
74
|
-
f"{id}__tot_ops_s", "derived value from tot_ops / elapsed"
|
|
75
|
-
)
|
|
76
|
-
)
|
|
77
|
-
self.prom_latency[id].append(
|
|
78
|
-
prom.Gauge(f"{id}__period_ops", "ops count for the recent window")
|
|
79
|
-
)
|
|
80
|
-
self.prom_latency[id].append(
|
|
81
|
-
prom.Gauge(
|
|
82
|
-
f"{id}__period_ops_s",
|
|
83
|
-
"derived value from period_ops / window duration",
|
|
84
|
-
)
|
|
85
|
-
)
|
|
86
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__mean_ms", "mean_ms"))
|
|
87
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__p50_ms", "p50_ms"))
|
|
88
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__p90_ms", "p90_ms"))
|
|
89
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__p95_ms", "p95_ms"))
|
|
90
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__p99_ms", "p99_ms"))
|
|
91
|
-
self.prom_latency[id].append(prom.Gauge(f"{id}__max_ms", "max_ms"))
|
|
92
|
-
|
|
93
|
-
for idx, v in enumerate(row[3:]):
|
|
94
|
-
self.prom_latency[id][idx].set(v)
|
|
95
|
-
|
|
96
|
-
# threads value is the same for all rows
|
|
97
|
-
if report:
|
|
98
|
-
self.threads.set(report[0][2])
|
|
51
|
+
from prometheus_client.core import REGISTRY, HistogramMetricFamily
|
|
52
|
+
from prometheus_client.registry import Collector
|
|
99
53
|
|
|
100
54
|
|
|
101
55
|
class Stats:
|
|
@@ -216,6 +170,84 @@ class WorkerStats:
|
|
|
216
170
|
]
|
|
217
171
|
|
|
218
172
|
|
|
173
|
+
class CustomHistogram(Collector):
|
|
174
|
+
def __init__(self, name: str, stats: Stats, bins: list):
|
|
175
|
+
self.name = name
|
|
176
|
+
self.stats = stats
|
|
177
|
+
self.bins = bins
|
|
178
|
+
|
|
179
|
+
def get_buckets(self, name):
|
|
180
|
+
td = self.stats.cumulative_counts.get(name)
|
|
181
|
+
if td is None:
|
|
182
|
+
return [["+Inf", 0]]
|
|
183
|
+
|
|
184
|
+
# create buckets from 10 ... 180
|
|
185
|
+
td_hist = [[x, int(td.cdf((int(x) + 1) / 1000) * td.weight)] for x in self.bins]
|
|
186
|
+
td_hist.append(["+Inf", td.weight])
|
|
187
|
+
|
|
188
|
+
return td.mean * 1000 * td.weight, td_hist
|
|
189
|
+
|
|
190
|
+
def collect(self):
|
|
191
|
+
sum_value, buckets = self.get_buckets(self.name)
|
|
192
|
+
yield HistogramMetricFamily(
|
|
193
|
+
f"{self.name}_latency_ms",
|
|
194
|
+
f"Latency in ms for {self.name}",
|
|
195
|
+
buckets,
|
|
196
|
+
sum_value,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class Prom:
|
|
201
|
+
def __init__(self, prom_port: int = 26260, stats: Stats = None, bins: list = []):
|
|
202
|
+
self.prom_latency: dict[str, list[prom.Gauge]] = {}
|
|
203
|
+
self.stats = stats
|
|
204
|
+
self.bins = bins
|
|
205
|
+
|
|
206
|
+
# don't stop just because prom server can't start
|
|
207
|
+
try:
|
|
208
|
+
prom.start_http_server(prom_port)
|
|
209
|
+
except OSError as e:
|
|
210
|
+
logger.warning(f"Cannot start prometheus server: {e}")
|
|
211
|
+
|
|
212
|
+
self.threads = prom.Gauge(
|
|
213
|
+
"threads", "count of connection threads to the database."
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def publish(self, report: list, td: dict = {}):
|
|
217
|
+
for row in report:
|
|
218
|
+
id = row[1]
|
|
219
|
+
|
|
220
|
+
if id not in self.prom_latency:
|
|
221
|
+
self.prom_latency[id] = []
|
|
222
|
+
|
|
223
|
+
REGISTRY.register(CustomHistogram(id, self.stats, self.bins))
|
|
224
|
+
|
|
225
|
+
self.prom_latency[id].append(
|
|
226
|
+
prom.Gauge(f"{id}__tot_ops", "total count of ops")
|
|
227
|
+
)
|
|
228
|
+
self.prom_latency[id].append(
|
|
229
|
+
prom.Gauge(
|
|
230
|
+
f"{id}__tot_ops_s", "derived value from tot_ops / elapsed"
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
self.prom_latency[id].append(
|
|
234
|
+
prom.Gauge(f"{id}__period_ops", "ops count for the recent window")
|
|
235
|
+
)
|
|
236
|
+
self.prom_latency[id].append(
|
|
237
|
+
prom.Gauge(
|
|
238
|
+
f"{id}__period_ops_s",
|
|
239
|
+
"derived value from period_ops / window duration",
|
|
240
|
+
)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
for idx, v in enumerate(row[3:6]):
|
|
244
|
+
self.prom_latency[id][idx].set(v)
|
|
245
|
+
|
|
246
|
+
# threads value is the same for all rows
|
|
247
|
+
if report:
|
|
248
|
+
self.threads.set(report[0][2])
|
|
249
|
+
|
|
250
|
+
|
|
219
251
|
def get_driver_from_scheme(scheme: str):
|
|
220
252
|
return {
|
|
221
253
|
"postgres": "postgres",
|
|
@@ -654,23 +686,6 @@ def ddl_to_yaml(ddl: str):
|
|
|
654
686
|
elif within_brackets > 0 and i == ",":
|
|
655
687
|
col_def += ":"
|
|
656
688
|
|
|
657
|
-
# process the content within parenthesis in the
|
|
658
|
-
# CREATE TABLE stmt char by char to distinguish
|
|
659
|
-
# the comma for separating columns vs the comma
|
|
660
|
-
# included in single quote strings such as those in DEFAULT
|
|
661
|
-
# eg: mycol STRING NULL DEFAULT 'corporate, inc'
|
|
662
|
-
within_quote = False
|
|
663
|
-
col_def_str = col_def
|
|
664
|
-
col_def = ""
|
|
665
|
-
for i in col_def_str:
|
|
666
|
-
if i == "'":
|
|
667
|
-
within_quote = not within_quote
|
|
668
|
-
continue
|
|
669
|
-
if within_quote:
|
|
670
|
-
continue
|
|
671
|
-
else:
|
|
672
|
-
col_def += i
|
|
673
|
-
|
|
674
689
|
col_def = [x.strip().lower() for x in col_def.split(",")]
|
|
675
690
|
|
|
676
691
|
ll = []
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
import builtins
|
|
1
2
|
import csv
|
|
2
3
|
import datetime as dt
|
|
3
4
|
import logging
|
|
4
5
|
import multiprocessing as mp
|
|
5
6
|
import os
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import uuid
|
|
8
7
|
import random
|
|
9
|
-
import
|
|
8
|
+
import uuid
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
10
12
|
from .common import import_class_at_runtime
|
|
11
13
|
|
|
12
14
|
logger = logging.getLogger("dbworkload")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|