dbworkload 0.8.0__tar.gz → 0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dbworkload
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Summary: Workload framework
5
5
  License: GPLv3+
6
6
  Author: Fabio Ghirardello
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/python
2
2
 
3
- from .. import __version__
4
3
  import typer
5
4
 
5
+ from .. import __version__
6
+
6
7
  EPILOG = "Docs: <https://dbworkload.github.io/dbworkload/>"
7
8
 
8
9
 
@@ -1,23 +1,26 @@
1
1
  #!/usr/bin/python
2
2
 
3
- from .. import __version__
4
- from dbworkload.cli.dep import Param, EPILOG, ConnInfo
3
+ import json
4
+ import logging
5
+ import os
6
+ import platform
7
+ import sys
5
8
  from enum import Enum
6
9
  from pathlib import Path
7
10
  from typing import Optional
8
11
  from urllib.parse import urlparse
12
+
13
+ import pandas as pd
14
+ import typer
15
+ import yaml
16
+
9
17
  import dbworkload.cli.util
10
18
  import dbworkload.models.run
11
19
  import dbworkload.models.util
12
20
  import dbworkload.utils.common
13
- import json
14
- import logging
15
- import os
16
- import platform
17
- import sys
18
- import typer
19
- import yaml
20
- import pandas as pd
21
+ from dbworkload.cli.dep import EPILOG, ConnInfo, Param
22
+
23
+ from .. import __version__
21
24
 
22
25
  logger = logging.getLogger("dbworkload")
23
26
 
@@ -145,6 +148,11 @@ def run(
145
148
  "--schedule",
146
149
  help="schedule JSON string or filepath to the schedule file.",
147
150
  ),
151
+ histogram_bins: str = typer.Option(
152
+ "5,10,25,50,75,100,125,250,500,750,1000",
153
+ "--bins",
154
+ help="comma separated list of ints defining the histogram bins.",
155
+ ),
148
156
  log_level: LogLevel = Param.LogLevel,
149
157
  ):
150
158
  logger.setLevel(log_level.upper())
@@ -231,6 +239,7 @@ def run(
231
239
 
232
240
  args = load_args(args)
233
241
 
242
+ histogram_bins = histogram_bins.split(",")
234
243
  schedule = load_schedule(schedule)
235
244
 
236
245
  dbworkload.models.run.run(
@@ -249,6 +258,7 @@ def run(
249
258
  quiet,
250
259
  save,
251
260
  schedule,
261
+ histogram_bins,
252
262
  log_level.upper(),
253
263
  )
254
264
 
@@ -1,13 +1,15 @@
1
1
  #!/usr/bin/python
2
2
 
3
- from pathlib import Path
4
3
  from enum import Enum
4
+ from pathlib import Path
5
5
  from typing import Optional
6
+
7
+ import typer
8
+
6
9
  import dbworkload.models.run
7
10
  import dbworkload.models.util
8
11
  import dbworkload.utils.common
9
- from dbworkload.cli.dep import Param, EPILOG
10
- import typer
12
+ from dbworkload.cli.dep import EPILOG, Param
11
13
 
12
14
 
13
15
  class Compression(str, Enum):
@@ -1,24 +1,23 @@
1
1
  #!/usr/bin/python
2
2
 
3
- from contextlib import contextmanager
4
- from dbworkload.cli.dep import ConnInfo
5
- import dbworkload.utils.common
3
+ import errno
6
4
  import logging
7
- import logging.handlers
8
5
  import multiprocessing as mp
9
- import numpy as np
6
+ import os
10
7
  import queue
11
8
  import random
12
9
  import signal
13
10
  import sys
14
- import sys
15
- import tabulate
16
- from threading import Thread
17
11
  import time
18
12
  import traceback
19
- import os
20
- import errno
13
+ from contextlib import contextmanager
14
+ from threading import Thread
21
15
 
16
+ import numpy as np
17
+ import tabulate
18
+
19
+ import dbworkload.utils.common
20
+ from dbworkload.cli.dep import ConnInfo
22
21
 
23
22
  # from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT, Session
24
23
  # from cassandra.policies import (
@@ -178,6 +177,7 @@ def run(
178
177
  quiet: bool,
179
178
  save: bool,
180
179
  schedule: list,
180
+ histogram_bins: list,
181
181
  log_level: str,
182
182
  ):
183
183
  def gracefully_shutdown(by_keyinterrupt: bool = False):
@@ -330,7 +330,7 @@ def run(
330
330
 
331
331
  stats = dbworkload.utils.common.Stats(start_time)
332
332
 
333
- prom = dbworkload.utils.common.Prom(prom_port)
333
+ prom = dbworkload.utils.common.Prom(prom_port, stats, histogram_bins)
334
334
 
335
335
  to_main_q = mp.Queue()
336
336
 
@@ -1,28 +1,29 @@
1
1
  #!/usr/bin/python
2
2
 
3
- from io import TextIOWrapper
4
- from jinja2 import Environment, PackageLoader
5
- from pathlib import PosixPath
6
- from plotly.subplots import make_subplots
7
- from pytdigest import TDigest
8
3
  import datetime as dt
9
- import dbworkload
10
- import dbworkload.utils.common
11
- import dbworkload.utils.simplefaker
12
4
  import gzip
13
5
  import itertools
14
6
  import logging
15
- import numpy as np
16
7
  import os
8
+ import shutil
9
+ import sys
10
+ from io import TextIOWrapper
11
+ from pathlib import PosixPath
12
+
13
+ import numpy as np
17
14
  import pandas as pd
18
15
  import plotext as plt
19
16
  import plotly.graph_objects as go
20
17
  import plotly.io as pio
21
- import shutil
22
18
  import sqlparse
23
- import sys
24
19
  import yaml
20
+ from jinja2 import Environment, PackageLoader
21
+ from plotly.subplots import make_subplots
22
+ from pytdigest import TDigest
25
23
 
24
+ import dbworkload
25
+ import dbworkload.utils.common
26
+ import dbworkload.utils.simplefaker
26
27
 
27
28
  logger = logging.getLogger("dbworkload")
28
29
  logger.setLevel(logging.INFO)
@@ -2,14 +2,17 @@
2
2
 
3
3
  import importlib
4
4
  import logging
5
- import numpy as np
6
5
  import os
7
6
  import random
8
7
  import sys
9
8
  import time
10
9
  import urllib.parse
11
- import yaml
10
+
11
+ import numpy as np
12
12
  import prometheus_client as prom
13
+ import yaml
14
+ from prometheus_client.core import REGISTRY, HistogramMetricFamily
15
+ from prometheus_client.registry import Collector
13
16
  from pytdigest import TDigest
14
17
 
15
18
  RESERVED_WORDS = [
@@ -45,57 +48,8 @@ NOT_NULL_MAX = 40
45
48
 
46
49
  logger = logging.getLogger("dbworkload")
47
50
 
48
-
49
- class Prom:
50
- def __init__(self, prom_port: int = 26260):
51
- self.prom_latency: dict[str, list[prom.Gauge]] = {}
52
-
53
- # don't stop just because prom server can't start
54
- try:
55
- prom.start_http_server(prom_port)
56
- except OSError as e:
57
- logger.warning(f"Cannot start prometheus server: {e}")
58
-
59
- self.threads = prom.Gauge(
60
- "threads", "count of connection threads to the database."
61
- )
62
-
63
- def publish(self, report: list):
64
- for row in report:
65
- id = row[1]
66
-
67
- if id not in self.prom_latency:
68
- self.prom_latency[id] = []
69
- self.prom_latency[id].append(
70
- prom.Gauge(f"{id}__tot_ops", "total count of ops")
71
- )
72
- self.prom_latency[id].append(
73
- prom.Gauge(
74
- f"{id}__tot_ops_s", "derived value from tot_ops / elapsed"
75
- )
76
- )
77
- self.prom_latency[id].append(
78
- prom.Gauge(f"{id}__period_ops", "ops count for the recent window")
79
- )
80
- self.prom_latency[id].append(
81
- prom.Gauge(
82
- f"{id}__period_ops_s",
83
- "derived value from period_ops / window duration",
84
- )
85
- )
86
- self.prom_latency[id].append(prom.Gauge(f"{id}__mean_ms", "mean_ms"))
87
- self.prom_latency[id].append(prom.Gauge(f"{id}__p50_ms", "p50_ms"))
88
- self.prom_latency[id].append(prom.Gauge(f"{id}__p90_ms", "p90_ms"))
89
- self.prom_latency[id].append(prom.Gauge(f"{id}__p95_ms", "p95_ms"))
90
- self.prom_latency[id].append(prom.Gauge(f"{id}__p99_ms", "p99_ms"))
91
- self.prom_latency[id].append(prom.Gauge(f"{id}__max_ms", "max_ms"))
92
-
93
- for idx, v in enumerate(row[3:]):
94
- self.prom_latency[id][idx].set(v)
95
-
96
- # threads value is the same for all rows
97
- if report:
98
- self.threads.set(report[0][2])
51
+ from prometheus_client.core import REGISTRY, HistogramMetricFamily
52
+ from prometheus_client.registry import Collector
99
53
 
100
54
 
101
55
  class Stats:
@@ -216,6 +170,84 @@ class WorkerStats:
216
170
  ]
217
171
 
218
172
 
173
+ class CustomHistogram(Collector):
174
+ def __init__(self, name: str, stats: Stats, bins: list):
175
+ self.name = name
176
+ self.stats = stats
177
+ self.bins = bins
178
+
179
+ def get_buckets(self, name):
180
+ td = self.stats.cumulative_counts.get(name)
181
+ if td is None:
182
+ return [["+Inf", 0]]
183
+
184
+ # create buckets from 10 ... 180
185
+ td_hist = [[x, int(td.cdf((int(x) + 1) / 1000) * td.weight)] for x in self.bins]
186
+ td_hist.append(["+Inf", td.weight])
187
+
188
+ return td.mean * 1000 * td.weight, td_hist
189
+
190
+ def collect(self):
191
+ sum_value, buckets = self.get_buckets(self.name)
192
+ yield HistogramMetricFamily(
193
+ f"{self.name}_latency_ms",
194
+ f"Latency in ms for {self.name}",
195
+ buckets,
196
+ sum_value,
197
+ )
198
+
199
+
200
+ class Prom:
201
+ def __init__(self, prom_port: int = 26260, stats: Stats = None, bins: list = []):
202
+ self.prom_latency: dict[str, list[prom.Gauge]] = {}
203
+ self.stats = stats
204
+ self.bins = bins
205
+
206
+ # don't stop just because prom server can't start
207
+ try:
208
+ prom.start_http_server(prom_port)
209
+ except OSError as e:
210
+ logger.warning(f"Cannot start prometheus server: {e}")
211
+
212
+ self.threads = prom.Gauge(
213
+ "threads", "count of connection threads to the database."
214
+ )
215
+
216
+ def publish(self, report: list, td: dict = {}):
217
+ for row in report:
218
+ id = row[1]
219
+
220
+ if id not in self.prom_latency:
221
+ self.prom_latency[id] = []
222
+
223
+ REGISTRY.register(CustomHistogram(id, self.stats, self.bins))
224
+
225
+ self.prom_latency[id].append(
226
+ prom.Gauge(f"{id}__tot_ops", "total count of ops")
227
+ )
228
+ self.prom_latency[id].append(
229
+ prom.Gauge(
230
+ f"{id}__tot_ops_s", "derived value from tot_ops / elapsed"
231
+ )
232
+ )
233
+ self.prom_latency[id].append(
234
+ prom.Gauge(f"{id}__period_ops", "ops count for the recent window")
235
+ )
236
+ self.prom_latency[id].append(
237
+ prom.Gauge(
238
+ f"{id}__period_ops_s",
239
+ "derived value from period_ops / window duration",
240
+ )
241
+ )
242
+
243
+ for idx, v in enumerate(row[3:6]):
244
+ self.prom_latency[id][idx].set(v)
245
+
246
+ # threads value is the same for all rows
247
+ if report:
248
+ self.threads.set(report[0][2])
249
+
250
+
219
251
  def get_driver_from_scheme(scheme: str):
220
252
  return {
221
253
  "postgres": "postgres",
@@ -654,23 +686,6 @@ def ddl_to_yaml(ddl: str):
654
686
  elif within_brackets > 0 and i == ",":
655
687
  col_def += ":"
656
688
 
657
- # process the content within parenthesis in the
658
- # CREATE TABLE stmt char by char to distinguish
659
- # the comma for separating columns vs the comma
660
- # included in single quote strings such as those in DEFAULT
661
- # eg: mycol STRING NULL DEFAULT 'corporate, inc'
662
- within_quote = False
663
- col_def_str = col_def
664
- col_def = ""
665
- for i in col_def_str:
666
- if i == "'":
667
- within_quote = not within_quote
668
- continue
669
- if within_quote:
670
- continue
671
- else:
672
- col_def += i
673
-
674
689
  col_def = [x.strip().lower() for x in col_def.split(",")]
675
690
 
676
691
  ll = []
@@ -1,12 +1,14 @@
1
+ import builtins
1
2
  import csv
2
3
  import datetime as dt
3
4
  import logging
4
5
  import multiprocessing as mp
5
6
  import os
6
- import pandas as pd
7
- import uuid
8
7
  import random
9
- import builtins
8
+ import uuid
9
+
10
+ import pandas as pd
11
+
10
12
  from .common import import_class_at_runtime
11
13
 
12
14
  logger = logging.getLogger("dbworkload")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbworkload"
3
- version = "0.8.0"
3
+ version = "0.8.2"
4
4
  description = "Workload framework"
5
5
  authors = ["Fabio Ghirardello"]
6
6
  license = "GPLv3+"
File without changes
File without changes