dbworkload 0.6.3__tar.gz → 0.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbworkload-0.6.3 → dbworkload-0.6.5}/PKG-INFO +1 -1
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/cli/util.py +10 -4
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/models/run.py +2 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/models/util.py +61 -33
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/utils/common.py +1 -1
- {dbworkload-0.6.3 → dbworkload-0.6.5}/pyproject.toml +1 -1
- {dbworkload-0.6.3 → dbworkload-0.6.5}/LICENSE +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/README.md +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/__init__.py +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/cli/dep.py +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/cli/main.py +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/templates/stub.j2 +0 -0
- {dbworkload-0.6.3 → dbworkload-0.6.5}/dbworkload/utils/simplefaker.py +0 -0
|
@@ -134,12 +134,12 @@ def util_yaml(
|
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
@app.command(
|
|
137
|
-
"
|
|
137
|
+
"merge_sort",
|
|
138
138
|
epilog=EPILOG,
|
|
139
139
|
no_args_is_help=True,
|
|
140
|
-
help="Merge multiple sorted CSV files into 1+ files.",
|
|
140
|
+
help="Merge-Sort multiple sorted CSV files into 1+ files.",
|
|
141
141
|
)
|
|
142
|
-
def
|
|
142
|
+
def util_sort_merge(
|
|
143
143
|
input: Optional[Path] = typer.Option(
|
|
144
144
|
...,
|
|
145
145
|
"--input",
|
|
@@ -166,8 +166,14 @@ def util_merge(
|
|
|
166
166
|
resolve_path=True,
|
|
167
167
|
),
|
|
168
168
|
csv_max_rows: int = Param.CSVMaxRows,
|
|
169
|
+
compress: bool = typer.Option(
|
|
170
|
+
True,
|
|
171
|
+
"--no-compress",
|
|
172
|
+
show_default=False,
|
|
173
|
+
help="Do not gzip output files.",
|
|
174
|
+
),
|
|
169
175
|
):
|
|
170
|
-
dbworkload.models.util.
|
|
176
|
+
dbworkload.models.util.util_merge_sort(input, output, csv_max_rows, compress)
|
|
171
177
|
|
|
172
178
|
|
|
173
179
|
@app.command(
|
|
@@ -5,23 +5,25 @@ from jinja2 import Environment, PackageLoader
|
|
|
5
5
|
from pathlib import PosixPath
|
|
6
6
|
from plotly.subplots import make_subplots
|
|
7
7
|
from pytdigest import TDigest
|
|
8
|
-
import dbworkload
|
|
9
8
|
import datetime as dt
|
|
9
|
+
import dbworkload
|
|
10
10
|
import dbworkload.utils.common
|
|
11
11
|
import dbworkload.utils.simplefaker
|
|
12
|
+
import gzip
|
|
12
13
|
import itertools
|
|
13
14
|
import logging
|
|
14
15
|
import numpy as np
|
|
15
16
|
import os
|
|
16
17
|
import pandas as pd
|
|
17
|
-
import pandas as pd
|
|
18
18
|
import plotext as plt
|
|
19
19
|
import plotly.graph_objects as go
|
|
20
20
|
import plotly.io as pio
|
|
21
|
+
import shutil
|
|
21
22
|
import sqlparse
|
|
22
23
|
import sys
|
|
23
24
|
import yaml
|
|
24
25
|
|
|
26
|
+
|
|
25
27
|
logger = logging.getLogger("dbworkload")
|
|
26
28
|
logger.setLevel(logging.INFO)
|
|
27
29
|
|
|
@@ -111,9 +113,13 @@ def util_yaml(input: str, output: str):
|
|
|
111
113
|
f.write(dbworkload.utils.common.ddl_to_yaml(ddl))
|
|
112
114
|
|
|
113
115
|
|
|
114
|
-
def
|
|
115
|
-
|
|
116
|
-
|
|
116
|
+
def util_merge_sort(input_dir: str, output_dir: str, csv_max_rows: int, compress: bool):
|
|
117
|
+
from operator import itemgetter
|
|
118
|
+
|
|
119
|
+
class MergeSort:
|
|
120
|
+
def __init__(
|
|
121
|
+
self, input_dir: str, output_dir: str, csv_max_rows: int, compress: bool
|
|
122
|
+
):
|
|
117
123
|
# input CSV files - it assumes files are already sorted
|
|
118
124
|
files = os.listdir(input_dir)
|
|
119
125
|
# Filtering only the files.
|
|
@@ -123,12 +129,18 @@ def util_merge(input_dir: str, output_dir: str, csv_max_rows: int):
|
|
|
123
129
|
if os.path.isfile(os.path.join(input_dir, f))
|
|
124
130
|
]
|
|
125
131
|
|
|
132
|
+
self.compress = ".gz" if compress else ""
|
|
133
|
+
self.file_extension = self.CSVs[0][-3:]
|
|
134
|
+
|
|
126
135
|
self.CSV_MAX_ROWS = csv_max_rows
|
|
127
136
|
self.COUNTER = 0
|
|
128
137
|
self.C = 0
|
|
129
138
|
|
|
139
|
+
# source holds the list of lines in each CSV file, marked by the idx number
|
|
140
|
+
# file_handlers holds a the open file handler for each CSV file, marked by the idx number
|
|
130
141
|
self.source: dict[int, list] = {}
|
|
131
142
|
self.file_handlers: dict[int, TextIOWrapper] = {}
|
|
143
|
+
|
|
132
144
|
self.output: TextIOWrapper
|
|
133
145
|
if not output_dir:
|
|
134
146
|
self.output_dir = str(input_dir) + ".merged"
|
|
@@ -160,7 +172,7 @@ def util_merge(input_dir: str, output_dir: str, csv_max_rows: int):
|
|
|
160
172
|
self.source[idx].append(line)
|
|
161
173
|
else:
|
|
162
174
|
# reached end of file
|
|
163
|
-
logger.
|
|
175
|
+
logger.debug(
|
|
164
176
|
f"initial_fill: CSV file '{csv}' at source index {idx} reached EOF."
|
|
165
177
|
)
|
|
166
178
|
f.close()
|
|
@@ -179,23 +191,41 @@ def util_merge(input_dir: str, output_dir: str, csv_max_rows: int):
|
|
|
179
191
|
self.source[idx].append(line)
|
|
180
192
|
else:
|
|
181
193
|
# reached end of file
|
|
182
|
-
logger.
|
|
194
|
+
logger.debug(f"index {idx} reached EOF.")
|
|
183
195
|
f.close()
|
|
184
196
|
del self.file_handlers[idx]
|
|
185
197
|
except Exception as e:
|
|
186
198
|
logger.error("Excepton in replenish_queue: ", e)
|
|
187
199
|
|
|
200
|
+
def close_output(self):
|
|
201
|
+
self.output.close()
|
|
202
|
+
|
|
203
|
+
if self.compress:
|
|
204
|
+
with open(self.output.name, "rb") as f_in:
|
|
205
|
+
with gzip.open(f"{self.output.name}{self.compress}", "wb") as f_out:
|
|
206
|
+
shutil.copyfileobj(f_in, f_out)
|
|
207
|
+
os.remove(self.output.name)
|
|
208
|
+
|
|
209
|
+
logger.info(f"Saved {self.output_filename}{self.compress}")
|
|
210
|
+
|
|
211
|
+
def open_new_output(self):
|
|
212
|
+
self.output_filename = (
|
|
213
|
+
f"out_{str.zfill(str(self.COUNTER), 6)}.{self.file_extension}"
|
|
214
|
+
)
|
|
215
|
+
self.output = open(
|
|
216
|
+
os.path.join(self.output_dir, self.output_filename),
|
|
217
|
+
"+w",
|
|
218
|
+
)
|
|
219
|
+
|
|
188
220
|
def write_to_csv(self, v: str):
|
|
221
|
+
# create a new output file if the limit is reached
|
|
189
222
|
if self.C >= self.CSV_MAX_ROWS:
|
|
190
|
-
self.
|
|
223
|
+
self.close_output()
|
|
224
|
+
|
|
191
225
|
self.COUNTER += 1
|
|
192
226
|
self.C = 0
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
self.output_dir, f"out_{str.zfill(str(self.COUNTER), 3)}.csv"
|
|
196
|
-
),
|
|
197
|
-
"+w",
|
|
198
|
-
)
|
|
227
|
+
|
|
228
|
+
self.open_new_output()
|
|
199
229
|
|
|
200
230
|
self.output.write(v)
|
|
201
231
|
self.C += 1
|
|
@@ -209,56 +239,54 @@ def util_merge(input_dir: str, output_dir: str, csv_max_rows: int):
|
|
|
209
239
|
self.initial_fill(csv, idx)
|
|
210
240
|
|
|
211
241
|
# the source dict now has a key for every file and a list of the first values read
|
|
242
|
+
# the file_handler dict has a key for every file and a pointer to the open file handler
|
|
212
243
|
|
|
213
|
-
|
|
214
|
-
# pop the first value in each source to
|
|
215
|
-
# `
|
|
244
|
+
staging = []
|
|
245
|
+
# pop the first value in each source list to list `staging`
|
|
246
|
+
# `staging` will have the first values of all source CSV files
|
|
216
247
|
for k, v in self.source.items():
|
|
217
248
|
try:
|
|
218
|
-
|
|
249
|
+
staging.append((v.pop(0), k))
|
|
219
250
|
except IndexError as e:
|
|
220
251
|
pass
|
|
252
|
+
from pprint import pprint
|
|
221
253
|
|
|
222
254
|
first_k = None
|
|
223
255
|
first_v = None
|
|
224
|
-
self.
|
|
225
|
-
os.path.join(
|
|
226
|
-
self.output_dir, f"out_{str.zfill(str(self.COUNTER), 3)}.csv"
|
|
227
|
-
),
|
|
228
|
-
"+w",
|
|
229
|
-
)
|
|
256
|
+
self.open_new_output()
|
|
230
257
|
|
|
231
|
-
# sort list `
|
|
258
|
+
# sort list `staging`
|
|
232
259
|
# pop the first value (the smallest) in `first_v`
|
|
233
260
|
# make a note of the source of that value in `first_k`
|
|
234
261
|
# replenish the corrisponding source
|
|
262
|
+
|
|
235
263
|
while True:
|
|
236
264
|
if first_k is not None:
|
|
237
265
|
try:
|
|
238
266
|
self.replenish_source_list(first_k)
|
|
239
|
-
|
|
267
|
+
staging.append((self.source[first_k].pop(0), first_k))
|
|
240
268
|
|
|
241
269
|
except IndexError as e:
|
|
242
270
|
# the source list is empty
|
|
243
|
-
logger.
|
|
271
|
+
logger.debug(f"source list {first_k} is now empty")
|
|
244
272
|
first_k = None
|
|
245
273
|
|
|
246
|
-
if
|
|
247
|
-
|
|
274
|
+
if staging:
|
|
275
|
+
staging.sort(key=itemgetter(0))
|
|
248
276
|
try:
|
|
249
|
-
first_v, first_k =
|
|
277
|
+
first_v, first_k = staging.pop(0)
|
|
250
278
|
self.write_to_csv(first_v)
|
|
251
279
|
except IndexError as e:
|
|
252
|
-
logger.
|
|
280
|
+
logger.warning("Exception in main: ", e)
|
|
253
281
|
self.output.close()
|
|
254
282
|
else:
|
|
255
283
|
break
|
|
256
284
|
|
|
257
|
-
self.
|
|
285
|
+
self.close_output()
|
|
258
286
|
|
|
259
287
|
logger.info("Completed")
|
|
260
288
|
|
|
261
|
-
|
|
289
|
+
MergeSort(input_dir, output_dir, csv_max_rows, compress).run()
|
|
262
290
|
|
|
263
291
|
|
|
264
292
|
def util_plot(input: PosixPath):
|
|
@@ -157,7 +157,7 @@ class Stats:
|
|
|
157
157
|
int(self.cumulative_counts[id].weight),
|
|
158
158
|
int(self.cumulative_counts[id].weight // elapsed),
|
|
159
159
|
int(td.weight),
|
|
160
|
-
int(td.weight //
|
|
160
|
+
int(td.weight // window_elapsed),
|
|
161
161
|
round(td.mean * 1000, 2),
|
|
162
162
|
] + [round(x * 1000, 2) for x in td.inverse_cdf(self.quantiles)]
|
|
163
163
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|