python-jack-knife 0.7.0__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pjk/history.py +3 -0
- pjk/integrations/postgres_pipe.py +146 -96
- pjk/pipes/factory.py +1 -1
- pjk/pipes/query_pipe.py +2 -2
- pjk/pipes/select.py +2 -2
- pjk/sinks/graph_bar_line.py +17 -10
- pjk/sources/factory.py +13 -0
- pjk/sources/npy_source.py +3 -4
- pjk/sources/s3_select_source.py +373 -0
- pjk/sources/sql_source.py +13 -4
- pjk/version.py +1 -1
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/METADATA +1 -1
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/RECORD +17 -16
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/WHEEL +0 -0
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/entry_points.txt +0 -0
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/licenses/LICENSE +0 -0
- {python_jack_knife-0.7.0.dist-info → python_jack_knife-0.7.4.dist-info}/top_level.txt +0 -0
pjk/history.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
import base64
|
|
7
7
|
import datetime as _dt
|
|
8
8
|
import uuid
|
|
9
|
+
import time
|
|
9
10
|
from decimal import Decimal
|
|
10
11
|
from typing import Any, Dict, Optional
|
|
11
12
|
|
|
@@ -13,33 +14,55 @@ from pjk.usage import ParsedToken, Usage
|
|
|
13
14
|
from pjk.common import Integration
|
|
14
15
|
from pjk.pipes.query_pipe import QueryPipe
|
|
15
16
|
|
|
17
|
+
MAX_RETRIES = 3
|
|
18
|
+
BASE_DELAY = 0.1 # seconds
|
|
16
19
|
|
|
17
20
|
class DBClient:
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
21
|
+
"""Per-instance pg8000 connection wrapper. No shared state."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
host: str,
|
|
26
|
+
username: str,
|
|
27
|
+
password: Optional[str],
|
|
28
|
+
db_name: str,
|
|
29
|
+
port: int = 5432,
|
|
30
|
+
ssl: bool = False,
|
|
31
|
+
):
|
|
32
|
+
import pg8000 # lazy import
|
|
33
|
+
|
|
34
|
+
kwargs = dict(
|
|
35
|
+
user=username,
|
|
36
|
+
password=password,
|
|
37
|
+
host=host,
|
|
38
|
+
database=db_name,
|
|
39
|
+
port=port,
|
|
40
|
+
)
|
|
41
|
+
if ssl:
|
|
42
|
+
import ssl as _ssl
|
|
43
|
+
|
|
44
|
+
kwargs["ssl_context"] = _ssl.create_default_context()
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
self.conn = pg8000.connect(**kwargs)
|
|
48
|
+
self.conn.autocommit = True
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print("Failed to connect to DB")
|
|
51
|
+
raise e
|
|
36
52
|
|
|
37
53
|
def close(self):
|
|
38
|
-
if self
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
54
|
+
if getattr(self, "conn", None) is None:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
import pg8000 # lazy
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
self.conn.close()
|
|
61
|
+
except pg8000.exceptions.InterfaceError:
|
|
62
|
+
# Already closed / broken; ignore.
|
|
63
|
+
pass
|
|
64
|
+
finally:
|
|
65
|
+
self.conn = None
|
|
43
66
|
|
|
44
67
|
|
|
45
68
|
def _iso_dt(x: _dt.datetime) -> str:
|
|
@@ -90,43 +113,67 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
90
113
|
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
91
114
|
|
|
92
115
|
|
|
93
|
-
class PostgresPipe(QueryPipe,Integration):
|
|
94
|
-
name =
|
|
116
|
+
class PostgresPipe(QueryPipe, Integration):
|
|
117
|
+
name = "postgres"
|
|
95
118
|
desc = "Postgres query pipe; executes SQL over input record['query']."
|
|
96
|
-
arg0 = ("instance",
|
|
119
|
+
arg0 = ("instance", "instance of database.")
|
|
97
120
|
examples = [
|
|
98
|
-
[
|
|
99
|
-
["{'query': 'SELECT * from MY_TABLE;'}",
|
|
100
|
-
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}",
|
|
101
|
-
["{'query': 'SELECT
|
|
121
|
+
["myquery.sql", "postgres:mydb", "-"],
|
|
122
|
+
["{'query': 'SELECT * from MY_TABLE;'}", "postgres:mydb", "-"],
|
|
123
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", "postgres:mydb"],
|
|
124
|
+
["{'query': 'SELECT procedure_batch(%s, ...), batch_params:{...}"],
|
|
125
|
+
["{'query': 'SELECT procedure_jsonb(%s, ...), json_params:json_string"],
|
|
102
126
|
]
|
|
103
127
|
|
|
104
128
|
# name, type, default
|
|
105
129
|
config_tuples = [
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
130
|
+
("db_name", str, None),
|
|
131
|
+
("host", str, None),
|
|
132
|
+
("user", str, None),
|
|
133
|
+
("password", str, None),
|
|
134
|
+
("port", int, 5432),
|
|
135
|
+
("ssl", bool, False),
|
|
112
136
|
]
|
|
113
|
-
|
|
114
|
-
def __init__(self, ptok: ParsedToken, u: Usage):
|
|
115
|
-
super().__init__(ptok, u)
|
|
116
137
|
|
|
117
|
-
|
|
138
|
+
def __init__(self, ptok: ParsedToken, u: Usage, root=None):
|
|
139
|
+
super().__init__(ptok, u, root=root)
|
|
140
|
+
|
|
141
|
+
self.db_name = u.get_config("db_name")
|
|
118
142
|
self.db_host = u.get_config("host")
|
|
119
143
|
self.db_user = u.get_config("user")
|
|
120
144
|
self.db_pass = u.get_config("password")
|
|
121
145
|
self.db_port = u.get_config("port")
|
|
122
|
-
self.db_ssl
|
|
146
|
+
self.db_ssl = u.get_config("ssl")
|
|
123
147
|
|
|
124
|
-
|
|
148
|
+
# Standard params field: single-exec params (list/tuple/dict/single value)
|
|
149
|
+
self.params_field = "params"
|
|
150
|
+
|
|
151
|
+
# Legacy batch path: list[tuple|list|dict] → executemany
|
|
152
|
+
self.batch_field = "batch_params"
|
|
153
|
+
|
|
154
|
+
# Explicit JSON payload field (no query sniffing).
|
|
155
|
+
# If present, this value is passed to cur.execute(query, json_params).
|
|
156
|
+
self.json_params_field = "json_params"
|
|
157
|
+
|
|
158
|
+
# One DB client (and thus one connection) per PostgresPipe instance.
|
|
159
|
+
# Under your invariant (one thread per pipe), this is thread-safe.
|
|
160
|
+
self.client = DBClient(
|
|
161
|
+
host=self.db_host,
|
|
162
|
+
username=self.db_user,
|
|
163
|
+
password=self.db_pass,
|
|
164
|
+
db_name=self.db_name,
|
|
165
|
+
port=self.db_port,
|
|
166
|
+
ssl=self.db_ssl,
|
|
167
|
+
)
|
|
125
168
|
|
|
126
169
|
def reset(self):
|
|
127
170
|
# stateless across reset
|
|
128
171
|
pass
|
|
129
172
|
|
|
173
|
+
def close(self):
|
|
174
|
+
if self.client is not None:
|
|
175
|
+
self.client.close()
|
|
176
|
+
|
|
130
177
|
def _make_header(self, cur, query: str, params=None) -> Dict[str, Any]:
|
|
131
178
|
"""
|
|
132
179
|
Inspect the cursor and build a full header record.
|
|
@@ -136,7 +183,7 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
136
183
|
"db": self.db_name,
|
|
137
184
|
"dbhost": self.db_host,
|
|
138
185
|
}
|
|
139
|
-
if params:
|
|
186
|
+
if params is not None:
|
|
140
187
|
h["params"] = params
|
|
141
188
|
|
|
142
189
|
if cur.description:
|
|
@@ -155,64 +202,67 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
155
202
|
return h
|
|
156
203
|
|
|
157
204
|
def execute_query_returning_S_xO_iterable(self, record):
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
)
|
|
205
|
+
query = record.get(self.query_field)
|
|
206
|
+
if not query:
|
|
207
|
+
record["_error"] = "missing query"
|
|
208
|
+
yield record
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
# Priority: json_params > batch_params > params
|
|
212
|
+
json_params = record.get(self.json_params_field, None)
|
|
213
|
+
batch = record.get(self.batch_field, None)
|
|
214
|
+
params = record.get(self.params_field, None)
|
|
215
|
+
|
|
216
|
+
cur = self.client.conn.cursor()
|
|
166
217
|
try:
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
218
|
+
did_executemany = False
|
|
219
|
+
header_params = None
|
|
220
|
+
|
|
221
|
+
# ---------- execute ----------
|
|
222
|
+
if json_params is not None:
|
|
223
|
+
# Explicit JSON payload; caller controls shape.
|
|
224
|
+
# We don't inspect query or payload.
|
|
225
|
+
if isinstance(json_params, (list, tuple, dict)):
|
|
226
|
+
cur.execute(query, json_params)
|
|
227
|
+
else:
|
|
228
|
+
cur.execute(query, (json_params,))
|
|
229
|
+
header_params = {self.json_params_field: json_params}
|
|
230
|
+
|
|
231
|
+
elif batch is not None:
|
|
232
|
+
# Legacy executemany path; no magic.
|
|
233
|
+
if len(batch) == 0:
|
|
234
|
+
cur.execute("SELECT 1")
|
|
235
|
+
header_params = {"batch_size": 0}
|
|
236
|
+
elif len(batch) == 1:
|
|
237
|
+
cur.execute(query, batch[0])
|
|
238
|
+
header_params = {"batch_size": 1, "params": batch[0]}
|
|
239
|
+
else:
|
|
240
|
+
cur.executemany(query, batch)
|
|
241
|
+
did_executemany = True
|
|
242
|
+
header_params = {"batch_size": len(batch)}
|
|
243
|
+
|
|
244
|
+
else:
|
|
245
|
+
# Single-statement path.
|
|
246
|
+
if params is None:
|
|
247
|
+
cur.execute(query)
|
|
248
|
+
header_params = None
|
|
194
249
|
else:
|
|
195
|
-
if params
|
|
196
|
-
cur.execute(query)
|
|
250
|
+
if isinstance(params, (list, tuple, dict)):
|
|
251
|
+
cur.execute(query, params)
|
|
197
252
|
else:
|
|
198
|
-
|
|
199
|
-
cur.execute(query, params)
|
|
200
|
-
else:
|
|
201
|
-
cur.execute(query, (params,))
|
|
253
|
+
cur.execute(query, (params,))
|
|
202
254
|
header_params = params
|
|
203
255
|
|
|
204
|
-
|
|
205
|
-
|
|
256
|
+
# ---------- header ----------
|
|
257
|
+
yield self._make_header(cur, query, header_params)
|
|
206
258
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
yield _row_to_dict(cur, row)
|
|
259
|
+
# ---------- stream rows (only meaningful for single execute that returns rows) ----------
|
|
260
|
+
if not did_executemany and cur.description:
|
|
261
|
+
cols = [d[0] for d in cur.description]
|
|
262
|
+
if not (len(cols) == 1 and cols[0] == "ingest_event"):
|
|
263
|
+
for row in cur:
|
|
264
|
+
yield _row_to_dict(cur, row)
|
|
214
265
|
|
|
215
|
-
finally:
|
|
216
|
-
cur.close()
|
|
217
266
|
finally:
|
|
218
|
-
|
|
267
|
+
cur.close()
|
|
268
|
+
# connection stays open for this pipe; closed in .close()
|
pjk/pipes/factory.py
CHANGED
pjk/pipes/query_pipe.py
CHANGED
|
@@ -35,8 +35,8 @@ class QueryPipe(Pipe):
|
|
|
35
35
|
return u
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
39
|
-
super().__init__(ptok, usage)
|
|
38
|
+
def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
|
|
39
|
+
super().__init__(ptok, usage, root=root)
|
|
40
40
|
self.output_shape = usage.get_param('shape')
|
|
41
41
|
self.count = usage.get_param('count')
|
|
42
42
|
self.query_field = 'query' # for all subclasses
|
pjk/pipes/select.py
CHANGED
|
@@ -10,12 +10,12 @@ class SelectFields(DeepCopyPipe):
|
|
|
10
10
|
@classmethod
|
|
11
11
|
def usage(cls):
|
|
12
12
|
usage = Usage(
|
|
13
|
-
name='
|
|
13
|
+
name='select',
|
|
14
14
|
desc='Select specific fields from each record.',
|
|
15
15
|
component_class=cls
|
|
16
16
|
)
|
|
17
17
|
usage.def_arg(name='fields', usage='Comma-separated list of fields to retain')
|
|
18
|
-
usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", '
|
|
18
|
+
usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", 'select:id,color'], expect="id: 1, color:'blue'")
|
|
19
19
|
return usage
|
|
20
20
|
|
|
21
21
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
pjk/sinks/graph_bar_line.py
CHANGED
|
@@ -20,10 +20,6 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence
|
|
|
20
20
|
from datetime import date, datetime
|
|
21
21
|
from collections import defaultdict
|
|
22
22
|
|
|
23
|
-
import numpy as np
|
|
24
|
-
import pandas as pd
|
|
25
|
-
|
|
26
|
-
|
|
27
23
|
# ----------------------------- Public Params -----------------------------
|
|
28
24
|
@dataclass
|
|
29
25
|
class GraphParams:
|
|
@@ -48,6 +44,8 @@ class TimeDetector:
|
|
|
48
44
|
|
|
49
45
|
@staticmethod
|
|
50
46
|
def is_time(xs: pd.Series) -> bool:
|
|
47
|
+
import numpy as np # lazy
|
|
48
|
+
import pandas as pd # lazy
|
|
51
49
|
# Already datetime dtype?
|
|
52
50
|
if pd.api.types.is_datetime64_any_dtype(xs):
|
|
53
51
|
return True
|
|
@@ -74,6 +72,7 @@ class TimeDetector:
|
|
|
74
72
|
|
|
75
73
|
@staticmethod
|
|
76
74
|
def parse_times(series: pd.Series) -> pd.Series:
|
|
75
|
+
import pandas as pd # lazy
|
|
77
76
|
numeric = pd.to_numeric(series, errors="coerce")
|
|
78
77
|
parsed = None
|
|
79
78
|
if numeric.notna().mean() >= 0.9:
|
|
@@ -92,6 +91,8 @@ class MultiYAdapter:
|
|
|
92
91
|
"""Builds wide dataframe: columns = ['x'] + y_fields; sums duplicates of x."""
|
|
93
92
|
@staticmethod
|
|
94
93
|
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_fields: Sequence[str]) -> pd.DataFrame:
|
|
94
|
+
import pandas as pd # lazy
|
|
95
|
+
import numpy as np # lazy
|
|
95
96
|
rows: List[Dict[str, Any]] = []
|
|
96
97
|
for r in records:
|
|
97
98
|
if x_field not in r:
|
|
@@ -120,7 +121,8 @@ class MultiYAdapter:
|
|
|
120
121
|
class SingleYWithSetsAdapter:
|
|
121
122
|
"""Legacy: single y_field + optional per-row set_name to create series."""
|
|
122
123
|
@staticmethod
|
|
123
|
-
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str)
|
|
124
|
+
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str):
|
|
125
|
+
import pandas as pd # lazy
|
|
124
126
|
triplets = [] # (x, y, set_name)
|
|
125
127
|
for r in records:
|
|
126
128
|
if x_field in r and y_field in r:
|
|
@@ -141,8 +143,10 @@ class GraphPlotter:
|
|
|
141
143
|
self.y_fields = list(dict.fromkeys(self.pms.y_fields)) # dedupe, preserve order
|
|
142
144
|
|
|
143
145
|
def plot(self, chart_type: str = "line"):
|
|
144
|
-
import matplotlib.pyplot as plt
|
|
145
|
-
import matplotlib.dates as mdates
|
|
146
|
+
import matplotlib.pyplot as plt # lazy
|
|
147
|
+
import matplotlib.dates as mdates # lazy
|
|
148
|
+
import pandas as pd # lazy
|
|
149
|
+
import numpy as np # lazy
|
|
146
150
|
|
|
147
151
|
fig = plt.figure()
|
|
148
152
|
ax = plt.gca()
|
|
@@ -258,7 +262,7 @@ class GraphPlotter:
|
|
|
258
262
|
# ---------- Formatting helpers ----------
|
|
259
263
|
@staticmethod
|
|
260
264
|
def _format_time_axis(ax, df: pd.DataFrame) -> None:
|
|
261
|
-
import matplotlib.dates as mdates
|
|
265
|
+
import matplotlib.dates as mdates # lazy
|
|
262
266
|
fig = ax.get_figure()
|
|
263
267
|
ts = df["ts"]
|
|
264
268
|
if ts.empty:
|
|
@@ -289,6 +293,7 @@ class GraphPlotter:
|
|
|
289
293
|
|
|
290
294
|
def _bars_time(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
291
295
|
# Grouped bars at each timestamp using index positions
|
|
296
|
+
import numpy as np # lazy
|
|
292
297
|
x_vals = df["ts"].to_numpy(); idx = np.arange(len(x_vals))
|
|
293
298
|
n = len(y_cols); width = 0.8 / max(n, 1)
|
|
294
299
|
for i, y in enumerate(y_cols):
|
|
@@ -298,6 +303,7 @@ class GraphPlotter:
|
|
|
298
303
|
ax.set_xticks(idx, [pd.to_datetime(t).strftime("%Y-%m-%d %H:%M") for t in x_vals], rotation=45)
|
|
299
304
|
|
|
300
305
|
def _bars_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
306
|
+
import numpy as np # lazy
|
|
301
307
|
seen = set(); ordered_x: List[Any] = []
|
|
302
308
|
for x in df["x"].tolist():
|
|
303
309
|
if x not in seen:
|
|
@@ -310,6 +316,7 @@ class GraphPlotter:
|
|
|
310
316
|
ax.set_xticks(idx, ordered_x, rotation=45)
|
|
311
317
|
|
|
312
318
|
def _lines_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
319
|
+
import numpy as np # lazy
|
|
313
320
|
seen = set(); ordered_x: List[Any] = []
|
|
314
321
|
for x in df["x"].tolist():
|
|
315
322
|
if x not in seen:
|
|
@@ -322,7 +329,7 @@ class GraphPlotter:
|
|
|
322
329
|
|
|
323
330
|
# ---------- Misc ----------
|
|
324
331
|
def _apply_args_dict(self) -> None:
|
|
325
|
-
import matplotlib.pyplot as plt
|
|
332
|
+
import matplotlib.pyplot as plt # lazy
|
|
326
333
|
for name, val in getattr(self.pms, "args_dict", {}).items():
|
|
327
334
|
fn = getattr(plt, name, None)
|
|
328
335
|
if callable(fn):
|
|
@@ -345,7 +352,7 @@ def graph_bar_line(obj, type):
|
|
|
345
352
|
Returns (fig, ax) for optional downstream tweaks (safe to ignore).
|
|
346
353
|
"""
|
|
347
354
|
# Lazy import (ensures MPL backend)
|
|
348
|
-
import matplotlib.pyplot as plt # noqa: F401
|
|
355
|
+
import matplotlib.pyplot as plt # noqa: F401 # lazy
|
|
349
356
|
|
|
350
357
|
# Normalize y_fields from string or list
|
|
351
358
|
raw_y = obj.y_field if isinstance(obj.y_field, str) else str(obj.y_field)
|
pjk/sources/factory.py
CHANGED
|
@@ -13,9 +13,11 @@ from pjk.sources.inline_source import InlineSource
|
|
|
13
13
|
from pjk.sources.user_source_factory import UserSourceFactory
|
|
14
14
|
from pjk.sources.parquet_source import ParquetSource
|
|
15
15
|
from pjk.sources.format_source import FormatSource
|
|
16
|
+
from pjk.sources.s3_select_source import S3SelectSource
|
|
16
17
|
|
|
17
18
|
COMPONENTS = {
|
|
18
19
|
'inline': InlineSource,
|
|
20
|
+
's3s': S3SelectSource,
|
|
19
21
|
'json': JsonSource,
|
|
20
22
|
'jsonl': JsonSource,
|
|
21
23
|
'csv': CSVSource,
|
|
@@ -35,6 +37,11 @@ class SourceFactory(ComponentFactory):
|
|
|
35
37
|
def create(self, token: str) -> Source:
|
|
36
38
|
token = token.strip()
|
|
37
39
|
|
|
40
|
+
# s3s is a pseudo source only in the above list to provide easy man page
|
|
41
|
+
# it's instantiated by the parser when <file>.s3s, so disallow standard search for it.
|
|
42
|
+
if token == 's3s':
|
|
43
|
+
return None
|
|
44
|
+
|
|
38
45
|
if InlineSource.is_inline(token):
|
|
39
46
|
return InlineSource(token)
|
|
40
47
|
|
|
@@ -44,6 +51,12 @@ class SourceFactory(ComponentFactory):
|
|
|
44
51
|
source = UserSourceFactory.create(ptok)
|
|
45
52
|
if source:
|
|
46
53
|
return source
|
|
54
|
+
|
|
55
|
+
# s3 select file
|
|
56
|
+
if ptok.pre_colon.endswith('.s3s'):
|
|
57
|
+
source = S3SelectSource(ptok, None)
|
|
58
|
+
if source:
|
|
59
|
+
return source
|
|
47
60
|
|
|
48
61
|
source_cls = self.get_component_class(ptok.pre_colon)
|
|
49
62
|
if source_cls and not issubclass(source_cls, FormatSource):
|
pjk/sources/npy_source.py
CHANGED
|
@@ -4,18 +4,15 @@
|
|
|
4
4
|
import json
|
|
5
5
|
from typing import Iterator, Dict, Any
|
|
6
6
|
|
|
7
|
-
import numpy as np
|
|
8
|
-
from pjk.usage import NoBindUsage
|
|
9
|
-
from pjk.components import Source
|
|
10
7
|
from pjk.sources.lazy_file import LazyFile
|
|
11
8
|
from pjk.sources.format_source import FormatSource
|
|
12
9
|
from pjk.log import logger
|
|
13
10
|
|
|
14
|
-
|
|
15
11
|
class NpySource(FormatSource):
|
|
16
12
|
extension = 'npy'
|
|
17
13
|
|
|
18
14
|
def __init__(self, lazy_file: LazyFile):
|
|
15
|
+
super().__init__(root=None)
|
|
19
16
|
self.lazy_file = lazy_file
|
|
20
17
|
self.num_vecs = 0
|
|
21
18
|
|
|
@@ -32,9 +29,11 @@ class NpySource(FormatSource):
|
|
|
32
29
|
|
|
33
30
|
try:
|
|
34
31
|
# Use mmap to avoid loading entire array in RAM at once.
|
|
32
|
+
import numpy as np #lazy import
|
|
35
33
|
arr = np.load(path, mmap_mode="r", allow_pickle=False)
|
|
36
34
|
except Exception as e:
|
|
37
35
|
logger.error(f"Failed to load .npy file at {path}: {e}")
|
|
36
|
+
raise Exception(f"Failed to load .npy file at {path}: {e}")
|
|
38
37
|
return
|
|
39
38
|
|
|
40
39
|
if arr.size == 0:
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from threading import Lock
|
|
7
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from pjk.components import Source
|
|
12
|
+
from pjk.usage import ParsedToken, Usage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ============================================================
|
|
16
|
+
# Per-object S3 Select reader
|
|
17
|
+
# ============================================================
|
|
18
|
+
|
|
19
|
+
class S3SelectObjectSource(Source):
|
|
20
|
+
"""
|
|
21
|
+
Runs S3 Select on a single S3 key and streams JSON rows.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
s3_client,
|
|
27
|
+
bucket: str,
|
|
28
|
+
key: str,
|
|
29
|
+
query: str,
|
|
30
|
+
input_format: str,
|
|
31
|
+
is_gz: bool,
|
|
32
|
+
):
|
|
33
|
+
super().__init__(root=None)
|
|
34
|
+
self._s3 = s3_client
|
|
35
|
+
self._bucket = bucket
|
|
36
|
+
self._key = key
|
|
37
|
+
self._query = query
|
|
38
|
+
self._input_format = input_format
|
|
39
|
+
self._is_gz = is_gz
|
|
40
|
+
|
|
41
|
+
def _build_input_serialization(self) -> Dict[str, Any]:
|
|
42
|
+
fmt = self._input_format.lower()
|
|
43
|
+
|
|
44
|
+
if fmt == "json":
|
|
45
|
+
base = {"JSON": {"Type": "LINES"}}
|
|
46
|
+
elif fmt == "csv":
|
|
47
|
+
base = {"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": ","}}
|
|
48
|
+
elif fmt == "tsv":
|
|
49
|
+
base = {"CSV": {"FileHeaderInfo": "USE", "FieldDelimiter": "\t"}}
|
|
50
|
+
elif fmt == "parquet":
|
|
51
|
+
base = {"Parquet": {}}
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError(f"Unsupported input format for S3 Select: {self._input_format}")
|
|
54
|
+
|
|
55
|
+
if self._is_gz:
|
|
56
|
+
base["CompressionType"] = "GZIP"
|
|
57
|
+
|
|
58
|
+
return base
|
|
59
|
+
|
|
60
|
+
def __iter__(self) -> Iterator[Dict[str, Any]]:
|
|
61
|
+
input_ser = self._build_input_serialization()
|
|
62
|
+
output_ser = {"JSON": {}} # JSON objects per record
|
|
63
|
+
|
|
64
|
+
resp = self._s3.select_object_content(
|
|
65
|
+
Bucket=self._bucket,
|
|
66
|
+
Key=self._key,
|
|
67
|
+
ExpressionType="SQL",
|
|
68
|
+
Expression=self._query,
|
|
69
|
+
InputSerialization=input_ser,
|
|
70
|
+
OutputSerialization=output_ser,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
decoder = json.JSONDecoder()
|
|
74
|
+
buffer = ""
|
|
75
|
+
|
|
76
|
+
for event in resp["Payload"]:
|
|
77
|
+
if "Records" not in event:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
chunk = event["Records"]["Payload"].decode("utf-8")
|
|
81
|
+
buffer += chunk
|
|
82
|
+
|
|
83
|
+
# peel off as many complete JSON objects as we can
|
|
84
|
+
while True:
|
|
85
|
+
stripped = buffer.lstrip()
|
|
86
|
+
if not stripped:
|
|
87
|
+
buffer = ""
|
|
88
|
+
break
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
obj, end = decoder.raw_decode(stripped)
|
|
92
|
+
except json.JSONDecodeError:
|
|
93
|
+
# incomplete JSON; wait for more data
|
|
94
|
+
break
|
|
95
|
+
|
|
96
|
+
yield obj
|
|
97
|
+
buffer = stripped[end:]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ============================================================
|
|
101
|
+
# Shared S3 Select state (prefix iteration + threading)
|
|
102
|
+
# ============================================================
|
|
103
|
+
|
|
104
|
+
class _SharedS3SelectState:
|
|
105
|
+
"""
|
|
106
|
+
Shared, thread-safe lazy iterator over S3 objects for S3 Select queries.
|
|
107
|
+
|
|
108
|
+
Driven by a .s3s YAML config that specifies:
|
|
109
|
+
- s3_bucket
|
|
110
|
+
- prefix
|
|
111
|
+
- optional sub_keys: [ "01", "02", ... ]
|
|
112
|
+
- optional key_regex
|
|
113
|
+
- format
|
|
114
|
+
- query
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
s3_client,
|
|
120
|
+
bucket: str,
|
|
121
|
+
prefixes: List[str],
|
|
122
|
+
format_override: str,
|
|
123
|
+
query: str,
|
|
124
|
+
key_regex: Optional[str] = None,
|
|
125
|
+
):
|
|
126
|
+
self.s3 = s3_client
|
|
127
|
+
self.bucket = bucket
|
|
128
|
+
self.prefixes = prefixes
|
|
129
|
+
self.format_override = format_override
|
|
130
|
+
self.query = query
|
|
131
|
+
|
|
132
|
+
self._key_regex = re.compile(key_regex) if key_regex else None
|
|
133
|
+
|
|
134
|
+
self._lock = Lock()
|
|
135
|
+
self._exhausted = False
|
|
136
|
+
self._prefix_index = 0
|
|
137
|
+
self._current_iter: Optional[Iterator[str]] = None
|
|
138
|
+
|
|
139
|
+
@staticmethod
|
|
140
|
+
def _get_format_gz(value: str) -> Tuple[str, bool]:
|
|
141
|
+
"""
|
|
142
|
+
Split 'json.gz' -> ('json', True), 'json' -> ('json', False), etc.
|
|
143
|
+
"""
|
|
144
|
+
is_gz = value.endswith(".gz")
|
|
145
|
+
fmt = value[:-3] if is_gz else value
|
|
146
|
+
return fmt, is_gz
|
|
147
|
+
|
|
148
|
+
def _next_key_iter(self) -> Optional[Iterator[str]]:
|
|
149
|
+
"""
|
|
150
|
+
Get an iterator over keys for the next prefix in self.prefixes.
|
|
151
|
+
"""
|
|
152
|
+
if self._prefix_index >= len(self.prefixes):
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
prefix = self.prefixes[self._prefix_index]
|
|
156
|
+
self._prefix_index += 1
|
|
157
|
+
|
|
158
|
+
paginator = self.s3.get_paginator("list_objects_v2")
|
|
159
|
+
return (
|
|
160
|
+
obj.get("Key")
|
|
161
|
+
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix)
|
|
162
|
+
for obj in page.get("Contents", [])
|
|
163
|
+
if isinstance(obj.get("Key"), str)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def _iter_all_keys(self) -> Iterator[str]:
|
|
167
|
+
"""
|
|
168
|
+
Iterate over all keys for all prefixes, applying key_regex if present.
|
|
169
|
+
"""
|
|
170
|
+
while True:
|
|
171
|
+
if self._current_iter is None:
|
|
172
|
+
self._current_iter = self._next_key_iter()
|
|
173
|
+
if self._current_iter is None:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
key = next(self._current_iter)
|
|
178
|
+
except StopIteration:
|
|
179
|
+
self._current_iter = None
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
if self._key_regex and not self._key_regex.search(key):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
yield key
|
|
186
|
+
|
|
187
|
+
def _infer_format_and_compression(self) -> Tuple[str, bool]:
|
|
188
|
+
if not self.format_override:
|
|
189
|
+
raise ValueError("format is required in .s3s config")
|
|
190
|
+
fmt, is_gz = self._get_format_gz(self.format_override)
|
|
191
|
+
return fmt, is_gz
|
|
192
|
+
|
|
193
|
+
def _build_source_for_key(self, key: str) -> Source:
|
|
194
|
+
fmt, is_gz = self._infer_format_and_compression()
|
|
195
|
+
return S3SelectObjectSource(
|
|
196
|
+
s3_client=self.s3,
|
|
197
|
+
bucket=self.bucket,
|
|
198
|
+
key=key,
|
|
199
|
+
query=self.query,
|
|
200
|
+
input_format=fmt,
|
|
201
|
+
is_gz=is_gz,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def reserve_next_source(self) -> Optional[Source]:
|
|
205
|
+
"""
|
|
206
|
+
Atomically reserve and construct the next per-key S3SelectObjectSource.
|
|
207
|
+
Returns None when exhausted.
|
|
208
|
+
"""
|
|
209
|
+
if self._exhausted:
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
with self._lock:
|
|
213
|
+
if self._exhausted:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
key = next(self._iter_all_keys())
|
|
218
|
+
except StopIteration:
|
|
219
|
+
self._exhausted = True
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
return self._build_source_for_key(key)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ============================================================
|
|
226
|
+
# Main S3 Select Source (YAML-driven)
|
|
227
|
+
# ============================================================
|
|
228
|
+
|
|
229
|
+
class S3SelectSource(Source):
|
|
230
|
+
"""
|
|
231
|
+
S3 Select source using a .s3s YAML config file.
|
|
232
|
+
|
|
233
|
+
Example config.s3s:
|
|
234
|
+
|
|
235
|
+
s3_bucket: my-bucket
|
|
236
|
+
prefix: balancerevent/2025/11/
|
|
237
|
+
sub_keys:
|
|
238
|
+
- 01
|
|
239
|
+
- 02
|
|
240
|
+
format: json.gz
|
|
241
|
+
query: |
|
|
242
|
+
SELECT ...
|
|
243
|
+
FROM S3Object s
|
|
244
|
+
|
|
245
|
+
Parser passes the config file path as ptok.all_but_params.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
extension = "s3s"
|
|
249
|
+
|
|
250
|
+
# ---------- Usage ----------
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def usage(cls):
|
|
254
|
+
usage = Usage(
|
|
255
|
+
name="s3s",
|
|
256
|
+
desc=(
|
|
257
|
+
"S3 select source using <file>.s3s YAML config file.\n"
|
|
258
|
+
"where <file>.s3s e.g:\n\n"
|
|
259
|
+
"s3_bucket: my-bucket\n"
|
|
260
|
+
"prefix: my-prefix\n"
|
|
261
|
+
"sub_keys: # optional\n"
|
|
262
|
+
"- 01\n"
|
|
263
|
+
"- 02\n"
|
|
264
|
+
"format: format.gz # csv, etc\n"
|
|
265
|
+
"query: |\n"
|
|
266
|
+
" SELECT s.FooCol FROM S3Object s\n"
|
|
267
|
+
" WHERE s.IntCol = 42"
|
|
268
|
+
),
|
|
269
|
+
component_class=cls,
|
|
270
|
+
)
|
|
271
|
+
usage.def_example(expr_tokens=["config.s3s", "-"], expect=None)
|
|
272
|
+
usage.def_syntax(None)
|
|
273
|
+
return usage
|
|
274
|
+
|
|
275
|
+
# ---------- Construction ----------
|
|
276
|
+
|
|
277
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
278
|
+
super().__init__(root=None)
|
|
279
|
+
|
|
280
|
+
config_path = ptok.all_but_params
|
|
281
|
+
cfg = self._load_config(config_path)
|
|
282
|
+
|
|
283
|
+
bucket = cfg.get("s3_bucket")
|
|
284
|
+
prefix = cfg.get("prefix")
|
|
285
|
+
fmt = cfg.get("format")
|
|
286
|
+
query = cfg.get("query")
|
|
287
|
+
|
|
288
|
+
if not bucket:
|
|
289
|
+
raise ValueError("s3s config must include 's3_bucket'")
|
|
290
|
+
if not prefix:
|
|
291
|
+
raise ValueError("s3s config must include 'prefix'")
|
|
292
|
+
if not fmt:
|
|
293
|
+
raise ValueError("s3s config must include 'format'")
|
|
294
|
+
if not query:
|
|
295
|
+
raise ValueError("s3s config must include 'query'")
|
|
296
|
+
|
|
297
|
+
key_regex = cfg.get("key_regex")
|
|
298
|
+
|
|
299
|
+
prefixes = self._build_prefixes_from_config(prefix, cfg.get("sub_keys"))
|
|
300
|
+
|
|
301
|
+
import boto3 # lazy
|
|
302
|
+
s3 = boto3.client("s3")
|
|
303
|
+
|
|
304
|
+
state = _SharedS3SelectState(
|
|
305
|
+
s3_client=s3,
|
|
306
|
+
bucket=bucket,
|
|
307
|
+
prefixes=prefixes,
|
|
308
|
+
format_override=fmt,
|
|
309
|
+
query=query,
|
|
310
|
+
key_regex=key_regex,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
reserved = state.reserve_next_source()
|
|
314
|
+
|
|
315
|
+
self._state = state
|
|
316
|
+
self._current: Optional[Source] = reserved
|
|
317
|
+
|
|
318
|
+
# Alternate ctor used by deep_copy
|
|
319
|
+
@classmethod
|
|
320
|
+
def _from_state(cls, state: _SharedS3SelectState, reserved: Optional[Source]):
|
|
321
|
+
obj = cls.__new__(cls)
|
|
322
|
+
Source.__init__(obj, root=None)
|
|
323
|
+
obj._state = state
|
|
324
|
+
obj._current = reserved
|
|
325
|
+
return obj
|
|
326
|
+
|
|
327
|
+
# ---------- Iteration / deep_copy ----------
|
|
328
|
+
|
|
329
|
+
def __iter__(self):
|
|
330
|
+
while True:
|
|
331
|
+
if self._current is None:
|
|
332
|
+
self._current = self._state.reserve_next_source()
|
|
333
|
+
if self._current is None:
|
|
334
|
+
return # exhausted
|
|
335
|
+
|
|
336
|
+
for record in self._current:
|
|
337
|
+
yield record
|
|
338
|
+
|
|
339
|
+
self._current = None
|
|
340
|
+
|
|
341
|
+
def deep_copy(self):
|
|
342
|
+
reserved = self._state.reserve_next_source()
|
|
343
|
+
if reserved is None:
|
|
344
|
+
return None
|
|
345
|
+
return self._from_state(self._state, reserved)
|
|
346
|
+
|
|
347
|
+
# ---------- Config helpers ----------
|
|
348
|
+
|
|
349
|
+
@staticmethod
|
|
350
|
+
def _load_config(path: str) -> Dict[str, Any]:
|
|
351
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
352
|
+
cfg = yaml.safe_load(f)
|
|
353
|
+
if not isinstance(cfg, dict):
|
|
354
|
+
raise ValueError("s3s config must be a YAML mapping at top level")
|
|
355
|
+
return cfg
|
|
356
|
+
|
|
357
|
+
@staticmethod
|
|
358
|
+
def _build_prefixes_from_config(prefix: str, sub_keys: Optional[List[Any]]) -> List[str]:
|
|
359
|
+
"""
|
|
360
|
+
If sub_keys present (list of suffix strings), produce prefix+suffix
|
|
361
|
+
for each; otherwise just [prefix].
|
|
362
|
+
"""
|
|
363
|
+
if not sub_keys:
|
|
364
|
+
return [prefix]
|
|
365
|
+
|
|
366
|
+
result: List[str] = []
|
|
367
|
+
for s in sub_keys:
|
|
368
|
+
# YAML might give ints or strings; normalize to str and strip
|
|
369
|
+
suffix = str(s).strip()
|
|
370
|
+
if not suffix:
|
|
371
|
+
continue
|
|
372
|
+
result.append(f"{prefix}{suffix}")
|
|
373
|
+
return result
|
pjk/sources/sql_source.py
CHANGED
|
@@ -15,13 +15,22 @@ class SQLSource(FormatSource):
|
|
|
15
15
|
def __init__(self, lazy_file: LazyFile):
|
|
16
16
|
super().__init__(root=None)
|
|
17
17
|
self.lazy_file = lazy_file
|
|
18
|
-
self.num_recs = 0
|
|
19
18
|
|
|
20
19
|
def __iter__(self):
|
|
20
|
+
lines = []
|
|
21
21
|
with self.lazy_file.open() as f:
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
for line in f:
|
|
23
|
+
line = line.strip()
|
|
24
|
+
if len(line) == 0:
|
|
25
|
+
continue
|
|
26
|
+
|
|
27
|
+
if '#' in line:
|
|
28
|
+
line = line.split('#')[0]
|
|
29
|
+
if '--' in line:
|
|
30
|
+
line = line.split('--')[0]
|
|
31
|
+
lines.append(line)
|
|
32
|
+
|
|
33
|
+
sql_text = ' '.join(lines)
|
|
24
34
|
|
|
25
35
|
if sql_text:
|
|
26
|
-
self.num_recs += 1
|
|
27
36
|
yield {"query": sql_text}
|
pjk/version.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
pjk/__init__.py,sha256=6HGDVcFOFv6VPSNjxVnusm9wHqy01pELX3AyCWFzqWg,128
|
|
2
2
|
pjk/common.py,sha256=8f9oBKF2ydeAPKnP3bd5XG-JDSIb_UcUUMI-_h_MKmY,5125
|
|
3
3
|
pjk/components.py,sha256=0HGN0V-CyAMOciyoRjr6-Y_bLFqVvW3bLRK2ymFGvzk,3655
|
|
4
|
-
pjk/history.py,sha256=
|
|
4
|
+
pjk/history.py,sha256=UAzV40GqS3yzTTDDUiV2WAR_CHkMnjDOwKWtj6MuRrs,3076
|
|
5
5
|
pjk/log.py,sha256=LjBboWhUrr2Cz-ygpftCIK17ee2-zNHKZjeJyoW0PlA,2163
|
|
6
6
|
pjk/main.py,sha256=EqU6fWLFK5fD3l701XVv8p6FpvsH2iNcEWHtMYpz7j8,4463
|
|
7
7
|
pjk/man_page.py,sha256=U8s--CH6TYfR98lFr3oiII6uCOHcgxAabPAK_N5VLH8,6327
|
|
@@ -9,15 +9,15 @@ pjk/parser.py,sha256=3oFAssyK3BURULPWJegCQcmUI3yiAU_bdKF4LmKcOjM,13667
|
|
|
9
9
|
pjk/progress.py,sha256=G9igcBy2B9-QuwysKF65KR7inK2l1u8qts6ulsQMPuk,10290
|
|
10
10
|
pjk/registry.py,sha256=u2LJcjZcoD2OfpON6cgZT-ZI_oCwCK6mQL8OTIppo_o,8328
|
|
11
11
|
pjk/usage.py,sha256=Q3BbCld65h9Mn6opjCnlE5YIj8XYWMZk44p_rK-SiJA,11509
|
|
12
|
-
pjk/version.py,sha256=
|
|
12
|
+
pjk/version.py,sha256=hjsaIvpSnszaMTm4_LAcj0sFlUX7XHcK8bbB9SZYLxk,91
|
|
13
13
|
pjk/integrations/opensearch_client.py,sha256=2lpN1ZV7qMcpIq-V0Pih2Xs08IBgr0-PQBOZDNxfm5Y,3572
|
|
14
14
|
pjk/integrations/opensearch_index_sink.py,sha256=1heDtR85iHSzxn2j9j2TrzZPPVnxs2sdCx1EaD40iRQ,3537
|
|
15
15
|
pjk/integrations/opensearch_query_pipe.py,sha256=mD9U0Jz6t4I8qE-bzGbyLOiCG_8kwK_Lr6UrxopYCqg,4519
|
|
16
|
-
pjk/integrations/postgres_pipe.py,sha256=
|
|
16
|
+
pjk/integrations/postgres_pipe.py,sha256=IOROg-jW2C5Rl0vfaizOOjOPKQtvBti5WYTnBvHNnhk,8530
|
|
17
17
|
pjk/integrations/snowflake_pipe.py,sha256=TK7pkLyPREv-LEF2LznFtoe_UWg83n6s8KOjBWM-4C4,7638
|
|
18
18
|
pjk/pipes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
pjk/pipes/denorm.py,sha256=NBVWO88V-wal4RgtFrrksSU6yVj8qGTG7jcy7VuVFm0,2273
|
|
20
|
-
pjk/pipes/factory.py,sha256=
|
|
20
|
+
pjk/pipes/factory.py,sha256=JrnwdCTw-441hTBQXvYh0GfBvjCrHK3Gja5brGfSq3s,2261
|
|
21
21
|
pjk/pipes/filter.py,sha256=K-ScDrUCb7m4Q8bRMahixJ9zuJNfVduNoX3GEHetSR4,2091
|
|
22
22
|
pjk/pipes/head.py,sha256=Dy9EXza8TMi4G0ErO1OE5KzGYUgSF_V0ibB0lNBq2cE,930
|
|
23
23
|
pjk/pipes/join.py,sha256=11Q9VGAQ71L0ywtuy1eGbrxLX_YBJKaPZ9KnbJmbOQc,3822
|
|
@@ -25,10 +25,10 @@ pjk/pipes/let_reduce.py,sha256=33x-Zxi6IfD85396-uw3rDiFUro-ZNp9-OHz9W49T-s,7212
|
|
|
25
25
|
pjk/pipes/map.py,sha256=EDDZ9kV8HZwSpuiH7J8uTcIj3zZ_1AFbECkBISwErHI,5175
|
|
26
26
|
pjk/pipes/move_field.py,sha256=0uljmqurslrJ-rXQuj28QvhWhb-uuuvg_dyYXguNeJY,1169
|
|
27
27
|
pjk/pipes/progress_pipe.py,sha256=d8-QeD7Uo7MoSjZYbzAuy4c-cSIcDRVQnFII-1proto,1245
|
|
28
|
-
pjk/pipes/query_pipe.py,sha256=
|
|
28
|
+
pjk/pipes/query_pipe.py,sha256=52LABmTTgzlBECUkhxbhEzCrzyB18JCAhpGNEULtC0w,3601
|
|
29
29
|
pjk/pipes/remove_field.py,sha256=nB6QOT-VvNfvn536tpBTIxgG41VjHKY7sByzmH245pI,1219
|
|
30
30
|
pjk/pipes/sample.py,sha256=Bo0OJC-PFAIZfF0EnfdtoFhnJlK7d2AK8mvfjlPmxSg,2296
|
|
31
|
-
pjk/pipes/select.py,sha256=
|
|
31
|
+
pjk/pipes/select.py,sha256=85mlpFrhSlmhsSoGLn6_A7tTwbf6yc_xSZGM63MpIOI,1364
|
|
32
32
|
pjk/pipes/sort.py,sha256=CKs9uXMeHNrLpuSqCAwLED6y-Ajc-kcA_2n6yJzcy2s,2347
|
|
33
33
|
pjk/pipes/tail.py,sha256=_pAqN3cPz8ByCgfrRxbTg0TNNC2gyWIFt4hhXOxDVW0,1097
|
|
34
34
|
pjk/pipes/user_pipe_factory.py,sha256=aGyU1ig4HOD5I8SVnGIra7f_sCbIZa5SEVJqmzYRl9E,1599
|
|
@@ -42,7 +42,7 @@ pjk/sinks/expect.py,sha256=oTJ7MsHn1b_xc6V9KQO4ZkCIPJnYedAoNs6rC-zTMC0,3201
|
|
|
42
42
|
pjk/sinks/factory.py,sha256=uE6ETzliuRsoU1MV-Rc9f7TfVIX7hePp69QLkSqCEbU,1823
|
|
43
43
|
pjk/sinks/format_sink.py,sha256=HCyBJi2cdfKOOn-ebVXbGKYFBJHL7g_VM_gCz1e0aW0,4986
|
|
44
44
|
pjk/sinks/graph.py,sha256=rQGa-Us2LIaK-gPGX6y7hng3B6Bz5EG4MCB-PQ3b3hk,2132
|
|
45
|
-
pjk/sinks/graph_bar_line.py,sha256=
|
|
45
|
+
pjk/sinks/graph_bar_line.py,sha256=b37Yf9brzH_kCnhBdYd2budW_dMJDIKy3KHWFP1zqHc,16614
|
|
46
46
|
pjk/sinks/graph_cumulative.py,sha256=fFXI9MSLhxKk5Xwb4df4-QqrctHeiwyXqj4S_pUNHAw,1711
|
|
47
47
|
pjk/sinks/graph_hist.py,sha256=drkAeAMhSXRM-Qm_xfK7WJ1u_usMVlC_TDP1GF_xppI,1994
|
|
48
48
|
pjk/sinks/graph_scatter.py,sha256=3nnIdux9oy8Na2Nt80UzPm03abEglXZyrrHB6ciJabc,1027
|
|
@@ -56,7 +56,7 @@ pjk/sinks/user_sink_factory.py,sha256=HEJVNU7Raf6DeU7wvcfziafA4odeVafn5v5X9VV_xt
|
|
|
56
56
|
pjk/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
57
|
pjk/sources/csv_source.py,sha256=_99z8niT6K8jum2TiHhM0VVrf3VuQLpEHhEIKSRZP0s,765
|
|
58
58
|
pjk/sources/dir_source.py,sha256=gdHVxcDA8qnGEt_ChMbG013y-Pp4I6Asm673S6i54qA,5939
|
|
59
|
-
pjk/sources/factory.py,sha256=
|
|
59
|
+
pjk/sources/factory.py,sha256=LfNpsZ1NYyI4Q0jo1lAfxqGS-uU5EkhfrtyFsK7cz8g,2234
|
|
60
60
|
pjk/sources/favorite_source.py,sha256=ZgpK_kNeB_rWf0LvZ9zDzwdwCwVxbyeZ8xE5EPs40fQ,1237
|
|
61
61
|
pjk/sources/format_source.py,sha256=qRq6WoRyt-u9fZQ1sBEzQVvN4-0hQXuB88_eFP0T7MM,5064
|
|
62
62
|
pjk/sources/inline_source.py,sha256=t3J1FEr45hySSYLF5gEUPeNo_Fr0M7G3hXa3-J-56XI,1766
|
|
@@ -64,16 +64,17 @@ pjk/sources/json_source.py,sha256=3OQ61YHjhVAITWrCf0J9JNld1pFGY1P1Kd2H4jopDPM,13
|
|
|
64
64
|
pjk/sources/lazy_file.py,sha256=fQYaQz7bytG9vY4JNtIQJxfHWFowCn5il51H7vQrTNg,400
|
|
65
65
|
pjk/sources/lazy_file_local.py,sha256=giDruMzRJSfUmWtuuJcXb2mUF2Cz0og-l-HOk3tFv0I,588
|
|
66
66
|
pjk/sources/lazy_file_s3.py,sha256=a4PyBM_WoHfmKrbMucTlqxOPF79KGRyJGxECD-dVq5Q,877
|
|
67
|
-
pjk/sources/npy_source.py,sha256=
|
|
67
|
+
pjk/sources/npy_source.py,sha256=kEGCeuhtaxymQzWBs2PJaMdZX1_kvAECgP5NG5Zpddk,2930
|
|
68
68
|
pjk/sources/parquet_source.py,sha256=Dcou-p3xB1M1NUtDDEj1m0fnb8IaWdiSLr5qp7JGAfw,831
|
|
69
|
+
pjk/sources/s3_select_source.py,sha256=Rcg_pcoBXwy9TsEcXQNymJo5h_iTps3cT2K_EiyvtG8,10928
|
|
69
70
|
pjk/sources/s3_source.py,sha256=CSZYpjxqNoQcqgZCmWJXNG6Q-8XAThz0bF9ycjtuQ6Y,5019
|
|
70
71
|
pjk/sources/source_list.py,sha256=E70ewbGvc0MiD61_nIiRAZ_71ObH7iqs2JZfSR-6BaM,650
|
|
71
|
-
pjk/sources/sql_source.py,sha256=
|
|
72
|
+
pjk/sources/sql_source.py,sha256=p2hqMAqOH0qUUvoM0cvELFX24mign1K-xSXAwI5irEA,1007
|
|
72
73
|
pjk/sources/tsv_source.py,sha256=37nhEblCZ8XeTNHVo-WcdJ8HbIbiwKgsDES_yzz6EdU,306
|
|
73
74
|
pjk/sources/user_source_factory.py,sha256=FiUrdP_ecqYG1btGjDTOWMMYGFMhKuDZYW1BUMzwQEQ,1353
|
|
74
|
-
python_jack_knife-0.7.
|
|
75
|
-
python_jack_knife-0.7.
|
|
76
|
-
python_jack_knife-0.7.
|
|
77
|
-
python_jack_knife-0.7.
|
|
78
|
-
python_jack_knife-0.7.
|
|
79
|
-
python_jack_knife-0.7.
|
|
75
|
+
python_jack_knife-0.7.4.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
76
|
+
python_jack_knife-0.7.4.dist-info/METADATA,sha256=CDUdtNS7SK2wbwcnU_BrTGn-REtYKhgaKfqW5kPTrZ0,14641
|
|
77
|
+
python_jack_knife-0.7.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
78
|
+
python_jack_knife-0.7.4.dist-info/entry_points.txt,sha256=kzZ10zEisvEaG2xYqqw7xRpuV62rAO_dPEHnM6USelk,38
|
|
79
|
+
python_jack_knife-0.7.4.dist-info/top_level.txt,sha256=r-Ef_I9SbVDL9jD-W0WtshstLos_7guWbpItYxxSllQ,4
|
|
80
|
+
python_jack_knife-0.7.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|