python-jack-knife 0.7.1__tar.gz → 0.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.7.1/src/python_jack_knife.egg-info → python_jack_knife-0.7.4}/PKG-INFO +1 -1
- python_jack_knife-0.7.4/src/pjk/integrations/postgres_pipe.py +268 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/query_pipe.py +2 -2
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_bar_line.py +7 -2
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/factory.py +13 -0
- python_jack_knife-0.7.4/src/pjk/sources/s3_select_source.py +373 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/sql_source.py +13 -4
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/version.py +1 -1
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4/src/python_jack_knife.egg-info}/PKG-INFO +1 -1
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/SOURCES.txt +1 -0
- python_jack_knife-0.7.1/src/pjk/integrations/postgres_pipe.py +0 -227
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/LICENSE +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/README.md +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/pyproject.toml +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/setup.cfg +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/common.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/components.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/history.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_client.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_index_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_query_pipe.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/snowflake_pipe.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/log.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/main.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/parser.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/factory.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/filter.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/head.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/join.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/map.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/move_field.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/progress_pipe.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/sample.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/select.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/tail.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/where.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/progress.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/registry.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/create_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/factory.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/favorite_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/format_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/usage.py +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
#
|
|
4
|
+
# djk/pipes/postgres_pipe.py
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import datetime as _dt
|
|
8
|
+
import uuid
|
|
9
|
+
import time
|
|
10
|
+
from decimal import Decimal
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
from pjk.usage import ParsedToken, Usage
|
|
14
|
+
from pjk.common import Integration
|
|
15
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
16
|
+
|
|
17
|
+
MAX_RETRIES = 3
|
|
18
|
+
BASE_DELAY = 0.1 # seconds
|
|
19
|
+
|
|
20
|
+
class DBClient:
|
|
21
|
+
"""Per-instance pg8000 connection wrapper. No shared state."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
host: str,
|
|
26
|
+
username: str,
|
|
27
|
+
password: Optional[str],
|
|
28
|
+
db_name: str,
|
|
29
|
+
port: int = 5432,
|
|
30
|
+
ssl: bool = False,
|
|
31
|
+
):
|
|
32
|
+
import pg8000 # lazy import
|
|
33
|
+
|
|
34
|
+
kwargs = dict(
|
|
35
|
+
user=username,
|
|
36
|
+
password=password,
|
|
37
|
+
host=host,
|
|
38
|
+
database=db_name,
|
|
39
|
+
port=port,
|
|
40
|
+
)
|
|
41
|
+
if ssl:
|
|
42
|
+
import ssl as _ssl
|
|
43
|
+
|
|
44
|
+
kwargs["ssl_context"] = _ssl.create_default_context()
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
self.conn = pg8000.connect(**kwargs)
|
|
48
|
+
self.conn.autocommit = True
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print("Failed to connect to DB")
|
|
51
|
+
raise e
|
|
52
|
+
|
|
53
|
+
def close(self):
|
|
54
|
+
if getattr(self, "conn", None) is None:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
import pg8000 # lazy
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
self.conn.close()
|
|
61
|
+
except pg8000.exceptions.InterfaceError:
|
|
62
|
+
# Already closed / broken; ignore.
|
|
63
|
+
pass
|
|
64
|
+
finally:
|
|
65
|
+
self.conn = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _iso_dt(x: _dt.datetime) -> str:
|
|
69
|
+
"""ISO 8601; normalize UTC offset to 'Z'."""
|
|
70
|
+
s = x.isoformat()
|
|
71
|
+
return s.replace("+00:00", "Z")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def normalize(obj: Any) -> Any:
|
|
75
|
+
"""
|
|
76
|
+
Make values JSON/YAML-safe and portable (schema-agnostic):
|
|
77
|
+
- Decimal -> exact string (no sci-notation)
|
|
78
|
+
- date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
|
|
79
|
+
- UUID -> string
|
|
80
|
+
- bytes -> base64 string
|
|
81
|
+
- lists/tuples/sets, dicts -> normalized recursively
|
|
82
|
+
- leaves int/float/str/bool/None as-is
|
|
83
|
+
"""
|
|
84
|
+
if obj is None:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
if isinstance(obj, Decimal):
|
|
88
|
+
return format(obj, "f") # exact value as string
|
|
89
|
+
|
|
90
|
+
if isinstance(obj, _dt.datetime):
|
|
91
|
+
return _iso_dt(obj)
|
|
92
|
+
|
|
93
|
+
if isinstance(obj, (_dt.date, _dt.time)):
|
|
94
|
+
return obj.isoformat()
|
|
95
|
+
|
|
96
|
+
if isinstance(obj, uuid.UUID):
|
|
97
|
+
return str(obj)
|
|
98
|
+
|
|
99
|
+
if isinstance(obj, (bytes, bytearray, memoryview)):
|
|
100
|
+
return base64.b64encode(bytes(obj)).decode("ascii")
|
|
101
|
+
|
|
102
|
+
if isinstance(obj, dict):
|
|
103
|
+
return {k: normalize(v) for k, v in obj.items()}
|
|
104
|
+
|
|
105
|
+
if isinstance(obj, (list, tuple, set)):
|
|
106
|
+
return [normalize(v) for v in obj]
|
|
107
|
+
|
|
108
|
+
return obj
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
112
|
+
cols = [d[0] for d in cursor.description]
|
|
113
|
+
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class PostgresPipe(QueryPipe, Integration):
|
|
117
|
+
name = "postgres"
|
|
118
|
+
desc = "Postgres query pipe; executes SQL over input record['query']."
|
|
119
|
+
arg0 = ("instance", "instance of database.")
|
|
120
|
+
examples = [
|
|
121
|
+
["myquery.sql", "postgres:mydb", "-"],
|
|
122
|
+
["{'query': 'SELECT * from MY_TABLE;'}", "postgres:mydb", "-"],
|
|
123
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", "postgres:mydb"],
|
|
124
|
+
["{'query': 'SELECT procedure_batch(%s, ...), batch_params:{...}"],
|
|
125
|
+
["{'query': 'SELECT procedure_jsonb(%s, ...), json_params:json_string"],
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
# name, type, default
|
|
129
|
+
config_tuples = [
|
|
130
|
+
("db_name", str, None),
|
|
131
|
+
("host", str, None),
|
|
132
|
+
("user", str, None),
|
|
133
|
+
("password", str, None),
|
|
134
|
+
("port", int, 5432),
|
|
135
|
+
("ssl", bool, False),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
def __init__(self, ptok: ParsedToken, u: Usage, root=None):
|
|
139
|
+
super().__init__(ptok, u, root=root)
|
|
140
|
+
|
|
141
|
+
self.db_name = u.get_config("db_name")
|
|
142
|
+
self.db_host = u.get_config("host")
|
|
143
|
+
self.db_user = u.get_config("user")
|
|
144
|
+
self.db_pass = u.get_config("password")
|
|
145
|
+
self.db_port = u.get_config("port")
|
|
146
|
+
self.db_ssl = u.get_config("ssl")
|
|
147
|
+
|
|
148
|
+
# Standard params field: single-exec params (list/tuple/dict/single value)
|
|
149
|
+
self.params_field = "params"
|
|
150
|
+
|
|
151
|
+
# Legacy batch path: list[tuple|list|dict] → executemany
|
|
152
|
+
self.batch_field = "batch_params"
|
|
153
|
+
|
|
154
|
+
# Explicit JSON payload field (no query sniffing).
|
|
155
|
+
# If present, this value is passed to cur.execute(query, json_params).
|
|
156
|
+
self.json_params_field = "json_params"
|
|
157
|
+
|
|
158
|
+
# One DB client (and thus one connection) per PostgresPipe instance.
|
|
159
|
+
# Under your invariant (one thread per pipe), this is thread-safe.
|
|
160
|
+
self.client = DBClient(
|
|
161
|
+
host=self.db_host,
|
|
162
|
+
username=self.db_user,
|
|
163
|
+
password=self.db_pass,
|
|
164
|
+
db_name=self.db_name,
|
|
165
|
+
port=self.db_port,
|
|
166
|
+
ssl=self.db_ssl,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def reset(self):
|
|
170
|
+
# stateless across reset
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
def close(self):
|
|
174
|
+
if self.client is not None:
|
|
175
|
+
self.client.close()
|
|
176
|
+
|
|
177
|
+
def _make_header(self, cur, query: str, params=None) -> Dict[str, Any]:
|
|
178
|
+
"""
|
|
179
|
+
Inspect the cursor and build a full header record.
|
|
180
|
+
Figures out result, rowcount, function automatically.
|
|
181
|
+
"""
|
|
182
|
+
h = {
|
|
183
|
+
"db": self.db_name,
|
|
184
|
+
"dbhost": self.db_host,
|
|
185
|
+
}
|
|
186
|
+
if params is not None:
|
|
187
|
+
h["params"] = params
|
|
188
|
+
|
|
189
|
+
if cur.description:
|
|
190
|
+
cols = [d[0] for d in cur.description]
|
|
191
|
+
if len(cols) == 1 and cols[0] == "ingest_event":
|
|
192
|
+
_ = cur.fetchone() # consume void row
|
|
193
|
+
h["result"] = "ok"
|
|
194
|
+
h["function"] = "ingest_event"
|
|
195
|
+
else:
|
|
196
|
+
h["result"] = "ok"
|
|
197
|
+
h["rowcount"] = cur.rowcount if cur.rowcount != -1 else None
|
|
198
|
+
else:
|
|
199
|
+
h["result"] = "ok"
|
|
200
|
+
h["rowcount"] = cur.rowcount
|
|
201
|
+
|
|
202
|
+
return h
|
|
203
|
+
|
|
204
|
+
def execute_query_returning_S_xO_iterable(self, record):
|
|
205
|
+
query = record.get(self.query_field)
|
|
206
|
+
if not query:
|
|
207
|
+
record["_error"] = "missing query"
|
|
208
|
+
yield record
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
# Priority: json_params > batch_params > params
|
|
212
|
+
json_params = record.get(self.json_params_field, None)
|
|
213
|
+
batch = record.get(self.batch_field, None)
|
|
214
|
+
params = record.get(self.params_field, None)
|
|
215
|
+
|
|
216
|
+
cur = self.client.conn.cursor()
|
|
217
|
+
try:
|
|
218
|
+
did_executemany = False
|
|
219
|
+
header_params = None
|
|
220
|
+
|
|
221
|
+
# ---------- execute ----------
|
|
222
|
+
if json_params is not None:
|
|
223
|
+
# Explicit JSON payload; caller controls shape.
|
|
224
|
+
# We don't inspect query or payload.
|
|
225
|
+
if isinstance(json_params, (list, tuple, dict)):
|
|
226
|
+
cur.execute(query, json_params)
|
|
227
|
+
else:
|
|
228
|
+
cur.execute(query, (json_params,))
|
|
229
|
+
header_params = {self.json_params_field: json_params}
|
|
230
|
+
|
|
231
|
+
elif batch is not None:
|
|
232
|
+
# Legacy executemany path; no magic.
|
|
233
|
+
if len(batch) == 0:
|
|
234
|
+
cur.execute("SELECT 1")
|
|
235
|
+
header_params = {"batch_size": 0}
|
|
236
|
+
elif len(batch) == 1:
|
|
237
|
+
cur.execute(query, batch[0])
|
|
238
|
+
header_params = {"batch_size": 1, "params": batch[0]}
|
|
239
|
+
else:
|
|
240
|
+
cur.executemany(query, batch)
|
|
241
|
+
did_executemany = True
|
|
242
|
+
header_params = {"batch_size": len(batch)}
|
|
243
|
+
|
|
244
|
+
else:
|
|
245
|
+
# Single-statement path.
|
|
246
|
+
if params is None:
|
|
247
|
+
cur.execute(query)
|
|
248
|
+
header_params = None
|
|
249
|
+
else:
|
|
250
|
+
if isinstance(params, (list, tuple, dict)):
|
|
251
|
+
cur.execute(query, params)
|
|
252
|
+
else:
|
|
253
|
+
cur.execute(query, (params,))
|
|
254
|
+
header_params = params
|
|
255
|
+
|
|
256
|
+
# ---------- header ----------
|
|
257
|
+
yield self._make_header(cur, query, header_params)
|
|
258
|
+
|
|
259
|
+
# ---------- stream rows (only meaningful for single execute that returns rows) ----------
|
|
260
|
+
if not did_executemany and cur.description:
|
|
261
|
+
cols = [d[0] for d in cur.description]
|
|
262
|
+
if not (len(cols) == 1 and cols[0] == "ingest_event"):
|
|
263
|
+
for row in cur:
|
|
264
|
+
yield _row_to_dict(cur, row)
|
|
265
|
+
|
|
266
|
+
finally:
|
|
267
|
+
cur.close()
|
|
268
|
+
# connection stays open for this pipe; closed in .close()
|
|
@@ -35,8 +35,8 @@ class QueryPipe(Pipe):
|
|
|
35
35
|
return u
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
39
|
-
super().__init__(ptok, usage)
|
|
38
|
+
def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
|
|
39
|
+
super().__init__(ptok, usage, root=root)
|
|
40
40
|
self.output_shape = usage.get_param('shape')
|
|
41
41
|
self.count = usage.get_param('count')
|
|
42
42
|
self.query_field = 'query' # for all subclasses
|
|
@@ -92,6 +92,7 @@ class MultiYAdapter:
|
|
|
92
92
|
@staticmethod
|
|
93
93
|
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_fields: Sequence[str]) -> pd.DataFrame:
|
|
94
94
|
import pandas as pd # lazy
|
|
95
|
+
import numpy as np # lazy
|
|
95
96
|
rows: List[Dict[str, Any]] = []
|
|
96
97
|
for r in records:
|
|
97
98
|
if x_field not in r:
|
|
@@ -120,7 +121,8 @@ class MultiYAdapter:
|
|
|
120
121
|
class SingleYWithSetsAdapter:
|
|
121
122
|
"""Legacy: single y_field + optional per-row set_name to create series."""
|
|
122
123
|
@staticmethod
|
|
123
|
-
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str)
|
|
124
|
+
def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str):
|
|
125
|
+
import pandas as pd # lazy
|
|
124
126
|
triplets = [] # (x, y, set_name)
|
|
125
127
|
for r in records:
|
|
126
128
|
if x_field in r and y_field in r:
|
|
@@ -137,7 +139,6 @@ class SingleYWithSetsAdapter:
|
|
|
137
139
|
# ----------------------------- Plotter -----------------------------
|
|
138
140
|
class GraphPlotter:
|
|
139
141
|
def __init__(self, params: GraphParams):
|
|
140
|
-
import numpy as np
|
|
141
142
|
self.pms = params
|
|
142
143
|
self.y_fields = list(dict.fromkeys(self.pms.y_fields)) # dedupe, preserve order
|
|
143
144
|
|
|
@@ -145,6 +146,7 @@ class GraphPlotter:
|
|
|
145
146
|
import matplotlib.pyplot as plt # lazy
|
|
146
147
|
import matplotlib.dates as mdates # lazy
|
|
147
148
|
import pandas as pd # lazy
|
|
149
|
+
import numpy as np # lazy
|
|
148
150
|
|
|
149
151
|
fig = plt.figure()
|
|
150
152
|
ax = plt.gca()
|
|
@@ -291,6 +293,7 @@ class GraphPlotter:
|
|
|
291
293
|
|
|
292
294
|
def _bars_time(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
293
295
|
# Grouped bars at each timestamp using index positions
|
|
296
|
+
import numpy as np # lazy
|
|
294
297
|
x_vals = df["ts"].to_numpy(); idx = np.arange(len(x_vals))
|
|
295
298
|
n = len(y_cols); width = 0.8 / max(n, 1)
|
|
296
299
|
for i, y in enumerate(y_cols):
|
|
@@ -300,6 +303,7 @@ class GraphPlotter:
|
|
|
300
303
|
ax.set_xticks(idx, [pd.to_datetime(t).strftime("%Y-%m-%d %H:%M") for t in x_vals], rotation=45)
|
|
301
304
|
|
|
302
305
|
def _bars_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
306
|
+
import numpy as np # lazy
|
|
303
307
|
seen = set(); ordered_x: List[Any] = []
|
|
304
308
|
for x in df["x"].tolist():
|
|
305
309
|
if x not in seen:
|
|
@@ -312,6 +316,7 @@ class GraphPlotter:
|
|
|
312
316
|
ax.set_xticks(idx, ordered_x, rotation=45)
|
|
313
317
|
|
|
314
318
|
def _lines_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
|
|
319
|
+
import numpy as np # lazy
|
|
315
320
|
seen = set(); ordered_x: List[Any] = []
|
|
316
321
|
for x in df["x"].tolist():
|
|
317
322
|
if x not in seen:
|
|
@@ -13,9 +13,11 @@ from pjk.sources.inline_source import InlineSource
|
|
|
13
13
|
from pjk.sources.user_source_factory import UserSourceFactory
|
|
14
14
|
from pjk.sources.parquet_source import ParquetSource
|
|
15
15
|
from pjk.sources.format_source import FormatSource
|
|
16
|
+
from pjk.sources.s3_select_source import S3SelectSource
|
|
16
17
|
|
|
17
18
|
COMPONENTS = {
|
|
18
19
|
'inline': InlineSource,
|
|
20
|
+
's3s': S3SelectSource,
|
|
19
21
|
'json': JsonSource,
|
|
20
22
|
'jsonl': JsonSource,
|
|
21
23
|
'csv': CSVSource,
|
|
@@ -35,6 +37,11 @@ class SourceFactory(ComponentFactory):
|
|
|
35
37
|
def create(self, token: str) -> Source:
|
|
36
38
|
token = token.strip()
|
|
37
39
|
|
|
40
|
+
# s3s is a pseudo source only in the above list to provide easy man page
|
|
41
|
+
# it's instantiated by the parser when <file>.s3s, so disallow standard search for it.
|
|
42
|
+
if token == 's3s':
|
|
43
|
+
return None
|
|
44
|
+
|
|
38
45
|
if InlineSource.is_inline(token):
|
|
39
46
|
return InlineSource(token)
|
|
40
47
|
|
|
@@ -44,6 +51,12 @@ class SourceFactory(ComponentFactory):
|
|
|
44
51
|
source = UserSourceFactory.create(ptok)
|
|
45
52
|
if source:
|
|
46
53
|
return source
|
|
54
|
+
|
|
55
|
+
# s3 select file
|
|
56
|
+
if ptok.pre_colon.endswith('.s3s'):
|
|
57
|
+
source = S3SelectSource(ptok, None)
|
|
58
|
+
if source:
|
|
59
|
+
return source
|
|
47
60
|
|
|
48
61
|
source_cls = self.get_component_class(ptok.pre_colon)
|
|
49
62
|
if source_cls and not issubclass(source_cls, FormatSource):
|