python-jack-knife 0.7.1__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {python_jack_knife-0.7.1/src/python_jack_knife.egg-info → python_jack_knife-0.7.4}/PKG-INFO +1 -1
  2. python_jack_knife-0.7.4/src/pjk/integrations/postgres_pipe.py +268 -0
  3. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/query_pipe.py +2 -2
  4. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_bar_line.py +7 -2
  5. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/factory.py +13 -0
  6. python_jack_knife-0.7.4/src/pjk/sources/s3_select_source.py +373 -0
  7. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/sql_source.py +13 -4
  8. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/version.py +1 -1
  9. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4/src/python_jack_knife.egg-info}/PKG-INFO +1 -1
  10. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/SOURCES.txt +1 -0
  11. python_jack_knife-0.7.1/src/pjk/integrations/postgres_pipe.py +0 -227
  12. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/LICENSE +0 -0
  13. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/README.md +0 -0
  14. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/pyproject.toml +0 -0
  15. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/setup.cfg +0 -0
  16. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/__init__.py +0 -0
  17. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/common.py +0 -0
  18. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/components.py +0 -0
  19. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/history.py +0 -0
  20. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_client.py +0 -0
  21. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  22. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/opensearch_query_pipe.py +0 -0
  23. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/integrations/snowflake_pipe.py +0 -0
  24. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/log.py +0 -0
  25. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/main.py +0 -0
  26. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/man_page.py +0 -0
  27. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/parser.py +0 -0
  28. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/__init__.py +0 -0
  29. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/denorm.py +0 -0
  30. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/factory.py +0 -0
  31. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/filter.py +0 -0
  32. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/head.py +0 -0
  33. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/join.py +0 -0
  34. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/let_reduce.py +0 -0
  35. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/map.py +0 -0
  36. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/move_field.py +0 -0
  37. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/progress_pipe.py +0 -0
  38. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/remove_field.py +0 -0
  39. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/sample.py +0 -0
  40. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/select.py +0 -0
  41. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/sort.py +0 -0
  42. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/tail.py +0 -0
  43. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/user_pipe_factory.py +0 -0
  44. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/pipes/where.py +0 -0
  45. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/progress.py +0 -0
  46. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/registry.py +0 -0
  47. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/__init__.py +0 -0
  48. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/create_sink.py +0 -0
  49. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/csv_sink.py +0 -0
  50. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/devnull.py +0 -0
  51. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/dir_sink.py +0 -0
  52. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/expect.py +0 -0
  53. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/factory.py +0 -0
  54. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/format_sink.py +0 -0
  55. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph.py +0 -0
  56. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_cumulative.py +0 -0
  57. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_hist.py +0 -0
  58. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/graph_scatter.py +0 -0
  59. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/json_sink.py +0 -0
  60. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/s3_sink.py +0 -0
  61. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/s3_stream.py +0 -0
  62. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/sinks.py +0 -0
  63. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/stdout.py +0 -0
  64. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/tsv_sink.py +0 -0
  65. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sinks/user_sink_factory.py +0 -0
  66. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/__init__.py +0 -0
  67. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/csv_source.py +0 -0
  68. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/dir_source.py +0 -0
  69. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/favorite_source.py +0 -0
  70. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/format_source.py +0 -0
  71. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/inline_source.py +0 -0
  72. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/json_source.py +0 -0
  73. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file.py +0 -0
  74. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file_local.py +0 -0
  75. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/lazy_file_s3.py +0 -0
  76. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/npy_source.py +0 -0
  77. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/parquet_source.py +0 -0
  78. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/s3_source.py +0 -0
  79. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/source_list.py +0 -0
  80. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/tsv_source.py +0 -0
  81. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/sources/user_source_factory.py +0 -0
  82. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/pjk/usage.py +0 -0
  83. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  84. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  85. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/requires.txt +0 -0
  86. {python_jack_knife-0.7.1 → python_jack_knife-0.7.4}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.7.1
3
+ Version: 0.7.4
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -0,0 +1,268 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+ #
4
+ # djk/pipes/postgres_pipe.py
5
+
6
+ import base64
7
+ import datetime as _dt
8
+ import uuid
9
+ import time
10
+ from decimal import Decimal
11
+ from typing import Any, Dict, Optional
12
+
13
+ from pjk.usage import ParsedToken, Usage
14
+ from pjk.common import Integration
15
+ from pjk.pipes.query_pipe import QueryPipe
16
+
17
+ MAX_RETRIES = 3
18
+ BASE_DELAY = 0.1 # seconds
19
+
20
+ class DBClient:
21
+ """Per-instance pg8000 connection wrapper. No shared state."""
22
+
23
+ def __init__(
24
+ self,
25
+ host: str,
26
+ username: str,
27
+ password: Optional[str],
28
+ db_name: str,
29
+ port: int = 5432,
30
+ ssl: bool = False,
31
+ ):
32
+ import pg8000 # lazy import
33
+
34
+ kwargs = dict(
35
+ user=username,
36
+ password=password,
37
+ host=host,
38
+ database=db_name,
39
+ port=port,
40
+ )
41
+ if ssl:
42
+ import ssl as _ssl
43
+
44
+ kwargs["ssl_context"] = _ssl.create_default_context()
45
+
46
+ try:
47
+ self.conn = pg8000.connect(**kwargs)
48
+ self.conn.autocommit = True
49
+ except Exception as e:
50
+ print("Failed to connect to DB")
51
+ raise e
52
+
53
+ def close(self):
54
+ if getattr(self, "conn", None) is None:
55
+ return
56
+
57
+ import pg8000 # lazy
58
+
59
+ try:
60
+ self.conn.close()
61
+ except pg8000.exceptions.InterfaceError:
62
+ # Already closed / broken; ignore.
63
+ pass
64
+ finally:
65
+ self.conn = None
66
+
67
+
68
+ def _iso_dt(x: _dt.datetime) -> str:
69
+ """ISO 8601; normalize UTC offset to 'Z'."""
70
+ s = x.isoformat()
71
+ return s.replace("+00:00", "Z")
72
+
73
+
74
+ def normalize(obj: Any) -> Any:
75
+ """
76
+ Make values JSON/YAML-safe and portable (schema-agnostic):
77
+ - Decimal -> exact string (no sci-notation)
78
+ - date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
79
+ - UUID -> string
80
+ - bytes -> base64 string
81
+ - lists/tuples/sets, dicts -> normalized recursively
82
+ - leaves int/float/str/bool/None as-is
83
+ """
84
+ if obj is None:
85
+ return None
86
+
87
+ if isinstance(obj, Decimal):
88
+ return format(obj, "f") # exact value as string
89
+
90
+ if isinstance(obj, _dt.datetime):
91
+ return _iso_dt(obj)
92
+
93
+ if isinstance(obj, (_dt.date, _dt.time)):
94
+ return obj.isoformat()
95
+
96
+ if isinstance(obj, uuid.UUID):
97
+ return str(obj)
98
+
99
+ if isinstance(obj, (bytes, bytearray, memoryview)):
100
+ return base64.b64encode(bytes(obj)).decode("ascii")
101
+
102
+ if isinstance(obj, dict):
103
+ return {k: normalize(v) for k, v in obj.items()}
104
+
105
+ if isinstance(obj, (list, tuple, set)):
106
+ return [normalize(v) for v in obj]
107
+
108
+ return obj
109
+
110
+
111
+ def _row_to_dict(cursor, row) -> Dict[str, Any]:
112
+ cols = [d[0] for d in cursor.description]
113
+ return {col: normalize(val) for col, val in zip(cols, row)}
114
+
115
+
116
+ class PostgresPipe(QueryPipe, Integration):
117
+ name = "postgres"
118
+ desc = "Postgres query pipe; executes SQL over input record['query']."
119
+ arg0 = ("instance", "instance of database.")
120
+ examples = [
121
+ ["myquery.sql", "postgres:mydb", "-"],
122
+ ["{'query': 'SELECT * from MY_TABLE;'}", "postgres:mydb", "-"],
123
+ ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", "postgres:mydb"],
124
+ ["{'query': 'SELECT procedure_batch(%s, ...), batch_params:{...}"],
125
+ ["{'query': 'SELECT procedure_jsonb(%s, ...), json_params:json_string"],
126
+ ]
127
+
128
+ # name, type, default
129
+ config_tuples = [
130
+ ("db_name", str, None),
131
+ ("host", str, None),
132
+ ("user", str, None),
133
+ ("password", str, None),
134
+ ("port", int, 5432),
135
+ ("ssl", bool, False),
136
+ ]
137
+
138
+ def __init__(self, ptok: ParsedToken, u: Usage, root=None):
139
+ super().__init__(ptok, u, root=root)
140
+
141
+ self.db_name = u.get_config("db_name")
142
+ self.db_host = u.get_config("host")
143
+ self.db_user = u.get_config("user")
144
+ self.db_pass = u.get_config("password")
145
+ self.db_port = u.get_config("port")
146
+ self.db_ssl = u.get_config("ssl")
147
+
148
+ # Standard params field: single-exec params (list/tuple/dict/single value)
149
+ self.params_field = "params"
150
+
151
+ # Legacy batch path: list[tuple|list|dict] → executemany
152
+ self.batch_field = "batch_params"
153
+
154
+ # Explicit JSON payload field (no query sniffing).
155
+ # If present, this value is passed to cur.execute(query, json_params).
156
+ self.json_params_field = "json_params"
157
+
158
+ # One DB client (and thus one connection) per PostgresPipe instance.
159
+ # Under your invariant (one thread per pipe), this is thread-safe.
160
+ self.client = DBClient(
161
+ host=self.db_host,
162
+ username=self.db_user,
163
+ password=self.db_pass,
164
+ db_name=self.db_name,
165
+ port=self.db_port,
166
+ ssl=self.db_ssl,
167
+ )
168
+
169
+ def reset(self):
170
+ # stateless across reset
171
+ pass
172
+
173
+ def close(self):
174
+ if self.client is not None:
175
+ self.client.close()
176
+
177
+ def _make_header(self, cur, query: str, params=None) -> Dict[str, Any]:
178
+ """
179
+ Inspect the cursor and build a full header record.
180
+ Figures out result, rowcount, function automatically.
181
+ """
182
+ h = {
183
+ "db": self.db_name,
184
+ "dbhost": self.db_host,
185
+ }
186
+ if params is not None:
187
+ h["params"] = params
188
+
189
+ if cur.description:
190
+ cols = [d[0] for d in cur.description]
191
+ if len(cols) == 1 and cols[0] == "ingest_event":
192
+ _ = cur.fetchone() # consume void row
193
+ h["result"] = "ok"
194
+ h["function"] = "ingest_event"
195
+ else:
196
+ h["result"] = "ok"
197
+ h["rowcount"] = cur.rowcount if cur.rowcount != -1 else None
198
+ else:
199
+ h["result"] = "ok"
200
+ h["rowcount"] = cur.rowcount
201
+
202
+ return h
203
+
204
+ def execute_query_returning_S_xO_iterable(self, record):
205
+ query = record.get(self.query_field)
206
+ if not query:
207
+ record["_error"] = "missing query"
208
+ yield record
209
+ return
210
+
211
+ # Priority: json_params > batch_params > params
212
+ json_params = record.get(self.json_params_field, None)
213
+ batch = record.get(self.batch_field, None)
214
+ params = record.get(self.params_field, None)
215
+
216
+ cur = self.client.conn.cursor()
217
+ try:
218
+ did_executemany = False
219
+ header_params = None
220
+
221
+ # ---------- execute ----------
222
+ if json_params is not None:
223
+ # Explicit JSON payload; caller controls shape.
224
+ # We don't inspect query or payload.
225
+ if isinstance(json_params, (list, tuple, dict)):
226
+ cur.execute(query, json_params)
227
+ else:
228
+ cur.execute(query, (json_params,))
229
+ header_params = {self.json_params_field: json_params}
230
+
231
+ elif batch is not None:
232
+ # Legacy executemany path; no magic.
233
+ if len(batch) == 0:
234
+ cur.execute("SELECT 1")
235
+ header_params = {"batch_size": 0}
236
+ elif len(batch) == 1:
237
+ cur.execute(query, batch[0])
238
+ header_params = {"batch_size": 1, "params": batch[0]}
239
+ else:
240
+ cur.executemany(query, batch)
241
+ did_executemany = True
242
+ header_params = {"batch_size": len(batch)}
243
+
244
+ else:
245
+ # Single-statement path.
246
+ if params is None:
247
+ cur.execute(query)
248
+ header_params = None
249
+ else:
250
+ if isinstance(params, (list, tuple, dict)):
251
+ cur.execute(query, params)
252
+ else:
253
+ cur.execute(query, (params,))
254
+ header_params = params
255
+
256
+ # ---------- header ----------
257
+ yield self._make_header(cur, query, header_params)
258
+
259
+ # ---------- stream rows (only meaningful for single execute that returns rows) ----------
260
+ if not did_executemany and cur.description:
261
+ cols = [d[0] for d in cur.description]
262
+ if not (len(cols) == 1 and cols[0] == "ingest_event"):
263
+ for row in cur:
264
+ yield _row_to_dict(cur, row)
265
+
266
+ finally:
267
+ cur.close()
268
+ # connection stays open for this pipe; closed in .close()
@@ -35,8 +35,8 @@ class QueryPipe(Pipe):
35
35
  return u
36
36
 
37
37
 
38
- def __init__(self, ptok: ParsedToken, usage: Usage):
39
- super().__init__(ptok, usage)
38
+ def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
39
+ super().__init__(ptok, usage, root=root)
40
40
  self.output_shape = usage.get_param('shape')
41
41
  self.count = usage.get_param('count')
42
42
  self.query_field = 'query' # for all subclasses
@@ -92,6 +92,7 @@ class MultiYAdapter:
92
92
  @staticmethod
93
93
  def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_fields: Sequence[str]) -> pd.DataFrame:
94
94
  import pandas as pd # lazy
95
+ import numpy as np # lazy
95
96
  rows: List[Dict[str, Any]] = []
96
97
  for r in records:
97
98
  if x_field not in r:
@@ -120,7 +121,8 @@ class MultiYAdapter:
120
121
  class SingleYWithSetsAdapter:
121
122
  """Legacy: single y_field + optional per-row set_name to create series."""
122
123
  @staticmethod
123
- def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str) -> pd.DataFrame:
124
+ def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_field: str):
125
+ import pandas as pd # lazy
124
126
  triplets = [] # (x, y, set_name)
125
127
  for r in records:
126
128
  if x_field in r and y_field in r:
@@ -137,7 +139,6 @@ class SingleYWithSetsAdapter:
137
139
  # ----------------------------- Plotter -----------------------------
138
140
  class GraphPlotter:
139
141
  def __init__(self, params: GraphParams):
140
- import numpy as np
141
142
  self.pms = params
142
143
  self.y_fields = list(dict.fromkeys(self.pms.y_fields)) # dedupe, preserve order
143
144
 
@@ -145,6 +146,7 @@ class GraphPlotter:
145
146
  import matplotlib.pyplot as plt # lazy
146
147
  import matplotlib.dates as mdates # lazy
147
148
  import pandas as pd # lazy
149
+ import numpy as np # lazy
148
150
 
149
151
  fig = plt.figure()
150
152
  ax = plt.gca()
@@ -291,6 +293,7 @@ class GraphPlotter:
291
293
 
292
294
  def _bars_time(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
293
295
  # Grouped bars at each timestamp using index positions
296
+ import numpy as np # lazy
294
297
  x_vals = df["ts"].to_numpy(); idx = np.arange(len(x_vals))
295
298
  n = len(y_cols); width = 0.8 / max(n, 1)
296
299
  for i, y in enumerate(y_cols):
@@ -300,6 +303,7 @@ class GraphPlotter:
300
303
  ax.set_xticks(idx, [pd.to_datetime(t).strftime("%Y-%m-%d %H:%M") for t in x_vals], rotation=45)
301
304
 
302
305
  def _bars_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
306
+ import numpy as np # lazy
303
307
  seen = set(); ordered_x: List[Any] = []
304
308
  for x in df["x"].tolist():
305
309
  if x not in seen:
@@ -312,6 +316,7 @@ class GraphPlotter:
312
316
  ax.set_xticks(idx, ordered_x, rotation=45)
313
317
 
314
318
  def _lines_categorical(self, ax, df: pd.DataFrame, y_cols: Sequence[str]) -> None:
319
+ import numpy as np # lazy
315
320
  seen = set(); ordered_x: List[Any] = []
316
321
  for x in df["x"].tolist():
317
322
  if x not in seen:
@@ -13,9 +13,11 @@ from pjk.sources.inline_source import InlineSource
13
13
  from pjk.sources.user_source_factory import UserSourceFactory
14
14
  from pjk.sources.parquet_source import ParquetSource
15
15
  from pjk.sources.format_source import FormatSource
16
+ from pjk.sources.s3_select_source import S3SelectSource
16
17
 
17
18
  COMPONENTS = {
18
19
  'inline': InlineSource,
20
+ 's3s': S3SelectSource,
19
21
  'json': JsonSource,
20
22
  'jsonl': JsonSource,
21
23
  'csv': CSVSource,
@@ -35,6 +37,11 @@ class SourceFactory(ComponentFactory):
35
37
  def create(self, token: str) -> Source:
36
38
  token = token.strip()
37
39
 
40
+ # s3s is a pseudo source only in the above list to provide easy man page
41
+ # it's instantiated by the parser when <file>.s3s, so disallow standard search for it.
42
+ if token == 's3s':
43
+ return None
44
+
38
45
  if InlineSource.is_inline(token):
39
46
  return InlineSource(token)
40
47
 
@@ -44,6 +51,12 @@ class SourceFactory(ComponentFactory):
44
51
  source = UserSourceFactory.create(ptok)
45
52
  if source:
46
53
  return source
54
+
55
+ # s3 select file
56
+ if ptok.pre_colon.endswith('.s3s'):
57
+ source = S3SelectSource(ptok, None)
58
+ if source:
59
+ return source
47
60
 
48
61
  source_cls = self.get_component_class(ptok.pre_colon)
49
62
  if source_cls and not issubclass(source_cls, FormatSource):