python-jack-knife 0.5.0__tar.gz → 0.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/PKG-INFO +1 -1
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/base.py +46 -27
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/common.py +15 -8
- python_jack_knife-0.5.5/src/pjk/log.py +62 -0
- python_jack_knife-0.5.5/src/pjk/main.py +135 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/parser.py +13 -4
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/factory.py +6 -2
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/filter.py +3 -3
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/head.py +3 -5
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/join.py +4 -4
- python_jack_knife-0.5.5/src/pjk/pipes/map.py +130 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/move_field.py +2 -2
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/postgres_pipe.py +5 -9
- python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +41 -0
- python_jack_knife-0.5.5/src/pjk/pipes/sample.py +66 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/select.py +2 -4
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/tail.py +1 -1
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/where.py +12 -15
- python_jack_knife-0.5.5/src/pjk/progress.py +177 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/registry.py +34 -5
- python_jack_knife-0.5.5/src/pjk/sinks/csv_sink.py +22 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/devnull.py +13 -6
- python_jack_knife-0.5.5/src/pjk/sinks/dir_sink.py +71 -0
- python_jack_knife-0.5.5/src/pjk/sinks/expect.py +92 -0
- python_jack_knife-0.5.5/src/pjk/sinks/factory.py +50 -0
- python_jack_knife-0.5.5/src/pjk/sinks/format_sink.py +126 -0
- python_jack_knife-0.5.5/src/pjk/sinks/json_sink.py +14 -0
- python_jack_knife-0.5.5/src/pjk/sinks/s3_sink.py +90 -0
- python_jack_knife-0.5.5/src/pjk/sinks/s3_stream.py +134 -0
- python_jack_knife-0.5.5/src/pjk/sinks/tsv_sink.py +12 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/user_sink_factory.py +2 -1
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/csv_source.py +3 -6
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/dir_source.py +30 -17
- python_jack_knife-0.5.5/src/pjk/sources/factory.py +58 -0
- python_jack_knife-0.5.5/src/pjk/sources/format_source.py +119 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/json_source.py +3 -7
- python_jack_knife-0.5.5/src/pjk/sources/npy_source.py +76 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/parquet_source.py +3 -7
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/s3_source.py +42 -51
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/sql_source.py +4 -11
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/tsv_source.py +2 -6
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/version.py +1 -1
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/SOURCES.txt +7 -1
- python_jack_knife-0.5.0/src/pjk/log.py +0 -67
- python_jack_knife-0.5.0/src/pjk/main.py +0 -106
- python_jack_knife-0.5.0/src/pjk/pipes/map.py +0 -91
- python_jack_knife-0.5.0/src/pjk/sinks/csv_sink.py +0 -33
- python_jack_knife-0.5.0/src/pjk/sinks/dir_sink.py +0 -59
- python_jack_knife-0.5.0/src/pjk/sinks/expect.py +0 -53
- python_jack_knife-0.5.0/src/pjk/sinks/factory.py +0 -108
- python_jack_knife-0.5.0/src/pjk/sinks/json_sink.py +0 -23
- python_jack_knife-0.5.0/src/pjk/sinks/s3_sink.py +0 -100
- python_jack_knife-0.5.0/src/pjk/sinks/tsv_sink.py +0 -22
- python_jack_knife-0.5.0/src/pjk/sources/factory.py +0 -100
- python_jack_knife-0.5.0/src/pjk/sources/format_usage.py +0 -11
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/LICENSE +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/README.md +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/pyproject.toml +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/setup.cfg +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/ddb.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -152,16 +152,17 @@ class Usage:
|
|
|
152
152
|
lines.append(self.desc)
|
|
153
153
|
|
|
154
154
|
syntax_str = self.get_token_syntax() # might be ''
|
|
155
|
-
if
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
155
|
+
if not syntax_str:
|
|
156
|
+
return '\n'.join(lines)
|
|
157
|
+
|
|
158
|
+
lines.append('')
|
|
159
|
+
lines.append(f'syntax:')
|
|
160
|
+
lines.append(f' {self.get_token_syntax()}')
|
|
160
161
|
lines.extend(f"{line}" for line in self.get_arg_param_desc())
|
|
161
162
|
return '\n'.join(lines)
|
|
162
163
|
|
|
163
164
|
def get_token_syntax(self):
|
|
164
|
-
if self.syntax
|
|
165
|
+
if self.syntax:
|
|
165
166
|
return self.syntax # else piece it together
|
|
166
167
|
|
|
167
168
|
token = f'{self.name}'
|
|
@@ -216,7 +217,10 @@ class Usage:
|
|
|
216
217
|
self.args[name] = self._get_val(val_str, is_num, valid_values)
|
|
217
218
|
except (ValueError, TypeError) as e:
|
|
218
219
|
raise TokenError.from_list([f"wrong value for '{name}' arg.", '', self.get_usage_text()])
|
|
219
|
-
|
|
220
|
+
|
|
221
|
+
self.bind_params(ptok)
|
|
222
|
+
|
|
223
|
+
def bind_params(self, ptok: ParsedToken):
|
|
220
224
|
for name, str_val in ptok.get_params().items():
|
|
221
225
|
usage = self.param_usages.get(name, None)
|
|
222
226
|
if not usage:
|
|
@@ -276,8 +280,6 @@ class KeyedSource(ABC):
|
|
|
276
280
|
return None
|
|
277
281
|
|
|
278
282
|
class Source(ABC):
|
|
279
|
-
is_format = False
|
|
280
|
-
|
|
281
283
|
@classmethod
|
|
282
284
|
def usage(cls):
|
|
283
285
|
return NoBindUsage(
|
|
@@ -296,17 +298,21 @@ class Source(ABC):
|
|
|
296
298
|
self._iter = iter(self)
|
|
297
299
|
return next(self._iter)
|
|
298
300
|
|
|
299
|
-
|
|
300
301
|
def deep_copy(self):
|
|
301
302
|
return None # Default: not copyable unless overridden
|
|
302
|
-
|
|
303
|
+
|
|
304
|
+
def close(self):
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
def _get_sources(self, source_list: list):
|
|
308
|
+
pass
|
|
303
309
|
|
|
304
310
|
class Pipe(Source):
|
|
305
|
-
deep_copyable: bool = False # default to false
|
|
306
311
|
arity: int = 1
|
|
307
312
|
|
|
308
313
|
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
309
314
|
self.ptok = ptok
|
|
315
|
+
self.usage = usage
|
|
310
316
|
self.left = None # left source for convience
|
|
311
317
|
self.right = None # right source for convience
|
|
312
318
|
self.inputs: List[Source] = []
|
|
@@ -324,24 +330,29 @@ class Pipe(Source):
|
|
|
324
330
|
pass # optional hook
|
|
325
331
|
|
|
326
332
|
def deep_copy(self) -> Optional["Pipe"]:
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
def _get_sources(self, source_list: list):
|
|
336
|
+
for ix in self.inputs:
|
|
337
|
+
source_list.append(ix)
|
|
338
|
+
ix._get_sources(source_list)
|
|
333
339
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
340
|
+
class DeepCopyPipe(Pipe):
|
|
341
|
+
def deep_copy(self):
|
|
342
|
+
"""
|
|
343
|
+
Generic deep_copy: clone left source, re-instantiate
|
|
344
|
+
this pipe class with the same ptok/usage, and attach.
|
|
345
|
+
"""
|
|
346
|
+
source_clone = self.left.deep_copy()
|
|
347
|
+
if not source_clone:
|
|
348
|
+
return None
|
|
339
349
|
|
|
340
|
-
|
|
350
|
+
# re-instantiate using the actual subclass
|
|
351
|
+
pipe = type(self)(self.ptok, self.usage)
|
|
352
|
+
pipe.add_source(source_clone)
|
|
353
|
+
return pipe
|
|
341
354
|
|
|
342
355
|
class Sink(ABC):
|
|
343
|
-
is_format = False
|
|
344
|
-
|
|
345
356
|
@classmethod
|
|
346
357
|
def usage(cls):
|
|
347
358
|
return NoBindUsage(
|
|
@@ -356,8 +367,16 @@ class Sink(ABC):
|
|
|
356
367
|
|
|
357
368
|
def drain(self):
|
|
358
369
|
self.process()
|
|
370
|
+
self.close()
|
|
371
|
+
|
|
372
|
+
# get all inputs in the execution chain for closing
|
|
373
|
+
inputs = [self.input]
|
|
374
|
+
self.input._get_sources(inputs)
|
|
375
|
+
for input in inputs:
|
|
376
|
+
input.close()
|
|
359
377
|
|
|
360
|
-
|
|
378
|
+
# optional
|
|
379
|
+
def close(self):
|
|
361
380
|
pass
|
|
362
381
|
|
|
363
382
|
def add_source(self, source: Source) -> None:
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import sys, shutil, subprocess, contextlib, signal
|
|
5
5
|
import os
|
|
6
6
|
import yaml
|
|
7
|
+
from pjk.base import TokenError
|
|
7
8
|
|
|
8
9
|
class SafeNamespace:
|
|
9
10
|
def __init__(self, obj):
|
|
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
73
74
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
74
75
|
|
|
75
76
|
class Lookups:
|
|
76
|
-
def __init__(self):
|
|
77
|
+
def __init__(self, component_class):
|
|
77
78
|
self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
|
|
79
|
+
self.class_name = type(component_class).__name__
|
|
78
80
|
self._data = {}
|
|
79
81
|
self._load()
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
def _load(self):
|
|
82
84
|
"""Load lookups from YAML file if it exists."""
|
|
83
85
|
if os.path.exists(self.lookups_yaml):
|
|
@@ -93,8 +95,13 @@ class Lookups:
|
|
|
93
95
|
yaml.safe_dump(self._data, f)
|
|
94
96
|
|
|
95
97
|
def get(self, key, default=None):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
lookup_key = f'{self.class_name}-{key}'
|
|
99
|
+
entry = self._data.get(lookup_key, default)
|
|
100
|
+
if not entry:
|
|
101
|
+
raise TokenError(
|
|
102
|
+
f"~/.pjk/lookups.yaml must contain entry for '{lookup_key}' with host, user, password."
|
|
103
|
+
)
|
|
104
|
+
return entry
|
|
98
105
|
|
|
99
106
|
def set(self, key, value):
|
|
100
107
|
"""Set a lookup value and persist it."""
|
|
@@ -129,14 +136,14 @@ class ComponentFactory:
|
|
|
129
136
|
print(header)
|
|
130
137
|
|
|
131
138
|
i = 0
|
|
132
|
-
|
|
139
|
+
# user and outside package components are also here, but printed from registry class
|
|
133
140
|
for name, comp_class in self.components.items():
|
|
134
141
|
usage = comp_class.usage()
|
|
135
142
|
lines = usage.desc.split('\n')
|
|
136
143
|
if i >= self.num_orig_comps:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
line =
|
|
144
|
+
break
|
|
145
|
+
|
|
146
|
+
line = f' {name:<12} {lines[0]}'
|
|
140
147
|
print(line)
|
|
141
148
|
i += 1
|
|
142
149
|
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
import logging, os, tempfile
|
|
5
|
+
from logging.handlers import RotatingFileHandler
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("pjk")
|
|
10
|
+
|
|
11
|
+
def _truthy(v: Optional[str]) -> bool:
|
|
12
|
+
return str(v).lower() in ("1", "true", "yes", "on")
|
|
13
|
+
|
|
14
|
+
def init(force: bool = False, level: Optional[int] = None):
|
|
15
|
+
"""
|
|
16
|
+
Initialize 'pjk' logging.
|
|
17
|
+
|
|
18
|
+
- Rotates at DJK_LOG_MAX_MB (default 2 MB), keeps DJK_LOG_BACKUPS (default 3).
|
|
19
|
+
- Files under ~/.pjk/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
|
|
20
|
+
- Set DJK_DEBUG=1|true|yes for DEBUG, else INFO (or pass explicit level).
|
|
21
|
+
- If the log directory is not writable, fall back to console logging
|
|
22
|
+
(stderr → CloudWatch in AWS).
|
|
23
|
+
- Set force=True to replace existing handlers.
|
|
24
|
+
"""
|
|
25
|
+
if logger.handlers and not force:
|
|
26
|
+
return
|
|
27
|
+
logger.handlers.clear()
|
|
28
|
+
|
|
29
|
+
level = level or (logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO)
|
|
30
|
+
fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
|
|
31
|
+
formatter = logging.Formatter(fmt)
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
# Preferred: rotating file handler under ~/.pjk/logs
|
|
35
|
+
log_dir = Path(os.getenv("DJK_LOG_DIR", Path.home() / ".pjk" / "logs"))
|
|
36
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
log_file = log_dir / os.getenv("DJK_LOG_FILE", "pjk.log")
|
|
39
|
+
max_bytes = int(float(os.getenv("DJK_LOG_MAX_MB", "2")) * 1024 * 1024) # 2 MB
|
|
40
|
+
backups = int(os.getenv("DJK_LOG_BACKUPS", "3"))
|
|
41
|
+
|
|
42
|
+
fh = RotatingFileHandler(
|
|
43
|
+
log_file,
|
|
44
|
+
maxBytes=max_bytes,
|
|
45
|
+
backupCount=backups,
|
|
46
|
+
encoding="utf-8",
|
|
47
|
+
delay=False,
|
|
48
|
+
)
|
|
49
|
+
fh.setLevel(level)
|
|
50
|
+
fh.setFormatter(formatter)
|
|
51
|
+
logger.addHandler(fh)
|
|
52
|
+
except Exception:
|
|
53
|
+
# Fallback: console handler
|
|
54
|
+
ch = logging.StreamHandler()
|
|
55
|
+
ch.setLevel(level)
|
|
56
|
+
ch.setFormatter(formatter)
|
|
57
|
+
logger.addHandler(ch)
|
|
58
|
+
logger.warning("Falling back to console logging (log file not writable)")
|
|
59
|
+
|
|
60
|
+
logger.setLevel(level)
|
|
61
|
+
# Do not propagate to root
|
|
62
|
+
logger.propagate = False
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
#!/usr/bin/env python
|
|
5
|
+
import sys
|
|
6
|
+
import os
|
|
7
|
+
import signal
|
|
8
|
+
import shlex
|
|
9
|
+
from typing import List
|
|
10
|
+
from pjk.parser import ExpressionParser
|
|
11
|
+
from pjk.base import UsageError
|
|
12
|
+
from pjk.log import init as init_logging
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
import traceback
|
|
15
|
+
import concurrent.futures
|
|
16
|
+
from pjk.registry import ComponentRegistry
|
|
17
|
+
from pjk.sinks.stdout import StdoutSink
|
|
18
|
+
from pjk.man_page import do_man, do_examples
|
|
19
|
+
from pjk.sinks.expect import ExpectSink
|
|
20
|
+
from pjk.progress import ProgressDisplay
|
|
21
|
+
from pjk.version import __version__
|
|
22
|
+
|
|
23
|
+
def write_history(tokens):
|
|
24
|
+
if os.environ.get("PJK_NO_HISTORY") == "1":
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
log_path = ".pjk-history.txt"
|
|
28
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
29
|
+
command = " ".join(tokens)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
with open(log_path, "a") as f:
|
|
33
|
+
f.write(f"{timestamp}\tpjk {command}\n")
|
|
34
|
+
except (PermissionError, OSError):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
def execute_threaded(sinks, stop_progress=None):
|
|
38
|
+
max_workers = min(32, len(sinks))
|
|
39
|
+
executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
|
|
40
|
+
futures = {executor.submit(s.drain): s for s in sinks}
|
|
41
|
+
try:
|
|
42
|
+
for future in concurrent.futures.as_completed(futures):
|
|
43
|
+
sink_obj = futures[future]
|
|
44
|
+
future.result() # re-raises worker exception with traceback
|
|
45
|
+
except KeyboardInterrupt:
|
|
46
|
+
# stop UI first, then cancel and non-blocking shutdown
|
|
47
|
+
if stop_progress:
|
|
48
|
+
try: stop_progress()
|
|
49
|
+
except Exception: pass
|
|
50
|
+
for f in futures:
|
|
51
|
+
f.cancel()
|
|
52
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
53
|
+
os._exit(130)
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
if stop_progress:
|
|
57
|
+
try: stop_progress()
|
|
58
|
+
except Exception: pass
|
|
59
|
+
sys.stderr.write(f"Sink {futures[future]} raised an exception:\n")
|
|
60
|
+
traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
|
|
61
|
+
for f in futures:
|
|
62
|
+
f.cancel()
|
|
63
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
64
|
+
raise
|
|
65
|
+
else:
|
|
66
|
+
executor.shutdown(wait=True)
|
|
67
|
+
|
|
68
|
+
def execute(command: str):
|
|
69
|
+
tokens = shlex.split(command, comments=True, posix=True)
|
|
70
|
+
execute_tokens(tokens)
|
|
71
|
+
|
|
72
|
+
def execute_tokens(tokens: List[str]):
|
|
73
|
+
init_logging()
|
|
74
|
+
# (remove the sys.exit SIGINT handler here)
|
|
75
|
+
|
|
76
|
+
if '--version' in tokens:
|
|
77
|
+
print(f"pjk version {__version__}")
|
|
78
|
+
sys.exit(0)
|
|
79
|
+
|
|
80
|
+
registry = ComponentRegistry()
|
|
81
|
+
|
|
82
|
+
if len(tokens) < 1:
|
|
83
|
+
registry.print_usage()
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
if len(tokens) == 2 and tokens[0] == 'man':
|
|
87
|
+
do_man(tokens[1], registry)
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
|
|
91
|
+
do_examples(tokens[0], registry)
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
parser = ExpressionParser(registry)
|
|
95
|
+
|
|
96
|
+
display = None
|
|
97
|
+
try:
|
|
98
|
+
sink = parser.parse(tokens)
|
|
99
|
+
if not isinstance(sink, (StdoutSink | ExpectSink)):
|
|
100
|
+
display = ProgressDisplay(interval=3.0)
|
|
101
|
+
display.start()
|
|
102
|
+
|
|
103
|
+
sinks = [sink]
|
|
104
|
+
max_threads = os.cpu_count()
|
|
105
|
+
while len(sinks) < max_threads:
|
|
106
|
+
clone = sink.deep_copy()
|
|
107
|
+
if not clone:
|
|
108
|
+
break
|
|
109
|
+
sinks.append(clone)
|
|
110
|
+
|
|
111
|
+
if len(sinks) > 1:
|
|
112
|
+
# pass a stopper so we halt the UI before tracebacks / shutdown
|
|
113
|
+
execute_threaded(sinks, stop_progress=(display.stop if display else None))
|
|
114
|
+
else:
|
|
115
|
+
sink.drain()
|
|
116
|
+
|
|
117
|
+
write_history(sys.argv[1:])
|
|
118
|
+
|
|
119
|
+
except UsageError as e:
|
|
120
|
+
print(e, file=sys.stderr)
|
|
121
|
+
sys.exit(2)
|
|
122
|
+
except KeyboardInterrupt:
|
|
123
|
+
pass
|
|
124
|
+
finally:
|
|
125
|
+
if display:
|
|
126
|
+
# short join so Ctrl-C is immediate
|
|
127
|
+
try: display.stop(timeout=0.1)
|
|
128
|
+
except Exception: pass
|
|
129
|
+
|
|
130
|
+
def main():
|
|
131
|
+
tokens = sys.argv[1:]
|
|
132
|
+
execute_tokens(tokens)
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
main()
|
|
@@ -8,6 +8,9 @@ from typing import Optional, Any, List
|
|
|
8
8
|
from pjk.base import Source, Pipe, Sink, TokenError, UsageError, ParsedToken, Usage
|
|
9
9
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
10
10
|
from pjk.pipes.let_reduce import ReducePipe
|
|
11
|
+
from pjk.sinks.stdout import StdoutSink
|
|
12
|
+
from pjk.sinks.expect import ExpectSink
|
|
13
|
+
from pjk.pipes.progress_pipe import ProgressPipe
|
|
11
14
|
from pjk.registry import ComponentRegistry
|
|
12
15
|
|
|
13
16
|
def expand_macros(tokens: List[str]) -> List[str]:
|
|
@@ -58,8 +61,12 @@ class ExpressionParser:
|
|
|
58
61
|
if not sink:
|
|
59
62
|
raise TokenError.from_list(['expression must end in a sink.',
|
|
60
63
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
64
|
+
|
|
65
|
+
# so each sink doesn't have to, maybe make a base class or mixin for sinks
|
|
66
|
+
progress_pipe = ProgressPipe(component_instance=sink)
|
|
67
|
+
progress_pipe.add_source(source)
|
|
61
68
|
|
|
62
|
-
sink.add_source(
|
|
69
|
+
sink.add_source(progress_pipe)
|
|
63
70
|
return sink
|
|
64
71
|
|
|
65
72
|
def parse(self, tokens: List[str]) -> Sink:
|
|
@@ -78,10 +85,12 @@ class ExpressionParser:
|
|
|
78
85
|
return self.get_sink(stack_helper, token)
|
|
79
86
|
|
|
80
87
|
source = self.registry.create_source(token)
|
|
81
|
-
if source:
|
|
88
|
+
if source:
|
|
82
89
|
stack_helper.add_operator(source, self.stack)
|
|
90
|
+
progress_pipe = ProgressPipe(component_instance=source, simple=True)
|
|
91
|
+
stack_helper.add_operator(progress_pipe, self.stack)
|
|
83
92
|
continue
|
|
84
|
-
|
|
93
|
+
|
|
85
94
|
subexp = SubExpression.create(token)
|
|
86
95
|
if subexp:
|
|
87
96
|
stack_helper.add_operator(subexp, self.stack)
|
|
@@ -94,7 +103,7 @@ class ExpressionParser:
|
|
|
94
103
|
|
|
95
104
|
else: # unrecognized token
|
|
96
105
|
# could be sink in WRONG position, let's see for better error message
|
|
97
|
-
sink = self.registry.create_sink(token
|
|
106
|
+
sink = self.registry.create_sink(token)
|
|
98
107
|
if sink:
|
|
99
108
|
raise TokenError.from_list(['sink may only occur in final position.',
|
|
100
109
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
@@ -12,12 +12,14 @@ from pjk.pipes.head import HeadPipe
|
|
|
12
12
|
from pjk.pipes.tail import TailPipe
|
|
13
13
|
from pjk.pipes.sort import SortPipe
|
|
14
14
|
from pjk.pipes.where import WherePipe
|
|
15
|
-
from pjk.pipes.map import
|
|
15
|
+
from pjk.pipes.map import MapByPipe
|
|
16
|
+
from pjk.pipes.map import GroupByPipe
|
|
16
17
|
from pjk.pipes.join import JoinPipe
|
|
17
18
|
from pjk.pipes.filter import FilterPipe
|
|
18
19
|
from pjk.pipes.select import SelectFields
|
|
19
20
|
from pjk.pipes.denorm import DenormPipe
|
|
20
21
|
from pjk.pipes.postgres_pipe import PostgresPipe
|
|
22
|
+
from pjk.pipes.sample import SamplePipe
|
|
21
23
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
22
24
|
|
|
23
25
|
COMPONENTS = {
|
|
@@ -25,7 +27,8 @@ COMPONENTS = {
|
|
|
25
27
|
'tail': TailPipe,
|
|
26
28
|
'join': JoinPipe,
|
|
27
29
|
'filter': FilterPipe,
|
|
28
|
-
'
|
|
30
|
+
'mapby': MapByPipe,
|
|
31
|
+
'groupby': GroupByPipe,
|
|
29
32
|
'as': MoveField,
|
|
30
33
|
'drop': RemoveField,
|
|
31
34
|
'let': LetPipe,
|
|
@@ -33,6 +36,7 @@ COMPONENTS = {
|
|
|
33
36
|
'sort': SortPipe,
|
|
34
37
|
'where': WherePipe,
|
|
35
38
|
'sel': SelectFields,
|
|
39
|
+
'sample': SamplePipe,
|
|
36
40
|
'explode': DenormPipe,
|
|
37
41
|
'pgres': PostgresPipe,
|
|
38
42
|
}
|
|
@@ -15,13 +15,13 @@ class FilterPipe(Pipe):
|
|
|
15
15
|
)
|
|
16
16
|
usage.def_arg("mode", "'+' to include matches, '-' to exclude matches",
|
|
17
17
|
valid_values={'+', '-'})
|
|
18
|
-
usage.def_syntax("pjk <left_source> <map_source>
|
|
18
|
+
usage.def_syntax("pjk <left_source> <map_source> [mapby:groupby]:<how>:<key> filter:<mode> <sink>")
|
|
19
19
|
|
|
20
20
|
usage.def_example(expr_tokens=
|
|
21
21
|
[
|
|
22
22
|
"[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
|
|
23
23
|
"[{id:1}, {id:3}, {id:5}]",
|
|
24
|
-
'
|
|
24
|
+
'mapby:id',
|
|
25
25
|
"filter:+"
|
|
26
26
|
],
|
|
27
27
|
expect="[{id:1}, {id:3}, {id:5}]")
|
|
@@ -30,7 +30,7 @@ class FilterPipe(Pipe):
|
|
|
30
30
|
[
|
|
31
31
|
"[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
|
|
32
32
|
"[{id:1}, {id:3}, {id:5}]",
|
|
33
|
-
'
|
|
33
|
+
'mapby:id',
|
|
34
34
|
"filter:-"
|
|
35
35
|
],
|
|
36
36
|
expect="[{id:2}, {id:4}]")
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
# Copyright 2024 Mike Schultz
|
|
3
3
|
|
|
4
4
|
# djk/pipes/head.py
|
|
5
|
-
|
|
6
|
-
from typing import Optional
|
|
7
5
|
from pjk.base import Pipe, ParsedToken, Usage
|
|
8
6
|
|
|
9
7
|
class HeadPipe(Pipe):
|
|
@@ -11,15 +9,15 @@ class HeadPipe(Pipe):
|
|
|
11
9
|
def usage(cls):
|
|
12
10
|
usage = Usage(
|
|
13
11
|
name='head',
|
|
14
|
-
desc='take first records of input (
|
|
12
|
+
desc='take first records of input (single-threaded)',
|
|
15
13
|
component_class=cls
|
|
16
14
|
)
|
|
17
|
-
usage.def_arg(name='limit', usage='number of records', is_num=True)
|
|
18
15
|
usage.def_example(expr_tokens=['[{id:1}, {id:2}]', 'head:1'], expect="{id:1}")
|
|
16
|
+
usage.def_arg(name='limit', usage='number of records', is_num=True)
|
|
19
17
|
return usage
|
|
20
18
|
|
|
21
19
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
22
|
-
super().__init__(ptok)
|
|
20
|
+
super().__init__(ptok, usage)
|
|
23
21
|
self.limit = usage.get_arg('limit')
|
|
24
22
|
self.count = 0
|
|
25
23
|
|
|
@@ -20,13 +20,13 @@ class JoinPipe(Pipe):
|
|
|
20
20
|
usage="'left', 'inner', or 'outer' join behavior",
|
|
21
21
|
valid_values={'left', 'inner', 'outer'}
|
|
22
22
|
)
|
|
23
|
-
usage.def_syntax("pjk <left_source> <map_source>
|
|
23
|
+
usage.def_syntax("pjk <left_source> <map_source> [mapby|groupby]:<key> join:<mode> <sink>")
|
|
24
24
|
|
|
25
25
|
usage.def_example(expr_tokens=
|
|
26
26
|
[
|
|
27
27
|
"[{color:'blue'},{color:'green'}]",
|
|
28
28
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
29
|
-
'
|
|
29
|
+
'mapby:color',
|
|
30
30
|
"join:left"
|
|
31
31
|
],
|
|
32
32
|
expect="[{color:'blue', price:50}, {color:'green'}]")
|
|
@@ -34,7 +34,7 @@ class JoinPipe(Pipe):
|
|
|
34
34
|
[
|
|
35
35
|
"[{color:'blue'},{color:'green'}]",
|
|
36
36
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
37
|
-
'
|
|
37
|
+
'mapby:color',
|
|
38
38
|
"join:inner"
|
|
39
39
|
],
|
|
40
40
|
expect="[{color:'blue', price:50}]")
|
|
@@ -43,7 +43,7 @@ class JoinPipe(Pipe):
|
|
|
43
43
|
[
|
|
44
44
|
"[{color:'blue'},{color:'green'}]",
|
|
45
45
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
46
|
-
'
|
|
46
|
+
'mapby:color',
|
|
47
47
|
"join:outer"
|
|
48
48
|
],
|
|
49
49
|
expect="[{color:'blue', price:50}, {color:'green'}, {color:'red', price: 20}]")
|