python-jack-knife 0.5.1__tar.gz → 0.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/PKG-INFO +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/base.py +19 -20
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/common.py +15 -8
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/main.py +56 -31
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/parser.py +12 -3
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/factory.py +6 -2
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/filter.py +3 -3
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/head.py +4 -6
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/join.py +4 -4
- python_jack_knife-0.5.5/src/pjk/pipes/map.py +130 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/move_field.py +2 -2
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/postgres_pipe.py +5 -9
- python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +41 -0
- python_jack_knife-0.5.5/src/pjk/pipes/sample.py +66 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/select.py +2 -4
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/tail.py +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/where.py +10 -5
- python_jack_knife-0.5.5/src/pjk/progress.py +177 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/registry.py +25 -3
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/devnull.py +13 -6
- python_jack_knife-0.5.5/src/pjk/sinks/expect.py +92 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/factory.py +0 -5
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/user_sink_factory.py +2 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/dir_source.py +2 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/factory.py +3 -34
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/format_source.py +5 -0
- python_jack_knife-0.5.5/src/pjk/sources/npy_source.py +76 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/s3_source.py +2 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/version.py +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/SOURCES.txt +4 -0
- python_jack_knife-0.5.1/src/pjk/pipes/map.py +0 -91
- python_jack_knife-0.5.1/src/pjk/sinks/expect.py +0 -53
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/LICENSE +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/README.md +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/pyproject.toml +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/setup.cfg +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/log.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/ddb.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -162,7 +162,7 @@ class Usage:
|
|
|
162
162
|
return '\n'.join(lines)
|
|
163
163
|
|
|
164
164
|
def get_token_syntax(self):
|
|
165
|
-
if
|
|
165
|
+
if self.syntax:
|
|
166
166
|
return self.syntax # else piece it together
|
|
167
167
|
|
|
168
168
|
token = f'{self.name}'
|
|
@@ -300,10 +300,14 @@ class Source(ABC):
|
|
|
300
300
|
|
|
301
301
|
def deep_copy(self):
|
|
302
302
|
return None # Default: not copyable unless overridden
|
|
303
|
-
|
|
303
|
+
|
|
304
|
+
def close(self):
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
def _get_sources(self, source_list: list):
|
|
308
|
+
pass
|
|
304
309
|
|
|
305
310
|
class Pipe(Source):
|
|
306
|
-
deep_copyable: bool = False # default to false
|
|
307
311
|
arity: int = 1
|
|
308
312
|
|
|
309
313
|
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
@@ -326,20 +330,12 @@ class Pipe(Source):
|
|
|
326
330
|
pass # optional hook
|
|
327
331
|
|
|
328
332
|
def deep_copy(self) -> Optional["Pipe"]:
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
for input in self.inputs:
|
|
337
|
-
strand = input.deep_copy()
|
|
338
|
-
if strand is None:
|
|
339
|
-
return None
|
|
340
|
-
clone.add_source(strand)
|
|
341
|
-
|
|
342
|
-
return clone
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
def _get_sources(self, source_list: list):
|
|
336
|
+
for ix in self.inputs:
|
|
337
|
+
source_list.append(ix)
|
|
338
|
+
ix._get_sources(source_list)
|
|
343
339
|
|
|
344
340
|
class DeepCopyPipe(Pipe):
|
|
345
341
|
def deep_copy(self):
|
|
@@ -373,13 +369,16 @@ class Sink(ABC):
|
|
|
373
369
|
self.process()
|
|
374
370
|
self.close()
|
|
375
371
|
|
|
372
|
+
# get all inputs in the execution chain for closing
|
|
373
|
+
inputs = [self.input]
|
|
374
|
+
self.input._get_sources(inputs)
|
|
375
|
+
for input in inputs:
|
|
376
|
+
input.close()
|
|
377
|
+
|
|
376
378
|
# optional
|
|
377
379
|
def close(self):
|
|
378
380
|
pass
|
|
379
381
|
|
|
380
|
-
def print_info(self):
|
|
381
|
-
pass
|
|
382
|
-
|
|
383
382
|
def add_source(self, source: Source) -> None:
|
|
384
383
|
self.input = source
|
|
385
384
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import sys, shutil, subprocess, contextlib, signal
|
|
5
5
|
import os
|
|
6
6
|
import yaml
|
|
7
|
+
from pjk.base import TokenError
|
|
7
8
|
|
|
8
9
|
class SafeNamespace:
|
|
9
10
|
def __init__(self, obj):
|
|
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
73
74
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
74
75
|
|
|
75
76
|
class Lookups:
|
|
76
|
-
def __init__(self):
|
|
77
|
+
def __init__(self, component_class):
|
|
77
78
|
self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
|
|
79
|
+
self.class_name = type(component_class).__name__
|
|
78
80
|
self._data = {}
|
|
79
81
|
self._load()
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
def _load(self):
|
|
82
84
|
"""Load lookups from YAML file if it exists."""
|
|
83
85
|
if os.path.exists(self.lookups_yaml):
|
|
@@ -93,8 +95,13 @@ class Lookups:
|
|
|
93
95
|
yaml.safe_dump(self._data, f)
|
|
94
96
|
|
|
95
97
|
def get(self, key, default=None):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
lookup_key = f'{self.class_name}-{key}'
|
|
99
|
+
entry = self._data.get(lookup_key, default)
|
|
100
|
+
if not entry:
|
|
101
|
+
raise TokenError(
|
|
102
|
+
f"~/.pjk/lookups.yaml must contain entry for '{lookup_key}' with host, user, password."
|
|
103
|
+
)
|
|
104
|
+
return entry
|
|
98
105
|
|
|
99
106
|
def set(self, key, value):
|
|
100
107
|
"""Set a lookup value and persist it."""
|
|
@@ -129,14 +136,14 @@ class ComponentFactory:
|
|
|
129
136
|
print(header)
|
|
130
137
|
|
|
131
138
|
i = 0
|
|
132
|
-
|
|
139
|
+
# user and outside package components are also here, but printed from registry class
|
|
133
140
|
for name, comp_class in self.components.items():
|
|
134
141
|
usage = comp_class.usage()
|
|
135
142
|
lines = usage.desc.split('\n')
|
|
136
143
|
if i >= self.num_orig_comps:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
line =
|
|
144
|
+
break
|
|
145
|
+
|
|
146
|
+
line = f' {name:<12} {lines[0]}'
|
|
140
147
|
print(line)
|
|
141
148
|
i += 1
|
|
142
149
|
|
|
@@ -10,17 +10,20 @@ from typing import List
|
|
|
10
10
|
from pjk.parser import ExpressionParser
|
|
11
11
|
from pjk.base import UsageError
|
|
12
12
|
from pjk.log import init as init_logging
|
|
13
|
-
from datetime import datetime
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
import traceback
|
|
14
15
|
import concurrent.futures
|
|
15
16
|
from pjk.registry import ComponentRegistry
|
|
16
|
-
from pjk.
|
|
17
|
-
from pjk.sources.factory import SourceFactory
|
|
18
|
-
from pjk.sinks.factory import SinkFactory
|
|
17
|
+
from pjk.sinks.stdout import StdoutSink
|
|
19
18
|
from pjk.man_page import do_man, do_examples
|
|
20
19
|
from pjk.sinks.expect import ExpectSink
|
|
20
|
+
from pjk.progress import ProgressDisplay
|
|
21
21
|
from pjk.version import __version__
|
|
22
22
|
|
|
23
23
|
def write_history(tokens):
|
|
24
|
+
if os.environ.get("PJK_NO_HISTORY") == "1":
|
|
25
|
+
return
|
|
26
|
+
|
|
24
27
|
log_path = ".pjk-history.txt"
|
|
25
28
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
26
29
|
command = " ".join(tokens)
|
|
@@ -31,56 +34,71 @@ def write_history(tokens):
|
|
|
31
34
|
except (PermissionError, OSError):
|
|
32
35
|
pass
|
|
33
36
|
|
|
34
|
-
def execute_threaded(sinks):
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
futures = {
|
|
40
|
-
executor.submit(s.drain): s for s in sinks
|
|
41
|
-
}
|
|
42
|
-
|
|
37
|
+
def execute_threaded(sinks, stop_progress=None):
|
|
38
|
+
max_workers = min(32, len(sinks))
|
|
39
|
+
executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
|
|
40
|
+
futures = {executor.submit(s.drain): s for s in sinks}
|
|
41
|
+
try:
|
|
43
42
|
for future in concurrent.futures.as_completed(futures):
|
|
44
43
|
sink_obj = futures[future]
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
44
|
+
future.result() # re-raises worker exception with traceback
|
|
45
|
+
except KeyboardInterrupt:
|
|
46
|
+
# stop UI first, then cancel and non-blocking shutdown
|
|
47
|
+
if stop_progress:
|
|
48
|
+
try: stop_progress()
|
|
49
|
+
except Exception: pass
|
|
50
|
+
for f in futures:
|
|
51
|
+
f.cancel()
|
|
52
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
53
|
+
os._exit(130)
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
if stop_progress:
|
|
57
|
+
try: stop_progress()
|
|
58
|
+
except Exception: pass
|
|
59
|
+
sys.stderr.write(f"Sink {futures[future]} raised an exception:\n")
|
|
60
|
+
traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
|
|
61
|
+
for f in futures:
|
|
62
|
+
f.cancel()
|
|
63
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
64
|
+
raise
|
|
65
|
+
else:
|
|
66
|
+
executor.shutdown(wait=True)
|
|
50
67
|
|
|
51
68
|
def execute(command: str):
|
|
52
69
|
tokens = shlex.split(command, comments=True, posix=True)
|
|
53
70
|
execute_tokens(tokens)
|
|
54
71
|
|
|
55
|
-
def execute_tokens(tokens:List[str]):
|
|
72
|
+
def execute_tokens(tokens: List[str]):
|
|
56
73
|
init_logging()
|
|
57
|
-
|
|
74
|
+
# (remove the sys.exit SIGINT handler here)
|
|
58
75
|
|
|
59
76
|
if '--version' in tokens:
|
|
60
77
|
print(f"pjk version {__version__}")
|
|
61
78
|
sys.exit(0)
|
|
62
|
-
|
|
79
|
+
|
|
63
80
|
registry = ComponentRegistry()
|
|
64
|
-
|
|
81
|
+
|
|
65
82
|
if len(tokens) < 1:
|
|
66
83
|
registry.print_usage()
|
|
67
84
|
return
|
|
68
|
-
|
|
69
|
-
# pjk man --all | --all+ | <component>
|
|
85
|
+
|
|
70
86
|
if len(tokens) == 2 and tokens[0] == 'man':
|
|
71
87
|
do_man(tokens[1], registry)
|
|
72
88
|
return
|
|
73
|
-
|
|
74
|
-
# pjk examples | examples+
|
|
89
|
+
|
|
75
90
|
if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
|
|
76
91
|
do_examples(tokens[0], registry)
|
|
77
92
|
return
|
|
78
93
|
|
|
79
94
|
parser = ExpressionParser(registry)
|
|
80
95
|
|
|
96
|
+
display = None
|
|
81
97
|
try:
|
|
82
|
-
# Build initial sink
|
|
83
98
|
sink = parser.parse(tokens)
|
|
99
|
+
if not isinstance(sink, (StdoutSink | ExpectSink)):
|
|
100
|
+
display = ProgressDisplay(interval=3.0)
|
|
101
|
+
display.start()
|
|
84
102
|
|
|
85
103
|
sinks = [sink]
|
|
86
104
|
max_threads = os.cpu_count()
|
|
@@ -91,16 +109,23 @@ def execute_tokens(tokens:List[str]):
|
|
|
91
109
|
sinks.append(clone)
|
|
92
110
|
|
|
93
111
|
if len(sinks) > 1:
|
|
94
|
-
|
|
112
|
+
# pass a stopper so we halt the UI before tracebacks / shutdown
|
|
113
|
+
execute_threaded(sinks, stop_progress=(display.stop if display else None))
|
|
95
114
|
else:
|
|
96
|
-
sink.drain()
|
|
97
|
-
sink.print_info() # rarely used, e.g. expect and devnull
|
|
115
|
+
sink.drain()
|
|
98
116
|
|
|
99
117
|
write_history(sys.argv[1:])
|
|
100
118
|
|
|
101
119
|
except UsageError as e:
|
|
102
120
|
print(e, file=sys.stderr)
|
|
103
|
-
sys.exit(2)
|
|
121
|
+
sys.exit(2)
|
|
122
|
+
except KeyboardInterrupt:
|
|
123
|
+
pass
|
|
124
|
+
finally:
|
|
125
|
+
if display:
|
|
126
|
+
# short join so Ctrl-C is immediate
|
|
127
|
+
try: display.stop(timeout=0.1)
|
|
128
|
+
except Exception: pass
|
|
104
129
|
|
|
105
130
|
def main():
|
|
106
131
|
tokens = sys.argv[1:]
|
|
@@ -8,6 +8,9 @@ from typing import Optional, Any, List
|
|
|
8
8
|
from pjk.base import Source, Pipe, Sink, TokenError, UsageError, ParsedToken, Usage
|
|
9
9
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
10
10
|
from pjk.pipes.let_reduce import ReducePipe
|
|
11
|
+
from pjk.sinks.stdout import StdoutSink
|
|
12
|
+
from pjk.sinks.expect import ExpectSink
|
|
13
|
+
from pjk.pipes.progress_pipe import ProgressPipe
|
|
11
14
|
from pjk.registry import ComponentRegistry
|
|
12
15
|
|
|
13
16
|
def expand_macros(tokens: List[str]) -> List[str]:
|
|
@@ -58,8 +61,12 @@ class ExpressionParser:
|
|
|
58
61
|
if not sink:
|
|
59
62
|
raise TokenError.from_list(['expression must end in a sink.',
|
|
60
63
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
64
|
+
|
|
65
|
+
# so each sink doesn't have to, maybe make a base class or mixin for sinks
|
|
66
|
+
progress_pipe = ProgressPipe(component_instance=sink)
|
|
67
|
+
progress_pipe.add_source(source)
|
|
61
68
|
|
|
62
|
-
sink.add_source(
|
|
69
|
+
sink.add_source(progress_pipe)
|
|
63
70
|
return sink
|
|
64
71
|
|
|
65
72
|
def parse(self, tokens: List[str]) -> Sink:
|
|
@@ -78,10 +85,12 @@ class ExpressionParser:
|
|
|
78
85
|
return self.get_sink(stack_helper, token)
|
|
79
86
|
|
|
80
87
|
source = self.registry.create_source(token)
|
|
81
|
-
if source:
|
|
88
|
+
if source:
|
|
82
89
|
stack_helper.add_operator(source, self.stack)
|
|
90
|
+
progress_pipe = ProgressPipe(component_instance=source, simple=True)
|
|
91
|
+
stack_helper.add_operator(progress_pipe, self.stack)
|
|
83
92
|
continue
|
|
84
|
-
|
|
93
|
+
|
|
85
94
|
subexp = SubExpression.create(token)
|
|
86
95
|
if subexp:
|
|
87
96
|
stack_helper.add_operator(subexp, self.stack)
|
|
@@ -12,12 +12,14 @@ from pjk.pipes.head import HeadPipe
|
|
|
12
12
|
from pjk.pipes.tail import TailPipe
|
|
13
13
|
from pjk.pipes.sort import SortPipe
|
|
14
14
|
from pjk.pipes.where import WherePipe
|
|
15
|
-
from pjk.pipes.map import
|
|
15
|
+
from pjk.pipes.map import MapByPipe
|
|
16
|
+
from pjk.pipes.map import GroupByPipe
|
|
16
17
|
from pjk.pipes.join import JoinPipe
|
|
17
18
|
from pjk.pipes.filter import FilterPipe
|
|
18
19
|
from pjk.pipes.select import SelectFields
|
|
19
20
|
from pjk.pipes.denorm import DenormPipe
|
|
20
21
|
from pjk.pipes.postgres_pipe import PostgresPipe
|
|
22
|
+
from pjk.pipes.sample import SamplePipe
|
|
21
23
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
22
24
|
|
|
23
25
|
COMPONENTS = {
|
|
@@ -25,7 +27,8 @@ COMPONENTS = {
|
|
|
25
27
|
'tail': TailPipe,
|
|
26
28
|
'join': JoinPipe,
|
|
27
29
|
'filter': FilterPipe,
|
|
28
|
-
'
|
|
30
|
+
'mapby': MapByPipe,
|
|
31
|
+
'groupby': GroupByPipe,
|
|
29
32
|
'as': MoveField,
|
|
30
33
|
'drop': RemoveField,
|
|
31
34
|
'let': LetPipe,
|
|
@@ -33,6 +36,7 @@ COMPONENTS = {
|
|
|
33
36
|
'sort': SortPipe,
|
|
34
37
|
'where': WherePipe,
|
|
35
38
|
'sel': SelectFields,
|
|
39
|
+
'sample': SamplePipe,
|
|
36
40
|
'explode': DenormPipe,
|
|
37
41
|
'pgres': PostgresPipe,
|
|
38
42
|
}
|
|
@@ -15,13 +15,13 @@ class FilterPipe(Pipe):
|
|
|
15
15
|
)
|
|
16
16
|
usage.def_arg("mode", "'+' to include matches, '-' to exclude matches",
|
|
17
17
|
valid_values={'+', '-'})
|
|
18
|
-
usage.def_syntax("pjk <left_source> <map_source>
|
|
18
|
+
usage.def_syntax("pjk <left_source> <map_source> [mapby:groupby]:<how>:<key> filter:<mode> <sink>")
|
|
19
19
|
|
|
20
20
|
usage.def_example(expr_tokens=
|
|
21
21
|
[
|
|
22
22
|
"[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
|
|
23
23
|
"[{id:1}, {id:3}, {id:5}]",
|
|
24
|
-
'
|
|
24
|
+
'mapby:id',
|
|
25
25
|
"filter:+"
|
|
26
26
|
],
|
|
27
27
|
expect="[{id:1}, {id:3}, {id:5}]")
|
|
@@ -30,7 +30,7 @@ class FilterPipe(Pipe):
|
|
|
30
30
|
[
|
|
31
31
|
"[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
|
|
32
32
|
"[{id:1}, {id:3}, {id:5}]",
|
|
33
|
-
'
|
|
33
|
+
'mapby:id',
|
|
34
34
|
"filter:-"
|
|
35
35
|
],
|
|
36
36
|
expect="[{id:2}, {id:4}]")
|
|
@@ -2,20 +2,18 @@
|
|
|
2
2
|
# Copyright 2024 Mike Schultz
|
|
3
3
|
|
|
4
4
|
# djk/pipes/head.py
|
|
5
|
+
from pjk.base import Pipe, ParsedToken, Usage
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
from pjk.base import Pipe, ParsedToken, Usage, DeepCopyPipe
|
|
8
|
-
|
|
9
|
-
class HeadPipe(DeepCopyPipe):
|
|
7
|
+
class HeadPipe(Pipe):
|
|
10
8
|
@classmethod
|
|
11
9
|
def usage(cls):
|
|
12
10
|
usage = Usage(
|
|
13
11
|
name='head',
|
|
14
|
-
desc='take first records of input (
|
|
12
|
+
desc='take first records of input (single-threaded)',
|
|
15
13
|
component_class=cls
|
|
16
14
|
)
|
|
17
|
-
usage.def_arg(name='limit', usage='number of records', is_num=True)
|
|
18
15
|
usage.def_example(expr_tokens=['[{id:1}, {id:2}]', 'head:1'], expect="{id:1}")
|
|
16
|
+
usage.def_arg(name='limit', usage='number of records', is_num=True)
|
|
19
17
|
return usage
|
|
20
18
|
|
|
21
19
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
@@ -20,13 +20,13 @@ class JoinPipe(Pipe):
|
|
|
20
20
|
usage="'left', 'inner', or 'outer' join behavior",
|
|
21
21
|
valid_values={'left', 'inner', 'outer'}
|
|
22
22
|
)
|
|
23
|
-
usage.def_syntax("pjk <left_source> <map_source>
|
|
23
|
+
usage.def_syntax("pjk <left_source> <map_source> [mapby|groupby]:<key> join:<mode> <sink>")
|
|
24
24
|
|
|
25
25
|
usage.def_example(expr_tokens=
|
|
26
26
|
[
|
|
27
27
|
"[{color:'blue'},{color:'green'}]",
|
|
28
28
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
29
|
-
'
|
|
29
|
+
'mapby:color',
|
|
30
30
|
"join:left"
|
|
31
31
|
],
|
|
32
32
|
expect="[{color:'blue', price:50}, {color:'green'}]")
|
|
@@ -34,7 +34,7 @@ class JoinPipe(Pipe):
|
|
|
34
34
|
[
|
|
35
35
|
"[{color:'blue'},{color:'green'}]",
|
|
36
36
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
37
|
-
'
|
|
37
|
+
'mapby:color',
|
|
38
38
|
"join:inner"
|
|
39
39
|
],
|
|
40
40
|
expect="[{color:'blue', price:50}]")
|
|
@@ -43,7 +43,7 @@ class JoinPipe(Pipe):
|
|
|
43
43
|
[
|
|
44
44
|
"[{color:'blue'},{color:'green'}]",
|
|
45
45
|
"[{color:'blue', price:50}, {color:'red', price:20}]",
|
|
46
|
-
'
|
|
46
|
+
'mapby:color',
|
|
47
47
|
"join:outer"
|
|
48
48
|
],
|
|
49
49
|
expect="[{color:'blue', price:50}, {color:'green'}, {color:'red', price: 20}]")
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
# djk/pipes/group.py
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from pjk.base import ParsedToken, Usage, Pipe, KeyedSource
|
|
8
|
+
|
|
9
|
+
class MapByPipe(Pipe, KeyedSource):
|
|
10
|
+
@classmethod
|
|
11
|
+
def usage(cls):
|
|
12
|
+
u = Usage(
|
|
13
|
+
name='mapby',
|
|
14
|
+
desc="Maps records to key, taking last instance of duplicates.\nFilters out records without all key fields.\nCreates Keyed Source for join or filter.",
|
|
15
|
+
component_class=cls
|
|
16
|
+
)
|
|
17
|
+
u.def_arg(name='key', usage='comma separated fields to map by')
|
|
18
|
+
u.def_param(name='count', usage='add count of the records with key', valid_values={'true', 'false'}, default='false')
|
|
19
|
+
u.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'mapby:id'],
|
|
20
|
+
expect="[{id:2, color:'red'}, {id:1, color:'green'}]")
|
|
21
|
+
u.def_example(expr_tokens=["[{id: 1, color:'blue', size:5}, {id:1, color:'green', size:10}]", 'mapby:id,color'],
|
|
22
|
+
expect="[{id:1, color:'green', size: 10}, {id:1, color:'blue', size:5}]")
|
|
23
|
+
u.def_example(expr_tokens=["[{id:'a'}, {id:'a'}, {id:'b'}, {j:3}]", "mapby:id@count=true"],
|
|
24
|
+
expect="[{id:'a', count:2}, {id:'b', 'count': 1}]")
|
|
25
|
+
|
|
26
|
+
return u
|
|
27
|
+
|
|
28
|
+
def __init__(self, ptok: ParsedToken, usage: Usage, is_group: bool = False):
|
|
29
|
+
super().__init__(ptok)
|
|
30
|
+
self.is_group = is_group
|
|
31
|
+
self.fields = usage.get_arg('key').split(',')
|
|
32
|
+
self.rec_map = {}
|
|
33
|
+
self.matched_map = {}
|
|
34
|
+
self.is_loaded = False
|
|
35
|
+
self.do_count = usage.get_param(name='count').lower() == 'true'
|
|
36
|
+
self.counts = {}
|
|
37
|
+
|
|
38
|
+
def reset(self):
|
|
39
|
+
self.rec_map.clear()
|
|
40
|
+
self.matched_map.clear()
|
|
41
|
+
self._rec_list = None
|
|
42
|
+
self.is_loaded = False
|
|
43
|
+
|
|
44
|
+
def get_key_rec(self, record):
|
|
45
|
+
key_rec = {}
|
|
46
|
+
for field in self.fields:
|
|
47
|
+
key_val = record.pop(field, None) if self.is_group else record.get(field)
|
|
48
|
+
if not key_val:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
key_rec[field] = key_val
|
|
52
|
+
return key_rec
|
|
53
|
+
|
|
54
|
+
def count(self, key):
|
|
55
|
+
if not self.do_count:
|
|
56
|
+
return
|
|
57
|
+
i = self.counts.get(key, 0)
|
|
58
|
+
self.counts[key] = i+1
|
|
59
|
+
|
|
60
|
+
def load(self):
|
|
61
|
+
if self.is_loaded:
|
|
62
|
+
return
|
|
63
|
+
self.is_loaded = True
|
|
64
|
+
|
|
65
|
+
for record in self.left:
|
|
66
|
+
key_rec = self.get_key_rec(record)
|
|
67
|
+
if not key_rec: # some fields missing, filter out rec
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
key = tuple(key_rec.values())
|
|
71
|
+
self.count(key)
|
|
72
|
+
|
|
73
|
+
existing = self.rec_map.get(key)
|
|
74
|
+
if not existing:
|
|
75
|
+
if self.is_group:
|
|
76
|
+
key_rec['child'] = [record]
|
|
77
|
+
self.rec_map[key] = key_rec
|
|
78
|
+
else:
|
|
79
|
+
self.rec_map[key] = record
|
|
80
|
+
else:
|
|
81
|
+
if self.is_group:
|
|
82
|
+
existing['child'].append(record)
|
|
83
|
+
else:
|
|
84
|
+
self.rec_map[key] = record
|
|
85
|
+
|
|
86
|
+
if self.do_count:
|
|
87
|
+
for k, v in self.rec_map.items():
|
|
88
|
+
if self.do_count:
|
|
89
|
+
c = self.counts.get(k, 0)
|
|
90
|
+
v['count'] = c
|
|
91
|
+
|
|
92
|
+
def __iter__(self):
|
|
93
|
+
if not self.is_loaded:
|
|
94
|
+
self.load()
|
|
95
|
+
for v in self.rec_map.values():
|
|
96
|
+
yield v
|
|
97
|
+
|
|
98
|
+
def lookup(self, left_rec) -> Optional[dict]:
|
|
99
|
+
if not self.is_loaded:
|
|
100
|
+
self.load()
|
|
101
|
+
|
|
102
|
+
key = tuple(left_rec.get(f) for f in self.fields)
|
|
103
|
+
rec = self.rec_map.pop(key, None)
|
|
104
|
+
if rec is not None:
|
|
105
|
+
self.matched_map[key] = rec
|
|
106
|
+
return rec
|
|
107
|
+
return self.matched_map.get(key)
|
|
108
|
+
|
|
109
|
+
def get_unlookedup_records(self):
|
|
110
|
+
if not self.is_loaded:
|
|
111
|
+
self.load()
|
|
112
|
+
return list(self.rec_map.values())
|
|
113
|
+
|
|
114
|
+
class GroupByPipe(MapByPipe):
|
|
115
|
+
@classmethod
|
|
116
|
+
def usage(cls):
|
|
117
|
+
u = Usage(
|
|
118
|
+
name='groupby',
|
|
119
|
+
desc="groups records by key. Creates Keyed Source for join or filter.",
|
|
120
|
+
component_class=cls
|
|
121
|
+
)
|
|
122
|
+
u.def_arg(name='key', usage='comma separated fields to map by')
|
|
123
|
+
u.def_param(name='count', usage='add count of the records with key', valid_values={'true', 'false'}, default='false')
|
|
124
|
+
u.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'groupby:id'],
|
|
125
|
+
expect="[{id:2, child:[{color:'red'}]}, {id:1, child:[{color:'blue'},{color: 'green'}]}]")
|
|
126
|
+
|
|
127
|
+
return u
|
|
128
|
+
|
|
129
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
130
|
+
super().__init__(ptok, usage, True)
|
|
@@ -10,7 +10,7 @@ class MoveField(Pipe):
|
|
|
10
10
|
def usage(cls):
|
|
11
11
|
usage = Usage(
|
|
12
12
|
name='as',
|
|
13
|
-
desc='
|
|
13
|
+
desc='rename a field in the record',
|
|
14
14
|
component_class=cls
|
|
15
15
|
)
|
|
16
16
|
usage.def_arg(name='src', usage='Source field name')
|
|
@@ -20,7 +20,7 @@ class MoveField(Pipe):
|
|
|
20
20
|
return usage
|
|
21
21
|
|
|
22
22
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
23
|
-
super().__init__(ptok)
|
|
23
|
+
super().__init__(ptok, usage)
|
|
24
24
|
self.src = usage.get_arg('src')
|
|
25
25
|
self.dst = usage.get_arg('dst')
|
|
26
26
|
self.count = 0
|
|
@@ -99,30 +99,26 @@ class PostgresPipe(Pipe):
|
|
|
99
99
|
)
|
|
100
100
|
usage.def_arg(
|
|
101
101
|
"dbname",
|
|
102
|
-
"
|
|
102
|
+
f"~/.pjk/lookups.yaml must containing entry '{cls.__name__}-<dbname>' with host, user, password"
|
|
103
103
|
)
|
|
104
104
|
usage.def_param(
|
|
105
105
|
"header",
|
|
106
106
|
usage="emit header record before query results",
|
|
107
|
-
valid_values={"true", "false"}, default='
|
|
107
|
+
valid_values={"true", "false"}, default='false',
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
|
|
111
111
|
usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
|
|
112
|
+
usage.def_example(expr_tokens=["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb'], expect=None)
|
|
112
113
|
return usage
|
|
113
114
|
|
|
114
115
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
115
116
|
super().__init__(ptok, usage)
|
|
116
117
|
|
|
117
|
-
lookups = Lookups()
|
|
118
|
+
lookups = Lookups(self)
|
|
118
119
|
self.dbname = usage.get_arg("dbname")
|
|
119
|
-
db_params = lookups.get(self.dbname)
|
|
120
|
-
if not db_params:
|
|
121
|
-
# f-string so dbname prints correctly
|
|
122
|
-
raise TokenError(
|
|
123
|
-
f"~/.pjk/lookups.yaml must contain entry for '{self.dbname}' with host, user, password."
|
|
124
|
-
)
|
|
125
120
|
|
|
121
|
+
db_params = lookups.get(self.dbname)
|
|
126
122
|
self.db_host = db_params.get("host")
|
|
127
123
|
self.db_user = db_params.get("user")
|
|
128
124
|
self.db_pass = db_params.get("password")
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
from pjk.base import Pipe
|
|
3
|
+
from pjk.progress import papi
|
|
4
|
+
|
|
5
|
+
# monitors flow of records wherever inserted
|
|
6
|
+
|
|
7
|
+
class ProgressPipe(Pipe):
|
|
8
|
+
def __init__(self, component_instance = None, simple: bool = False):
|
|
9
|
+
super().__init__(None, None)
|
|
10
|
+
self.component_instance = component_instance
|
|
11
|
+
self.simple = simple
|
|
12
|
+
|
|
13
|
+
label = self.get_component_label(component_instance)
|
|
14
|
+
self.counter = papi.get_counter(label, var_label='recs')
|
|
15
|
+
#papi.add_rate(sink_name, self.counter, var_label='krecs/sec')
|
|
16
|
+
if not simple:
|
|
17
|
+
papi.get_counter(label, var_label='threads').increment()
|
|
18
|
+
papi.add_elapsed_time(label, var_label='elapsed')
|
|
19
|
+
|
|
20
|
+
def get_component_label(self, component_instance):
|
|
21
|
+
if hasattr(type(component_instance), 'extension'):
|
|
22
|
+
return type(component_instance).extension
|
|
23
|
+
elif hasattr(component_instance, 'usage'):
|
|
24
|
+
return type(component_instance).usage().name
|
|
25
|
+
return type(component_instance).__name__
|
|
26
|
+
|
|
27
|
+
def __iter__(self) -> Iterator:
|
|
28
|
+
# only counting here
|
|
29
|
+
for record in self.left:
|
|
30
|
+
self.counter.increment()
|
|
31
|
+
yield record
|
|
32
|
+
|
|
33
|
+
def deep_copy(self):
|
|
34
|
+
source_clone = self.left.deep_copy()
|
|
35
|
+
if not source_clone:
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
pipe = ProgressPipe(self.component_instance, self.simple)
|
|
39
|
+
pipe.add_source(source_clone)
|
|
40
|
+
return pipe
|
|
41
|
+
|