python-jack-knife 0.6.11__tar.gz → 0.6.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/PKG-INFO +1 -1
- python_jack_knife-0.6.14/src/pjk/common.py +168 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/postgres_pipe.py +42 -20
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/main.py +6 -5
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/man_page.py +9 -11
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/let_reduce.py +1 -1
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/map.py +1 -1
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/query_pipe.py +9 -1
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/remove_field.py +2 -2
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/registry.py +85 -65
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph.py +2 -2
- python_jack_knife-0.6.14/src/pjk/sinks/graph_bar_line.py +383 -0
- python_jack_knife-0.6.14/src/pjk/sinks/stdout.py +78 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/factory.py +0 -2
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/version.py +1 -1
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- python_jack_knife-0.6.11/src/pjk/common.py +0 -123
- python_jack_knife-0.6.11/src/pjk/sinks/graph_bar_line.py +0 -229
- python_jack_knife-0.6.11/src/pjk/sinks/stdout.py +0 -46
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/LICENSE +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/README.md +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/pyproject.toml +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/setup.cfg +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/components.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_client.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_index_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_query_pipe.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/snowflake_pipe.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/log.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/parser.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/factory.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/filter.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/head.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/join.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/move_field.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/progress_pipe.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/sample.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/select.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/tail.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/where.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/progress.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/create_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/factory.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/configs_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/favorite_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/format_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/macro_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/usage.py +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/SOURCES.txt +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
import contextlib, io, os, subprocess, sys
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from abc import ABC
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pjk.sources.format_source import FormatSource
|
|
10
|
+
from pjk.sinks.format_sink import FormatSink
|
|
11
|
+
from typing import List, Type
|
|
12
|
+
|
|
13
|
+
class SafeNamespace:
|
|
14
|
+
def __init__(self, obj):
|
|
15
|
+
for k, v in obj.items():
|
|
16
|
+
if isinstance(v, dict):
|
|
17
|
+
v = SafeNamespace(v)
|
|
18
|
+
elif isinstance(v, list):
|
|
19
|
+
v = [SafeNamespace(x) if isinstance(x, dict) else x for x in v]
|
|
20
|
+
setattr(self, k, v)
|
|
21
|
+
|
|
22
|
+
def __getattr__(self, key):
|
|
23
|
+
return None # gracefully handle missing keys
|
|
24
|
+
|
|
25
|
+
class ReducingNamespace:
|
|
26
|
+
def __init__(self, record):
|
|
27
|
+
self._record = record
|
|
28
|
+
|
|
29
|
+
def __getattr__(self, name):
|
|
30
|
+
value = self._record[name]
|
|
31
|
+
if isinstance(value, (list, tuple, set)):
|
|
32
|
+
return value
|
|
33
|
+
return [value] # promote scalars to singleton lists
|
|
34
|
+
|
|
35
|
+
# pjk/common.py
|
|
36
|
+
import contextlib, io, os, subprocess, sys
|
|
37
|
+
|
|
38
|
+
@contextlib.contextmanager
|
|
39
|
+
def pager_stdout(use_pager: bool = True):
|
|
40
|
+
"""
|
|
41
|
+
Stream stdout into `less` via a pipe.
|
|
42
|
+
- If stdout is not a TTY or use_pager is False → write directly to sys.stdout.
|
|
43
|
+
- Otherwise spawn `less` and replace sys.stdout with less.stdin.
|
|
44
|
+
"""
|
|
45
|
+
# If not a TTY, paging makes no sense
|
|
46
|
+
if not use_pager or not sys.stdout.isatty():
|
|
47
|
+
yield
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
env = os.environ.copy()
|
|
51
|
+
# -R: pass ANSI; -S: chop long lines; you can add -F/-X to taste
|
|
52
|
+
env.setdefault("LESS", "-RFX")
|
|
53
|
+
# Ensure UTF-8
|
|
54
|
+
env.setdefault("LESSCHARSET", "utf-8")
|
|
55
|
+
|
|
56
|
+
stdout_orig = sys.stdout
|
|
57
|
+
stderr_orig = sys.stderr
|
|
58
|
+
|
|
59
|
+
# Start less with a *pipe* for stdin and inherit the real terminal for out/err
|
|
60
|
+
pager = subprocess.Popen(
|
|
61
|
+
["less"],
|
|
62
|
+
stdin=subprocess.PIPE,
|
|
63
|
+
stdout=stdout_orig, # keep interactivity
|
|
64
|
+
stderr=stderr_orig,
|
|
65
|
+
env=env,
|
|
66
|
+
close_fds=True,
|
|
67
|
+
bufsize=0, # unbuffered pipe
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Wrap less.stdin as a text writer and swap sys.stdout
|
|
71
|
+
assert pager.stdin is not None
|
|
72
|
+
pager_bin = pager.stdin
|
|
73
|
+
pager_txt = io.TextIOWrapper(pager_bin, encoding="utf-8", write_through=True)
|
|
74
|
+
|
|
75
|
+
sys.stdout = pager_txt
|
|
76
|
+
try:
|
|
77
|
+
yield
|
|
78
|
+
except BrokenPipeError:
|
|
79
|
+
pass
|
|
80
|
+
finally:
|
|
81
|
+
try:
|
|
82
|
+
sys.stdout.flush()
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
# Restore first, then close pager stdin to send EOF
|
|
86
|
+
sys.stdout = stdout_orig
|
|
87
|
+
try:
|
|
88
|
+
pager_txt.flush()
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
try:
|
|
92
|
+
pager_bin.close() # EOF → lets less exit
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
try:
|
|
96
|
+
pager.wait()
|
|
97
|
+
except Exception:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
COLOR_CODES = {
|
|
101
|
+
'bold': '\033[1m',
|
|
102
|
+
'underline': '\033[4m',
|
|
103
|
+
'red': '\033[31m',
|
|
104
|
+
'green': '\033[32m',
|
|
105
|
+
'yellow': '\033[33m',
|
|
106
|
+
'blue': '\033[34m',
|
|
107
|
+
'magenta': '\033[35m',
|
|
108
|
+
'cyan': '\033[36m',
|
|
109
|
+
'gray': '\033[90m',
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
RESET = '\033[0m'
|
|
113
|
+
|
|
114
|
+
def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
115
|
+
value = text if not value else value
|
|
116
|
+
style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
|
|
117
|
+
return text.replace(value, f"{style}{value}{RESET}")
|
|
118
|
+
|
|
119
|
+
# mixin
|
|
120
|
+
class Integration(ABC):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
class ComponentOrigin(Enum):
|
|
124
|
+
CORE = 0 # core components defined in python-jack-knife
|
|
125
|
+
EXTERNAL = 1 # component loaded via load_package_extras (displayed in either 'integrations' or 'applications')
|
|
126
|
+
USER = 2 # components loaded via load_user_components (always displayed in user_components)
|
|
127
|
+
|
|
128
|
+
class ComponentWrapper:
|
|
129
|
+
def __init__(self, name: str, comp_class, origin: ComponentOrigin):
|
|
130
|
+
self.name = name
|
|
131
|
+
self.comp_class = comp_class
|
|
132
|
+
self.origin = origin
|
|
133
|
+
self.is_integration = issubclass(comp_class, Integration)
|
|
134
|
+
|
|
135
|
+
class ComponentFactory:
|
|
136
|
+
def __init__(self, core_components: dict):
|
|
137
|
+
self.wrappers = {}
|
|
138
|
+
for k, v in core_components.items():
|
|
139
|
+
self.register(k, v, origin=ComponentOrigin.CORE)
|
|
140
|
+
|
|
141
|
+
def register(self, name, comp_class, origin: ComponentOrigin):
|
|
142
|
+
self.wrappers[name] = ComponentWrapper(name, comp_class=comp_class, origin=origin)
|
|
143
|
+
|
|
144
|
+
# is_integration True|False|None=don't care
|
|
145
|
+
def get_components(self, origin_list: List[ComponentOrigin], is_integration: bool) -> dict:
|
|
146
|
+
all = {}
|
|
147
|
+
for wrapper in self.wrappers.values():
|
|
148
|
+
if is_integration is not None:
|
|
149
|
+
if wrapper.is_integration != is_integration:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
for o in origin_list:
|
|
153
|
+
if wrapper.origin == o:
|
|
154
|
+
all[wrapper.name] = wrapper.comp_class
|
|
155
|
+
|
|
156
|
+
return all
|
|
157
|
+
|
|
158
|
+
def get_component_class(self, name: str):
|
|
159
|
+
wrapper = self.wrappers.get(name, None)
|
|
160
|
+
if not wrapper:
|
|
161
|
+
return None
|
|
162
|
+
return wrapper.comp_class
|
|
163
|
+
|
|
164
|
+
def create(self, token: str):
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
def is_valid_field_name(name: str):
|
|
168
|
+
return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
|
|
@@ -97,7 +97,8 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
97
97
|
examples = [
|
|
98
98
|
['myquery.sql', 'postgres:mydb', '-'],
|
|
99
99
|
["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
|
|
100
|
-
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
|
|
100
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb'],
|
|
101
|
+
["{'query': 'SELECT stored_procedure(%s, ...), batch_params:{...}"]
|
|
101
102
|
]
|
|
102
103
|
|
|
103
104
|
# name, type, default
|
|
@@ -165,14 +166,32 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
165
166
|
try:
|
|
166
167
|
query = record.get(self.query_field)
|
|
167
168
|
if not query:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
169
|
+
record['_error'] = 'missing query'
|
|
170
|
+
yield record
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
params = record.get(self.params_field) # single-exec params
|
|
174
|
+
batch = record.get("batch_params", None) # list[tuple|dict] for batching
|
|
175
|
+
|
|
176
|
+
cur = client.conn.cursor()
|
|
177
|
+
try:
|
|
178
|
+
did_executemany = False
|
|
179
|
+
|
|
180
|
+
# ---------- execute ----------
|
|
181
|
+
if batch is not None:
|
|
182
|
+
# Handle batch sizes explicitly to preserve single-SELECT streaming semantics
|
|
183
|
+
if len(batch) == 0:
|
|
184
|
+
# No-op batch; execute a lightweight statement so we can still emit a header
|
|
185
|
+
cur.execute("SELECT 1")
|
|
186
|
+
header_params = {"batch_size": 0}
|
|
187
|
+
elif len(batch) == 1:
|
|
188
|
+
cur.execute(query, batch[0])
|
|
189
|
+
header_params = batch[0]
|
|
190
|
+
else:
|
|
191
|
+
cur.executemany(query, batch)
|
|
192
|
+
did_executemany = True
|
|
193
|
+
header_params = {"batch_size": len(batch)}
|
|
194
|
+
else:
|
|
176
195
|
if params is None:
|
|
177
196
|
cur.execute(query)
|
|
178
197
|
else:
|
|
@@ -180,17 +199,20 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
180
199
|
cur.execute(query, params)
|
|
181
200
|
else:
|
|
182
201
|
cur.execute(query, (params,))
|
|
202
|
+
header_params = params
|
|
183
203
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
204
|
+
# ---------- header ----------
|
|
205
|
+
yield self._make_header(cur, query, header_params)
|
|
206
|
+
|
|
207
|
+
# ---------- stream rows (only meaningful for single execute that returns rows) ----------
|
|
208
|
+
# Note: executemany() typically doesn't expose per-execution result sets.
|
|
209
|
+
if not did_executemany and cur.description:
|
|
210
|
+
cols = [d[0] for d in cur.description]
|
|
211
|
+
if not (len(cols) == 1 and cols[0] == "ingest_event"):
|
|
212
|
+
for row in cur:
|
|
213
|
+
yield _row_to_dict(cur, row)
|
|
214
|
+
|
|
215
|
+
finally:
|
|
216
|
+
cur.close()
|
|
195
217
|
finally:
|
|
196
218
|
client.close()
|
|
@@ -5,13 +5,11 @@
|
|
|
5
5
|
import sys
|
|
6
6
|
import os
|
|
7
7
|
import shlex
|
|
8
|
-
import shutil
|
|
9
8
|
from typing import List
|
|
10
9
|
from pjk.parser import ExpressionParser
|
|
11
10
|
from pjk.usage import UsageError
|
|
12
11
|
from pjk.log import init as init_logging
|
|
13
12
|
from datetime import datetime
|
|
14
|
-
from pathlib import Path
|
|
15
13
|
import traceback
|
|
16
14
|
import concurrent.futures
|
|
17
15
|
from pjk.registry import ComponentRegistry
|
|
@@ -27,6 +25,10 @@ def write_history(tokens):
|
|
|
27
25
|
|
|
28
26
|
log_path = ".pjk-history.txt"
|
|
29
27
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
28
|
+
|
|
29
|
+
if len(tokens) < 2:
|
|
30
|
+
return
|
|
31
|
+
|
|
30
32
|
command = " ".join(tokens)
|
|
31
33
|
|
|
32
34
|
try:
|
|
@@ -68,6 +70,7 @@ def execute_threaded(sinks, stop_progress=None):
|
|
|
68
70
|
|
|
69
71
|
def initialize():
|
|
70
72
|
init_logging()
|
|
73
|
+
write_history(sys.argv[1:])
|
|
71
74
|
|
|
72
75
|
#src = Path("src/pjk/resources/configs.tmpl")
|
|
73
76
|
#dst_dir = Path.home() / ".pjk"
|
|
@@ -80,7 +83,7 @@ def execute(command: str):
|
|
|
80
83
|
|
|
81
84
|
def execute_tokens(tokens: List[str]):
|
|
82
85
|
initialize()
|
|
83
|
-
|
|
86
|
+
|
|
84
87
|
if '--version' in tokens:
|
|
85
88
|
print(f"pjk version {__version__}")
|
|
86
89
|
sys.exit(0)
|
|
@@ -122,8 +125,6 @@ def execute_tokens(tokens: List[str]):
|
|
|
122
125
|
else:
|
|
123
126
|
sink.drain()
|
|
124
127
|
|
|
125
|
-
write_history(sys.argv[1:])
|
|
126
|
-
|
|
127
128
|
except UsageError as e:
|
|
128
129
|
print(e, file=sys.stderr)
|
|
129
130
|
sys.exit(2)
|
|
@@ -8,7 +8,7 @@ from pjk.parser import ExpressionParser
|
|
|
8
8
|
from pjk.components import Source, Pipe, Sink
|
|
9
9
|
from pjk.usage import Usage, ParsedToken
|
|
10
10
|
from pjk.registry import ComponentRegistry
|
|
11
|
-
from pjk.common import pager_stdout, highlight
|
|
11
|
+
from pjk.common import pager_stdout, highlight, ComponentOrigin
|
|
12
12
|
from contextlib import nullcontext
|
|
13
13
|
|
|
14
14
|
def get_base_class(usage: Usage, as_string: bool = False):
|
|
@@ -49,9 +49,9 @@ def do_man(name: str, registry: ComponentRegistry):
|
|
|
49
49
|
# source and sinks have common names so go through multiple times
|
|
50
50
|
printed = False
|
|
51
51
|
for factory in registry.get_factories():
|
|
52
|
-
|
|
53
|
-
if
|
|
54
|
-
print_man(registry, name, usage)
|
|
52
|
+
comp_class = factory.get_component_class(name)
|
|
53
|
+
if comp_class:
|
|
54
|
+
print_man(registry, name, comp_class.usage())
|
|
55
55
|
printed = True
|
|
56
56
|
|
|
57
57
|
if not printed:
|
|
@@ -61,11 +61,9 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
|
|
|
61
61
|
cm = nullcontext() if no_pager else pager_stdout()
|
|
62
62
|
with cm:
|
|
63
63
|
for factory in registry.get_factories():
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
usage = factory.get_usage(name)
|
|
68
|
-
print_man(registry, name, usage)
|
|
64
|
+
component_dict = factory.get_components([ComponentOrigin.CORE, ComponentOrigin.EXTERNAL, ComponentOrigin.USER], is_integration=None)
|
|
65
|
+
for name, comp_class in component_dict.items():
|
|
66
|
+
print_man(registry, name, comp_class.usage())
|
|
69
67
|
print()
|
|
70
68
|
|
|
71
69
|
def print_man(registry: ComponentRegistry, name: str, usage: Usage):
|
|
@@ -94,8 +92,8 @@ def do_examples(token:str, registry: ComponentRegistry):
|
|
|
94
92
|
cm = nullcontext() if no_pager else pager_stdout()
|
|
95
93
|
with cm:
|
|
96
94
|
for factory in registry.get_factories():
|
|
97
|
-
|
|
98
|
-
for name, comp_class in
|
|
95
|
+
component_dict = factory.get_components([ComponentOrigin.CORE, ComponentOrigin.EXTERNAL, ComponentOrigin.USER], is_integration=None)
|
|
96
|
+
for name, comp_class in component_dict.items():
|
|
99
97
|
usage = comp_class.usage()
|
|
100
98
|
|
|
101
99
|
comp_type = get_base_class(usage, as_string=True)
|
|
@@ -24,7 +24,7 @@ def do_eval(expr, env):
|
|
|
24
24
|
safe_env['json'] = json
|
|
25
25
|
return eval(expr, {}, safe_env)
|
|
26
26
|
except Exception:
|
|
27
|
-
raise
|
|
27
|
+
raise Exception(f"Error in expression: {expr}")
|
|
28
28
|
|
|
29
29
|
def eval_regular(expr: str, record: dict):
|
|
30
30
|
env = {'f': SafeNamespace(record)}
|
|
@@ -51,7 +51,7 @@ class MapByPipe(Pipe, KeyedSource):
|
|
|
51
51
|
key_rec = {}
|
|
52
52
|
for field in self.fields:
|
|
53
53
|
key_val = record.pop(field, None) if self.is_group else record.get(field)
|
|
54
|
-
if not
|
|
54
|
+
if key_val is None: # not only false-ish but NONE
|
|
55
55
|
return None
|
|
56
56
|
|
|
57
57
|
key_rec[field] = key_val
|
|
@@ -2,7 +2,7 @@ from pjk.components import Pipe
|
|
|
2
2
|
from pjk.usage import ParsedToken, Usage, CONFIG_FILE
|
|
3
3
|
from typing import Any, Dict, Iterable, Optional
|
|
4
4
|
from abc import abstractmethod
|
|
5
|
-
|
|
5
|
+
from pjk.progress import papi
|
|
6
6
|
|
|
7
7
|
class QueryPipe(Pipe):
|
|
8
8
|
name: str = None
|
|
@@ -40,6 +40,8 @@ class QueryPipe(Pipe):
|
|
|
40
40
|
self.output_shape = usage.get_param('shape')
|
|
41
41
|
self.count = usage.get_param('count')
|
|
42
42
|
self.query_field = 'query' # for all subclasses
|
|
43
|
+
self.inrecs = papi.get_counter(self, var_label=None) # don't display progress
|
|
44
|
+
self.outrecs = papi.get_percentage_counter(self, var_label='recs_out', denom_counter=self.inrecs)
|
|
43
45
|
|
|
44
46
|
@abstractmethod
|
|
45
47
|
def execute_query_returning_S_xO_iterable(self, record) -> Iterable[Dict[str, Any]]:
|
|
@@ -53,6 +55,7 @@ class QueryPipe(Pipe):
|
|
|
53
55
|
|
|
54
56
|
def __iter__(self):
|
|
55
57
|
for in_rec in self.left:
|
|
58
|
+
self.inrecs.increment()
|
|
56
59
|
iter = self.execute_query_returning_S_xO_iterable(in_rec)
|
|
57
60
|
|
|
58
61
|
if self.output_shape == 'S_xO':
|
|
@@ -60,8 +63,11 @@ class QueryPipe(Pipe):
|
|
|
60
63
|
for out_rec in iter:
|
|
61
64
|
if not q_done:
|
|
62
65
|
q_done = True
|
|
66
|
+
self.outrecs.increment()
|
|
63
67
|
yield self._make_q_object(in_rec, out_rec)
|
|
64
68
|
continue
|
|
69
|
+
|
|
70
|
+
self.outrecs.increment()
|
|
65
71
|
yield out_rec
|
|
66
72
|
|
|
67
73
|
elif self.output_shape == 'xO':
|
|
@@ -70,6 +76,7 @@ class QueryPipe(Pipe):
|
|
|
70
76
|
if not q_done:
|
|
71
77
|
q_done = True
|
|
72
78
|
continue
|
|
79
|
+
self.outrecs.increment()
|
|
73
80
|
yield out_rec
|
|
74
81
|
|
|
75
82
|
elif self.output_shape == 'Sxo':
|
|
@@ -84,6 +91,7 @@ class QueryPipe(Pipe):
|
|
|
84
91
|
continue
|
|
85
92
|
r_list.append(out_rec)
|
|
86
93
|
q_out['child'] = r_list
|
|
94
|
+
self.outrecs.increment()
|
|
87
95
|
yield q_out
|
|
88
96
|
|
|
89
97
|
|
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
|
|
4
4
|
# djk/pipes/remove_field.py
|
|
5
5
|
|
|
6
|
-
from pjk.components import
|
|
6
|
+
from pjk.components import DeepCopyPipe
|
|
7
7
|
from pjk.usage import ParsedToken, Usage, UsageError
|
|
8
8
|
|
|
9
|
-
class RemoveField(
|
|
9
|
+
class RemoveField(DeepCopyPipe):
|
|
10
10
|
@classmethod
|
|
11
11
|
def usage(cls):
|
|
12
12
|
usage = Usage(
|
|
@@ -10,25 +10,32 @@ from pjk.sinks.format_sink import FormatSink
|
|
|
10
10
|
from pjk.sources.format_source import FormatSource
|
|
11
11
|
import importlib.util
|
|
12
12
|
import importlib
|
|
13
|
-
import importlib.metadata
|
|
14
13
|
from pjk.components import Pipe, Source, Sink
|
|
15
|
-
from pjk.common import ComponentFactory, highlight
|
|
16
|
-
from typing import List
|
|
14
|
+
from pjk.common import ComponentFactory, highlight, ComponentOrigin
|
|
15
|
+
from typing import List, Type
|
|
17
16
|
|
|
18
|
-
class
|
|
19
|
-
def __init__(self,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
class ExternalRegistrar:
|
|
18
|
+
def __init__(self, sources: SourceFactory, pipes: PipeFactory, sinks: SinkFactory) -> None:
|
|
19
|
+
self._sources: SourceFactory = sources
|
|
20
|
+
self._pipes: PipeFactory = pipes
|
|
21
|
+
self._sinks: SinkFactory = sinks
|
|
22
|
+
|
|
23
|
+
def source(self, name: str, cls: Type[Source]) -> None:
|
|
24
|
+
self._sources.register(name, cls, origin=ComponentOrigin.EXTERNAL)
|
|
25
|
+
|
|
26
|
+
def pipe(self, name: str, cls: Type[Pipe]) -> None:
|
|
27
|
+
self._pipes.register(name, cls, origin=ComponentOrigin.EXTERNAL)
|
|
28
|
+
|
|
29
|
+
def sink(self, name: str, cls: Type[Sink]) -> None:
|
|
30
|
+
self._sinks.register(name, cls, origin=ComponentOrigin.EXTERNAL)
|
|
23
31
|
|
|
24
32
|
class ComponentRegistry:
|
|
25
33
|
def __init__(self):
|
|
26
34
|
self.source_factory = SourceFactory()
|
|
27
35
|
self.pipe_factory = PipeFactory()
|
|
28
36
|
self.sink_factory = SinkFactory()
|
|
29
|
-
|
|
30
37
|
self.load_user_components()
|
|
31
|
-
|
|
38
|
+
self.load_namespace_extras()
|
|
32
39
|
|
|
33
40
|
def create_source(self, token: str):
|
|
34
41
|
return self.source_factory.create(token)
|
|
@@ -56,9 +63,31 @@ class ComponentRegistry:
|
|
|
56
63
|
print()
|
|
57
64
|
print_factory_core(self.sink_factory, header='sinks')
|
|
58
65
|
|
|
59
|
-
self.
|
|
60
|
-
self.
|
|
66
|
+
self.print_non_core([ComponentOrigin.CORE,ComponentOrigin.EXTERNAL], is_integration=True, header='integrations')
|
|
67
|
+
self.print_non_core([ComponentOrigin.EXTERNAL], is_integration=False, header='apps')
|
|
68
|
+
self.print_non_core([ComponentOrigin.USER], is_integration=None, header='user components (~/.pjk/plugins)')
|
|
69
|
+
|
|
70
|
+
# is_integration = True|False|None None=don't care
|
|
71
|
+
def print_non_core(self, origin_list: List[ComponentOrigin], is_integration: bool, header:str):
|
|
72
|
+
all = {}
|
|
73
|
+
for factory in [self.source_factory, self.pipe_factory, self.sink_factory]:
|
|
74
|
+
component_dict = factory.get_components(origin_list=origin_list, is_integration=is_integration)
|
|
75
|
+
all.update(component_dict)
|
|
76
|
+
|
|
77
|
+
if not all:
|
|
78
|
+
return
|
|
61
79
|
|
|
80
|
+
print()
|
|
81
|
+
print(highlight(header))
|
|
82
|
+
|
|
83
|
+
for name, comp_class in all.items():
|
|
84
|
+
usage = comp_class.usage()
|
|
85
|
+
comp_class_type_str = get_component_type(comp_class)
|
|
86
|
+
lines = usage.desc.split('\n')
|
|
87
|
+
temp = highlight(comp_class_type_str)
|
|
88
|
+
line = f' {name:<17} {temp:<15} {lines[0]}'
|
|
89
|
+
print(line)
|
|
90
|
+
|
|
62
91
|
def load_user_components(self, path=os.path.expanduser("~/.pjk/plugins")):
|
|
63
92
|
if not os.path.isdir(path):
|
|
64
93
|
return
|
|
@@ -87,36 +116,48 @@ class ComponentRegistry:
|
|
|
87
116
|
name = usage.name
|
|
88
117
|
|
|
89
118
|
if is_sink(obj, module):
|
|
90
|
-
self.sink_factory.register(name, obj,
|
|
119
|
+
self.sink_factory.register(name, obj, ComponentOrigin.USER)
|
|
91
120
|
elif is_pipe(obj, module):
|
|
92
|
-
self.pipe_factory.register(name, obj,
|
|
121
|
+
self.pipe_factory.register(name, obj, ComponentOrigin.USER)
|
|
93
122
|
elif is_source(obj, module):
|
|
94
|
-
self.source_factory.register(name, obj,
|
|
123
|
+
self.source_factory.register(name, obj, ComponentOrigin.USER)
|
|
95
124
|
|
|
96
|
-
def
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
component_tuples.extend(factory.get_component_name_class_tuples(origin))
|
|
125
|
+
def load_namespace_extras(self, package: str = "pjk_extras") -> None:
|
|
126
|
+
registrar = ExternalRegistrar(self.source_factory, self.pipe_factory, self.sink_factory)
|
|
127
|
+
import importlib, importlib.metadata as im
|
|
100
128
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
129
|
+
for dist in im.distributions():
|
|
130
|
+
name = (dist.metadata.get("Name") or "")
|
|
131
|
+
if not name.startswith("pjk-"):
|
|
132
|
+
continue
|
|
105
133
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
134
|
+
modname = f"{package}.{name[4:].replace('-', '_')}" # pjk-foo-bar -> pjk_extras.foo_bar
|
|
135
|
+
|
|
136
|
+
# Import the extra; if it fails, continue to the next
|
|
137
|
+
try:
|
|
138
|
+
mod = importlib.import_module(modname)
|
|
139
|
+
except Exception as e:
|
|
140
|
+
print(f"[pjk] import failed for {modname}: {e}")
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
reg = getattr(mod, "register", None)
|
|
144
|
+
if not callable(reg):
|
|
145
|
+
print(f"[pjk] extra '{modname}' has no register(registrar)")
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
# Run its register; if it fails, continue to the next
|
|
149
|
+
try:
|
|
150
|
+
reg(registrar) # registers class TYPES, same contract as before
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"[pjk] register() failed in {modname}: {e}")
|
|
153
|
+
continue
|
|
113
154
|
|
|
114
155
|
def print_core_formats(factories: List[ComponentFactory]):
|
|
115
156
|
print(highlight('formats'))
|
|
116
157
|
formats = set()
|
|
117
158
|
for factory in factories:
|
|
118
|
-
|
|
119
|
-
for name, comp_class in
|
|
159
|
+
component_dict = factory.get_components([ComponentOrigin.CORE], is_integration=False)
|
|
160
|
+
for name, comp_class in component_dict.items():
|
|
120
161
|
if issubclass(comp_class, FormatSink|FormatSource):
|
|
121
162
|
formats.add(name)
|
|
122
163
|
|
|
@@ -124,23 +165,21 @@ def print_core_formats(factories: List[ComponentFactory]):
|
|
|
124
165
|
lst = ', '.join(list(formats))
|
|
125
166
|
print(f'{space:<15}{lst}. (sources/sinks in local files, dirs and s3)')
|
|
126
167
|
|
|
127
|
-
def print_factory_core(factory: ComponentFactory, header: str
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
168
|
+
def print_factory_core(factory: ComponentFactory, header: str):
|
|
169
|
+
component_dict = factory.get_components([ComponentOrigin.CORE], is_integration=False)
|
|
170
|
+
header = highlight(header)
|
|
171
|
+
print(header)
|
|
131
172
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
continue
|
|
173
|
+
# user and outside package components are also here, but printed from registry class
|
|
174
|
+
for name, comp_class in component_dict.items():
|
|
175
|
+
if issubclass(comp_class, FormatSink|FormatSource):
|
|
176
|
+
continue
|
|
137
177
|
|
|
138
|
-
|
|
139
|
-
|
|
178
|
+
usage = comp_class.usage()
|
|
179
|
+
lines = usage.desc.split('\n')
|
|
140
180
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
i += 1
|
|
181
|
+
line = f' {name:<12} {lines[0]}'
|
|
182
|
+
print(line)
|
|
144
183
|
|
|
145
184
|
def get_component_type(component_class) -> str:
|
|
146
185
|
if issubclass(component_class, Sink):
|
|
@@ -178,22 +217,3 @@ def is_sink(obj, module):
|
|
|
178
217
|
and obj.__module__ == module.__name__
|
|
179
218
|
)
|
|
180
219
|
|
|
181
|
-
|
|
182
|
-
def iter_entry_points(group: str):
|
|
183
|
-
eps = importlib.metadata.entry_points()
|
|
184
|
-
if hasattr(eps, "select"):
|
|
185
|
-
# Python 3.10+ (importlib.metadata.EntryPoints)
|
|
186
|
-
return eps.select(group=group)
|
|
187
|
-
# Python 3.9 and older
|
|
188
|
-
return eps.get(group, [])
|
|
189
|
-
|
|
190
|
-
def load_package_extras():
|
|
191
|
-
"""
|
|
192
|
-
Discover and import all installed pjk extras (via entry points).
|
|
193
|
-
"""
|
|
194
|
-
for ep in iter_entry_points("pjk.package_extras"):
|
|
195
|
-
try:
|
|
196
|
-
importlib.import_module(ep.value)
|
|
197
|
-
print(f"[pjk] loaded package extra: {ep.name} -> {ep.value}")
|
|
198
|
-
except Exception as e:
|
|
199
|
-
print(f"[pjk] failed to load extra {ep.name}: {e}")
|
|
@@ -18,8 +18,8 @@ class GraphSink(Sink):
|
|
|
18
18
|
component_class=cls
|
|
19
19
|
)
|
|
20
20
|
usage.def_arg(name='kind', usage='hist|scatter|bar|line|cumulative')
|
|
21
|
-
usage.def_param(name='x', usage='
|
|
22
|
-
usage.def_param(name='y', usage='
|
|
21
|
+
usage.def_param(name='x', usage='x-axis field', default='x')
|
|
22
|
+
usage.def_param(name='y', usage='comma separated list of y-axis fields', default='y')
|
|
23
23
|
usage.def_param(name='pause', usage='Seconds to show graph', is_num=True, default='-1')
|
|
24
24
|
return usage
|
|
25
25
|
|