python-jack-knife 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pjk/__init__.py +5 -0
- pjk/base.py +377 -0
- pjk/common.py +150 -0
- pjk/log.py +67 -0
- pjk/main.py +106 -0
- pjk/man_page.py +125 -0
- pjk/parser.py +284 -0
- pjk/pipes/__init__.py +0 -0
- pjk/pipes/denorm.py +68 -0
- pjk/pipes/factory.py +62 -0
- pjk/pipes/filter.py +57 -0
- pjk/pipes/head.py +34 -0
- pjk/pipes/join.py +85 -0
- pjk/pipes/let_reduce.py +198 -0
- pjk/pipes/map.py +91 -0
- pjk/pipes/move_field.py +36 -0
- pjk/pipes/postgres_pipe.py +209 -0
- pjk/pipes/remove_field.py +36 -0
- pjk/pipes/select.py +42 -0
- pjk/pipes/sort.py +63 -0
- pjk/pipes/tail.py +39 -0
- pjk/pipes/user_pipe_factory.py +45 -0
- pjk/pipes/where.py +49 -0
- pjk/registry.py +143 -0
- pjk/sinks/__init__.py +0 -0
- pjk/sinks/csv_sink.py +33 -0
- pjk/sinks/ddb.py +54 -0
- pjk/sinks/devnull.py +31 -0
- pjk/sinks/dir_sink.py +59 -0
- pjk/sinks/expect.py +53 -0
- pjk/sinks/factory.py +108 -0
- pjk/sinks/graph.py +57 -0
- pjk/sinks/graph_bar_line.py +229 -0
- pjk/sinks/graph_cumulative.py +55 -0
- pjk/sinks/graph_hist.py +72 -0
- pjk/sinks/graph_scatter.py +29 -0
- pjk/sinks/json_sink.py +23 -0
- pjk/sinks/s3_sink.py +100 -0
- pjk/sinks/sinks.py +68 -0
- pjk/sinks/stdout.py +44 -0
- pjk/sinks/tsv_sink.py +22 -0
- pjk/sinks/user_sink_factory.py +43 -0
- pjk/sources/__init__.py +0 -0
- pjk/sources/csv_source.py +28 -0
- pjk/sources/dir_source.py +69 -0
- pjk/sources/factory.py +100 -0
- pjk/sources/format_usage.py +11 -0
- pjk/sources/inline_source.py +56 -0
- pjk/sources/json_source.py +35 -0
- pjk/sources/lazy_file.py +16 -0
- pjk/sources/lazy_file_local.py +22 -0
- pjk/sources/lazy_file_s3.py +28 -0
- pjk/sources/parquet_source.py +32 -0
- pjk/sources/s3_source.py +146 -0
- pjk/sources/source_list.py +23 -0
- pjk/sources/sql_source.py +32 -0
- pjk/sources/tsv_source.py +15 -0
- pjk/sources/user_source_factory.py +33 -0
- pjk/version.py +4 -0
- python_jack_knife-0.5.0.dist-info/METADATA +254 -0
- python_jack_knife-0.5.0.dist-info/RECORD +65 -0
- python_jack_knife-0.5.0.dist-info/WHEEL +5 -0
- python_jack_knife-0.5.0.dist-info/entry_points.txt +2 -0
- python_jack_knife-0.5.0.dist-info/licenses/LICENSE +202 -0
- python_jack_knife-0.5.0.dist-info/top_level.txt +1 -0
pjk/__init__.py
ADDED
pjk/base.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Any, Optional, List, Set
|
|
6
|
+
|
|
7
|
+
class TokenError(ValueError):
|
|
8
|
+
@classmethod
|
|
9
|
+
def from_list(cls, lines: List[str]):
|
|
10
|
+
text = '\n'.join(lines)
|
|
11
|
+
return TokenError(text)
|
|
12
|
+
|
|
13
|
+
def __init__(self, text: str):
|
|
14
|
+
super().__init__(text)
|
|
15
|
+
self.text = text
|
|
16
|
+
|
|
17
|
+
def get_text(self):
|
|
18
|
+
return self.text
|
|
19
|
+
|
|
20
|
+
class UsageError(ValueError):
|
|
21
|
+
def __init__(self, message: str,
|
|
22
|
+
tokens: List[str] = None,
|
|
23
|
+
token_no: int = 0,
|
|
24
|
+
token_error: TokenError = None):
|
|
25
|
+
super().__init__(message)
|
|
26
|
+
self.message = message
|
|
27
|
+
self.tokens = tokens
|
|
28
|
+
self.token_no = token_no
|
|
29
|
+
self.token_error = token_error
|
|
30
|
+
|
|
31
|
+
def __str__(self):
|
|
32
|
+
lines = []
|
|
33
|
+
token_copies = [self._quote(t) for t in self.tokens]
|
|
34
|
+
lines.append('pjk ' + ' '.join(token_copies))
|
|
35
|
+
lines.append(self._get_underline(token_copies))
|
|
36
|
+
lines.append(self.message)
|
|
37
|
+
lines.append('')
|
|
38
|
+
lines.append(self.token_error.get_text())
|
|
39
|
+
return '\n'.join(lines)
|
|
40
|
+
|
|
41
|
+
# quote json inline
|
|
42
|
+
def _quote(self, token):
|
|
43
|
+
if token.startswith('[') or token.startswith('{'):
|
|
44
|
+
return '"' + token + '"'
|
|
45
|
+
else:
|
|
46
|
+
return token
|
|
47
|
+
|
|
48
|
+
def _get_underline(self, tokens: List, marker='^') -> str:
|
|
49
|
+
offset = 4 + sum(len(t) + 1 for t in tokens[:self.token_no]) # +1 for space, 4 for pjk
|
|
50
|
+
underline = ' ' * offset + marker * len(tokens[self.token_no])
|
|
51
|
+
return underline
|
|
52
|
+
|
|
53
|
+
class ParsedToken:
|
|
54
|
+
def __init__(self, token: str):
|
|
55
|
+
self.token = token
|
|
56
|
+
self._params = {}
|
|
57
|
+
self._args = []
|
|
58
|
+
at_parts = token.split('@', 1) # Separate params off
|
|
59
|
+
if len(at_parts) > 1:
|
|
60
|
+
param_list = at_parts[1].split('@')
|
|
61
|
+
for param in param_list:
|
|
62
|
+
parts = param.split('=')
|
|
63
|
+
value = parts[1] if len(parts) == 2 else None
|
|
64
|
+
self._params[parts[0]] = value
|
|
65
|
+
|
|
66
|
+
self._all_but_params = at_parts[0]
|
|
67
|
+
|
|
68
|
+
# args
|
|
69
|
+
colon_parts = at_parts[0].split(':')
|
|
70
|
+
self._pre_colon = colon_parts[0]
|
|
71
|
+
|
|
72
|
+
for arg in colon_parts[1:]: # treat a '' arg as missing and ignore all args after that
|
|
73
|
+
if arg != '':
|
|
74
|
+
self._args.append(arg)
|
|
75
|
+
else:
|
|
76
|
+
break
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def pre_colon(self):
|
|
80
|
+
return self._pre_colon
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def whole_token(self):
|
|
84
|
+
return self.token
|
|
85
|
+
|
|
86
|
+
@property # avoid colon parsing
|
|
87
|
+
def all_but_params(self):
|
|
88
|
+
return self._all_but_params
|
|
89
|
+
|
|
90
|
+
def num_args(self):
|
|
91
|
+
return len(self._args)
|
|
92
|
+
|
|
93
|
+
# args are mandatory
|
|
94
|
+
def get_arg(self, arg_no: int):
|
|
95
|
+
return self._args[arg_no] if arg_no < len(self._args) else None
|
|
96
|
+
|
|
97
|
+
# params are optional
|
|
98
|
+
def get_params(self) -> dict:
|
|
99
|
+
return self._params
|
|
100
|
+
|
|
101
|
+
class Usage:
|
|
102
|
+
def __init__(self, name: str, desc: str, component_class: type):
|
|
103
|
+
self.name = name
|
|
104
|
+
self.desc = desc
|
|
105
|
+
self.comp_class = component_class
|
|
106
|
+
self.args = {}
|
|
107
|
+
self.params = {}
|
|
108
|
+
self.syntax = None
|
|
109
|
+
|
|
110
|
+
self.arg_defs = []
|
|
111
|
+
self.param_usages = {}
|
|
112
|
+
self.examples = []
|
|
113
|
+
|
|
114
|
+
def get_component_class(self):
|
|
115
|
+
return self.comp_class
|
|
116
|
+
|
|
117
|
+
def get_base_class(self, as_string: bool = False):
|
|
118
|
+
if issubclass(self.comp_class, Sink):
|
|
119
|
+
return 'sink' if as_string else Sink
|
|
120
|
+
elif issubclass(self.comp_class, Pipe):
|
|
121
|
+
return 'pipe' if as_string else Pipe
|
|
122
|
+
elif issubclass(self.comp_class, Source):
|
|
123
|
+
return 'source' if as_string else Source
|
|
124
|
+
raise 'improper class'
|
|
125
|
+
|
|
126
|
+
# args and param values default as str
|
|
127
|
+
def def_arg(self, name: str, usage: str, is_num: bool = False, valid_values: Optional[Set[str]] = None):
|
|
128
|
+
self.arg_defs.append((name, usage, is_num, valid_values))
|
|
129
|
+
|
|
130
|
+
def def_param(self, name:str, usage: str, is_num: bool = False, valid_values: Optional[Set[str]] = None, default:str = None):
|
|
131
|
+
self.param_usages[name] = (usage, is_num, valid_values, default)
|
|
132
|
+
if default:
|
|
133
|
+
self.params[name] = self._get_val(default, is_num, valid_values)
|
|
134
|
+
|
|
135
|
+
def def_example(self, expr_tokens:list[str], expect:str):
|
|
136
|
+
self.examples.append((expr_tokens, expect))
|
|
137
|
+
|
|
138
|
+
def def_syntax(self, syntax: str):
|
|
139
|
+
self.syntax = syntax
|
|
140
|
+
|
|
141
|
+
def get_examples(self):
|
|
142
|
+
return self.examples
|
|
143
|
+
|
|
144
|
+
def get_arg(self, name: str):
|
|
145
|
+
return self.args.get(name, None)
|
|
146
|
+
|
|
147
|
+
def get_param(self, name: str):
|
|
148
|
+
return self.params.get(name)
|
|
149
|
+
|
|
150
|
+
def get_usage_text(self):
|
|
151
|
+
lines = []
|
|
152
|
+
lines.append(self.desc)
|
|
153
|
+
|
|
154
|
+
syntax_str = self.get_token_syntax() # might be ''
|
|
155
|
+
if len(syntax_str) > 0:
|
|
156
|
+
lines.append('')
|
|
157
|
+
lines.append(f'syntax:')
|
|
158
|
+
lines.append(f' {self.get_token_syntax()}')
|
|
159
|
+
|
|
160
|
+
lines.extend(f"{line}" for line in self.get_arg_param_desc())
|
|
161
|
+
return '\n'.join(lines)
|
|
162
|
+
|
|
163
|
+
def get_token_syntax(self):
|
|
164
|
+
if self.syntax != None:
|
|
165
|
+
return self.syntax # else piece it together
|
|
166
|
+
|
|
167
|
+
token = f'{self.name}'
|
|
168
|
+
for name, usage, is_num, valid_values in self.arg_defs:
|
|
169
|
+
token += f':<{name}>'
|
|
170
|
+
|
|
171
|
+
for name, (usage, is_num, valid_values, default) in self.param_usages.items():
|
|
172
|
+
value_display = name
|
|
173
|
+
if valid_values:
|
|
174
|
+
value_display = '|'.join(list(valid_values))
|
|
175
|
+
token += f'@{name}=<{value_display}>'
|
|
176
|
+
return token
|
|
177
|
+
|
|
178
|
+
def get_arg_param_desc(self):
|
|
179
|
+
notes = []
|
|
180
|
+
if self.arg_defs:
|
|
181
|
+
notes.append('mandatory args:')
|
|
182
|
+
for name, usage, is_num, valid_values in self.arg_defs:
|
|
183
|
+
notes.append(f' {name} = {usage}')
|
|
184
|
+
|
|
185
|
+
if self.param_usages:
|
|
186
|
+
notes.append('optional params:')
|
|
187
|
+
for name, usage in self.param_usages.items():
|
|
188
|
+
text, is_num, valid_values, default = usage
|
|
189
|
+
notes.append(f' {name} = {text} (default={default})')
|
|
190
|
+
return notes
|
|
191
|
+
|
|
192
|
+
def bind(self, ptok: ParsedToken):
|
|
193
|
+
if ptok.num_args() > len(self.arg_defs):
|
|
194
|
+
extra = []
|
|
195
|
+
for i in range(len(self.arg_defs), ptok.num_args()):
|
|
196
|
+
name = ptok.get_arg(i)
|
|
197
|
+
extra.append(name)
|
|
198
|
+
|
|
199
|
+
raise TokenError.from_list([f"extra arg{'s' if len(extra) > 1 else ''}: {','.join(extra)}.",
|
|
200
|
+
'', self.get_usage_text()])
|
|
201
|
+
|
|
202
|
+
if ptok.num_args() < len(self.arg_defs):
|
|
203
|
+
missing = []
|
|
204
|
+
for i in range(ptok.num_args(), len(self.arg_defs)):
|
|
205
|
+
name, usage, is_num, valid_values = self.arg_defs[i]
|
|
206
|
+
missing.append(name)
|
|
207
|
+
|
|
208
|
+
raise TokenError.from_list([f"missing arg{'s' if len(missing) > 1 else ''}: {','.join(missing)}.",
|
|
209
|
+
'', self.get_usage_text()])
|
|
210
|
+
|
|
211
|
+
for i, adef in enumerate(self.arg_defs):
|
|
212
|
+
name, usage, is_num, valid_values = adef
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
val_str = ptok.get_arg(i)
|
|
216
|
+
self.args[name] = self._get_val(val_str, is_num, valid_values)
|
|
217
|
+
except (ValueError, TypeError) as e:
|
|
218
|
+
raise TokenError.from_list([f"wrong value for '{name}' arg.", '', self.get_usage_text()])
|
|
219
|
+
|
|
220
|
+
for name, str_val in ptok.get_params().items():
|
|
221
|
+
usage = self.param_usages.get(name, None)
|
|
222
|
+
if not usage:
|
|
223
|
+
raise TokenError.from_list([f"unknown param: '{name}'.", '', self.get_usage_text()])
|
|
224
|
+
if not str_val:
|
|
225
|
+
raise TokenError.from_list([f"missing value for '{name}' param.", '', self.get_usage_text()])
|
|
226
|
+
|
|
227
|
+
text, is_num, valid_values, default = usage
|
|
228
|
+
try:
|
|
229
|
+
self.params[name] = self._get_val(str_val, is_num, valid_values)
|
|
230
|
+
except (ValueError, TypeError) as e:
|
|
231
|
+
raise TokenError.from_list([f"wrong value type for '{name}' param.", '', self.get_usage_text()])
|
|
232
|
+
|
|
233
|
+
def _get_val(self, val_str: str, is_num: bool, valid_values: Optional[Set[str]] = None):
|
|
234
|
+
if not val_str:
|
|
235
|
+
raise ValueError('missing value')
|
|
236
|
+
if not is_num: # is string
|
|
237
|
+
if valid_values is None: # no constraints
|
|
238
|
+
return val_str
|
|
239
|
+
if not val_str in valid_values:
|
|
240
|
+
raise ValueError(f'illegal value: {val_str}')
|
|
241
|
+
return val_str
|
|
242
|
+
|
|
243
|
+
else: # is_num
|
|
244
|
+
try:
|
|
245
|
+
return int(val_str)
|
|
246
|
+
except ValueError as e: # coud be a float that errors, but is ok
|
|
247
|
+
return float(val_str)
|
|
248
|
+
|
|
249
|
+
# until all usages are implemented a default that doesn't bind
|
|
250
|
+
# they continue to use ParsedToken ptok
|
|
251
|
+
class NoBindUsage(Usage):
|
|
252
|
+
def __init__(self, name: str, desc: str, component_class: type):
|
|
253
|
+
super().__init__(name=name, desc=desc, component_class=component_class)
|
|
254
|
+
def bind(self, ptok: ParsedToken):
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
# mixin
|
|
258
|
+
class KeyedSource(ABC):
|
|
259
|
+
@classmethod
|
|
260
|
+
def usage(cls):
|
|
261
|
+
return Usage(
|
|
262
|
+
name=cls.__name__,
|
|
263
|
+
desc=f"{cls.__name__} component"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
@abstractmethod
|
|
267
|
+
def lookup(self, left_rec) -> Optional[dict]:
|
|
268
|
+
"""Return the record associated with the given key, or None."""
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
def get_unlookedup_records(self) -> List[Any]:
|
|
272
|
+
# for outer join
|
|
273
|
+
pass
|
|
274
|
+
|
|
275
|
+
def deep_copy(self):
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
class Source(ABC):
|
|
279
|
+
is_format = False
|
|
280
|
+
|
|
281
|
+
@classmethod
|
|
282
|
+
def usage(cls):
|
|
283
|
+
return NoBindUsage(
|
|
284
|
+
name=cls.__name__,
|
|
285
|
+
desc=f"{cls.__name__} component",
|
|
286
|
+
component_class=cls
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
@abstractmethod
|
|
290
|
+
def __iter__(self):
|
|
291
|
+
raise NotImplementedError("__iter__ must be implemented by subclasses")
|
|
292
|
+
|
|
293
|
+
def __next__(self):
|
|
294
|
+
# lazily create an internal iterator the first time next() is called
|
|
295
|
+
if not hasattr(self, "_iter"):
|
|
296
|
+
self._iter = iter(self)
|
|
297
|
+
return next(self._iter)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def deep_copy(self):
|
|
301
|
+
return None # Default: not copyable unless overridden
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class Pipe(Source):
|
|
305
|
+
deep_copyable: bool = False # default to false
|
|
306
|
+
arity: int = 1
|
|
307
|
+
|
|
308
|
+
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
309
|
+
self.ptok = ptok
|
|
310
|
+
self.left = None # left source for convience
|
|
311
|
+
self.right = None # right source for convience
|
|
312
|
+
self.inputs: List[Source] = []
|
|
313
|
+
|
|
314
|
+
def add_source(self, source: Source) -> None:
|
|
315
|
+
self.inputs.append(source)
|
|
316
|
+
# first two are assigned left, right
|
|
317
|
+
if self.left is None:
|
|
318
|
+
self.left = source
|
|
319
|
+
elif self.right is None:
|
|
320
|
+
self.right = self.left
|
|
321
|
+
self.left = source
|
|
322
|
+
|
|
323
|
+
def reset(self):
|
|
324
|
+
pass # optional hook
|
|
325
|
+
|
|
326
|
+
def deep_copy(self) -> Optional["Pipe"]:
|
|
327
|
+
if not self.deep_copyable:
|
|
328
|
+
return None
|
|
329
|
+
if not self.inputs:
|
|
330
|
+
raise RuntimeError(f"{self.__class__.__name__} has no inputs set")
|
|
331
|
+
|
|
332
|
+
clone = self.__class__(self.ptok, self.__class__.usage())
|
|
333
|
+
|
|
334
|
+
for input in self.inputs:
|
|
335
|
+
strand = input.deep_copy()
|
|
336
|
+
if strand is None:
|
|
337
|
+
return None
|
|
338
|
+
clone.add_source(strand)
|
|
339
|
+
|
|
340
|
+
return clone
|
|
341
|
+
|
|
342
|
+
class Sink(ABC):
|
|
343
|
+
is_format = False
|
|
344
|
+
|
|
345
|
+
@classmethod
|
|
346
|
+
def usage(cls):
|
|
347
|
+
return NoBindUsage(
|
|
348
|
+
name=cls.__name__,
|
|
349
|
+
desc=f"{cls.__name__} component",
|
|
350
|
+
component_class=cls
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
354
|
+
self.ptok = ptok
|
|
355
|
+
self.usage = usage
|
|
356
|
+
|
|
357
|
+
def drain(self):
|
|
358
|
+
self.process()
|
|
359
|
+
|
|
360
|
+
def print_info(self):
|
|
361
|
+
pass
|
|
362
|
+
|
|
363
|
+
def add_source(self, source: Source) -> None:
|
|
364
|
+
self.input = source
|
|
365
|
+
|
|
366
|
+
@abstractmethod
|
|
367
|
+
def process(self) -> None:
|
|
368
|
+
pass
|
|
369
|
+
|
|
370
|
+
def deep_copy(self):
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
# identity source for sub-pipeline seeding
|
|
374
|
+
class IdentitySource(Source):
|
|
375
|
+
def next(self):
|
|
376
|
+
raise RuntimeError("IdentitySource should never be executed")
|
|
377
|
+
|
pjk/common.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
import sys, shutil, subprocess, contextlib, signal
|
|
5
|
+
import os
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
class SafeNamespace:
|
|
9
|
+
def __init__(self, obj):
|
|
10
|
+
for k, v in obj.items():
|
|
11
|
+
if isinstance(v, dict):
|
|
12
|
+
v = SafeNamespace(v)
|
|
13
|
+
elif isinstance(v, list):
|
|
14
|
+
v = [SafeNamespace(x) if isinstance(x, dict) else x for x in v]
|
|
15
|
+
setattr(self, k, v)
|
|
16
|
+
|
|
17
|
+
def __getattr__(self, key):
|
|
18
|
+
return None # gracefully handle missing keys
|
|
19
|
+
|
|
20
|
+
class ReducingNamespace:
|
|
21
|
+
def __init__(self, record):
|
|
22
|
+
self._record = record
|
|
23
|
+
|
|
24
|
+
def __getattr__(self, name):
|
|
25
|
+
value = self._record[name]
|
|
26
|
+
if isinstance(value, (list, tuple, set)):
|
|
27
|
+
return value
|
|
28
|
+
return [value] # promote scalars to singleton lists
|
|
29
|
+
|
|
30
|
+
@contextlib.contextmanager
|
|
31
|
+
def pager_stdout(use_pager=True):
|
|
32
|
+
if use_pager and shutil.which("less"):
|
|
33
|
+
# Avoid BrokenPipeError noise if user quits less early
|
|
34
|
+
try:
|
|
35
|
+
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
|
36
|
+
except Exception:
|
|
37
|
+
pass # not available on Windows
|
|
38
|
+
|
|
39
|
+
pager = subprocess.Popen(["less", "-FRSX"], stdin=subprocess.PIPE, text=True)
|
|
40
|
+
old_stdout = sys.stdout
|
|
41
|
+
try:
|
|
42
|
+
sys.stdout = pager.stdin
|
|
43
|
+
yield
|
|
44
|
+
finally:
|
|
45
|
+
try:
|
|
46
|
+
sys.stdout.flush()
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
sys.stdout = old_stdout
|
|
50
|
+
if pager.stdin:
|
|
51
|
+
pager.stdin.close()
|
|
52
|
+
pager.wait()
|
|
53
|
+
else:
|
|
54
|
+
yield
|
|
55
|
+
|
|
56
|
+
COLOR_CODES = {
|
|
57
|
+
'bold': '\033[1m',
|
|
58
|
+
'underline': '\033[4m',
|
|
59
|
+
'red': '\033[31m',
|
|
60
|
+
'green': '\033[32m',
|
|
61
|
+
'yellow': '\033[33m',
|
|
62
|
+
'blue': '\033[34m',
|
|
63
|
+
'magenta': '\033[35m',
|
|
64
|
+
'cyan': '\033[36m',
|
|
65
|
+
'gray': '\033[90m',
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
RESET = '\033[0m'
|
|
69
|
+
|
|
70
|
+
def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
71
|
+
value = text if not value else value
|
|
72
|
+
style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
|
|
73
|
+
return text.replace(value, f"{style}{value}{RESET}")
|
|
74
|
+
|
|
75
|
+
class Lookups:
|
|
76
|
+
def __init__(self):
|
|
77
|
+
self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
|
|
78
|
+
self._data = {}
|
|
79
|
+
self._load()
|
|
80
|
+
|
|
81
|
+
def _load(self):
|
|
82
|
+
"""Load lookups from YAML file if it exists."""
|
|
83
|
+
if os.path.exists(self.lookups_yaml):
|
|
84
|
+
with open(self.lookups_yaml, 'r') as f:
|
|
85
|
+
self._data = yaml.safe_load(f) or {}
|
|
86
|
+
else:
|
|
87
|
+
self._data = {}
|
|
88
|
+
|
|
89
|
+
def save(self):
|
|
90
|
+
"""Save current lookups back to YAML file."""
|
|
91
|
+
os.makedirs(os.path.dirname(self.lookups_yaml), exist_ok=True)
|
|
92
|
+
with open(self.lookups_yaml, 'w') as f:
|
|
93
|
+
yaml.safe_dump(self._data, f)
|
|
94
|
+
|
|
95
|
+
def get(self, key, default=None):
|
|
96
|
+
"""Retrieve a lookup value by key."""
|
|
97
|
+
return self._data.get(key, default)
|
|
98
|
+
|
|
99
|
+
def set(self, key, value):
|
|
100
|
+
"""Set a lookup value and persist it."""
|
|
101
|
+
self._data[key] = value
|
|
102
|
+
self.save()
|
|
103
|
+
|
|
104
|
+
def delete(self, key):
|
|
105
|
+
"""Remove a key if it exists and save."""
|
|
106
|
+
if key in self._data:
|
|
107
|
+
del self._data[key]
|
|
108
|
+
self.save()
|
|
109
|
+
|
|
110
|
+
def all(self):
|
|
111
|
+
"""Return the full lookup dictionary."""
|
|
112
|
+
return dict(self._data)
|
|
113
|
+
|
|
114
|
+
class ComponentFactory:
|
|
115
|
+
def __init__(self, components: dict, comp_type_name: str):
|
|
116
|
+
self.num_orig = 0
|
|
117
|
+
self.components = components # name -> component_class
|
|
118
|
+
self.comp_type_name = comp_type_name
|
|
119
|
+
self.num_orig_comps = len(components)
|
|
120
|
+
|
|
121
|
+
def register(self, name, comp_class):
|
|
122
|
+
self.components[name] = comp_class
|
|
123
|
+
|
|
124
|
+
def get_comp_type_name(self):
|
|
125
|
+
return self.comp_type_name
|
|
126
|
+
|
|
127
|
+
def print_descriptions(self):
|
|
128
|
+
header = highlight(f'{self.comp_type_name}s')
|
|
129
|
+
print(header)
|
|
130
|
+
|
|
131
|
+
i = 0
|
|
132
|
+
plugin = ''
|
|
133
|
+
for name, comp_class in self.components.items():
|
|
134
|
+
usage = comp_class.usage()
|
|
135
|
+
lines = usage.desc.split('\n')
|
|
136
|
+
if i >= self.num_orig_comps:
|
|
137
|
+
plugin = '(~/.pjk/plugin)'
|
|
138
|
+
line = f' {name:<12} {lines[0]} {plugin}'
|
|
139
|
+
line = highlight(line, 'bold', plugin) if plugin else line
|
|
140
|
+
print(line)
|
|
141
|
+
i += 1
|
|
142
|
+
|
|
143
|
+
def get_usage(self, name: str):
|
|
144
|
+
comp_class = self.components.get(name)
|
|
145
|
+
if not comp_class:
|
|
146
|
+
return None
|
|
147
|
+
return comp_class.usage()
|
|
148
|
+
|
|
149
|
+
def create(self, token: str):
|
|
150
|
+
pass
|
pjk/log.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
import logging, os, atexit
|
|
5
|
+
from logging.handlers import RotatingFileHandler
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("djk")
|
|
10
|
+
|
|
11
|
+
def _truthy(env_val: Optional[str]) -> bool:
|
|
12
|
+
return str(env_val).lower() in ("1", "true", "yes", "on")
|
|
13
|
+
|
|
14
|
+
def init(force: bool = False, level: Optional[int] = None, console: Optional[bool] = None):
|
|
15
|
+
"""
|
|
16
|
+
Initialize 'djk' logging.
|
|
17
|
+
|
|
18
|
+
- Rotates at DJK_LOG_MAX_MB (default 2 MB), keeps DJK_LOG_BACKUPS (default 3).
|
|
19
|
+
- Files under ~/.pjk/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
|
|
20
|
+
- Set DJK_DEBUG=1|true|yes for DEBUG, else INFO (or pass explicit level).
|
|
21
|
+
- To enable console output explicitly, set console=True or DJK_LOG_CONSOLE=1.
|
|
22
|
+
- Set force=True to replace existing handlers.
|
|
23
|
+
"""
|
|
24
|
+
if logger.handlers and not force:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
logger.handlers.clear()
|
|
28
|
+
|
|
29
|
+
if level is None:
|
|
30
|
+
level = logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO
|
|
31
|
+
|
|
32
|
+
fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
|
|
33
|
+
formatter = logging.Formatter(fmt)
|
|
34
|
+
|
|
35
|
+
# Rotating file handler in ~/.pjk/logs
|
|
36
|
+
log_dir = Path(os.getenv("DJK_LOG_DIR", os.path.expanduser("~/.pjk/logs")))
|
|
37
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
log_file = log_dir / os.getenv("DJK_LOG_FILE", "pjk.log")
|
|
39
|
+
max_bytes = int(float(os.getenv("DJK_LOG_MAX_MB", "2")) * 1024 * 1024) # 2 MB
|
|
40
|
+
backups = int(os.getenv("DJK_LOG_BACKUPS", "3"))
|
|
41
|
+
|
|
42
|
+
fh = RotatingFileHandler(
|
|
43
|
+
log_file,
|
|
44
|
+
maxBytes=max_bytes,
|
|
45
|
+
backupCount=backups,
|
|
46
|
+
encoding="utf-8",
|
|
47
|
+
delay=False, # open immediately so first emit writes bytes
|
|
48
|
+
)
|
|
49
|
+
fh.setLevel(level)
|
|
50
|
+
fh.setFormatter(formatter)
|
|
51
|
+
|
|
52
|
+
logger.setLevel(level)
|
|
53
|
+
logger.addHandler(fh)
|
|
54
|
+
|
|
55
|
+
# DO NOT propagate into root (prevents accidental console logs elsewhere)
|
|
56
|
+
logger.propagate = False
|
|
57
|
+
|
|
58
|
+
# Optional console (off by default)
|
|
59
|
+
enable_console = console if console is not None else _truthy(os.getenv("DJK_LOG_CONSOLE"))
|
|
60
|
+
if enable_console:
|
|
61
|
+
sh = logging.StreamHandler()
|
|
62
|
+
sh.setLevel(level)
|
|
63
|
+
sh.setFormatter(formatter)
|
|
64
|
+
logger.addHandler(sh)
|
|
65
|
+
|
|
66
|
+
# Flush/close on exit for short-lived runs
|
|
67
|
+
atexit.register(logging.shutdown)
|
pjk/main.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
#!/usr/bin/env python
|
|
5
|
+
import sys
|
|
6
|
+
import os
|
|
7
|
+
import signal
|
|
8
|
+
import shlex
|
|
9
|
+
from typing import List
|
|
10
|
+
from pjk.parser import ExpressionParser
|
|
11
|
+
from pjk.base import UsageError
|
|
12
|
+
from pjk.log import init as init_logging
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
import concurrent.futures
|
|
15
|
+
from pjk.registry import ComponentRegistry
|
|
16
|
+
from pjk.pipes.factory import PipeFactory
|
|
17
|
+
from pjk.sources.factory import SourceFactory
|
|
18
|
+
from pjk.sinks.factory import SinkFactory
|
|
19
|
+
from pjk.man_page import do_man, do_examples
|
|
20
|
+
from pjk.sinks.expect import ExpectSink
|
|
21
|
+
from pjk.version import __version__
|
|
22
|
+
|
|
23
|
+
def write_history(tokens):
|
|
24
|
+
log_path = ".pjk-history.txt"
|
|
25
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
26
|
+
command = " ".join(tokens)
|
|
27
|
+
with open(log_path, "a") as f:
|
|
28
|
+
f.write(f"{timestamp}\tpjk {command}\n")
|
|
29
|
+
|
|
30
|
+
def execute_threaded(sinks):
|
|
31
|
+
# Choose a max thread limit (explicitly)
|
|
32
|
+
max_workers = min(32, len(sinks)) # or set a fixed cap like 8
|
|
33
|
+
|
|
34
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
35
|
+
futures = {
|
|
36
|
+
executor.submit(s.drain): s for s in sinks
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for future in concurrent.futures.as_completed(futures):
|
|
40
|
+
sink_obj = futures[future]
|
|
41
|
+
try:
|
|
42
|
+
future.result() # This will re-raise any exception from s.drain()
|
|
43
|
+
except Exception as e:
|
|
44
|
+
print(f"Sink {sink_obj} raised an exception:")
|
|
45
|
+
print(e)
|
|
46
|
+
|
|
47
|
+
def execute(command: str):
|
|
48
|
+
tokens = shlex.split(command, comments=True, posix=True)
|
|
49
|
+
execute_tokens(tokens)
|
|
50
|
+
|
|
51
|
+
def execute_tokens(tokens:List[str]):
|
|
52
|
+
init_logging()
|
|
53
|
+
signal.signal(signal.SIGINT, lambda s, f: sys.exit(0))
|
|
54
|
+
|
|
55
|
+
if '--version' in tokens:
|
|
56
|
+
print(f"pjk version {__version__}")
|
|
57
|
+
sys.exit(0)
|
|
58
|
+
|
|
59
|
+
registry = ComponentRegistry()
|
|
60
|
+
|
|
61
|
+
if len(tokens) < 1:
|
|
62
|
+
registry.print_usage()
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
# pjk man --all | --all+ | <component>
|
|
66
|
+
if len(tokens) == 2 and tokens[0] == 'man':
|
|
67
|
+
do_man(tokens[1], registry)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
# pjk examples | examples+
|
|
71
|
+
if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
|
|
72
|
+
do_examples(tokens[0], registry)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
parser = ExpressionParser(registry)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Build initial sink
|
|
79
|
+
sink = parser.parse(tokens)
|
|
80
|
+
|
|
81
|
+
sinks = [sink]
|
|
82
|
+
max_threads = os.cpu_count()
|
|
83
|
+
while len(sinks) < max_threads:
|
|
84
|
+
clone = sink.deep_copy()
|
|
85
|
+
if not clone:
|
|
86
|
+
break
|
|
87
|
+
sinks.append(clone)
|
|
88
|
+
|
|
89
|
+
if len(sinks) > 1:
|
|
90
|
+
execute_threaded(sinks)
|
|
91
|
+
else:
|
|
92
|
+
sink.drain() # run single in main thread
|
|
93
|
+
sink.print_info() # rarely used, e.g. expect and devnull
|
|
94
|
+
|
|
95
|
+
write_history(sys.argv[1:])
|
|
96
|
+
|
|
97
|
+
except UsageError as e:
|
|
98
|
+
print(e, file=sys.stderr)
|
|
99
|
+
sys.exit(2) # Exit with a non-zero code, but no traceback
|
|
100
|
+
|
|
101
|
+
def main():
|
|
102
|
+
tokens = sys.argv[1:]
|
|
103
|
+
execute_tokens(tokens)
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__":
|
|
106
|
+
main()
|