python-jack-knife 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. pjk/__init__.py +5 -0
  2. pjk/base.py +377 -0
  3. pjk/common.py +150 -0
  4. pjk/log.py +67 -0
  5. pjk/main.py +106 -0
  6. pjk/man_page.py +125 -0
  7. pjk/parser.py +284 -0
  8. pjk/pipes/__init__.py +0 -0
  9. pjk/pipes/denorm.py +68 -0
  10. pjk/pipes/factory.py +62 -0
  11. pjk/pipes/filter.py +57 -0
  12. pjk/pipes/head.py +34 -0
  13. pjk/pipes/join.py +85 -0
  14. pjk/pipes/let_reduce.py +198 -0
  15. pjk/pipes/map.py +91 -0
  16. pjk/pipes/move_field.py +36 -0
  17. pjk/pipes/postgres_pipe.py +209 -0
  18. pjk/pipes/remove_field.py +36 -0
  19. pjk/pipes/select.py +42 -0
  20. pjk/pipes/sort.py +63 -0
  21. pjk/pipes/tail.py +39 -0
  22. pjk/pipes/user_pipe_factory.py +45 -0
  23. pjk/pipes/where.py +49 -0
  24. pjk/registry.py +143 -0
  25. pjk/sinks/__init__.py +0 -0
  26. pjk/sinks/csv_sink.py +33 -0
  27. pjk/sinks/ddb.py +54 -0
  28. pjk/sinks/devnull.py +31 -0
  29. pjk/sinks/dir_sink.py +59 -0
  30. pjk/sinks/expect.py +53 -0
  31. pjk/sinks/factory.py +108 -0
  32. pjk/sinks/graph.py +57 -0
  33. pjk/sinks/graph_bar_line.py +229 -0
  34. pjk/sinks/graph_cumulative.py +55 -0
  35. pjk/sinks/graph_hist.py +72 -0
  36. pjk/sinks/graph_scatter.py +29 -0
  37. pjk/sinks/json_sink.py +23 -0
  38. pjk/sinks/s3_sink.py +100 -0
  39. pjk/sinks/sinks.py +68 -0
  40. pjk/sinks/stdout.py +44 -0
  41. pjk/sinks/tsv_sink.py +22 -0
  42. pjk/sinks/user_sink_factory.py +43 -0
  43. pjk/sources/__init__.py +0 -0
  44. pjk/sources/csv_source.py +28 -0
  45. pjk/sources/dir_source.py +69 -0
  46. pjk/sources/factory.py +100 -0
  47. pjk/sources/format_usage.py +11 -0
  48. pjk/sources/inline_source.py +56 -0
  49. pjk/sources/json_source.py +35 -0
  50. pjk/sources/lazy_file.py +16 -0
  51. pjk/sources/lazy_file_local.py +22 -0
  52. pjk/sources/lazy_file_s3.py +28 -0
  53. pjk/sources/parquet_source.py +32 -0
  54. pjk/sources/s3_source.py +146 -0
  55. pjk/sources/source_list.py +23 -0
  56. pjk/sources/sql_source.py +32 -0
  57. pjk/sources/tsv_source.py +15 -0
  58. pjk/sources/user_source_factory.py +33 -0
  59. pjk/version.py +4 -0
  60. python_jack_knife-0.5.0.dist-info/METADATA +254 -0
  61. python_jack_knife-0.5.0.dist-info/RECORD +65 -0
  62. python_jack_knife-0.5.0.dist-info/WHEEL +5 -0
  63. python_jack_knife-0.5.0.dist-info/entry_points.txt +2 -0
  64. python_jack_knife-0.5.0.dist-info/licenses/LICENSE +202 -0
  65. python_jack_knife-0.5.0.dist-info/top_level.txt +1 -0
pjk/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+ from .version import __version__
4
+
5
+ __all__ = ["__version__"]
pjk/base.py ADDED
@@ -0,0 +1,377 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Optional, List, Set
6
+
7
+ class TokenError(ValueError):
8
+ @classmethod
9
+ def from_list(cls, lines: List[str]):
10
+ text = '\n'.join(lines)
11
+ return TokenError(text)
12
+
13
+ def __init__(self, text: str):
14
+ super().__init__(text)
15
+ self.text = text
16
+
17
+ def get_text(self):
18
+ return self.text
19
+
20
+ class UsageError(ValueError):
21
+ def __init__(self, message: str,
22
+ tokens: List[str] = None,
23
+ token_no: int = 0,
24
+ token_error: TokenError = None):
25
+ super().__init__(message)
26
+ self.message = message
27
+ self.tokens = tokens
28
+ self.token_no = token_no
29
+ self.token_error = token_error
30
+
31
+ def __str__(self):
32
+ lines = []
33
+ token_copies = [self._quote(t) for t in self.tokens]
34
+ lines.append('pjk ' + ' '.join(token_copies))
35
+ lines.append(self._get_underline(token_copies))
36
+ lines.append(self.message)
37
+ lines.append('')
38
+ lines.append(self.token_error.get_text())
39
+ return '\n'.join(lines)
40
+
41
+ # quote json inline
42
+ def _quote(self, token):
43
+ if token.startswith('[') or token.startswith('{'):
44
+ return '"' + token + '"'
45
+ else:
46
+ return token
47
+
48
+ def _get_underline(self, tokens: List, marker='^') -> str:
49
+ offset = 4 + sum(len(t) + 1 for t in tokens[:self.token_no]) # +1 for space, 4 for pjk
50
+ underline = ' ' * offset + marker * len(tokens[self.token_no])
51
+ return underline
52
+
53
+ class ParsedToken:
54
+ def __init__(self, token: str):
55
+ self.token = token
56
+ self._params = {}
57
+ self._args = []
58
+ at_parts = token.split('@', 1) # Separate params off
59
+ if len(at_parts) > 1:
60
+ param_list = at_parts[1].split('@')
61
+ for param in param_list:
62
+ parts = param.split('=')
63
+ value = parts[1] if len(parts) == 2 else None
64
+ self._params[parts[0]] = value
65
+
66
+ self._all_but_params = at_parts[0]
67
+
68
+ # args
69
+ colon_parts = at_parts[0].split(':')
70
+ self._pre_colon = colon_parts[0]
71
+
72
+ for arg in colon_parts[1:]: # treat a '' arg as missing and ignore all args after that
73
+ if arg != '':
74
+ self._args.append(arg)
75
+ else:
76
+ break
77
+
78
+ @property
79
+ def pre_colon(self):
80
+ return self._pre_colon
81
+
82
+ @property
83
+ def whole_token(self):
84
+ return self.token
85
+
86
+ @property # avoid colon parsing
87
+ def all_but_params(self):
88
+ return self._all_but_params
89
+
90
+ def num_args(self):
91
+ return len(self._args)
92
+
93
+ # args are mandatory
94
+ def get_arg(self, arg_no: int):
95
+ return self._args[arg_no] if arg_no < len(self._args) else None
96
+
97
+ # params are optional
98
+ def get_params(self) -> dict:
99
+ return self._params
100
+
101
+ class Usage:
102
+ def __init__(self, name: str, desc: str, component_class: type):
103
+ self.name = name
104
+ self.desc = desc
105
+ self.comp_class = component_class
106
+ self.args = {}
107
+ self.params = {}
108
+ self.syntax = None
109
+
110
+ self.arg_defs = []
111
+ self.param_usages = {}
112
+ self.examples = []
113
+
114
+ def get_component_class(self):
115
+ return self.comp_class
116
+
117
+ def get_base_class(self, as_string: bool = False):
118
+ if issubclass(self.comp_class, Sink):
119
+ return 'sink' if as_string else Sink
120
+ elif issubclass(self.comp_class, Pipe):
121
+ return 'pipe' if as_string else Pipe
122
+ elif issubclass(self.comp_class, Source):
123
+ return 'source' if as_string else Source
124
+ raise 'improper class'
125
+
126
+ # args and param values default as str
127
+ def def_arg(self, name: str, usage: str, is_num: bool = False, valid_values: Optional[Set[str]] = None):
128
+ self.arg_defs.append((name, usage, is_num, valid_values))
129
+
130
+ def def_param(self, name:str, usage: str, is_num: bool = False, valid_values: Optional[Set[str]] = None, default:str = None):
131
+ self.param_usages[name] = (usage, is_num, valid_values, default)
132
+ if default:
133
+ self.params[name] = self._get_val(default, is_num, valid_values)
134
+
135
+ def def_example(self, expr_tokens:list[str], expect:str):
136
+ self.examples.append((expr_tokens, expect))
137
+
138
+ def def_syntax(self, syntax: str):
139
+ self.syntax = syntax
140
+
141
+ def get_examples(self):
142
+ return self.examples
143
+
144
+ def get_arg(self, name: str):
145
+ return self.args.get(name, None)
146
+
147
+ def get_param(self, name: str):
148
+ return self.params.get(name)
149
+
150
+ def get_usage_text(self):
151
+ lines = []
152
+ lines.append(self.desc)
153
+
154
+ syntax_str = self.get_token_syntax() # might be ''
155
+ if len(syntax_str) > 0:
156
+ lines.append('')
157
+ lines.append(f'syntax:')
158
+ lines.append(f' {self.get_token_syntax()}')
159
+
160
+ lines.extend(f"{line}" for line in self.get_arg_param_desc())
161
+ return '\n'.join(lines)
162
+
163
+ def get_token_syntax(self):
164
+ if self.syntax != None:
165
+ return self.syntax # else piece it together
166
+
167
+ token = f'{self.name}'
168
+ for name, usage, is_num, valid_values in self.arg_defs:
169
+ token += f':<{name}>'
170
+
171
+ for name, (usage, is_num, valid_values, default) in self.param_usages.items():
172
+ value_display = name
173
+ if valid_values:
174
+ value_display = '|'.join(list(valid_values))
175
+ token += f'@{name}=<{value_display}>'
176
+ return token
177
+
178
+ def get_arg_param_desc(self):
179
+ notes = []
180
+ if self.arg_defs:
181
+ notes.append('mandatory args:')
182
+ for name, usage, is_num, valid_values in self.arg_defs:
183
+ notes.append(f' {name} = {usage}')
184
+
185
+ if self.param_usages:
186
+ notes.append('optional params:')
187
+ for name, usage in self.param_usages.items():
188
+ text, is_num, valid_values, default = usage
189
+ notes.append(f' {name} = {text} (default={default})')
190
+ return notes
191
+
192
+ def bind(self, ptok: ParsedToken):
193
+ if ptok.num_args() > len(self.arg_defs):
194
+ extra = []
195
+ for i in range(len(self.arg_defs), ptok.num_args()):
196
+ name = ptok.get_arg(i)
197
+ extra.append(name)
198
+
199
+ raise TokenError.from_list([f"extra arg{'s' if len(extra) > 1 else ''}: {','.join(extra)}.",
200
+ '', self.get_usage_text()])
201
+
202
+ if ptok.num_args() < len(self.arg_defs):
203
+ missing = []
204
+ for i in range(ptok.num_args(), len(self.arg_defs)):
205
+ name, usage, is_num, valid_values = self.arg_defs[i]
206
+ missing.append(name)
207
+
208
+ raise TokenError.from_list([f"missing arg{'s' if len(missing) > 1 else ''}: {','.join(missing)}.",
209
+ '', self.get_usage_text()])
210
+
211
+ for i, adef in enumerate(self.arg_defs):
212
+ name, usage, is_num, valid_values = adef
213
+
214
+ try:
215
+ val_str = ptok.get_arg(i)
216
+ self.args[name] = self._get_val(val_str, is_num, valid_values)
217
+ except (ValueError, TypeError) as e:
218
+ raise TokenError.from_list([f"wrong value for '{name}' arg.", '', self.get_usage_text()])
219
+
220
+ for name, str_val in ptok.get_params().items():
221
+ usage = self.param_usages.get(name, None)
222
+ if not usage:
223
+ raise TokenError.from_list([f"unknown param: '{name}'.", '', self.get_usage_text()])
224
+ if not str_val:
225
+ raise TokenError.from_list([f"missing value for '{name}' param.", '', self.get_usage_text()])
226
+
227
+ text, is_num, valid_values, default = usage
228
+ try:
229
+ self.params[name] = self._get_val(str_val, is_num, valid_values)
230
+ except (ValueError, TypeError) as e:
231
+ raise TokenError.from_list([f"wrong value type for '{name}' param.", '', self.get_usage_text()])
232
+
233
+ def _get_val(self, val_str: str, is_num: bool, valid_values: Optional[Set[str]] = None):
234
+ if not val_str:
235
+ raise ValueError('missing value')
236
+ if not is_num: # is string
237
+ if valid_values is None: # no constraints
238
+ return val_str
239
+ if not val_str in valid_values:
240
+ raise ValueError(f'illegal value: {val_str}')
241
+ return val_str
242
+
243
+ else: # is_num
244
+ try:
245
+ return int(val_str)
246
+ except ValueError as e: # coud be a float that errors, but is ok
247
+ return float(val_str)
248
+
249
+ # until all usages are implemented a default that doesn't bind
250
+ # they continue to use ParsedToken ptok
251
+ class NoBindUsage(Usage):
252
+ def __init__(self, name: str, desc: str, component_class: type):
253
+ super().__init__(name=name, desc=desc, component_class=component_class)
254
+ def bind(self, ptok: ParsedToken):
255
+ return
256
+
257
+ # mixin
258
+ class KeyedSource(ABC):
259
+ @classmethod
260
+ def usage(cls):
261
+ return Usage(
262
+ name=cls.__name__,
263
+ desc=f"{cls.__name__} component"
264
+ )
265
+
266
+ @abstractmethod
267
+ def lookup(self, left_rec) -> Optional[dict]:
268
+ """Return the record associated with the given key, or None."""
269
+ pass
270
+
271
+ def get_unlookedup_records(self) -> List[Any]:
272
+ # for outer join
273
+ pass
274
+
275
+ def deep_copy(self):
276
+ return None
277
+
278
+ class Source(ABC):
279
+ is_format = False
280
+
281
+ @classmethod
282
+ def usage(cls):
283
+ return NoBindUsage(
284
+ name=cls.__name__,
285
+ desc=f"{cls.__name__} component",
286
+ component_class=cls
287
+ )
288
+
289
+ @abstractmethod
290
+ def __iter__(self):
291
+ raise NotImplementedError("__iter__ must be implemented by subclasses")
292
+
293
+ def __next__(self):
294
+ # lazily create an internal iterator the first time next() is called
295
+ if not hasattr(self, "_iter"):
296
+ self._iter = iter(self)
297
+ return next(self._iter)
298
+
299
+
300
+ def deep_copy(self):
301
+ return None # Default: not copyable unless overridden
302
+
303
+
304
+ class Pipe(Source):
305
+ deep_copyable: bool = False # default to false
306
+ arity: int = 1
307
+
308
+ def __init__(self, ptok: ParsedToken, usage: Usage = None):
309
+ self.ptok = ptok
310
+ self.left = None # left source for convience
311
+ self.right = None # right source for convience
312
+ self.inputs: List[Source] = []
313
+
314
+ def add_source(self, source: Source) -> None:
315
+ self.inputs.append(source)
316
+ # first two are assigned left, right
317
+ if self.left is None:
318
+ self.left = source
319
+ elif self.right is None:
320
+ self.right = self.left
321
+ self.left = source
322
+
323
+ def reset(self):
324
+ pass # optional hook
325
+
326
+ def deep_copy(self) -> Optional["Pipe"]:
327
+ if not self.deep_copyable:
328
+ return None
329
+ if not self.inputs:
330
+ raise RuntimeError(f"{self.__class__.__name__} has no inputs set")
331
+
332
+ clone = self.__class__(self.ptok, self.__class__.usage())
333
+
334
+ for input in self.inputs:
335
+ strand = input.deep_copy()
336
+ if strand is None:
337
+ return None
338
+ clone.add_source(strand)
339
+
340
+ return clone
341
+
342
+ class Sink(ABC):
343
+ is_format = False
344
+
345
+ @classmethod
346
+ def usage(cls):
347
+ return NoBindUsage(
348
+ name=cls.__name__,
349
+ desc=f"{cls.__name__} component",
350
+ component_class=cls
351
+ )
352
+
353
+ def __init__(self, ptok: ParsedToken, usage: Usage = None):
354
+ self.ptok = ptok
355
+ self.usage = usage
356
+
357
+ def drain(self):
358
+ self.process()
359
+
360
+ def print_info(self):
361
+ pass
362
+
363
+ def add_source(self, source: Source) -> None:
364
+ self.input = source
365
+
366
+ @abstractmethod
367
+ def process(self) -> None:
368
+ pass
369
+
370
+ def deep_copy(self):
371
+ return None
372
+
373
+ # identity source for sub-pipeline seeding
374
+ class IdentitySource(Source):
375
+ def next(self):
376
+ raise RuntimeError("IdentitySource should never be executed")
377
+
pjk/common.py ADDED
@@ -0,0 +1,150 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import sys, shutil, subprocess, contextlib, signal
5
+ import os
6
+ import yaml
7
+
8
+ class SafeNamespace:
9
+ def __init__(self, obj):
10
+ for k, v in obj.items():
11
+ if isinstance(v, dict):
12
+ v = SafeNamespace(v)
13
+ elif isinstance(v, list):
14
+ v = [SafeNamespace(x) if isinstance(x, dict) else x for x in v]
15
+ setattr(self, k, v)
16
+
17
+ def __getattr__(self, key):
18
+ return None # gracefully handle missing keys
19
+
20
+ class ReducingNamespace:
21
+ def __init__(self, record):
22
+ self._record = record
23
+
24
+ def __getattr__(self, name):
25
+ value = self._record[name]
26
+ if isinstance(value, (list, tuple, set)):
27
+ return value
28
+ return [value] # promote scalars to singleton lists
29
+
30
+ @contextlib.contextmanager
31
+ def pager_stdout(use_pager=True):
32
+ if use_pager and shutil.which("less"):
33
+ # Avoid BrokenPipeError noise if user quits less early
34
+ try:
35
+ signal.signal(signal.SIGPIPE, signal.SIG_DFL)
36
+ except Exception:
37
+ pass # not available on Windows
38
+
39
+ pager = subprocess.Popen(["less", "-FRSX"], stdin=subprocess.PIPE, text=True)
40
+ old_stdout = sys.stdout
41
+ try:
42
+ sys.stdout = pager.stdin
43
+ yield
44
+ finally:
45
+ try:
46
+ sys.stdout.flush()
47
+ except Exception:
48
+ pass
49
+ sys.stdout = old_stdout
50
+ if pager.stdin:
51
+ pager.stdin.close()
52
+ pager.wait()
53
+ else:
54
+ yield
55
+
56
+ COLOR_CODES = {
57
+ 'bold': '\033[1m',
58
+ 'underline': '\033[4m',
59
+ 'red': '\033[31m',
60
+ 'green': '\033[32m',
61
+ 'yellow': '\033[33m',
62
+ 'blue': '\033[34m',
63
+ 'magenta': '\033[35m',
64
+ 'cyan': '\033[36m',
65
+ 'gray': '\033[90m',
66
+ }
67
+
68
+ RESET = '\033[0m'
69
+
70
+ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
71
+ value = text if not value else value
72
+ style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
73
+ return text.replace(value, f"{style}{value}{RESET}")
74
+
75
+ class Lookups:
76
+ def __init__(self):
77
+ self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
78
+ self._data = {}
79
+ self._load()
80
+
81
+ def _load(self):
82
+ """Load lookups from YAML file if it exists."""
83
+ if os.path.exists(self.lookups_yaml):
84
+ with open(self.lookups_yaml, 'r') as f:
85
+ self._data = yaml.safe_load(f) or {}
86
+ else:
87
+ self._data = {}
88
+
89
+ def save(self):
90
+ """Save current lookups back to YAML file."""
91
+ os.makedirs(os.path.dirname(self.lookups_yaml), exist_ok=True)
92
+ with open(self.lookups_yaml, 'w') as f:
93
+ yaml.safe_dump(self._data, f)
94
+
95
+ def get(self, key, default=None):
96
+ """Retrieve a lookup value by key."""
97
+ return self._data.get(key, default)
98
+
99
+ def set(self, key, value):
100
+ """Set a lookup value and persist it."""
101
+ self._data[key] = value
102
+ self.save()
103
+
104
+ def delete(self, key):
105
+ """Remove a key if it exists and save."""
106
+ if key in self._data:
107
+ del self._data[key]
108
+ self.save()
109
+
110
+ def all(self):
111
+ """Return the full lookup dictionary."""
112
+ return dict(self._data)
113
+
114
+ class ComponentFactory:
115
+ def __init__(self, components: dict, comp_type_name: str):
116
+ self.num_orig = 0
117
+ self.components = components # name -> component_class
118
+ self.comp_type_name = comp_type_name
119
+ self.num_orig_comps = len(components)
120
+
121
+ def register(self, name, comp_class):
122
+ self.components[name] = comp_class
123
+
124
+ def get_comp_type_name(self):
125
+ return self.comp_type_name
126
+
127
+ def print_descriptions(self):
128
+ header = highlight(f'{self.comp_type_name}s')
129
+ print(header)
130
+
131
+ i = 0
132
+ plugin = ''
133
+ for name, comp_class in self.components.items():
134
+ usage = comp_class.usage()
135
+ lines = usage.desc.split('\n')
136
+ if i >= self.num_orig_comps:
137
+ plugin = '(~/.pjk/plugin)'
138
+ line = f' {name:<12} {lines[0]} {plugin}'
139
+ line = highlight(line, 'bold', plugin) if plugin else line
140
+ print(line)
141
+ i += 1
142
+
143
+ def get_usage(self, name: str):
144
+ comp_class = self.components.get(name)
145
+ if not comp_class:
146
+ return None
147
+ return comp_class.usage()
148
+
149
+ def create(self, token: str):
150
+ pass
pjk/log.py ADDED
@@ -0,0 +1,67 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import logging, os, atexit
5
+ from logging.handlers import RotatingFileHandler
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ logger = logging.getLogger("djk")
10
+
11
+ def _truthy(env_val: Optional[str]) -> bool:
12
+ return str(env_val).lower() in ("1", "true", "yes", "on")
13
+
14
+ def init(force: bool = False, level: Optional[int] = None, console: Optional[bool] = None):
15
+ """
16
+ Initialize 'djk' logging.
17
+
18
+ - Rotates at DJK_LOG_MAX_MB (default 2 MB), keeps DJK_LOG_BACKUPS (default 3).
19
+ - Files under ~/.pjk/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
20
+ - Set DJK_DEBUG=1|true|yes for DEBUG, else INFO (or pass explicit level).
21
+ - To enable console output explicitly, set console=True or DJK_LOG_CONSOLE=1.
22
+ - Set force=True to replace existing handlers.
23
+ """
24
+ if logger.handlers and not force:
25
+ return
26
+
27
+ logger.handlers.clear()
28
+
29
+ if level is None:
30
+ level = logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO
31
+
32
+ fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
33
+ formatter = logging.Formatter(fmt)
34
+
35
+ # Rotating file handler in ~/.pjk/logs
36
+ log_dir = Path(os.getenv("DJK_LOG_DIR", os.path.expanduser("~/.pjk/logs")))
37
+ log_dir.mkdir(parents=True, exist_ok=True)
38
+ log_file = log_dir / os.getenv("DJK_LOG_FILE", "pjk.log")
39
+ max_bytes = int(float(os.getenv("DJK_LOG_MAX_MB", "2")) * 1024 * 1024) # 2 MB
40
+ backups = int(os.getenv("DJK_LOG_BACKUPS", "3"))
41
+
42
+ fh = RotatingFileHandler(
43
+ log_file,
44
+ maxBytes=max_bytes,
45
+ backupCount=backups,
46
+ encoding="utf-8",
47
+ delay=False, # open immediately so first emit writes bytes
48
+ )
49
+ fh.setLevel(level)
50
+ fh.setFormatter(formatter)
51
+
52
+ logger.setLevel(level)
53
+ logger.addHandler(fh)
54
+
55
+ # DO NOT propagate into root (prevents accidental console logs elsewhere)
56
+ logger.propagate = False
57
+
58
+ # Optional console (off by default)
59
+ enable_console = console if console is not None else _truthy(os.getenv("DJK_LOG_CONSOLE"))
60
+ if enable_console:
61
+ sh = logging.StreamHandler()
62
+ sh.setLevel(level)
63
+ sh.setFormatter(formatter)
64
+ logger.addHandler(sh)
65
+
66
+ # Flush/close on exit for short-lived runs
67
+ atexit.register(logging.shutdown)
pjk/main.py ADDED
@@ -0,0 +1,106 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ #!/usr/bin/env python
5
+ import sys
6
+ import os
7
+ import signal
8
+ import shlex
9
+ from typing import List
10
+ from pjk.parser import ExpressionParser
11
+ from pjk.base import UsageError
12
+ from pjk.log import init as init_logging
13
+ from datetime import datetime, timezone
14
+ import concurrent.futures
15
+ from pjk.registry import ComponentRegistry
16
+ from pjk.pipes.factory import PipeFactory
17
+ from pjk.sources.factory import SourceFactory
18
+ from pjk.sinks.factory import SinkFactory
19
+ from pjk.man_page import do_man, do_examples
20
+ from pjk.sinks.expect import ExpectSink
21
+ from pjk.version import __version__
22
+
23
+ def write_history(tokens):
24
+ log_path = ".pjk-history.txt"
25
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
26
+ command = " ".join(tokens)
27
+ with open(log_path, "a") as f:
28
+ f.write(f"{timestamp}\tpjk {command}\n")
29
+
30
+ def execute_threaded(sinks):
31
+ # Choose a max thread limit (explicitly)
32
+ max_workers = min(32, len(sinks)) # or set a fixed cap like 8
33
+
34
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
35
+ futures = {
36
+ executor.submit(s.drain): s for s in sinks
37
+ }
38
+
39
+ for future in concurrent.futures.as_completed(futures):
40
+ sink_obj = futures[future]
41
+ try:
42
+ future.result() # This will re-raise any exception from s.drain()
43
+ except Exception as e:
44
+ print(f"Sink {sink_obj} raised an exception:")
45
+ print(e)
46
+
47
+ def execute(command: str):
48
+ tokens = shlex.split(command, comments=True, posix=True)
49
+ execute_tokens(tokens)
50
+
51
+ def execute_tokens(tokens:List[str]):
52
+ init_logging()
53
+ signal.signal(signal.SIGINT, lambda s, f: sys.exit(0))
54
+
55
+ if '--version' in tokens:
56
+ print(f"pjk version {__version__}")
57
+ sys.exit(0)
58
+
59
+ registry = ComponentRegistry()
60
+
61
+ if len(tokens) < 1:
62
+ registry.print_usage()
63
+ return
64
+
65
+ # pjk man --all | --all+ | <component>
66
+ if len(tokens) == 2 and tokens[0] == 'man':
67
+ do_man(tokens[1], registry)
68
+ return
69
+
70
+ # pjk examples | examples+
71
+ if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
72
+ do_examples(tokens[0], registry)
73
+ return
74
+
75
+ parser = ExpressionParser(registry)
76
+
77
+ try:
78
+ # Build initial sink
79
+ sink = parser.parse(tokens)
80
+
81
+ sinks = [sink]
82
+ max_threads = os.cpu_count()
83
+ while len(sinks) < max_threads:
84
+ clone = sink.deep_copy()
85
+ if not clone:
86
+ break
87
+ sinks.append(clone)
88
+
89
+ if len(sinks) > 1:
90
+ execute_threaded(sinks)
91
+ else:
92
+ sink.drain() # run single in main thread
93
+ sink.print_info() # rarely used, e.g. expect and devnull
94
+
95
+ write_history(sys.argv[1:])
96
+
97
+ except UsageError as e:
98
+ print(e, file=sys.stderr)
99
+ sys.exit(2) # Exit with a non-zero code, but no traceback
100
+
101
+ def main():
102
+ tokens = sys.argv[1:]
103
+ execute_tokens(tokens)
104
+
105
+ if __name__ == "__main__":
106
+ main()