python-jack-knife 0.5.0__tar.gz → 0.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/PKG-INFO +1 -1
  2. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/base.py +46 -27
  3. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/common.py +15 -8
  4. python_jack_knife-0.5.5/src/pjk/log.py +62 -0
  5. python_jack_knife-0.5.5/src/pjk/main.py +135 -0
  6. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/parser.py +13 -4
  7. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/factory.py +6 -2
  8. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/filter.py +3 -3
  9. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/head.py +3 -5
  10. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/join.py +4 -4
  11. python_jack_knife-0.5.5/src/pjk/pipes/map.py +130 -0
  12. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/move_field.py +2 -2
  13. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/postgres_pipe.py +5 -9
  14. python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +41 -0
  15. python_jack_knife-0.5.5/src/pjk/pipes/sample.py +66 -0
  16. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/select.py +2 -4
  17. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/tail.py +1 -1
  18. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/where.py +12 -15
  19. python_jack_knife-0.5.5/src/pjk/progress.py +177 -0
  20. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/registry.py +34 -5
  21. python_jack_knife-0.5.5/src/pjk/sinks/csv_sink.py +22 -0
  22. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/devnull.py +13 -6
  23. python_jack_knife-0.5.5/src/pjk/sinks/dir_sink.py +71 -0
  24. python_jack_knife-0.5.5/src/pjk/sinks/expect.py +92 -0
  25. python_jack_knife-0.5.5/src/pjk/sinks/factory.py +50 -0
  26. python_jack_knife-0.5.5/src/pjk/sinks/format_sink.py +126 -0
  27. python_jack_knife-0.5.5/src/pjk/sinks/json_sink.py +14 -0
  28. python_jack_knife-0.5.5/src/pjk/sinks/s3_sink.py +90 -0
  29. python_jack_knife-0.5.5/src/pjk/sinks/s3_stream.py +134 -0
  30. python_jack_knife-0.5.5/src/pjk/sinks/tsv_sink.py +12 -0
  31. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/user_sink_factory.py +2 -1
  32. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/csv_source.py +3 -6
  33. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/dir_source.py +30 -17
  34. python_jack_knife-0.5.5/src/pjk/sources/factory.py +58 -0
  35. python_jack_knife-0.5.5/src/pjk/sources/format_source.py +119 -0
  36. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/json_source.py +3 -7
  37. python_jack_knife-0.5.5/src/pjk/sources/npy_source.py +76 -0
  38. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/parquet_source.py +3 -7
  39. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/s3_source.py +42 -51
  40. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/sql_source.py +4 -11
  41. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/tsv_source.py +2 -6
  42. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/version.py +1 -1
  43. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  44. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/SOURCES.txt +7 -1
  45. python_jack_knife-0.5.0/src/pjk/log.py +0 -67
  46. python_jack_knife-0.5.0/src/pjk/main.py +0 -106
  47. python_jack_knife-0.5.0/src/pjk/pipes/map.py +0 -91
  48. python_jack_knife-0.5.0/src/pjk/sinks/csv_sink.py +0 -33
  49. python_jack_knife-0.5.0/src/pjk/sinks/dir_sink.py +0 -59
  50. python_jack_knife-0.5.0/src/pjk/sinks/expect.py +0 -53
  51. python_jack_knife-0.5.0/src/pjk/sinks/factory.py +0 -108
  52. python_jack_knife-0.5.0/src/pjk/sinks/json_sink.py +0 -23
  53. python_jack_knife-0.5.0/src/pjk/sinks/s3_sink.py +0 -100
  54. python_jack_knife-0.5.0/src/pjk/sinks/tsv_sink.py +0 -22
  55. python_jack_knife-0.5.0/src/pjk/sources/factory.py +0 -100
  56. python_jack_knife-0.5.0/src/pjk/sources/format_usage.py +0 -11
  57. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/LICENSE +0 -0
  58. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/README.md +0 -0
  59. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/pyproject.toml +0 -0
  60. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/setup.cfg +0 -0
  61. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/__init__.py +0 -0
  62. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/man_page.py +0 -0
  63. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/__init__.py +0 -0
  64. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/denorm.py +0 -0
  65. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/let_reduce.py +0 -0
  66. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/remove_field.py +0 -0
  67. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/sort.py +0 -0
  68. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/pipes/user_pipe_factory.py +0 -0
  69. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/__init__.py +0 -0
  70. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/ddb.py +0 -0
  71. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph.py +0 -0
  72. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_bar_line.py +0 -0
  73. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_cumulative.py +0 -0
  74. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_hist.py +0 -0
  75. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_scatter.py +0 -0
  76. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/sinks.py +0 -0
  77. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sinks/stdout.py +0 -0
  78. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/__init__.py +0 -0
  79. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/inline_source.py +0 -0
  80. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file.py +0 -0
  81. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_local.py +0 -0
  82. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_s3.py +0 -0
  83. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/source_list.py +0 -0
  84. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/pjk/sources/user_source_factory.py +0 -0
  85. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  86. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  87. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/requires.txt +0 -0
  88. {python_jack_knife-0.5.0 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.5.0
3
+ Version: 0.5.5
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -152,16 +152,17 @@ class Usage:
152
152
  lines.append(self.desc)
153
153
 
154
154
  syntax_str = self.get_token_syntax() # might be ''
155
- if len(syntax_str) > 0:
156
- lines.append('')
157
- lines.append(f'syntax:')
158
- lines.append(f' {self.get_token_syntax()}')
159
-
155
+ if not syntax_str:
156
+ return '\n'.join(lines)
157
+
158
+ lines.append('')
159
+ lines.append(f'syntax:')
160
+ lines.append(f' {self.get_token_syntax()}')
160
161
  lines.extend(f"{line}" for line in self.get_arg_param_desc())
161
162
  return '\n'.join(lines)
162
163
 
163
164
  def get_token_syntax(self):
164
- if self.syntax != None:
165
+ if self.syntax:
165
166
  return self.syntax # else piece it together
166
167
 
167
168
  token = f'{self.name}'
@@ -216,7 +217,10 @@ class Usage:
216
217
  self.args[name] = self._get_val(val_str, is_num, valid_values)
217
218
  except (ValueError, TypeError) as e:
218
219
  raise TokenError.from_list([f"wrong value for '{name}' arg.", '', self.get_usage_text()])
219
-
220
+
221
+ self.bind_params(ptok)
222
+
223
+ def bind_params(self, ptok: ParsedToken):
220
224
  for name, str_val in ptok.get_params().items():
221
225
  usage = self.param_usages.get(name, None)
222
226
  if not usage:
@@ -276,8 +280,6 @@ class KeyedSource(ABC):
276
280
  return None
277
281
 
278
282
  class Source(ABC):
279
- is_format = False
280
-
281
283
  @classmethod
282
284
  def usage(cls):
283
285
  return NoBindUsage(
@@ -296,17 +298,21 @@ class Source(ABC):
296
298
  self._iter = iter(self)
297
299
  return next(self._iter)
298
300
 
299
-
300
301
  def deep_copy(self):
301
302
  return None # Default: not copyable unless overridden
302
-
303
+
304
+ def close(self):
305
+ pass
306
+
307
+ def _get_sources(self, source_list: list):
308
+ pass
303
309
 
304
310
  class Pipe(Source):
305
- deep_copyable: bool = False # default to false
306
311
  arity: int = 1
307
312
 
308
313
  def __init__(self, ptok: ParsedToken, usage: Usage = None):
309
314
  self.ptok = ptok
315
+ self.usage = usage
310
316
  self.left = None # left source for convience
311
317
  self.right = None # right source for convience
312
318
  self.inputs: List[Source] = []
@@ -324,24 +330,29 @@ class Pipe(Source):
324
330
  pass # optional hook
325
331
 
326
332
  def deep_copy(self) -> Optional["Pipe"]:
327
- if not self.deep_copyable:
328
- return None
329
- if not self.inputs:
330
- raise RuntimeError(f"{self.__class__.__name__} has no inputs set")
331
-
332
- clone = self.__class__(self.ptok, self.__class__.usage())
333
+ return None
334
+
335
+ def _get_sources(self, source_list: list):
336
+ for ix in self.inputs:
337
+ source_list.append(ix)
338
+ ix._get_sources(source_list)
333
339
 
334
- for input in self.inputs:
335
- strand = input.deep_copy()
336
- if strand is None:
337
- return None
338
- clone.add_source(strand)
340
+ class DeepCopyPipe(Pipe):
341
+ def deep_copy(self):
342
+ """
343
+ Generic deep_copy: clone left source, re-instantiate
344
+ this pipe class with the same ptok/usage, and attach.
345
+ """
346
+ source_clone = self.left.deep_copy()
347
+ if not source_clone:
348
+ return None
339
349
 
340
- return clone
350
+ # re-instantiate using the actual subclass
351
+ pipe = type(self)(self.ptok, self.usage)
352
+ pipe.add_source(source_clone)
353
+ return pipe
341
354
 
342
355
  class Sink(ABC):
343
- is_format = False
344
-
345
356
  @classmethod
346
357
  def usage(cls):
347
358
  return NoBindUsage(
@@ -356,8 +367,16 @@ class Sink(ABC):
356
367
 
357
368
  def drain(self):
358
369
  self.process()
370
+ self.close()
371
+
372
+ # get all inputs in the execution chain for closing
373
+ inputs = [self.input]
374
+ self.input._get_sources(inputs)
375
+ for input in inputs:
376
+ input.close()
359
377
 
360
- def print_info(self):
378
+ # optional
379
+ def close(self):
361
380
  pass
362
381
 
363
382
  def add_source(self, source: Source) -> None:
@@ -4,6 +4,7 @@
4
4
  import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
6
  import yaml
7
+ from pjk.base import TokenError
7
8
 
8
9
  class SafeNamespace:
9
10
  def __init__(self, obj):
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
73
74
  return text.replace(value, f"{style}{value}{RESET}")
74
75
 
75
76
  class Lookups:
76
- def __init__(self):
77
+ def __init__(self, component_class):
77
78
  self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
79
+ self.class_name = type(component_class).__name__
78
80
  self._data = {}
79
81
  self._load()
80
-
82
+
81
83
  def _load(self):
82
84
  """Load lookups from YAML file if it exists."""
83
85
  if os.path.exists(self.lookups_yaml):
@@ -93,8 +95,13 @@ class Lookups:
93
95
  yaml.safe_dump(self._data, f)
94
96
 
95
97
  def get(self, key, default=None):
96
- """Retrieve a lookup value by key."""
97
- return self._data.get(key, default)
98
+ lookup_key = f'{self.class_name}-{key}'
99
+ entry = self._data.get(lookup_key, default)
100
+ if not entry:
101
+ raise TokenError(
102
+ f"~/.pjk/lookups.yaml must contain entry for '{lookup_key}' with host, user, password."
103
+ )
104
+ return entry
98
105
 
99
106
  def set(self, key, value):
100
107
  """Set a lookup value and persist it."""
@@ -129,14 +136,14 @@ class ComponentFactory:
129
136
  print(header)
130
137
 
131
138
  i = 0
132
- plugin = ''
139
+ # user and outside package components are also here, but printed from registry class
133
140
  for name, comp_class in self.components.items():
134
141
  usage = comp_class.usage()
135
142
  lines = usage.desc.split('\n')
136
143
  if i >= self.num_orig_comps:
137
- plugin = '(~/.pjk/plugin)'
138
- line = f' {name:<12} {lines[0]} {plugin}'
139
- line = highlight(line, 'bold', plugin) if plugin else line
144
+ break
145
+
146
+ line = f' {name:<12} {lines[0]}'
140
147
  print(line)
141
148
  i += 1
142
149
 
@@ -0,0 +1,62 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import logging, os, tempfile
5
+ from logging.handlers import RotatingFileHandler
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ logger = logging.getLogger("pjk")
10
+
11
+ def _truthy(v: Optional[str]) -> bool:
12
+ return str(v).lower() in ("1", "true", "yes", "on")
13
+
14
+ def init(force: bool = False, level: Optional[int] = None):
15
+ """
16
+ Initialize 'pjk' logging.
17
+
18
+ - Rotates at DJK_LOG_MAX_MB (default 2 MB), keeps DJK_LOG_BACKUPS (default 3).
19
+ - Files under ~/.pjk/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
20
+ - Set DJK_DEBUG=1|true|yes for DEBUG, else INFO (or pass explicit level).
21
+ - If the log directory is not writable, fall back to console logging
22
+ (stderr → CloudWatch in AWS).
23
+ - Set force=True to replace existing handlers.
24
+ """
25
+ if logger.handlers and not force:
26
+ return
27
+ logger.handlers.clear()
28
+
29
+ level = level or (logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO)
30
+ fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
31
+ formatter = logging.Formatter(fmt)
32
+
33
+ try:
34
+ # Preferred: rotating file handler under ~/.pjk/logs
35
+ log_dir = Path(os.getenv("DJK_LOG_DIR", Path.home() / ".pjk" / "logs"))
36
+ log_dir.mkdir(parents=True, exist_ok=True)
37
+
38
+ log_file = log_dir / os.getenv("DJK_LOG_FILE", "pjk.log")
39
+ max_bytes = int(float(os.getenv("DJK_LOG_MAX_MB", "2")) * 1024 * 1024) # 2 MB
40
+ backups = int(os.getenv("DJK_LOG_BACKUPS", "3"))
41
+
42
+ fh = RotatingFileHandler(
43
+ log_file,
44
+ maxBytes=max_bytes,
45
+ backupCount=backups,
46
+ encoding="utf-8",
47
+ delay=False,
48
+ )
49
+ fh.setLevel(level)
50
+ fh.setFormatter(formatter)
51
+ logger.addHandler(fh)
52
+ except Exception:
53
+ # Fallback: console handler
54
+ ch = logging.StreamHandler()
55
+ ch.setLevel(level)
56
+ ch.setFormatter(formatter)
57
+ logger.addHandler(ch)
58
+ logger.warning("Falling back to console logging (log file not writable)")
59
+
60
+ logger.setLevel(level)
61
+ # Do not propagate to root
62
+ logger.propagate = False
@@ -0,0 +1,135 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ #!/usr/bin/env python
5
+ import sys
6
+ import os
7
+ import signal
8
+ import shlex
9
+ from typing import List
10
+ from pjk.parser import ExpressionParser
11
+ from pjk.base import UsageError
12
+ from pjk.log import init as init_logging
13
+ from datetime import datetime
14
+ import traceback
15
+ import concurrent.futures
16
+ from pjk.registry import ComponentRegistry
17
+ from pjk.sinks.stdout import StdoutSink
18
+ from pjk.man_page import do_man, do_examples
19
+ from pjk.sinks.expect import ExpectSink
20
+ from pjk.progress import ProgressDisplay
21
+ from pjk.version import __version__
22
+
23
+ def write_history(tokens):
24
+ if os.environ.get("PJK_NO_HISTORY") == "1":
25
+ return
26
+
27
+ log_path = ".pjk-history.txt"
28
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
29
+ command = " ".join(tokens)
30
+
31
+ try:
32
+ with open(log_path, "a") as f:
33
+ f.write(f"{timestamp}\tpjk {command}\n")
34
+ except (PermissionError, OSError):
35
+ pass
36
+
37
+ def execute_threaded(sinks, stop_progress=None):
38
+ max_workers = min(32, len(sinks))
39
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
40
+ futures = {executor.submit(s.drain): s for s in sinks}
41
+ try:
42
+ for future in concurrent.futures.as_completed(futures):
43
+ sink_obj = futures[future]
44
+ future.result() # re-raises worker exception with traceback
45
+ except KeyboardInterrupt:
46
+ # stop UI first, then cancel and non-blocking shutdown
47
+ if stop_progress:
48
+ try: stop_progress()
49
+ except Exception: pass
50
+ for f in futures:
51
+ f.cancel()
52
+ executor.shutdown(wait=False, cancel_futures=True)
53
+ os._exit(130)
54
+
55
+ except Exception as e:
56
+ if stop_progress:
57
+ try: stop_progress()
58
+ except Exception: pass
59
+ sys.stderr.write(f"Sink {futures[future]} raised an exception:\n")
60
+ traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
61
+ for f in futures:
62
+ f.cancel()
63
+ executor.shutdown(wait=False, cancel_futures=True)
64
+ raise
65
+ else:
66
+ executor.shutdown(wait=True)
67
+
68
+ def execute(command: str):
69
+ tokens = shlex.split(command, comments=True, posix=True)
70
+ execute_tokens(tokens)
71
+
72
+ def execute_tokens(tokens: List[str]):
73
+ init_logging()
74
+ # (remove the sys.exit SIGINT handler here)
75
+
76
+ if '--version' in tokens:
77
+ print(f"pjk version {__version__}")
78
+ sys.exit(0)
79
+
80
+ registry = ComponentRegistry()
81
+
82
+ if len(tokens) < 1:
83
+ registry.print_usage()
84
+ return
85
+
86
+ if len(tokens) == 2 and tokens[0] == 'man':
87
+ do_man(tokens[1], registry)
88
+ return
89
+
90
+ if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
91
+ do_examples(tokens[0], registry)
92
+ return
93
+
94
+ parser = ExpressionParser(registry)
95
+
96
+ display = None
97
+ try:
98
+ sink = parser.parse(tokens)
99
+ if not isinstance(sink, (StdoutSink | ExpectSink)):
100
+ display = ProgressDisplay(interval=3.0)
101
+ display.start()
102
+
103
+ sinks = [sink]
104
+ max_threads = os.cpu_count()
105
+ while len(sinks) < max_threads:
106
+ clone = sink.deep_copy()
107
+ if not clone:
108
+ break
109
+ sinks.append(clone)
110
+
111
+ if len(sinks) > 1:
112
+ # pass a stopper so we halt the UI before tracebacks / shutdown
113
+ execute_threaded(sinks, stop_progress=(display.stop if display else None))
114
+ else:
115
+ sink.drain()
116
+
117
+ write_history(sys.argv[1:])
118
+
119
+ except UsageError as e:
120
+ print(e, file=sys.stderr)
121
+ sys.exit(2)
122
+ except KeyboardInterrupt:
123
+ pass
124
+ finally:
125
+ if display:
126
+ # short join so Ctrl-C is immediate
127
+ try: display.stop(timeout=0.1)
128
+ except Exception: pass
129
+
130
+ def main():
131
+ tokens = sys.argv[1:]
132
+ execute_tokens(tokens)
133
+
134
+ if __name__ == "__main__":
135
+ main()
@@ -8,6 +8,9 @@ from typing import Optional, Any, List
8
8
  from pjk.base import Source, Pipe, Sink, TokenError, UsageError, ParsedToken, Usage
9
9
  from pjk.pipes.user_pipe_factory import UserPipeFactory
10
10
  from pjk.pipes.let_reduce import ReducePipe
11
+ from pjk.sinks.stdout import StdoutSink
12
+ from pjk.sinks.expect import ExpectSink
13
+ from pjk.pipes.progress_pipe import ProgressPipe
11
14
  from pjk.registry import ComponentRegistry
12
15
 
13
16
  def expand_macros(tokens: List[str]) -> List[str]:
@@ -58,8 +61,12 @@ class ExpressionParser:
58
61
  if not sink:
59
62
  raise TokenError.from_list(['expression must end in a sink.',
60
63
  'pjk <source> [<pipe> ...] <sink>'])
64
+
65
+ # so each sink doesn't have to, maybe make a base class or mixin for sinks
66
+ progress_pipe = ProgressPipe(component_instance=sink)
67
+ progress_pipe.add_source(source)
61
68
 
62
- sink.add_source(source)
69
+ sink.add_source(progress_pipe)
63
70
  return sink
64
71
 
65
72
  def parse(self, tokens: List[str]) -> Sink:
@@ -78,10 +85,12 @@ class ExpressionParser:
78
85
  return self.get_sink(stack_helper, token)
79
86
 
80
87
  source = self.registry.create_source(token)
81
- if source:
88
+ if source:
82
89
  stack_helper.add_operator(source, self.stack)
90
+ progress_pipe = ProgressPipe(component_instance=source, simple=True)
91
+ stack_helper.add_operator(progress_pipe, self.stack)
83
92
  continue
84
-
93
+
85
94
  subexp = SubExpression.create(token)
86
95
  if subexp:
87
96
  stack_helper.add_operator(subexp, self.stack)
@@ -94,7 +103,7 @@ class ExpressionParser:
94
103
 
95
104
  else: # unrecognized token
96
105
  # could be sink in WRONG position, let's see for better error message
97
- sink = self.registry.create_sink(token, None)
106
+ sink = self.registry.create_sink(token)
98
107
  if sink:
99
108
  raise TokenError.from_list(['sink may only occur in final position.',
100
109
  'pjk <source> [<pipe> ...] <sink>'])
@@ -12,12 +12,14 @@ from pjk.pipes.head import HeadPipe
12
12
  from pjk.pipes.tail import TailPipe
13
13
  from pjk.pipes.sort import SortPipe
14
14
  from pjk.pipes.where import WherePipe
15
- from pjk.pipes.map import MapPipe
15
+ from pjk.pipes.map import MapByPipe
16
+ from pjk.pipes.map import GroupByPipe
16
17
  from pjk.pipes.join import JoinPipe
17
18
  from pjk.pipes.filter import FilterPipe
18
19
  from pjk.pipes.select import SelectFields
19
20
  from pjk.pipes.denorm import DenormPipe
20
21
  from pjk.pipes.postgres_pipe import PostgresPipe
22
+ from pjk.pipes.sample import SamplePipe
21
23
  from pjk.pipes.user_pipe_factory import UserPipeFactory
22
24
 
23
25
  COMPONENTS = {
@@ -25,7 +27,8 @@ COMPONENTS = {
25
27
  'tail': TailPipe,
26
28
  'join': JoinPipe,
27
29
  'filter': FilterPipe,
28
- 'map': MapPipe,
30
+ 'mapby': MapByPipe,
31
+ 'groupby': GroupByPipe,
29
32
  'as': MoveField,
30
33
  'drop': RemoveField,
31
34
  'let': LetPipe,
@@ -33,6 +36,7 @@ COMPONENTS = {
33
36
  'sort': SortPipe,
34
37
  'where': WherePipe,
35
38
  'sel': SelectFields,
39
+ 'sample': SamplePipe,
36
40
  'explode': DenormPipe,
37
41
  'pgres': PostgresPipe,
38
42
  }
@@ -15,13 +15,13 @@ class FilterPipe(Pipe):
15
15
  )
16
16
  usage.def_arg("mode", "'+' to include matches, '-' to exclude matches",
17
17
  valid_values={'+', '-'})
18
- usage.def_syntax("pjk <left_source> <map_source> map:<how>:<key> filter:<mode> <sink>")
18
+ usage.def_syntax("pjk <left_source> <map_source> [mapby:groupby]:<how>:<key> filter:<mode> <sink>")
19
19
 
20
20
  usage.def_example(expr_tokens=
21
21
  [
22
22
  "[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
23
23
  "[{id:1}, {id:3}, {id:5}]",
24
- 'map:o:id',
24
+ 'mapby:id',
25
25
  "filter:+"
26
26
  ],
27
27
  expect="[{id:1}, {id:3}, {id:5}]")
@@ -30,7 +30,7 @@ class FilterPipe(Pipe):
30
30
  [
31
31
  "[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
32
32
  "[{id:1}, {id:3}, {id:5}]",
33
- 'map:o:id',
33
+ 'mapby:id',
34
34
  "filter:-"
35
35
  ],
36
36
  expect="[{id:2}, {id:4}]")
@@ -2,8 +2,6 @@
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
4
  # djk/pipes/head.py
5
-
6
- from typing import Optional
7
5
  from pjk.base import Pipe, ParsedToken, Usage
8
6
 
9
7
  class HeadPipe(Pipe):
@@ -11,15 +9,15 @@ class HeadPipe(Pipe):
11
9
  def usage(cls):
12
10
  usage = Usage(
13
11
  name='head',
14
- desc='take first records of input (when single-threaded)',
12
+ desc='take first records of input (single-threaded)',
15
13
  component_class=cls
16
14
  )
17
- usage.def_arg(name='limit', usage='number of records', is_num=True)
18
15
  usage.def_example(expr_tokens=['[{id:1}, {id:2}]', 'head:1'], expect="{id:1}")
16
+ usage.def_arg(name='limit', usage='number of records', is_num=True)
19
17
  return usage
20
18
 
21
19
  def __init__(self, ptok: ParsedToken, usage: Usage):
22
- super().__init__(ptok)
20
+ super().__init__(ptok, usage)
23
21
  self.limit = usage.get_arg('limit')
24
22
  self.count = 0
25
23
 
@@ -20,13 +20,13 @@ class JoinPipe(Pipe):
20
20
  usage="'left', 'inner', or 'outer' join behavior",
21
21
  valid_values={'left', 'inner', 'outer'}
22
22
  )
23
- usage.def_syntax("pjk <left_source> <map_source> map:<how>:<key> join:<mode> <sink>")
23
+ usage.def_syntax("pjk <left_source> <map_source> [mapby|groupby]:<key> join:<mode> <sink>")
24
24
 
25
25
  usage.def_example(expr_tokens=
26
26
  [
27
27
  "[{color:'blue'},{color:'green'}]",
28
28
  "[{color:'blue', price:50}, {color:'red', price:20}]",
29
- 'map:o:color',
29
+ 'mapby:color',
30
30
  "join:left"
31
31
  ],
32
32
  expect="[{color:'blue', price:50}, {color:'green'}]")
@@ -34,7 +34,7 @@ class JoinPipe(Pipe):
34
34
  [
35
35
  "[{color:'blue'},{color:'green'}]",
36
36
  "[{color:'blue', price:50}, {color:'red', price:20}]",
37
- 'map:o:color',
37
+ 'mapby:color',
38
38
  "join:inner"
39
39
  ],
40
40
  expect="[{color:'blue', price:50}]")
@@ -43,7 +43,7 @@ class JoinPipe(Pipe):
43
43
  [
44
44
  "[{color:'blue'},{color:'green'}]",
45
45
  "[{color:'blue', price:50}, {color:'red', price:20}]",
46
- 'map:o:color',
46
+ 'mapby:color',
47
47
  "join:outer"
48
48
  ],
49
49
  expect="[{color:'blue', price:50}, {color:'green'}, {color:'red', price: 20}]")