python-jack-knife 0.5.1__tar.gz → 0.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/PKG-INFO +1 -1
  2. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/base.py +19 -20
  3. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/common.py +15 -8
  4. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/main.py +56 -31
  5. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/parser.py +12 -3
  6. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/factory.py +6 -2
  7. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/filter.py +3 -3
  8. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/head.py +4 -6
  9. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/join.py +4 -4
  10. python_jack_knife-0.5.5/src/pjk/pipes/map.py +130 -0
  11. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/move_field.py +2 -2
  12. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/postgres_pipe.py +5 -9
  13. python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +41 -0
  14. python_jack_knife-0.5.5/src/pjk/pipes/sample.py +66 -0
  15. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/select.py +2 -4
  16. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/tail.py +1 -1
  17. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/where.py +10 -5
  18. python_jack_knife-0.5.5/src/pjk/progress.py +177 -0
  19. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/registry.py +25 -3
  20. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/devnull.py +13 -6
  21. python_jack_knife-0.5.5/src/pjk/sinks/expect.py +92 -0
  22. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/factory.py +0 -5
  23. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/user_sink_factory.py +2 -1
  24. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/dir_source.py +2 -0
  25. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/factory.py +3 -34
  26. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/format_source.py +5 -0
  27. python_jack_knife-0.5.5/src/pjk/sources/npy_source.py +76 -0
  28. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/s3_source.py +2 -1
  29. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/version.py +1 -1
  30. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  31. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/SOURCES.txt +4 -0
  32. python_jack_knife-0.5.1/src/pjk/pipes/map.py +0 -91
  33. python_jack_knife-0.5.1/src/pjk/sinks/expect.py +0 -53
  34. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/LICENSE +0 -0
  35. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/README.md +0 -0
  36. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/pyproject.toml +0 -0
  37. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/setup.cfg +0 -0
  38. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/__init__.py +0 -0
  39. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/log.py +0 -0
  40. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/man_page.py +0 -0
  41. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/__init__.py +0 -0
  42. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/denorm.py +0 -0
  43. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/let_reduce.py +0 -0
  44. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/remove_field.py +0 -0
  45. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/sort.py +0 -0
  46. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/pipes/user_pipe_factory.py +0 -0
  47. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/__init__.py +0 -0
  48. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/csv_sink.py +0 -0
  49. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/ddb.py +0 -0
  50. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/dir_sink.py +0 -0
  51. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/format_sink.py +0 -0
  52. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph.py +0 -0
  53. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_bar_line.py +0 -0
  54. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_cumulative.py +0 -0
  55. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_hist.py +0 -0
  56. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/graph_scatter.py +0 -0
  57. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/json_sink.py +0 -0
  58. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/s3_sink.py +0 -0
  59. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/s3_stream.py +0 -0
  60. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/sinks.py +0 -0
  61. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/stdout.py +0 -0
  62. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sinks/tsv_sink.py +0 -0
  63. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/__init__.py +0 -0
  64. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/csv_source.py +0 -0
  65. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/inline_source.py +0 -0
  66. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/json_source.py +0 -0
  67. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file.py +0 -0
  68. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_local.py +0 -0
  69. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/lazy_file_s3.py +0 -0
  70. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/parquet_source.py +0 -0
  71. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/source_list.py +0 -0
  72. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/sql_source.py +0 -0
  73. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/tsv_source.py +0 -0
  74. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/pjk/sources/user_source_factory.py +0 -0
  75. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  76. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  77. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/requires.txt +0 -0
  78. {python_jack_knife-0.5.1 → python_jack_knife-0.5.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.5.1
3
+ Version: 0.5.5
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -162,7 +162,7 @@ class Usage:
162
162
  return '\n'.join(lines)
163
163
 
164
164
  def get_token_syntax(self):
165
- if not self.syntax:
165
+ if self.syntax:
166
166
  return self.syntax # else piece it together
167
167
 
168
168
  token = f'{self.name}'
@@ -300,10 +300,14 @@ class Source(ABC):
300
300
 
301
301
  def deep_copy(self):
302
302
  return None # Default: not copyable unless overridden
303
-
303
+
304
+ def close(self):
305
+ pass
306
+
307
+ def _get_sources(self, source_list: list):
308
+ pass
304
309
 
305
310
  class Pipe(Source):
306
- deep_copyable: bool = False # default to false
307
311
  arity: int = 1
308
312
 
309
313
  def __init__(self, ptok: ParsedToken, usage: Usage = None):
@@ -326,20 +330,12 @@ class Pipe(Source):
326
330
  pass # optional hook
327
331
 
328
332
  def deep_copy(self) -> Optional["Pipe"]:
329
- if not self.deep_copyable:
330
- return None
331
- if not self.inputs:
332
- raise RuntimeError(f"{self.__class__.__name__} has no inputs set")
333
-
334
- clone = self.__class__(self.ptok, self.__class__.usage())
335
-
336
- for input in self.inputs:
337
- strand = input.deep_copy()
338
- if strand is None:
339
- return None
340
- clone.add_source(strand)
341
-
342
- return clone
333
+ return None
334
+
335
+ def _get_sources(self, source_list: list):
336
+ for ix in self.inputs:
337
+ source_list.append(ix)
338
+ ix._get_sources(source_list)
343
339
 
344
340
  class DeepCopyPipe(Pipe):
345
341
  def deep_copy(self):
@@ -373,13 +369,16 @@ class Sink(ABC):
373
369
  self.process()
374
370
  self.close()
375
371
 
372
+ # get all inputs in the execution chain for closing
373
+ inputs = [self.input]
374
+ self.input._get_sources(inputs)
375
+ for input in inputs:
376
+ input.close()
377
+
376
378
  # optional
377
379
  def close(self):
378
380
  pass
379
381
 
380
- def print_info(self):
381
- pass
382
-
383
382
  def add_source(self, source: Source) -> None:
384
383
  self.input = source
385
384
 
@@ -4,6 +4,7 @@
4
4
  import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
6
  import yaml
7
+ from pjk.base import TokenError
7
8
 
8
9
  class SafeNamespace:
9
10
  def __init__(self, obj):
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
73
74
  return text.replace(value, f"{style}{value}{RESET}")
74
75
 
75
76
  class Lookups:
76
- def __init__(self):
77
+ def __init__(self, component_class):
77
78
  self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
79
+ self.class_name = type(component_class).__name__
78
80
  self._data = {}
79
81
  self._load()
80
-
82
+
81
83
  def _load(self):
82
84
  """Load lookups from YAML file if it exists."""
83
85
  if os.path.exists(self.lookups_yaml):
@@ -93,8 +95,13 @@ class Lookups:
93
95
  yaml.safe_dump(self._data, f)
94
96
 
95
97
  def get(self, key, default=None):
96
- """Retrieve a lookup value by key."""
97
- return self._data.get(key, default)
98
+ lookup_key = f'{self.class_name}-{key}'
99
+ entry = self._data.get(lookup_key, default)
100
+ if not entry:
101
+ raise TokenError(
102
+ f"~/.pjk/lookups.yaml must contain entry for '{lookup_key}' with host, user, password."
103
+ )
104
+ return entry
98
105
 
99
106
  def set(self, key, value):
100
107
  """Set a lookup value and persist it."""
@@ -129,14 +136,14 @@ class ComponentFactory:
129
136
  print(header)
130
137
 
131
138
  i = 0
132
- plugin = ''
139
+ # user and outside package components are also here, but printed from registry class
133
140
  for name, comp_class in self.components.items():
134
141
  usage = comp_class.usage()
135
142
  lines = usage.desc.split('\n')
136
143
  if i >= self.num_orig_comps:
137
- plugin = '(~/.pjk/plugin)'
138
- line = f' {name:<12} {lines[0]} {plugin}'
139
- line = highlight(line, 'bold', plugin) if plugin else line
144
+ break
145
+
146
+ line = f' {name:<12} {lines[0]}'
140
147
  print(line)
141
148
  i += 1
142
149
 
@@ -10,17 +10,20 @@ from typing import List
10
10
  from pjk.parser import ExpressionParser
11
11
  from pjk.base import UsageError
12
12
  from pjk.log import init as init_logging
13
- from datetime import datetime, timezone
13
+ from datetime import datetime
14
+ import traceback
14
15
  import concurrent.futures
15
16
  from pjk.registry import ComponentRegistry
16
- from pjk.pipes.factory import PipeFactory
17
- from pjk.sources.factory import SourceFactory
18
- from pjk.sinks.factory import SinkFactory
17
+ from pjk.sinks.stdout import StdoutSink
19
18
  from pjk.man_page import do_man, do_examples
20
19
  from pjk.sinks.expect import ExpectSink
20
+ from pjk.progress import ProgressDisplay
21
21
  from pjk.version import __version__
22
22
 
23
23
  def write_history(tokens):
24
+ if os.environ.get("PJK_NO_HISTORY") == "1":
25
+ return
26
+
24
27
  log_path = ".pjk-history.txt"
25
28
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
26
29
  command = " ".join(tokens)
@@ -31,56 +34,71 @@ def write_history(tokens):
31
34
  except (PermissionError, OSError):
32
35
  pass
33
36
 
34
- def execute_threaded(sinks):
35
- # Choose a max thread limit (explicitly)
36
- max_workers = min(32, len(sinks)) # or set a fixed cap like 8
37
-
38
- with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
39
- futures = {
40
- executor.submit(s.drain): s for s in sinks
41
- }
42
-
37
+ def execute_threaded(sinks, stop_progress=None):
38
+ max_workers = min(32, len(sinks))
39
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
40
+ futures = {executor.submit(s.drain): s for s in sinks}
41
+ try:
43
42
  for future in concurrent.futures.as_completed(futures):
44
43
  sink_obj = futures[future]
45
- try:
46
- future.result() # This will re-raise any exception from s.drain()
47
- except Exception as e:
48
- print(f"Sink {sink_obj} raised an exception:")
49
- print(e)
44
+ future.result() # re-raises worker exception with traceback
45
+ except KeyboardInterrupt:
46
+ # stop UI first, then cancel and non-blocking shutdown
47
+ if stop_progress:
48
+ try: stop_progress()
49
+ except Exception: pass
50
+ for f in futures:
51
+ f.cancel()
52
+ executor.shutdown(wait=False, cancel_futures=True)
53
+ os._exit(130)
54
+
55
+ except Exception as e:
56
+ if stop_progress:
57
+ try: stop_progress()
58
+ except Exception: pass
59
+ sys.stderr.write(f"Sink {futures[future]} raised an exception:\n")
60
+ traceback.print_exception(type(e), e, e.__traceback__, file=sys.stderr)
61
+ for f in futures:
62
+ f.cancel()
63
+ executor.shutdown(wait=False, cancel_futures=True)
64
+ raise
65
+ else:
66
+ executor.shutdown(wait=True)
50
67
 
51
68
  def execute(command: str):
52
69
  tokens = shlex.split(command, comments=True, posix=True)
53
70
  execute_tokens(tokens)
54
71
 
55
- def execute_tokens(tokens:List[str]):
72
+ def execute_tokens(tokens: List[str]):
56
73
  init_logging()
57
- signal.signal(signal.SIGINT, lambda s, f: sys.exit(0))
74
+ # (remove the sys.exit SIGINT handler here)
58
75
 
59
76
  if '--version' in tokens:
60
77
  print(f"pjk version {__version__}")
61
78
  sys.exit(0)
62
-
79
+
63
80
  registry = ComponentRegistry()
64
-
81
+
65
82
  if len(tokens) < 1:
66
83
  registry.print_usage()
67
84
  return
68
-
69
- # pjk man --all | --all+ | <component>
85
+
70
86
  if len(tokens) == 2 and tokens[0] == 'man':
71
87
  do_man(tokens[1], registry)
72
88
  return
73
-
74
- # pjk examples | examples+
89
+
75
90
  if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
76
91
  do_examples(tokens[0], registry)
77
92
  return
78
93
 
79
94
  parser = ExpressionParser(registry)
80
95
 
96
+ display = None
81
97
  try:
82
- # Build initial sink
83
98
  sink = parser.parse(tokens)
99
+ if not isinstance(sink, (StdoutSink | ExpectSink)):
100
+ display = ProgressDisplay(interval=3.0)
101
+ display.start()
84
102
 
85
103
  sinks = [sink]
86
104
  max_threads = os.cpu_count()
@@ -91,16 +109,23 @@ def execute_tokens(tokens:List[str]):
91
109
  sinks.append(clone)
92
110
 
93
111
  if len(sinks) > 1:
94
- execute_threaded(sinks)
112
+ # pass a stopper so we halt the UI before tracebacks / shutdown
113
+ execute_threaded(sinks, stop_progress=(display.stop if display else None))
95
114
  else:
96
- sink.drain() # run single in main thread
97
- sink.print_info() # rarely used, e.g. expect and devnull
115
+ sink.drain()
98
116
 
99
117
  write_history(sys.argv[1:])
100
118
 
101
119
  except UsageError as e:
102
120
  print(e, file=sys.stderr)
103
- sys.exit(2) # Exit with a non-zero code, but no traceback
121
+ sys.exit(2)
122
+ except KeyboardInterrupt:
123
+ pass
124
+ finally:
125
+ if display:
126
+ # short join so Ctrl-C is immediate
127
+ try: display.stop(timeout=0.1)
128
+ except Exception: pass
104
129
 
105
130
  def main():
106
131
  tokens = sys.argv[1:]
@@ -8,6 +8,9 @@ from typing import Optional, Any, List
8
8
  from pjk.base import Source, Pipe, Sink, TokenError, UsageError, ParsedToken, Usage
9
9
  from pjk.pipes.user_pipe_factory import UserPipeFactory
10
10
  from pjk.pipes.let_reduce import ReducePipe
11
+ from pjk.sinks.stdout import StdoutSink
12
+ from pjk.sinks.expect import ExpectSink
13
+ from pjk.pipes.progress_pipe import ProgressPipe
11
14
  from pjk.registry import ComponentRegistry
12
15
 
13
16
  def expand_macros(tokens: List[str]) -> List[str]:
@@ -58,8 +61,12 @@ class ExpressionParser:
58
61
  if not sink:
59
62
  raise TokenError.from_list(['expression must end in a sink.',
60
63
  'pjk <source> [<pipe> ...] <sink>'])
64
+
65
+ # so each sink doesn't have to, maybe make a base class or mixin for sinks
66
+ progress_pipe = ProgressPipe(component_instance=sink)
67
+ progress_pipe.add_source(source)
61
68
 
62
- sink.add_source(source)
69
+ sink.add_source(progress_pipe)
63
70
  return sink
64
71
 
65
72
  def parse(self, tokens: List[str]) -> Sink:
@@ -78,10 +85,12 @@ class ExpressionParser:
78
85
  return self.get_sink(stack_helper, token)
79
86
 
80
87
  source = self.registry.create_source(token)
81
- if source:
88
+ if source:
82
89
  stack_helper.add_operator(source, self.stack)
90
+ progress_pipe = ProgressPipe(component_instance=source, simple=True)
91
+ stack_helper.add_operator(progress_pipe, self.stack)
83
92
  continue
84
-
93
+
85
94
  subexp = SubExpression.create(token)
86
95
  if subexp:
87
96
  stack_helper.add_operator(subexp, self.stack)
@@ -12,12 +12,14 @@ from pjk.pipes.head import HeadPipe
12
12
  from pjk.pipes.tail import TailPipe
13
13
  from pjk.pipes.sort import SortPipe
14
14
  from pjk.pipes.where import WherePipe
15
- from pjk.pipes.map import MapPipe
15
+ from pjk.pipes.map import MapByPipe
16
+ from pjk.pipes.map import GroupByPipe
16
17
  from pjk.pipes.join import JoinPipe
17
18
  from pjk.pipes.filter import FilterPipe
18
19
  from pjk.pipes.select import SelectFields
19
20
  from pjk.pipes.denorm import DenormPipe
20
21
  from pjk.pipes.postgres_pipe import PostgresPipe
22
+ from pjk.pipes.sample import SamplePipe
21
23
  from pjk.pipes.user_pipe_factory import UserPipeFactory
22
24
 
23
25
  COMPONENTS = {
@@ -25,7 +27,8 @@ COMPONENTS = {
25
27
  'tail': TailPipe,
26
28
  'join': JoinPipe,
27
29
  'filter': FilterPipe,
28
- 'map': MapPipe,
30
+ 'mapby': MapByPipe,
31
+ 'groupby': GroupByPipe,
29
32
  'as': MoveField,
30
33
  'drop': RemoveField,
31
34
  'let': LetPipe,
@@ -33,6 +36,7 @@ COMPONENTS = {
33
36
  'sort': SortPipe,
34
37
  'where': WherePipe,
35
38
  'sel': SelectFields,
39
+ 'sample': SamplePipe,
36
40
  'explode': DenormPipe,
37
41
  'pgres': PostgresPipe,
38
42
  }
@@ -15,13 +15,13 @@ class FilterPipe(Pipe):
15
15
  )
16
16
  usage.def_arg("mode", "'+' to include matches, '-' to exclude matches",
17
17
  valid_values={'+', '-'})
18
- usage.def_syntax("pjk <left_source> <map_source> map:<how>:<key> filter:<mode> <sink>")
18
+ usage.def_syntax("pjk <left_source> <map_source> [mapby:groupby]:<how>:<key> filter:<mode> <sink>")
19
19
 
20
20
  usage.def_example(expr_tokens=
21
21
  [
22
22
  "[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
23
23
  "[{id:1}, {id:3}, {id:5}]",
24
- 'map:o:id',
24
+ 'mapby:id',
25
25
  "filter:+"
26
26
  ],
27
27
  expect="[{id:1}, {id:3}, {id:5}]")
@@ -30,7 +30,7 @@ class FilterPipe(Pipe):
30
30
  [
31
31
  "[{id:1}, {id:2}, {id:3}, {id:4}, {id:5}]",
32
32
  "[{id:1}, {id:3}, {id:5}]",
33
- 'map:o:id',
33
+ 'mapby:id',
34
34
  "filter:-"
35
35
  ],
36
36
  expect="[{id:2}, {id:4}]")
@@ -2,20 +2,18 @@
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
4
  # djk/pipes/head.py
5
+ from pjk.base import Pipe, ParsedToken, Usage
5
6
 
6
- from typing import Optional
7
- from pjk.base import Pipe, ParsedToken, Usage, DeepCopyPipe
8
-
9
- class HeadPipe(DeepCopyPipe):
7
+ class HeadPipe(Pipe):
10
8
  @classmethod
11
9
  def usage(cls):
12
10
  usage = Usage(
13
11
  name='head',
14
- desc='take first records of input (when single-threaded)',
12
+ desc='take first records of input (single-threaded)',
15
13
  component_class=cls
16
14
  )
17
- usage.def_arg(name='limit', usage='number of records', is_num=True)
18
15
  usage.def_example(expr_tokens=['[{id:1}, {id:2}]', 'head:1'], expect="{id:1}")
16
+ usage.def_arg(name='limit', usage='number of records', is_num=True)
19
17
  return usage
20
18
 
21
19
  def __init__(self, ptok: ParsedToken, usage: Usage):
@@ -20,13 +20,13 @@ class JoinPipe(Pipe):
20
20
  usage="'left', 'inner', or 'outer' join behavior",
21
21
  valid_values={'left', 'inner', 'outer'}
22
22
  )
23
- usage.def_syntax("pjk <left_source> <map_source> map:<how>:<key> join:<mode> <sink>")
23
+ usage.def_syntax("pjk <left_source> <map_source> [mapby|groupby]:<key> join:<mode> <sink>")
24
24
 
25
25
  usage.def_example(expr_tokens=
26
26
  [
27
27
  "[{color:'blue'},{color:'green'}]",
28
28
  "[{color:'blue', price:50}, {color:'red', price:20}]",
29
- 'map:o:color',
29
+ 'mapby:color',
30
30
  "join:left"
31
31
  ],
32
32
  expect="[{color:'blue', price:50}, {color:'green'}]")
@@ -34,7 +34,7 @@ class JoinPipe(Pipe):
34
34
  [
35
35
  "[{color:'blue'},{color:'green'}]",
36
36
  "[{color:'blue', price:50}, {color:'red', price:20}]",
37
- 'map:o:color',
37
+ 'mapby:color',
38
38
  "join:inner"
39
39
  ],
40
40
  expect="[{color:'blue', price:50}]")
@@ -43,7 +43,7 @@ class JoinPipe(Pipe):
43
43
  [
44
44
  "[{color:'blue'},{color:'green'}]",
45
45
  "[{color:'blue', price:50}, {color:'red', price:20}]",
46
- 'map:o:color',
46
+ 'mapby:color',
47
47
  "join:outer"
48
48
  ],
49
49
  expect="[{color:'blue', price:50}, {color:'green'}, {color:'red', price: 20}]")
@@ -0,0 +1,130 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/group.py
5
+
6
+ from typing import Optional
7
+ from pjk.base import ParsedToken, Usage, Pipe, KeyedSource
8
+
9
+ class MapByPipe(Pipe, KeyedSource):
10
+ @classmethod
11
+ def usage(cls):
12
+ u = Usage(
13
+ name='mapby',
14
+ desc="Maps records to key, taking last instance of duplicates.\nFilters out records without all key fields.\nCreates Keyed Source for join or filter.",
15
+ component_class=cls
16
+ )
17
+ u.def_arg(name='key', usage='comma separated fields to map by')
18
+ u.def_param(name='count', usage='add count of the records with key', valid_values={'true', 'false'}, default='false')
19
+ u.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'mapby:id'],
20
+ expect="[{id:2, color:'red'}, {id:1, color:'green'}]")
21
+ u.def_example(expr_tokens=["[{id: 1, color:'blue', size:5}, {id:1, color:'green', size:10}]", 'mapby:id,color'],
22
+ expect="[{id:1, color:'green', size: 10}, {id:1, color:'blue', size:5}]")
23
+ u.def_example(expr_tokens=["[{id:'a'}, {id:'a'}, {id:'b'}, {j:3}]", "mapby:id@count=true"],
24
+ expect="[{id:'a', count:2}, {id:'b', 'count': 1}]")
25
+
26
+ return u
27
+
28
+ def __init__(self, ptok: ParsedToken, usage: Usage, is_group: bool = False):
29
+ super().__init__(ptok)
30
+ self.is_group = is_group
31
+ self.fields = usage.get_arg('key').split(',')
32
+ self.rec_map = {}
33
+ self.matched_map = {}
34
+ self.is_loaded = False
35
+ self.do_count = usage.get_param(name='count').lower() == 'true'
36
+ self.counts = {}
37
+
38
+ def reset(self):
39
+ self.rec_map.clear()
40
+ self.matched_map.clear()
41
+ self._rec_list = None
42
+ self.is_loaded = False
43
+
44
+ def get_key_rec(self, record):
45
+ key_rec = {}
46
+ for field in self.fields:
47
+ key_val = record.pop(field, None) if self.is_group else record.get(field)
48
+ if not key_val:
49
+ return None
50
+
51
+ key_rec[field] = key_val
52
+ return key_rec
53
+
54
+ def count(self, key):
55
+ if not self.do_count:
56
+ return
57
+ i = self.counts.get(key, 0)
58
+ self.counts[key] = i+1
59
+
60
+ def load(self):
61
+ if self.is_loaded:
62
+ return
63
+ self.is_loaded = True
64
+
65
+ for record in self.left:
66
+ key_rec = self.get_key_rec(record)
67
+ if not key_rec: # some fields missing, filter out rec
68
+ continue
69
+
70
+ key = tuple(key_rec.values())
71
+ self.count(key)
72
+
73
+ existing = self.rec_map.get(key)
74
+ if not existing:
75
+ if self.is_group:
76
+ key_rec['child'] = [record]
77
+ self.rec_map[key] = key_rec
78
+ else:
79
+ self.rec_map[key] = record
80
+ else:
81
+ if self.is_group:
82
+ existing['child'].append(record)
83
+ else:
84
+ self.rec_map[key] = record
85
+
86
+ if self.do_count:
87
+ for k, v in self.rec_map.items():
88
+ if self.do_count:
89
+ c = self.counts.get(k, 0)
90
+ v['count'] = c
91
+
92
+ def __iter__(self):
93
+ if not self.is_loaded:
94
+ self.load()
95
+ for v in self.rec_map.values():
96
+ yield v
97
+
98
+ def lookup(self, left_rec) -> Optional[dict]:
99
+ if not self.is_loaded:
100
+ self.load()
101
+
102
+ key = tuple(left_rec.get(f) for f in self.fields)
103
+ rec = self.rec_map.pop(key, None)
104
+ if rec is not None:
105
+ self.matched_map[key] = rec
106
+ return rec
107
+ return self.matched_map.get(key)
108
+
109
+ def get_unlookedup_records(self):
110
+ if not self.is_loaded:
111
+ self.load()
112
+ return list(self.rec_map.values())
113
+
114
+ class GroupByPipe(MapByPipe):
115
+ @classmethod
116
+ def usage(cls):
117
+ u = Usage(
118
+ name='groupby',
119
+ desc="groups records by key. Creates Keyed Source for join or filter.",
120
+ component_class=cls
121
+ )
122
+ u.def_arg(name='key', usage='comma separated fields to map by')
123
+ u.def_param(name='count', usage='add count of the records with key', valid_values={'true', 'false'}, default='false')
124
+ u.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'groupby:id'],
125
+ expect="[{id:2, child:[{color:'red'}]}, {id:1, child:[{color:'blue'},{color: 'green'}]}]")
126
+
127
+ return u
128
+
129
+ def __init__(self, ptok: ParsedToken, usage: Usage):
130
+ super().__init__(ptok, usage, True)
@@ -10,7 +10,7 @@ class MoveField(Pipe):
10
10
  def usage(cls):
11
11
  usage = Usage(
12
12
  name='as',
13
- desc='Move one field to another key in the record',
13
+ desc='rename a field in the record',
14
14
  component_class=cls
15
15
  )
16
16
  usage.def_arg(name='src', usage='Source field name')
@@ -20,7 +20,7 @@ class MoveField(Pipe):
20
20
  return usage
21
21
 
22
22
  def __init__(self, ptok: ParsedToken, usage: Usage):
23
- super().__init__(ptok)
23
+ super().__init__(ptok, usage)
24
24
  self.src = usage.get_arg('src')
25
25
  self.dst = usage.get_arg('dst')
26
26
  self.count = 0
@@ -99,30 +99,26 @@ class PostgresPipe(Pipe):
99
99
  )
100
100
  usage.def_arg(
101
101
  "dbname",
102
- "name of db. Entry in ~/.pjk/lookups.yaml containing host, user, password"
102
+ f"~/.pjk/lookups.yaml must containing entry '{cls.__name__}-<dbname>' with host, user, password"
103
103
  )
104
104
  usage.def_param(
105
105
  "header",
106
106
  usage="emit header record before query results",
107
- valid_values={"true", "false"}, default='true',
107
+ valid_values={"true", "false"}, default='false',
108
108
  )
109
109
 
110
110
  usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
111
111
  usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
112
+ usage.def_example(expr_tokens=["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb'], expect=None)
112
113
  return usage
113
114
 
114
115
  def __init__(self, ptok: ParsedToken, usage: Usage):
115
116
  super().__init__(ptok, usage)
116
117
 
117
- lookups = Lookups()
118
+ lookups = Lookups(self)
118
119
  self.dbname = usage.get_arg("dbname")
119
- db_params = lookups.get(self.dbname)
120
- if not db_params:
121
- # f-string so dbname prints correctly
122
- raise TokenError(
123
- f"~/.pjk/lookups.yaml must contain entry for '{self.dbname}' with host, user, password."
124
- )
125
120
 
121
+ db_params = lookups.get(self.dbname)
126
122
  self.db_host = db_params.get("host")
127
123
  self.db_user = db_params.get("user")
128
124
  self.db_pass = db_params.get("password")
@@ -0,0 +1,41 @@
1
+ from typing import Iterator
2
+ from pjk.base import Pipe
3
+ from pjk.progress import papi
4
+
5
+ # monitors flow of records wherever inserted
6
+
7
+ class ProgressPipe(Pipe):
8
+ def __init__(self, component_instance = None, simple: bool = False):
9
+ super().__init__(None, None)
10
+ self.component_instance = component_instance
11
+ self.simple = simple
12
+
13
+ label = self.get_component_label(component_instance)
14
+ self.counter = papi.get_counter(label, var_label='recs')
15
+ #papi.add_rate(sink_name, self.counter, var_label='krecs/sec')
16
+ if not simple:
17
+ papi.get_counter(label, var_label='threads').increment()
18
+ papi.add_elapsed_time(label, var_label='elapsed')
19
+
20
+ def get_component_label(self, component_instance):
21
+ if hasattr(type(component_instance), 'extension'):
22
+ return type(component_instance).extension
23
+ elif hasattr(component_instance, 'usage'):
24
+ return type(component_instance).usage().name
25
+ return type(component_instance).__name__
26
+
27
+ def __iter__(self) -> Iterator:
28
+ # only counting here
29
+ for record in self.left:
30
+ self.counter.increment()
31
+ yield record
32
+
33
+ def deep_copy(self):
34
+ source_clone = self.left.deep_copy()
35
+ if not source_clone:
36
+ return None
37
+
38
+ pipe = ProgressPipe(self.component_instance, self.simple)
39
+ pipe.add_source(source_clone)
40
+ return pipe
41
+