python-jack-knife 0.6.11__tar.gz → 0.6.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/PKG-INFO +1 -1
  2. python_jack_knife-0.6.14/src/pjk/common.py +168 -0
  3. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/postgres_pipe.py +42 -20
  4. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/main.py +6 -5
  5. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/man_page.py +9 -11
  6. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/let_reduce.py +1 -1
  7. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/map.py +1 -1
  8. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/query_pipe.py +9 -1
  9. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/remove_field.py +2 -2
  10. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/registry.py +85 -65
  11. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph.py +2 -2
  12. python_jack_knife-0.6.14/src/pjk/sinks/graph_bar_line.py +383 -0
  13. python_jack_knife-0.6.14/src/pjk/sinks/stdout.py +78 -0
  14. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/factory.py +0 -2
  15. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/version.py +1 -1
  16. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  17. python_jack_knife-0.6.11/src/pjk/common.py +0 -123
  18. python_jack_knife-0.6.11/src/pjk/sinks/graph_bar_line.py +0 -229
  19. python_jack_knife-0.6.11/src/pjk/sinks/stdout.py +0 -46
  20. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/LICENSE +0 -0
  21. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/README.md +0 -0
  22. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/pyproject.toml +0 -0
  23. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/setup.cfg +0 -0
  24. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/__init__.py +0 -0
  25. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/components.py +0 -0
  26. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_client.py +0 -0
  27. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  28. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/opensearch_query_pipe.py +0 -0
  29. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/integrations/snowflake_pipe.py +0 -0
  30. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/log.py +0 -0
  31. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/parser.py +0 -0
  32. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/__init__.py +0 -0
  33. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/denorm.py +0 -0
  34. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/factory.py +0 -0
  35. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/filter.py +0 -0
  36. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/head.py +0 -0
  37. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/join.py +0 -0
  38. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/move_field.py +0 -0
  39. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/progress_pipe.py +0 -0
  40. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/sample.py +0 -0
  41. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/select.py +0 -0
  42. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/sort.py +0 -0
  43. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/tail.py +0 -0
  44. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/user_pipe_factory.py +0 -0
  45. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/pipes/where.py +0 -0
  46. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/progress.py +0 -0
  47. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/__init__.py +0 -0
  48. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/create_sink.py +0 -0
  49. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/csv_sink.py +0 -0
  50. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/devnull.py +0 -0
  51. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/dir_sink.py +0 -0
  52. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/expect.py +0 -0
  53. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/factory.py +0 -0
  54. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/format_sink.py +0 -0
  55. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_cumulative.py +0 -0
  56. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_hist.py +0 -0
  57. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/graph_scatter.py +0 -0
  58. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/json_sink.py +0 -0
  59. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/s3_sink.py +0 -0
  60. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/s3_stream.py +0 -0
  61. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/sinks.py +0 -0
  62. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/tsv_sink.py +0 -0
  63. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sinks/user_sink_factory.py +0 -0
  64. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/__init__.py +0 -0
  65. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/configs_source.py +0 -0
  66. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/csv_source.py +0 -0
  67. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/dir_source.py +0 -0
  68. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/favorite_source.py +0 -0
  69. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/format_source.py +0 -0
  70. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/inline_source.py +0 -0
  71. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/json_source.py +0 -0
  72. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file.py +0 -0
  73. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file_local.py +0 -0
  74. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/lazy_file_s3.py +0 -0
  75. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/macro_source.py +0 -0
  76. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/npy_source.py +0 -0
  77. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/parquet_source.py +0 -0
  78. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/s3_source.py +0 -0
  79. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/source_list.py +0 -0
  80. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/sql_source.py +0 -0
  81. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/tsv_source.py +0 -0
  82. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/sources/user_source_factory.py +0 -0
  83. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/pjk/usage.py +0 -0
  84. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/SOURCES.txt +0 -0
  85. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  86. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  87. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/requires.txt +0 -0
  88. {python_jack_knife-0.6.11 → python_jack_knife-0.6.14}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.11
3
+ Version: 0.6.14
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -0,0 +1,168 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import contextlib, io, os, subprocess, sys
5
+ import os
6
+ import re
7
+ from abc import ABC
8
+ from enum import Enum
9
+ from pjk.sources.format_source import FormatSource
10
+ from pjk.sinks.format_sink import FormatSink
11
+ from typing import List, Type
12
+
13
+ class SafeNamespace:
14
+ def __init__(self, obj):
15
+ for k, v in obj.items():
16
+ if isinstance(v, dict):
17
+ v = SafeNamespace(v)
18
+ elif isinstance(v, list):
19
+ v = [SafeNamespace(x) if isinstance(x, dict) else x for x in v]
20
+ setattr(self, k, v)
21
+
22
+ def __getattr__(self, key):
23
+ return None # gracefully handle missing keys
24
+
25
+ class ReducingNamespace:
26
+ def __init__(self, record):
27
+ self._record = record
28
+
29
+ def __getattr__(self, name):
30
+ value = self._record[name]
31
+ if isinstance(value, (list, tuple, set)):
32
+ return value
33
+ return [value] # promote scalars to singleton lists
34
+
35
+ # pjk/common.py
36
+ import contextlib, io, os, subprocess, sys
37
+
38
+ @contextlib.contextmanager
39
+ def pager_stdout(use_pager: bool = True):
40
+ """
41
+ Stream stdout into `less` via a pipe.
42
+ - If stdout is not a TTY or use_pager is False → write directly to sys.stdout.
43
+ - Otherwise spawn `less` and replace sys.stdout with less.stdin.
44
+ """
45
+ # If not a TTY, paging makes no sense
46
+ if not use_pager or not sys.stdout.isatty():
47
+ yield
48
+ return
49
+
50
+ env = os.environ.copy()
51
+ # -R: pass ANSI; -S: chop long lines; you can add -F/-X to taste
52
+ env.setdefault("LESS", "-RFX")
53
+ # Ensure UTF-8
54
+ env.setdefault("LESSCHARSET", "utf-8")
55
+
56
+ stdout_orig = sys.stdout
57
+ stderr_orig = sys.stderr
58
+
59
+ # Start less with a *pipe* for stdin and inherit the real terminal for out/err
60
+ pager = subprocess.Popen(
61
+ ["less"],
62
+ stdin=subprocess.PIPE,
63
+ stdout=stdout_orig, # keep interactivity
64
+ stderr=stderr_orig,
65
+ env=env,
66
+ close_fds=True,
67
+ bufsize=0, # unbuffered pipe
68
+ )
69
+
70
+ # Wrap less.stdin as a text writer and swap sys.stdout
71
+ assert pager.stdin is not None
72
+ pager_bin = pager.stdin
73
+ pager_txt = io.TextIOWrapper(pager_bin, encoding="utf-8", write_through=True)
74
+
75
+ sys.stdout = pager_txt
76
+ try:
77
+ yield
78
+ except BrokenPipeError:
79
+ pass
80
+ finally:
81
+ try:
82
+ sys.stdout.flush()
83
+ except Exception:
84
+ pass
85
+ # Restore first, then close pager stdin to send EOF
86
+ sys.stdout = stdout_orig
87
+ try:
88
+ pager_txt.flush()
89
+ except Exception:
90
+ pass
91
+ try:
92
+ pager_bin.close() # EOF → lets less exit
93
+ except Exception:
94
+ pass
95
+ try:
96
+ pager.wait()
97
+ except Exception:
98
+ pass
99
+
100
+ COLOR_CODES = {
101
+ 'bold': '\033[1m',
102
+ 'underline': '\033[4m',
103
+ 'red': '\033[31m',
104
+ 'green': '\033[32m',
105
+ 'yellow': '\033[33m',
106
+ 'blue': '\033[34m',
107
+ 'magenta': '\033[35m',
108
+ 'cyan': '\033[36m',
109
+ 'gray': '\033[90m',
110
+ }
111
+
112
+ RESET = '\033[0m'
113
+
114
+ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
115
+ value = text if not value else value
116
+ style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
117
+ return text.replace(value, f"{style}{value}{RESET}")
118
+
119
+ # mixin
120
+ class Integration(ABC):
121
+ pass
122
+
123
+ class ComponentOrigin(Enum):
124
+ CORE = 0 # core components defined in python-jack-knife
125
+ EXTERNAL = 1 # component loaded via load_package_extras (displayed in either 'integrations' or 'applications')
126
+ USER = 2 # components loaded via load_user_components (always displayed in user_components)
127
+
128
+ class ComponentWrapper:
129
+ def __init__(self, name: str, comp_class, origin: ComponentOrigin):
130
+ self.name = name
131
+ self.comp_class = comp_class
132
+ self.origin = origin
133
+ self.is_integration = issubclass(comp_class, Integration)
134
+
135
+ class ComponentFactory:
136
+ def __init__(self, core_components: dict):
137
+ self.wrappers = {}
138
+ for k, v in core_components.items():
139
+ self.register(k, v, origin=ComponentOrigin.CORE)
140
+
141
+ def register(self, name, comp_class, origin: ComponentOrigin):
142
+ self.wrappers[name] = ComponentWrapper(name, comp_class=comp_class, origin=origin)
143
+
144
+ # is_integration True|False|None=don't care
145
+ def get_components(self, origin_list: List[ComponentOrigin], is_integration: bool) -> dict:
146
+ all = {}
147
+ for wrapper in self.wrappers.values():
148
+ if is_integration is not None:
149
+ if wrapper.is_integration != is_integration:
150
+ continue
151
+
152
+ for o in origin_list:
153
+ if wrapper.origin == o:
154
+ all[wrapper.name] = wrapper.comp_class
155
+
156
+ return all
157
+
158
+ def get_component_class(self, name: str):
159
+ wrapper = self.wrappers.get(name, None)
160
+ if not wrapper:
161
+ return None
162
+ return wrapper.comp_class
163
+
164
+ def create(self, token: str):
165
+ pass
166
+
167
+ def is_valid_field_name(name: str):
168
+ return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
@@ -97,7 +97,8 @@ class PostgresPipe(QueryPipe,Integration):
97
97
  examples = [
98
98
  ['myquery.sql', 'postgres:mydb', '-'],
99
99
  ["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
100
- ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
100
+ ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb'],
101
+ ["{'query': 'SELECT stored_procedure(%s, ...), batch_params:{...}"]
101
102
  ]
102
103
 
103
104
  # name, type, default
@@ -165,14 +166,32 @@ class PostgresPipe(QueryPipe,Integration):
165
166
  try:
166
167
  query = record.get(self.query_field)
167
168
  if not query:
168
- record['_error'] = 'missing query'
169
- yield record
170
- else:
171
- params = record.get(self.params_field)
172
-
173
- cur = client.conn.cursor()
174
- try:
175
- # execute
169
+ record['_error'] = 'missing query'
170
+ yield record
171
+ return
172
+
173
+ params = record.get(self.params_field) # single-exec params
174
+ batch = record.get("batch_params", None) # list[tuple|dict] for batching
175
+
176
+ cur = client.conn.cursor()
177
+ try:
178
+ did_executemany = False
179
+
180
+ # ---------- execute ----------
181
+ if batch is not None:
182
+ # Handle batch sizes explicitly to preserve single-SELECT streaming semantics
183
+ if len(batch) == 0:
184
+ # No-op batch; execute a lightweight statement so we can still emit a header
185
+ cur.execute("SELECT 1")
186
+ header_params = {"batch_size": 0}
187
+ elif len(batch) == 1:
188
+ cur.execute(query, batch[0])
189
+ header_params = batch[0]
190
+ else:
191
+ cur.executemany(query, batch)
192
+ did_executemany = True
193
+ header_params = {"batch_size": len(batch)}
194
+ else:
176
195
  if params is None:
177
196
  cur.execute(query)
178
197
  else:
@@ -180,17 +199,20 @@ class PostgresPipe(QueryPipe,Integration):
180
199
  cur.execute(query, params)
181
200
  else:
182
201
  cur.execute(query, (params,))
202
+ header_params = params
183
203
 
184
- # yield header first
185
- yield self._make_header(cur, query, params)
186
-
187
- # then stream rows if it was a real SELECT with results
188
- if cur.description:
189
- cols = [d[0] for d in cur.description]
190
- if not (len(cols) == 1 and cols[0] == "ingest_event"):
191
- for row in cur:
192
- yield _row_to_dict(cur, row)
193
- finally:
194
- cur.close()
204
+ # ---------- header ----------
205
+ yield self._make_header(cur, query, header_params)
206
+
207
+ # ---------- stream rows (only meaningful for single execute that returns rows) ----------
208
+ # Note: executemany() typically doesn't expose per-execution result sets.
209
+ if not did_executemany and cur.description:
210
+ cols = [d[0] for d in cur.description]
211
+ if not (len(cols) == 1 and cols[0] == "ingest_event"):
212
+ for row in cur:
213
+ yield _row_to_dict(cur, row)
214
+
215
+ finally:
216
+ cur.close()
195
217
  finally:
196
218
  client.close()
@@ -5,13 +5,11 @@
5
5
  import sys
6
6
  import os
7
7
  import shlex
8
- import shutil
9
8
  from typing import List
10
9
  from pjk.parser import ExpressionParser
11
10
  from pjk.usage import UsageError
12
11
  from pjk.log import init as init_logging
13
12
  from datetime import datetime
14
- from pathlib import Path
15
13
  import traceback
16
14
  import concurrent.futures
17
15
  from pjk.registry import ComponentRegistry
@@ -27,6 +25,10 @@ def write_history(tokens):
27
25
 
28
26
  log_path = ".pjk-history.txt"
29
27
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
28
+
29
+ if len(tokens) < 2:
30
+ return
31
+
30
32
  command = " ".join(tokens)
31
33
 
32
34
  try:
@@ -68,6 +70,7 @@ def execute_threaded(sinks, stop_progress=None):
68
70
 
69
71
  def initialize():
70
72
  init_logging()
73
+ write_history(sys.argv[1:])
71
74
 
72
75
  #src = Path("src/pjk/resources/configs.tmpl")
73
76
  #dst_dir = Path.home() / ".pjk"
@@ -80,7 +83,7 @@ def execute(command: str):
80
83
 
81
84
  def execute_tokens(tokens: List[str]):
82
85
  initialize()
83
-
86
+
84
87
  if '--version' in tokens:
85
88
  print(f"pjk version {__version__}")
86
89
  sys.exit(0)
@@ -122,8 +125,6 @@ def execute_tokens(tokens: List[str]):
122
125
  else:
123
126
  sink.drain()
124
127
 
125
- write_history(sys.argv[1:])
126
-
127
128
  except UsageError as e:
128
129
  print(e, file=sys.stderr)
129
130
  sys.exit(2)
@@ -8,7 +8,7 @@ from pjk.parser import ExpressionParser
8
8
  from pjk.components import Source, Pipe, Sink
9
9
  from pjk.usage import Usage, ParsedToken
10
10
  from pjk.registry import ComponentRegistry
11
- from pjk.common import pager_stdout, highlight
11
+ from pjk.common import pager_stdout, highlight, ComponentOrigin
12
12
  from contextlib import nullcontext
13
13
 
14
14
  def get_base_class(usage: Usage, as_string: bool = False):
@@ -49,9 +49,9 @@ def do_man(name: str, registry: ComponentRegistry):
49
49
  # source and sinks have common names so go through multiple times
50
50
  printed = False
51
51
  for factory in registry.get_factories():
52
- usage = factory.get_usage(name)
53
- if usage:
54
- print_man(registry, name, usage)
52
+ comp_class = factory.get_component_class(name)
53
+ if comp_class:
54
+ print_man(registry, name, comp_class.usage())
55
55
  printed = True
56
56
 
57
57
  if not printed:
@@ -61,11 +61,9 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
61
61
  cm = nullcontext() if no_pager else pager_stdout()
62
62
  with cm:
63
63
  for factory in registry.get_factories():
64
- #comp_type = factory.get_comp_type_name()
65
- component_tuples = factory.get_component_name_class_tuples() # all of them
66
- for name, comp_class in component_tuples:
67
- usage = factory.get_usage(name)
68
- print_man(registry, name, usage)
64
+ component_dict = factory.get_components([ComponentOrigin.CORE, ComponentOrigin.EXTERNAL, ComponentOrigin.USER], is_integration=None)
65
+ for name, comp_class in component_dict.items():
66
+ print_man(registry, name, comp_class.usage())
69
67
  print()
70
68
 
71
69
  def print_man(registry: ComponentRegistry, name: str, usage: Usage):
@@ -94,8 +92,8 @@ def do_examples(token:str, registry: ComponentRegistry):
94
92
  cm = nullcontext() if no_pager else pager_stdout()
95
93
  with cm:
96
94
  for factory in registry.get_factories():
97
- comp_type = factory.get_comp_type_name()
98
- for name, comp_class in factory.get_component_name_class_tuples():
95
+ component_dict = factory.get_components([ComponentOrigin.CORE, ComponentOrigin.EXTERNAL, ComponentOrigin.USER], is_integration=None)
96
+ for name, comp_class in component_dict.items():
99
97
  usage = comp_class.usage()
100
98
 
101
99
  comp_type = get_base_class(usage, as_string=True)
@@ -24,7 +24,7 @@ def do_eval(expr, env):
24
24
  safe_env['json'] = json
25
25
  return eval(expr, {}, safe_env)
26
26
  except Exception:
27
- raise UsageError(f"UsageError in expression: {expr}")
27
+ raise Exception(f"Error in expression: {expr}")
28
28
 
29
29
  def eval_regular(expr: str, record: dict):
30
30
  env = {'f': SafeNamespace(record)}
@@ -51,7 +51,7 @@ class MapByPipe(Pipe, KeyedSource):
51
51
  key_rec = {}
52
52
  for field in self.fields:
53
53
  key_val = record.pop(field, None) if self.is_group else record.get(field)
54
- if not key_val:
54
+ if key_val is None: # not only false-ish but NONE
55
55
  return None
56
56
 
57
57
  key_rec[field] = key_val
@@ -2,7 +2,7 @@ from pjk.components import Pipe
2
2
  from pjk.usage import ParsedToken, Usage, CONFIG_FILE
3
3
  from typing import Any, Dict, Iterable, Optional
4
4
  from abc import abstractmethod
5
-
5
+ from pjk.progress import papi
6
6
 
7
7
  class QueryPipe(Pipe):
8
8
  name: str = None
@@ -40,6 +40,8 @@ class QueryPipe(Pipe):
40
40
  self.output_shape = usage.get_param('shape')
41
41
  self.count = usage.get_param('count')
42
42
  self.query_field = 'query' # for all subclasses
43
+ self.inrecs = papi.get_counter(self, var_label=None) # don't display progress
44
+ self.outrecs = papi.get_percentage_counter(self, var_label='recs_out', denom_counter=self.inrecs)
43
45
 
44
46
  @abstractmethod
45
47
  def execute_query_returning_S_xO_iterable(self, record) -> Iterable[Dict[str, Any]]:
@@ -53,6 +55,7 @@ class QueryPipe(Pipe):
53
55
 
54
56
  def __iter__(self):
55
57
  for in_rec in self.left:
58
+ self.inrecs.increment()
56
59
  iter = self.execute_query_returning_S_xO_iterable(in_rec)
57
60
 
58
61
  if self.output_shape == 'S_xO':
@@ -60,8 +63,11 @@ class QueryPipe(Pipe):
60
63
  for out_rec in iter:
61
64
  if not q_done:
62
65
  q_done = True
66
+ self.outrecs.increment()
63
67
  yield self._make_q_object(in_rec, out_rec)
64
68
  continue
69
+
70
+ self.outrecs.increment()
65
71
  yield out_rec
66
72
 
67
73
  elif self.output_shape == 'xO':
@@ -70,6 +76,7 @@ class QueryPipe(Pipe):
70
76
  if not q_done:
71
77
  q_done = True
72
78
  continue
79
+ self.outrecs.increment()
73
80
  yield out_rec
74
81
 
75
82
  elif self.output_shape == 'Sxo':
@@ -84,6 +91,7 @@ class QueryPipe(Pipe):
84
91
  continue
85
92
  r_list.append(out_rec)
86
93
  q_out['child'] = r_list
94
+ self.outrecs.increment()
87
95
  yield q_out
88
96
 
89
97
 
@@ -3,10 +3,10 @@
3
3
 
4
4
  # djk/pipes/remove_field.py
5
5
 
6
- from pjk.components import Pipe
6
+ from pjk.components import DeepCopyPipe
7
7
  from pjk.usage import ParsedToken, Usage, UsageError
8
8
 
9
- class RemoveField(Pipe):
9
+ class RemoveField(DeepCopyPipe):
10
10
  @classmethod
11
11
  def usage(cls):
12
12
  usage = Usage(
@@ -10,25 +10,32 @@ from pjk.sinks.format_sink import FormatSink
10
10
  from pjk.sources.format_source import FormatSource
11
11
  import importlib.util
12
12
  import importlib
13
- import importlib.metadata
14
13
  from pjk.components import Pipe, Source, Sink
15
- from pjk.common import ComponentFactory, highlight
16
- from typing import List
14
+ from pjk.common import ComponentFactory, highlight, ComponentOrigin
15
+ from typing import List, Type
17
16
 
18
- class DisplayHolder:
19
- def __init__(self, factories: List[ComponentFactory]):
20
- for factory in factories:
21
- pass
22
-
17
+ class ExternalRegistrar:
18
+ def __init__(self, sources: SourceFactory, pipes: PipeFactory, sinks: SinkFactory) -> None:
19
+ self._sources: SourceFactory = sources
20
+ self._pipes: PipeFactory = pipes
21
+ self._sinks: SinkFactory = sinks
22
+
23
+ def source(self, name: str, cls: Type[Source]) -> None:
24
+ self._sources.register(name, cls, origin=ComponentOrigin.EXTERNAL)
25
+
26
+ def pipe(self, name: str, cls: Type[Pipe]) -> None:
27
+ self._pipes.register(name, cls, origin=ComponentOrigin.EXTERNAL)
28
+
29
+ def sink(self, name: str, cls: Type[Sink]) -> None:
30
+ self._sinks.register(name, cls, origin=ComponentOrigin.EXTERNAL)
23
31
 
24
32
  class ComponentRegistry:
25
33
  def __init__(self):
26
34
  self.source_factory = SourceFactory()
27
35
  self.pipe_factory = PipeFactory()
28
36
  self.sink_factory = SinkFactory()
29
-
30
37
  self.load_user_components()
31
- load_package_extras()
38
+ self.load_namespace_extras()
32
39
 
33
40
  def create_source(self, token: str):
34
41
  return self.source_factory.create(token)
@@ -56,9 +63,31 @@ class ComponentRegistry:
56
63
  print()
57
64
  print_factory_core(self.sink_factory, header='sinks')
58
65
 
59
- self.print_origin_components('integration', 'integrations')
60
- self.print_origin_components('user', 'user components (~/.pjk/plugins)')
66
+ self.print_non_core([ComponentOrigin.CORE,ComponentOrigin.EXTERNAL], is_integration=True, header='integrations')
67
+ self.print_non_core([ComponentOrigin.EXTERNAL], is_integration=False, header='apps')
68
+ self.print_non_core([ComponentOrigin.USER], is_integration=None, header='user components (~/.pjk/plugins)')
69
+
70
+ # is_integration = True|False|None None=don't care
71
+ def print_non_core(self, origin_list: List[ComponentOrigin], is_integration: bool, header:str):
72
+ all = {}
73
+ for factory in [self.source_factory, self.pipe_factory, self.sink_factory]:
74
+ component_dict = factory.get_components(origin_list=origin_list, is_integration=is_integration)
75
+ all.update(component_dict)
76
+
77
+ if not all:
78
+ return
61
79
 
80
+ print()
81
+ print(highlight(header))
82
+
83
+ for name, comp_class in all.items():
84
+ usage = comp_class.usage()
85
+ comp_class_type_str = get_component_type(comp_class)
86
+ lines = usage.desc.split('\n')
87
+ temp = highlight(comp_class_type_str)
88
+ line = f' {name:<17} {temp:<15} {lines[0]}'
89
+ print(line)
90
+
62
91
  def load_user_components(self, path=os.path.expanduser("~/.pjk/plugins")):
63
92
  if not os.path.isdir(path):
64
93
  return
@@ -87,36 +116,48 @@ class ComponentRegistry:
87
116
  name = usage.name
88
117
 
89
118
  if is_sink(obj, module):
90
- self.sink_factory.register(name, obj, 'user')
119
+ self.sink_factory.register(name, obj, ComponentOrigin.USER)
91
120
  elif is_pipe(obj, module):
92
- self.pipe_factory.register(name, obj, 'user')
121
+ self.pipe_factory.register(name, obj, ComponentOrigin.USER)
93
122
  elif is_source(obj, module):
94
- self.source_factory.register(name, obj, 'user')
123
+ self.source_factory.register(name, obj, ComponentOrigin.USER)
95
124
 
96
- def print_origin_components(self, origin: str, header:str):
97
- component_tuples = []
98
- for factory in [self.source_factory, self.pipe_factory, self.sink_factory]:
99
- component_tuples.extend(factory.get_component_name_class_tuples(origin))
125
+ def load_namespace_extras(self, package: str = "pjk_extras") -> None:
126
+ registrar = ExternalRegistrar(self.source_factory, self.pipe_factory, self.sink_factory)
127
+ import importlib, importlib.metadata as im
100
128
 
101
- if not component_tuples:
102
- return
103
- print()
104
- print(highlight(header))
129
+ for dist in im.distributions():
130
+ name = (dist.metadata.get("Name") or "")
131
+ if not name.startswith("pjk-"):
132
+ continue
105
133
 
106
- for name, comp_class in component_tuples:
107
- usage = comp_class.usage()
108
- comp_class_type_str = get_component_type(comp_class)
109
- lines = usage.desc.split('\n')
110
- temp = highlight(comp_class_type_str)
111
- line = f' {name:<17} {temp:<15} {lines[0]}'
112
- print(line)
134
+ modname = f"{package}.{name[4:].replace('-', '_')}" # pjk-foo-bar -> pjk_extras.foo_bar
135
+
136
+ # Import the extra; if it fails, continue to the next
137
+ try:
138
+ mod = importlib.import_module(modname)
139
+ except Exception as e:
140
+ print(f"[pjk] import failed for {modname}: {e}")
141
+ continue
142
+
143
+ reg = getattr(mod, "register", None)
144
+ if not callable(reg):
145
+ print(f"[pjk] extra '{modname}' has no register(registrar)")
146
+ continue
147
+
148
+ # Run its register; if it fails, continue to the next
149
+ try:
150
+ reg(registrar) # registers class TYPES, same contract as before
151
+ except Exception as e:
152
+ print(f"[pjk] register() failed in {modname}: {e}")
153
+ continue
113
154
 
114
155
  def print_core_formats(factories: List[ComponentFactory]):
115
156
  print(highlight('formats'))
116
157
  formats = set()
117
158
  for factory in factories:
118
- tuples = factory.get_component_name_class_tuples('core')
119
- for name, comp_class in tuples:
159
+ component_dict = factory.get_components([ComponentOrigin.CORE], is_integration=False)
160
+ for name, comp_class in component_dict.items():
120
161
  if issubclass(comp_class, FormatSink|FormatSource):
121
162
  formats.add(name)
122
163
 
@@ -124,23 +165,21 @@ def print_core_formats(factories: List[ComponentFactory]):
124
165
  lst = ', '.join(list(formats))
125
166
  print(f'{space:<15}{lst}. (sources/sinks in local files, dirs and s3)')
126
167
 
127
- def print_factory_core(factory: ComponentFactory, header: str, include_formats: bool=False, include_integrations=False):
128
- components:list = factory.get_component_name_class_tuples('core')
129
- header = highlight(header)
130
- print(header)
168
+ def print_factory_core(factory: ComponentFactory, header: str):
169
+ component_dict = factory.get_components([ComponentOrigin.CORE], is_integration=False)
170
+ header = highlight(header)
171
+ print(header)
131
172
 
132
- i = 0
133
- # user and outside package components are also here, but printed from registry class
134
- for name, comp_class in components:
135
- if issubclass(comp_class, FormatSink|FormatSource) and not include_formats:
136
- continue
173
+ # user and outside package components are also here, but printed from registry class
174
+ for name, comp_class in component_dict.items():
175
+ if issubclass(comp_class, FormatSink|FormatSource):
176
+ continue
137
177
 
138
- usage = comp_class.usage()
139
- lines = usage.desc.split('\n')
178
+ usage = comp_class.usage()
179
+ lines = usage.desc.split('\n')
140
180
 
141
- line = f' {name:<12} {lines[0]}'
142
- print(line)
143
- i += 1
181
+ line = f' {name:<12} {lines[0]}'
182
+ print(line)
144
183
 
145
184
  def get_component_type(component_class) -> str:
146
185
  if issubclass(component_class, Sink):
@@ -178,22 +217,3 @@ def is_sink(obj, module):
178
217
  and obj.__module__ == module.__name__
179
218
  )
180
219
 
181
-
182
- def iter_entry_points(group: str):
183
- eps = importlib.metadata.entry_points()
184
- if hasattr(eps, "select"):
185
- # Python 3.10+ (importlib.metadata.EntryPoints)
186
- return eps.select(group=group)
187
- # Python 3.9 and older
188
- return eps.get(group, [])
189
-
190
- def load_package_extras():
191
- """
192
- Discover and import all installed pjk extras (via entry points).
193
- """
194
- for ep in iter_entry_points("pjk.package_extras"):
195
- try:
196
- importlib.import_module(ep.value)
197
- print(f"[pjk] loaded package extra: {ep.name} -> {ep.value}")
198
- except Exception as e:
199
- print(f"[pjk] failed to load extra {ep.name}: {e}")
@@ -18,8 +18,8 @@ class GraphSink(Sink):
18
18
  component_class=cls
19
19
  )
20
20
  usage.def_arg(name='kind', usage='hist|scatter|bar|line|cumulative')
21
- usage.def_param(name='x', usage='Name of x-axis field', default='x')
22
- usage.def_param(name='y', usage='Name of y-axis field', default='y')
21
+ usage.def_param(name='x', usage='x-axis field', default='x')
22
+ usage.def_param(name='y', usage='comma separated list of y-axis fields', default='y')
23
23
  usage.def_param(name='pause', usage='Seconds to show graph', is_num=True, default='-1')
24
24
  return usage
25
25