python-jack-knife 0.6.17__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {python_jack_knife-0.6.17/src/python_jack_knife.egg-info → python_jack_knife-0.7.1}/PKG-INFO +1 -1
  2. python_jack_knife-0.7.1/src/pjk/history.py +106 -0
  3. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/integrations/opensearch_query_pipe.py +1 -1
  4. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/integrations/postgres_pipe.py +12 -3
  5. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/main.py +38 -28
  6. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/man_page.py +49 -5
  7. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/parser.py +16 -1
  8. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/factory.py +1 -1
  9. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/select.py +2 -2
  10. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/registry.py +18 -17
  11. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/graph_bar_line.py +11 -9
  12. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/factory.py +0 -4
  13. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/npy_source.py +3 -4
  14. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/version.py +1 -1
  15. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1/src/python_jack_knife.egg-info}/PKG-INFO +1 -1
  16. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/python_jack_knife.egg-info/SOURCES.txt +1 -2
  17. python_jack_knife-0.6.17/src/pjk/sources/configs_source.py +0 -52
  18. python_jack_knife-0.6.17/src/pjk/sources/macro_source.py +0 -46
  19. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/LICENSE +0 -0
  20. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/README.md +0 -0
  21. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/pyproject.toml +0 -0
  22. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/setup.cfg +0 -0
  23. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/__init__.py +0 -0
  24. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/common.py +0 -0
  25. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/components.py +0 -0
  26. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/integrations/opensearch_client.py +0 -0
  27. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  28. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/integrations/snowflake_pipe.py +0 -0
  29. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/log.py +0 -0
  30. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/__init__.py +0 -0
  31. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/denorm.py +0 -0
  32. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/filter.py +0 -0
  33. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/head.py +0 -0
  34. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/join.py +0 -0
  35. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/let_reduce.py +0 -0
  36. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/map.py +0 -0
  37. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/move_field.py +0 -0
  38. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/progress_pipe.py +0 -0
  39. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/query_pipe.py +0 -0
  40. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/remove_field.py +0 -0
  41. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/sample.py +0 -0
  42. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/sort.py +0 -0
  43. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/tail.py +0 -0
  44. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/user_pipe_factory.py +0 -0
  45. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/pipes/where.py +0 -0
  46. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/progress.py +0 -0
  47. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/__init__.py +0 -0
  48. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/create_sink.py +0 -0
  49. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/csv_sink.py +0 -0
  50. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/devnull.py +0 -0
  51. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/dir_sink.py +0 -0
  52. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/expect.py +0 -0
  53. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/factory.py +0 -0
  54. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/format_sink.py +0 -0
  55. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/graph.py +0 -0
  56. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/graph_cumulative.py +0 -0
  57. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/graph_hist.py +0 -0
  58. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/graph_scatter.py +0 -0
  59. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/json_sink.py +0 -0
  60. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/s3_sink.py +0 -0
  61. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/s3_stream.py +0 -0
  62. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/sinks.py +0 -0
  63. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/stdout.py +0 -0
  64. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/tsv_sink.py +0 -0
  65. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sinks/user_sink_factory.py +0 -0
  66. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/__init__.py +0 -0
  67. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/csv_source.py +0 -0
  68. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/dir_source.py +0 -0
  69. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/favorite_source.py +0 -0
  70. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/format_source.py +0 -0
  71. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/inline_source.py +0 -0
  72. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/json_source.py +0 -0
  73. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/lazy_file.py +0 -0
  74. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/lazy_file_local.py +0 -0
  75. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/lazy_file_s3.py +0 -0
  76. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/parquet_source.py +0 -0
  77. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/s3_source.py +0 -0
  78. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/source_list.py +0 -0
  79. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/sql_source.py +0 -0
  80. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/tsv_source.py +0 -0
  81. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/sources/user_source_factory.py +0 -0
  82. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/pjk/usage.py +0 -0
  83. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  84. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  85. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/python_jack_knife.egg-info/requires.txt +0 -0
  86. {python_jack_knife-0.6.17 → python_jack_knife-0.7.1}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.17
3
+ Version: 0.7.1
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -0,0 +1,106 @@
1
+ import re
2
+ import os
3
+ import shlex
4
+ from typing import List, Set
5
+ from pjk.common import pager_stdout, highlight
6
+
7
+ LOG_FILE = '.pjk-history.txt'
8
+
9
+ def printable_command(tokens: list) -> str:
10
+ pattern = re.compile(r"[({]")
11
+
12
+ return ' '.join(
13
+ f'"{s}"' if pattern.search(s) else s
14
+ for s in tokens
15
+ )
16
+
17
+ def read_history(log_path: str) -> List[int]:
18
+ """
19
+ Reads the history file into an ordered dictionary (command -> ordinal)
20
+ """
21
+ # dict preserves insertion order in modern Python
22
+ clist: List[str] = []
23
+ cset:Set = set()
24
+
25
+ try:
26
+ with open(log_path, "r") as f:
27
+ ordinal = 1
28
+ for line in f:
29
+ line = line.strip()
30
+ if not line:
31
+ continue
32
+
33
+ if 'pjk ' in line: # legacy
34
+ line = line.split('pjk ', 1)[1]
35
+
36
+ # Expected format: <command_string>
37
+ line = line.strip()
38
+
39
+ # 2. Add to the map. Since dict keys must be unique,
40
+ # this ensures the map only contains one entry per command, preserving the first seen's order.
41
+ if line not in cset:
42
+ clist.append(line)
43
+ cset.add(line)
44
+
45
+ except FileNotFoundError:
46
+ pass
47
+ except (PermissionError, OSError) as e:
48
+ print(f"Warning: Could not read history file {log_path}: {e}")
49
+
50
+ return clist, cset
51
+
52
+ def write_history(tokens: list):
53
+ if os.environ.get("PJK_NO_HISTORY") == "1":
54
+ return
55
+
56
+ if len(tokens) < 2:
57
+ return
58
+
59
+ if tokens[0] == 'man':
60
+ return
61
+
62
+ new_command_string = printable_command(tokens)
63
+
64
+ # 1. Read the existing history and find the highest number
65
+ clist, cset = read_history(LOG_FILE)
66
+
67
+ # 2. Check for duplicates (Fast O(1) lookup using the dict key)
68
+ if new_command_string in cset:
69
+ # Command is a duplicate, nothing to do.
70
+ return
71
+
72
+ # 3. Append the new command line to the file
73
+ try:
74
+ # Use 'a' to append the new line only
75
+ with open(LOG_FILE, "a") as f:
76
+ f.write(f"{new_command_string}\n")
77
+
78
+ except (PermissionError, OSError) as e:
79
+ print(f"Warning: Could not write to history file {LOG_FILE}: {e}")
80
+
81
+ def display_history():
82
+ clist, cset = read_history(LOG_FILE)
83
+
84
+ with pager_stdout():
85
+ print(f"Local history in '{LOG_FILE}'")
86
+ print("Use 'pjk +<#>' to execute command.")
87
+ print()
88
+ o = highlight('#', 'bold', '#')
89
+ c = highlight('command', 'bold', 'command')
90
+ print(f'{o}\t{c}')
91
+
92
+ ordn = 1
93
+ for command in reversed(clist):
94
+ print(f'{ordn}\t{command}')
95
+ ordn += 1
96
+
97
+ def get_history_tokens(ord_str: str):
98
+ ord_in = int(ord_str)
99
+ clist, cset = read_history(LOG_FILE)
100
+ ordn = 0
101
+ for command in reversed(clist):
102
+ ordn += 1
103
+ if ord_in == ordn:
104
+ parts = shlex.split(command, comments=True, posix=True)
105
+ return parts
106
+ return None
@@ -23,7 +23,7 @@ def build_body_from_string(query_string: str) -> dict:
23
23
 
24
24
  class OpenSearchQueryPipe(QueryPipe, Integration):
25
25
  name = "os_query"
26
- desc = ("Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query\n"
26
+ desc = ("Opensearch query pipe. Uses record['query'] or record['os_query_object']\n"
27
27
  "An instance may define 'default_index' otherwise the query object must include an 'index' field.\n")
28
28
  arg0 = ("instance", "instance to query over.")
29
29
  examples = [
@@ -35,9 +35,12 @@ class DBClient:
35
35
  self.conn = DBClient._connection
36
36
 
37
37
  def close(self):
38
+ import pg8000 # lazy
38
39
  if self.conn is not None:
39
40
  try:
40
41
  self.conn.close()
42
+ except pg8000.exceptions.InterfaceError:
43
+ pass
41
44
  finally:
42
45
  DBClient._connection = None
43
46
 
@@ -123,10 +126,15 @@ class PostgresPipe(QueryPipe,Integration):
123
126
 
124
127
  self.params_field = "params" # optional: list/tuple (positional) or dict (named)
125
128
 
129
+ self.client = None
130
+
126
131
  def reset(self):
127
132
  # stateless across reset
128
133
  pass
129
134
 
135
+ def close(self):
136
+ self.client.close()
137
+
130
138
  def _make_header(self, cur, query: str, params=None) -> Dict[str, Any]:
131
139
  """
132
140
  Inspect the cursor and build a full header record.
@@ -155,7 +163,7 @@ class PostgresPipe(QueryPipe,Integration):
155
163
  return h
156
164
 
157
165
  def execute_query_returning_S_xO_iterable(self, record):
158
- client = DBClient(
166
+ self.client = DBClient(
159
167
  host=self.db_host,
160
168
  username=self.db_user,
161
169
  password=self.db_pass,
@@ -173,7 +181,7 @@ class PostgresPipe(QueryPipe,Integration):
173
181
  params = record.get(self.params_field) # single-exec params
174
182
  batch = record.get("batch_params", None) # list[tuple|dict] for batching
175
183
 
176
- cur = client.conn.cursor()
184
+ cur = self.client.conn.cursor()
177
185
  try:
178
186
  did_executemany = False
179
187
 
@@ -215,4 +223,5 @@ class PostgresPipe(QueryPipe,Integration):
215
223
  finally:
216
224
  cur.close()
217
225
  finally:
218
- client.close()
226
+ pass
227
+ # client.close()
@@ -3,40 +3,22 @@
3
3
 
4
4
  #!/usr/bin/env python
5
5
  import sys
6
- import os
6
+ import os, re
7
7
  import shlex
8
- from typing import List
8
+ from typing import List, Dict
9
9
  from pjk.parser import ExpressionParser
10
10
  from pjk.usage import UsageError
11
11
  from pjk.log import init as init_logging
12
- from datetime import datetime
13
12
  import traceback
14
13
  import concurrent.futures
15
14
  from pjk.registry import ComponentRegistry
16
15
  from pjk.sinks.stdout import StdoutSink
17
- from pjk.man_page import do_man, do_examples
16
+ from pjk.man_page import do_man, do_examples, display_configs, display_macros
17
+ from pjk.history import write_history, display_history, get_history_tokens
18
18
  from pjk.sinks.expect import ExpectSink
19
19
  from pjk.progress import ProgressDisplay
20
20
  from pjk.version import __version__
21
21
 
22
- def write_history(tokens):
23
- if os.environ.get("PJK_NO_HISTORY") == "1":
24
- return
25
-
26
- log_path = ".pjk-history.txt"
27
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
28
-
29
- if len(tokens) < 2:
30
- return
31
-
32
- command = " ".join(tokens)
33
-
34
- try:
35
- with open(log_path, "a") as f:
36
- f.write(f"{timestamp}\tpjk {command}\n")
37
- except (PermissionError, OSError):
38
- pass
39
-
40
22
  def execute_threaded(sinks, stop_progress=None):
41
23
  max_workers = min(32, len(sinks))
42
24
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
@@ -77,6 +59,32 @@ def initialize():
77
59
  #dst_dir.mkdir(parents=True, exist_ok=True)
78
60
  #hutil.copy(src, dst_dir / src.name)
79
61
 
62
+ def execute_non_expression(tokens, registry):
63
+ command = tokens[0]
64
+
65
+ if len(tokens) == 2 and command == 'man':
66
+ do_man(tokens[1], registry)
67
+ return True
68
+
69
+ if len(tokens) != 1:
70
+ return False
71
+
72
+ match command:
73
+ case 'examples':
74
+ do_examples(command, registry)
75
+ case 'examples+':
76
+ do_examples(command, registry)
77
+ case 'configs':
78
+ display_configs()
79
+ case 'macros':
80
+ display_macros()
81
+ case '+':
82
+ display_history()
83
+ case _:
84
+ return False
85
+
86
+ return True
87
+
80
88
  def execute(command: str):
81
89
  tokens = shlex.split(command, comments=True, posix=True)
82
90
  execute_tokens(tokens)
@@ -93,14 +101,16 @@ def execute_tokens(tokens: List[str]):
93
101
  if len(tokens) < 1:
94
102
  registry.print_usage()
95
103
  return
96
-
97
- if len(tokens) == 2 and tokens[0] == 'man':
98
- do_man(tokens[1], registry)
104
+
105
+ if execute_non_expression(tokens, registry):
99
106
  return
100
107
 
101
- if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
102
- do_examples(tokens[0], registry)
103
- return
108
+ # execute command from .pjk-history.txt
109
+ if len(tokens) == 1 and re.fullmatch(r'^\+\d+$', tokens[0]):
110
+ tokens = get_history_tokens(tokens[0])
111
+ if not tokens:
112
+ print('No such history')
113
+ return
104
114
 
105
115
  parser = ExpressionParser(registry)
106
116
 
@@ -1,15 +1,15 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- from pjk.pipes.factory import PipeFactory
5
- from pjk.sources.factory import SourceFactory
6
- from pjk.sinks.factory import SinkFactory
7
- from pjk.parser import ExpressionParser
4
+ from pjk.parser import ExpressionParser, MACRO_PREFIX, MACROS_FILE, read_macros
8
5
  from pjk.components import Source, Pipe, Sink
9
- from pjk.usage import Usage, ParsedToken
6
+ from pjk.usage import Usage, CONFIG_FILE
10
7
  from pjk.registry import ComponentRegistry
11
8
  from pjk.common import pager_stdout, highlight, ComponentOrigin
12
9
  from contextlib import nullcontext
10
+ import yaml
11
+ import sys
12
+ from pathlib import Path
13
13
 
14
14
  def get_base_class(usage: Usage, as_string: bool = False):
15
15
  if issubclass(usage.comp_class, Sink):
@@ -87,6 +87,50 @@ def print_man(registry: ComponentRegistry, name: str, usage: Usage):
87
87
  for expr_tokens, expect in usage.get_examples(): # expect in InlineSource format
88
88
  print_example(registry, expr_tokens, expect, name)
89
89
 
90
+ def display_configs():
91
+ path = Path(CONFIG_FILE).expanduser()
92
+
93
+ with pager_stdout():
94
+ with path.open("r", encoding="utf-8") as f:
95
+ data = yaml.safe_load(f) or {}
96
+ if not isinstance(data, dict):
97
+ raise ValueError("Top-level YAML must be a mapping of records")
98
+
99
+ print(f'Component configs defined in {CONFIG_FILE}')
100
+ print()
101
+ for name, body_dict in data.items():
102
+ print('=========================================')
103
+ print(' ', highlight(name, 'bold', name))
104
+ print('=========================================')
105
+
106
+ if 'password' in body_dict:
107
+ body_dict['password'] = '*************'
108
+
109
+ try:
110
+ yaml.dump(
111
+ body_dict,
112
+ sys.stdout,
113
+ sort_keys=False,
114
+ explicit_start=False,
115
+ allow_unicode=True,
116
+ width=10**9,
117
+ )
118
+ except BrokenPipeError:
119
+ break
120
+ print()
121
+
122
+ def display_macros():
123
+ macros = read_macros()
124
+
125
+ with pager_stdout():
126
+ print(f"Macros defined in '{MACROS_FILE}'")
127
+
128
+ print(f"Usage: pjk [...] {MACRO_PREFIX}:<macro_name> [...]")
129
+ print()
130
+ for name, value in macros.items():
131
+ print(f'{name}: {value}')
132
+ print()
133
+
90
134
  def do_examples(token:str, registry: ComponentRegistry):
91
135
  no_pager = token.endswith('+')
92
136
  cm = nullcontext() if no_pager else pager_stdout()
@@ -7,12 +7,27 @@ from typing import Any, List
7
7
  from pjk.components import Source, Pipe, Sink
8
8
  from pjk.usage import TokenError, UsageError, ParsedToken, Usage
9
9
  from pjk.pipes.let_reduce import ReducePipe
10
- from pjk.sources.macro_source import MACROS_FILE, MACRO_PREFIX, read_macros
11
10
  from pjk.pipes.progress_pipe import ProgressPipe
12
11
  from pjk.registry import ComponentRegistry
13
12
  from pjk.progress import papi
13
+ from typing import Dict
14
+ from pathlib import Path
14
15
  from pjk.progress import ProgressIgnore
15
16
 
17
+ MACROS_FILE = '~/.pjk/macros.txt'
18
+ MACRO_PREFIX = 'm'
19
+ def read_macros(file_name: str = MACROS_FILE) -> Dict[str, str]:
20
+ out: Dict[str, str] = {}
21
+ path = Path(file_name).expanduser()
22
+ with path.open(encoding="utf-8") as f:
23
+ for raw in f:
24
+ line = raw.split("#", 1)[0].strip()
25
+ if not line or ":" not in line:
26
+ continue
27
+ key, val = line.split(":", 1)
28
+ out[key.strip()] = val.strip()
29
+ return out
30
+
16
31
  # macros are of the form MACRO_PREFIX:<instance>
17
32
  def handle_macros(token: str, expanded: List[str]):
18
33
  if not token.startswith(f'{MACRO_PREFIX}:'):
@@ -38,7 +38,7 @@ COMPONENTS = {
38
38
  'reduce': ReducePipe,
39
39
  'sort': SortPipe,
40
40
  'where': WherePipe,
41
- 'sel': SelectFields,
41
+ 'select': SelectFields,
42
42
  'sample': SamplePipe,
43
43
  'explode': DenormPipe,
44
44
  'postgres': PostgresPipe,
@@ -10,12 +10,12 @@ class SelectFields(DeepCopyPipe):
10
10
  @classmethod
11
11
  def usage(cls):
12
12
  usage = Usage(
13
- name='sel',
13
+ name='select',
14
14
  desc='Select specific fields from each record.',
15
15
  component_class=cls
16
16
  )
17
17
  usage.def_arg(name='fields', usage='Comma-separated list of fields to retain')
18
- usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", 'sel:id,color'], expect="id: 1, color:'blue'")
18
+ usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", 'select:id,color'], expect="id: 1, color:'blue'")
19
19
  return usage
20
20
 
21
21
  def __init__(self, ptok: ParsedToken, usage: Usage):
@@ -11,7 +11,7 @@ from pjk.sources.format_source import FormatSource
11
11
  import importlib.util
12
12
  import importlib
13
13
  from pjk.components import Pipe, Source, Sink
14
- from pjk.common import ComponentFactory, highlight, ComponentOrigin
14
+ from pjk.common import ComponentFactory, highlight, ComponentOrigin, pager_stdout
15
15
  from typing import List, Type
16
16
 
17
17
  class ExternalRegistrar:
@@ -50,22 +50,23 @@ class ComponentRegistry:
50
50
  return [self.source_factory, self.pipe_factory, self.sink_factory]
51
51
 
52
52
  def print_usage(self):
53
- print('Usage: pjk <source> [<pipe> ...] <sink>')
54
- print(' pjk man <component> | --all')
55
- print(' pjk examples')
56
- print()
57
-
58
- print_core_formats([self.source_factory, self.sink_factory])
59
- print()
60
- print_factory_core(self.source_factory, header='sources')
61
- print()
62
- print_factory_core(self.pipe_factory, header='pipes')
63
- print()
64
- print_factory_core(self.sink_factory, header='sinks')
65
-
66
- self.print_non_core([ComponentOrigin.CORE,ComponentOrigin.EXTERNAL], is_integration=True, header='integrations')
67
- self.print_non_core([ComponentOrigin.EXTERNAL], is_integration=False, header='apps')
68
- self.print_non_core([ComponentOrigin.USER], is_integration=None, header='user components (~/.pjk/plugins)')
53
+ with pager_stdout():
54
+ print('Usage: pjk <source> [<pipe> ...] <sink>')
55
+ print(' pjk man <component> | --all')
56
+ print(' pjk examples | configs | macros | + (for history)')
57
+ print()
58
+
59
+ print_core_formats([self.source_factory, self.sink_factory])
60
+ print()
61
+ print_factory_core(self.source_factory, header='sources')
62
+ print()
63
+ print_factory_core(self.pipe_factory, header='pipes')
64
+ print()
65
+ print_factory_core(self.sink_factory, header='sinks')
66
+
67
+ self.print_non_core([ComponentOrigin.CORE,ComponentOrigin.EXTERNAL], is_integration=True, header='integrations')
68
+ self.print_non_core([ComponentOrigin.EXTERNAL], is_integration=False, header='apps')
69
+ self.print_non_core([ComponentOrigin.USER], is_integration=None, header='user components (~/.pjk/plugins)')
69
70
 
70
71
  # is_integration = True|False|None None=don't care
71
72
  def print_non_core(self, origin_list: List[ComponentOrigin], is_integration: bool, header:str):
@@ -20,10 +20,6 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence
20
20
  from datetime import date, datetime
21
21
  from collections import defaultdict
22
22
 
23
- import numpy as np
24
- import pandas as pd
25
-
26
-
27
23
  # ----------------------------- Public Params -----------------------------
28
24
  @dataclass
29
25
  class GraphParams:
@@ -48,6 +44,8 @@ class TimeDetector:
48
44
 
49
45
  @staticmethod
50
46
  def is_time(xs: pd.Series) -> bool:
47
+ import numpy as np # lazy
48
+ import pandas as pd # lazy
51
49
  # Already datetime dtype?
52
50
  if pd.api.types.is_datetime64_any_dtype(xs):
53
51
  return True
@@ -74,6 +72,7 @@ class TimeDetector:
74
72
 
75
73
  @staticmethod
76
74
  def parse_times(series: pd.Series) -> pd.Series:
75
+ import pandas as pd # lazy
77
76
  numeric = pd.to_numeric(series, errors="coerce")
78
77
  parsed = None
79
78
  if numeric.notna().mean() >= 0.9:
@@ -92,6 +91,7 @@ class MultiYAdapter:
92
91
  """Builds wide dataframe: columns = ['x'] + y_fields; sums duplicates of x."""
93
92
  @staticmethod
94
93
  def to_df(records: Iterable[Dict[str, Any]], x_field: str, y_fields: Sequence[str]) -> pd.DataFrame:
94
+ import pandas as pd # lazy
95
95
  rows: List[Dict[str, Any]] = []
96
96
  for r in records:
97
97
  if x_field not in r:
@@ -137,12 +137,14 @@ class SingleYWithSetsAdapter:
137
137
  # ----------------------------- Plotter -----------------------------
138
138
  class GraphPlotter:
139
139
  def __init__(self, params: GraphParams):
140
+ import numpy as np
140
141
  self.pms = params
141
142
  self.y_fields = list(dict.fromkeys(self.pms.y_fields)) # dedupe, preserve order
142
143
 
143
144
  def plot(self, chart_type: str = "line"):
144
- import matplotlib.pyplot as plt
145
- import matplotlib.dates as mdates
145
+ import matplotlib.pyplot as plt # lazy
146
+ import matplotlib.dates as mdates # lazy
147
+ import pandas as pd # lazy
146
148
 
147
149
  fig = plt.figure()
148
150
  ax = plt.gca()
@@ -258,7 +260,7 @@ class GraphPlotter:
258
260
  # ---------- Formatting helpers ----------
259
261
  @staticmethod
260
262
  def _format_time_axis(ax, df: pd.DataFrame) -> None:
261
- import matplotlib.dates as mdates
263
+ import matplotlib.dates as mdates # lazy
262
264
  fig = ax.get_figure()
263
265
  ts = df["ts"]
264
266
  if ts.empty:
@@ -322,7 +324,7 @@ class GraphPlotter:
322
324
 
323
325
  # ---------- Misc ----------
324
326
  def _apply_args_dict(self) -> None:
325
- import matplotlib.pyplot as plt
327
+ import matplotlib.pyplot as plt # lazy
326
328
  for name, val in getattr(self.pms, "args_dict", {}).items():
327
329
  fn = getattr(plt, name, None)
328
330
  if callable(fn):
@@ -345,7 +347,7 @@ def graph_bar_line(obj, type):
345
347
  Returns (fig, ax) for optional downstream tweaks (safe to ignore).
346
348
  """
347
349
  # Lazy import (ensures MPL backend)
348
- import matplotlib.pyplot as plt # noqa: F401
350
+ import matplotlib.pyplot as plt # noqa: F401 # lazy
349
351
 
350
352
  # Normalize y_fields from string or list
351
353
  raw_y = obj.y_field if isinstance(obj.y_field, str) else str(obj.y_field)
@@ -13,8 +13,6 @@ from pjk.sources.inline_source import InlineSource
13
13
  from pjk.sources.user_source_factory import UserSourceFactory
14
14
  from pjk.sources.parquet_source import ParquetSource
15
15
  from pjk.sources.format_source import FormatSource
16
- from pjk.sources.configs_source import ConfigsSource
17
- from pjk.sources.macro_source import MacroSource
18
16
 
19
17
  COMPONENTS = {
20
18
  'inline': InlineSource,
@@ -25,8 +23,6 @@ COMPONENTS = {
25
23
  'sql': SQLSource,
26
24
  'npy': NpySource,
27
25
  'parquet': ParquetSource,
28
- 'configs': ConfigsSource,
29
- 'macros': MacroSource
30
26
  }
31
27
 
32
28
  class SourceFactory(ComponentFactory):
@@ -4,18 +4,15 @@
4
4
  import json
5
5
  from typing import Iterator, Dict, Any
6
6
 
7
- import numpy as np
8
- from pjk.usage import NoBindUsage
9
- from pjk.components import Source
10
7
  from pjk.sources.lazy_file import LazyFile
11
8
  from pjk.sources.format_source import FormatSource
12
9
  from pjk.log import logger
13
10
 
14
-
15
11
  class NpySource(FormatSource):
16
12
  extension = 'npy'
17
13
 
18
14
  def __init__(self, lazy_file: LazyFile):
15
+ super().__init__(root=None)
19
16
  self.lazy_file = lazy_file
20
17
  self.num_vecs = 0
21
18
 
@@ -32,9 +29,11 @@ class NpySource(FormatSource):
32
29
 
33
30
  try:
34
31
  # Use mmap to avoid loading entire array in RAM at once.
32
+ import numpy as np #lazy import
35
33
  arr = np.load(path, mmap_mode="r", allow_pickle=False)
36
34
  except Exception as e:
37
35
  logger.error(f"Failed to load .npy file at {path}: {e}")
36
+ raise Exception(f"Failed to load .npy file at {path}: {e}")
38
37
  return
39
38
 
40
39
  if arr.size == 0:
@@ -1,4 +1,4 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- __version__ = "0.6.17"
4
+ __version__ = "0.7.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.17
3
+ Version: 0.7.1
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -4,6 +4,7 @@ pyproject.toml
4
4
  src/pjk/__init__.py
5
5
  src/pjk/common.py
6
6
  src/pjk/components.py
7
+ src/pjk/history.py
7
8
  src/pjk/log.py
8
9
  src/pjk/main.py
9
10
  src/pjk/man_page.py
@@ -56,7 +57,6 @@ src/pjk/sinks/stdout.py
56
57
  src/pjk/sinks/tsv_sink.py
57
58
  src/pjk/sinks/user_sink_factory.py
58
59
  src/pjk/sources/__init__.py
59
- src/pjk/sources/configs_source.py
60
60
  src/pjk/sources/csv_source.py
61
61
  src/pjk/sources/dir_source.py
62
62
  src/pjk/sources/factory.py
@@ -67,7 +67,6 @@ src/pjk/sources/json_source.py
67
67
  src/pjk/sources/lazy_file.py
68
68
  src/pjk/sources/lazy_file_local.py
69
69
  src/pjk/sources/lazy_file_s3.py
70
- src/pjk/sources/macro_source.py
71
70
  src/pjk/sources/npy_source.py
72
71
  src/pjk/sources/parquet_source.py
73
72
  src/pjk/sources/s3_source.py
@@ -1,52 +0,0 @@
1
- # template for Source used by create sink
2
-
3
- from pjk.components import Source
4
- from pjk.usage import Usage, ParsedToken, CONFIG_FILE
5
- import yaml
6
- from pathlib import Path
7
- import yaml
8
- from pathlib import Path
9
-
10
- class YamlRecords:
11
- def __init__(self, path):
12
- self.path = Path(path).expanduser()
13
-
14
- def __iter__(self):
15
- with self.path.open("r", encoding="utf-8") as f:
16
- data = yaml.safe_load(f) or {}
17
- if not isinstance(data, dict):
18
- raise ValueError("Top-level YAML must be a mapping of records")
19
-
20
- for name, body_dict in data.items():
21
- component, instance = name.split('-', 1)
22
- out = {
23
- 'entry': name,
24
- 'component': component,
25
- 'instance': instance,
26
- 'settings': body_dict
27
- }
28
- #out.update(body_dict)
29
- yield out
30
-
31
- class ConfigsSource(Source):
32
- @classmethod
33
- def usage(cls):
34
- usage = Usage(
35
- name='configs',
36
- desc=f'A source of pjk configuration in {CONFIG_FILE}',
37
- component_class=cls
38
- )
39
- usage.def_example(expr_tokens=['configs', '-'], expect=None)
40
- return usage
41
-
42
- def __init__(self, ptok: ParsedToken, usage: Usage):
43
- self.config_recs = YamlRecords(CONFIG_FILE)
44
-
45
- def __iter__(self):
46
- yield from self.config_recs
47
-
48
- def deep_copy(self):
49
- return None
50
-
51
- def close(self):
52
- pass
@@ -1,46 +0,0 @@
1
- from pjk.usage import Usage, ParsedToken
2
- from pjk.components import Source
3
- from pathlib import Path
4
- from typing import Dict
5
-
6
- MACROS_FILE = '~/.pjk/macros.txt'
7
- MACRO_PREFIX = 'm'
8
-
9
- def read_macros(file_name: str = MACROS_FILE) -> Dict[str, str]:
10
- out: Dict[str, str] = {}
11
- path = Path(file_name).expanduser()
12
- with path.open(encoding="utf-8") as f:
13
- for raw in f:
14
- line = raw.split("#", 1)[0].strip()
15
- if not line or ":" not in line:
16
- continue
17
- key, val = line.split(":", 1)
18
- out[key.strip()] = val.strip()
19
- return out
20
-
21
- class MacroSource(Source):
22
- @classmethod
23
- def usage(cls):
24
- u = Usage(
25
- name='macros',
26
- desc=f"A source of macros stored in {MACROS_FILE}\n"
27
- f"Specific macros are referenced as '{MACRO_PREFIX}:<instance>', e.g. 'pjk {MACRO_PREFIX}:hw -'",
28
- component_class=cls
29
- )
30
- u.def_example(["macros", "-"], None)
31
- return u
32
-
33
- def __init__(self, ptok: ParsedToken, usage: Usage):
34
- pass
35
-
36
- # only the instance=+ case comes here. See parser
37
- def __iter__(self):
38
- macros = read_macros()
39
- for k, v in macros.items():
40
- yield {k: v}
41
-
42
- def deep_copy(self):
43
- return None
44
-
45
- def close(self):
46
- pass