python-jack-knife 0.6.16__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/PKG-INFO +1 -1
  2. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/components.py +9 -5
  3. python_jack_knife-0.7.0/src/pjk/history.py +103 -0
  4. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/integrations/opensearch_query_pipe.py +1 -1
  5. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/main.py +38 -28
  6. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/man_page.py +49 -5
  7. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/parser.py +19 -3
  8. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/denorm.py +2 -2
  9. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/filter.py +2 -2
  10. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/join.py +2 -2
  11. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/let_reduce.py +5 -5
  12. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/map.py +2 -2
  13. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/query_pipe.py +1 -1
  14. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/remove_field.py +1 -1
  15. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/select.py +1 -1
  16. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/sort.py +1 -1
  17. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/tail.py +1 -1
  18. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/where.py +5 -3
  19. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/progress.py +61 -46
  20. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/registry.py +18 -17
  21. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/graph.py +2 -0
  22. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/graph_bar_line.py +14 -14
  23. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/s3_sink.py +1 -0
  24. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/csv_source.py +1 -0
  25. python_jack_knife-0.7.0/src/pjk/sources/dir_source.py +181 -0
  26. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/factory.py +0 -4
  27. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/format_source.py +3 -1
  28. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/inline_source.py +1 -0
  29. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/json_source.py +1 -0
  30. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/s3_source.py +1 -0
  31. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/sql_source.py +1 -0
  32. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/version.py +1 -1
  33. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  34. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/SOURCES.txt +1 -2
  35. python_jack_knife-0.6.16/src/pjk/sources/configs_source.py +0 -52
  36. python_jack_knife-0.6.16/src/pjk/sources/dir_source.py +0 -82
  37. python_jack_knife-0.6.16/src/pjk/sources/macro_source.py +0 -46
  38. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/LICENSE +0 -0
  39. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/README.md +0 -0
  40. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/pyproject.toml +0 -0
  41. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/setup.cfg +0 -0
  42. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/__init__.py +0 -0
  43. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/common.py +0 -0
  44. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/integrations/opensearch_client.py +0 -0
  45. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  46. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/integrations/postgres_pipe.py +0 -0
  47. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/integrations/snowflake_pipe.py +0 -0
  48. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/log.py +0 -0
  49. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/__init__.py +0 -0
  50. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/factory.py +0 -0
  51. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/head.py +0 -0
  52. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/move_field.py +0 -0
  53. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/progress_pipe.py +0 -0
  54. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/sample.py +0 -0
  55. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/pipes/user_pipe_factory.py +0 -0
  56. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/__init__.py +0 -0
  57. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/create_sink.py +0 -0
  58. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/csv_sink.py +0 -0
  59. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/devnull.py +0 -0
  60. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/dir_sink.py +0 -0
  61. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/expect.py +0 -0
  62. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/factory.py +0 -0
  63. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/format_sink.py +0 -0
  64. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/graph_cumulative.py +0 -0
  65. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/graph_hist.py +0 -0
  66. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/graph_scatter.py +0 -0
  67. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/json_sink.py +0 -0
  68. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/s3_stream.py +0 -0
  69. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/sinks.py +0 -0
  70. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/stdout.py +0 -0
  71. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/tsv_sink.py +0 -0
  72. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sinks/user_sink_factory.py +0 -0
  73. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/__init__.py +0 -0
  74. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/favorite_source.py +0 -0
  75. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/lazy_file.py +0 -0
  76. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/lazy_file_local.py +0 -0
  77. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/lazy_file_s3.py +0 -0
  78. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/npy_source.py +0 -0
  79. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/parquet_source.py +0 -0
  80. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/source_list.py +0 -0
  81. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/tsv_source.py +0 -0
  82. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/sources/user_source_factory.py +0 -0
  83. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/pjk/usage.py +0 -0
  84. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  85. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  86. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/requires.txt +0 -0
  87. {python_jack_knife-0.6.16 → python_jack_knife-0.7.0}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.16
3
+ Version: 0.7.0
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -35,9 +35,12 @@ class Source(ABC):
35
35
  component_class=cls
36
36
  )
37
37
 
38
+ def __init__(self, root = None):
39
+ self.root = root
40
+
38
41
  @abstractmethod
39
42
  def __iter__(self):
40
- raise NotImplementedError("__iter__ must be implemented by subclasses")
43
+ pass
41
44
 
42
45
  def __next__(self):
43
46
  # lazily create an internal iterator the first time next() is called
@@ -57,7 +60,8 @@ class Source(ABC):
57
60
  class Pipe(Source):
58
61
  arity: int = 1
59
62
 
60
- def __init__(self, ptok: ParsedToken, usage: Usage = None):
63
+ def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
64
+ self.root = root
61
65
  self.ptok = ptok
62
66
  self.usage = usage
63
67
  self.left = None # left source for convience
@@ -95,7 +99,7 @@ class DeepCopyPipe(Pipe):
95
99
  return None
96
100
 
97
101
  # re-instantiate using the actual subclass
98
- pipe = type(self)(self.ptok, self.usage)
102
+ pipe = type(self)(self.ptok, self.usage, self) # this self is the root
99
103
  pipe.add_source(source_clone)
100
104
  return pipe
101
105
 
@@ -108,7 +112,8 @@ class Sink(ABC):
108
112
  component_class=cls
109
113
  )
110
114
 
111
- def __init__(self, ptok: ParsedToken, usage: Usage = None):
115
+ def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
116
+ self.root = root
112
117
  self.ptok = ptok
113
118
  self.usage = usage
114
119
 
@@ -135,4 +140,3 @@ class Sink(ABC):
135
140
 
136
141
  def deep_copy(self):
137
142
  return None
138
-
@@ -0,0 +1,103 @@
1
+ import re
2
+ import os
3
+ import shlex
4
+ from typing import List, Set
5
+ from pjk.common import pager_stdout, highlight
6
+
7
+ LOG_FILE = '.pjk-history.txt'
8
+
9
+ def printable_command(tokens: list) -> str:
10
+ pattern = re.compile(r"[({]")
11
+
12
+ return ' '.join(
13
+ f'"{s}"' if pattern.search(s) else s
14
+ for s in tokens
15
+ )
16
+
17
+ def read_history(log_path: str) -> List[int]:
18
+ """
19
+ Reads the history file into an ordered dictionary (command -> ordinal)
20
+ """
21
+ # dict preserves insertion order in modern Python
22
+ clist: List[str] = []
23
+ cset:Set = set()
24
+
25
+ try:
26
+ with open(log_path, "r") as f:
27
+ ordinal = 1
28
+ for line in f:
29
+ line = line.strip()
30
+ if not line:
31
+ continue
32
+
33
+ # Expected format: <command_string>
34
+ line = line.strip()
35
+
36
+ # 2. Add to the map. Since dict keys must be unique,
37
+ # this ensures the map only contains one entry per command, preserving the first seen's order.
38
+ if line not in cset:
39
+ clist.append(line)
40
+ cset.add(line)
41
+
42
+ except FileNotFoundError:
43
+ pass
44
+ except (PermissionError, OSError) as e:
45
+ print(f"Warning: Could not read history file {log_path}: {e}")
46
+
47
+ return clist, cset
48
+
49
+ def write_history(tokens: list):
50
+ if os.environ.get("PJK_NO_HISTORY") == "1":
51
+ return
52
+
53
+ if len(tokens) < 2:
54
+ return
55
+
56
+ if tokens[0] == 'man':
57
+ return
58
+
59
+ new_command_string = printable_command(tokens)
60
+
61
+ # 1. Read the existing history and find the highest number
62
+ clist, cset = read_history(LOG_FILE)
63
+
64
+ # 2. Check for duplicates (Fast O(1) lookup using the dict key)
65
+ if new_command_string in cset:
66
+ # Command is a duplicate, nothing to do.
67
+ return
68
+
69
+ # 3. Append the new command line to the file
70
+ try:
71
+ # Use 'a' to append the new line only
72
+ with open(LOG_FILE, "a") as f:
73
+ f.write(f"{new_command_string}\n")
74
+
75
+ except (PermissionError, OSError) as e:
76
+ print(f"Warning: Could not write to history file {LOG_FILE}: {e}")
77
+
78
+ def display_history():
79
+ clist, cset = read_history(LOG_FILE)
80
+
81
+ with pager_stdout():
82
+ print(f"Local history in '{LOG_FILE}'")
83
+ print("Use 'pjk +<#>' to execute command.")
84
+ print()
85
+ o = highlight('#', 'bold', '#')
86
+ c = highlight('command', 'bold', 'command')
87
+ print(f'{o}\t{c}')
88
+
89
+ ordn = 1
90
+ for command in reversed(clist):
91
+ print(f'{ordn}\t{command}')
92
+ ordn += 1
93
+
94
+ def get_history_tokens(ord_str: str):
95
+ ord_in = int(ord_str)
96
+ clist, cset = read_history(LOG_FILE)
97
+ ordn = 0
98
+ for command in reversed(clist):
99
+ ordn += 1
100
+ if ord_in == ordn:
101
+ parts = shlex.split(command, comments=True, posix=True)
102
+ return parts
103
+ return None
@@ -23,7 +23,7 @@ def build_body_from_string(query_string: str) -> dict:
23
23
 
24
24
  class OpenSearchQueryPipe(QueryPipe, Integration):
25
25
  name = "os_query"
26
- desc = ("Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query\n"
26
+ desc = ("Opensearch query pipe. Uses record['query'] or record['os_query_object']\n"
27
27
  "An instance may define 'default_index' otherwise the query object must include an 'index' field.\n")
28
28
  arg0 = ("instance", "instance to query over.")
29
29
  examples = [
@@ -3,40 +3,22 @@
3
3
 
4
4
  #!/usr/bin/env python
5
5
  import sys
6
- import os
6
+ import os, re
7
7
  import shlex
8
- from typing import List
8
+ from typing import List, Dict
9
9
  from pjk.parser import ExpressionParser
10
10
  from pjk.usage import UsageError
11
11
  from pjk.log import init as init_logging
12
- from datetime import datetime
13
12
  import traceback
14
13
  import concurrent.futures
15
14
  from pjk.registry import ComponentRegistry
16
15
  from pjk.sinks.stdout import StdoutSink
17
- from pjk.man_page import do_man, do_examples
16
+ from pjk.man_page import do_man, do_examples, display_configs, display_macros
17
+ from pjk.history import write_history, display_history, get_history_tokens
18
18
  from pjk.sinks.expect import ExpectSink
19
19
  from pjk.progress import ProgressDisplay
20
20
  from pjk.version import __version__
21
21
 
22
- def write_history(tokens):
23
- if os.environ.get("PJK_NO_HISTORY") == "1":
24
- return
25
-
26
- log_path = ".pjk-history.txt"
27
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
28
-
29
- if len(tokens) < 2:
30
- return
31
-
32
- command = " ".join(tokens)
33
-
34
- try:
35
- with open(log_path, "a") as f:
36
- f.write(f"{timestamp}\tpjk {command}\n")
37
- except (PermissionError, OSError):
38
- pass
39
-
40
22
  def execute_threaded(sinks, stop_progress=None):
41
23
  max_workers = min(32, len(sinks))
42
24
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) # no 'with'
@@ -77,6 +59,32 @@ def initialize():
77
59
  #dst_dir.mkdir(parents=True, exist_ok=True)
78
60
  #hutil.copy(src, dst_dir / src.name)
79
61
 
62
+ def execute_non_expression(tokens, registry):
63
+ command = tokens[0]
64
+
65
+ if len(tokens) == 2 and command == 'man':
66
+ do_man(tokens[1], registry)
67
+ return True
68
+
69
+ if len(tokens) != 1:
70
+ return False
71
+
72
+ match command:
73
+ case 'examples':
74
+ do_examples(command, registry)
75
+ case 'examples+':
76
+ do_examples(command, registry)
77
+ case 'configs':
78
+ display_configs()
79
+ case 'macros':
80
+ display_macros()
81
+ case '+':
82
+ display_history()
83
+ case _:
84
+ return False
85
+
86
+ return True
87
+
80
88
  def execute(command: str):
81
89
  tokens = shlex.split(command, comments=True, posix=True)
82
90
  execute_tokens(tokens)
@@ -93,14 +101,16 @@ def execute_tokens(tokens: List[str]):
93
101
  if len(tokens) < 1:
94
102
  registry.print_usage()
95
103
  return
96
-
97
- if len(tokens) == 2 and tokens[0] == 'man':
98
- do_man(tokens[1], registry)
104
+
105
+ if execute_non_expression(tokens, registry):
99
106
  return
100
107
 
101
- if len(tokens) == 1 and tokens[0] in ['examples', 'examples+']:
102
- do_examples(tokens[0], registry)
103
- return
108
+ # execute command from .pjk-history.txt
109
+ if len(tokens) == 1 and re.fullmatch(r'^\+\d+$', tokens[0]):
110
+ tokens = get_history_tokens(tokens[0])
111
+ if not tokens:
112
+ print('No such history')
113
+ return
104
114
 
105
115
  parser = ExpressionParser(registry)
106
116
 
@@ -1,15 +1,15 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- from pjk.pipes.factory import PipeFactory
5
- from pjk.sources.factory import SourceFactory
6
- from pjk.sinks.factory import SinkFactory
7
- from pjk.parser import ExpressionParser
4
+ from pjk.parser import ExpressionParser, MACRO_PREFIX, MACROS_FILE, read_macros
8
5
  from pjk.components import Source, Pipe, Sink
9
- from pjk.usage import Usage, ParsedToken
6
+ from pjk.usage import Usage, CONFIG_FILE
10
7
  from pjk.registry import ComponentRegistry
11
8
  from pjk.common import pager_stdout, highlight, ComponentOrigin
12
9
  from contextlib import nullcontext
10
+ import yaml
11
+ import sys
12
+ from pathlib import Path
13
13
 
14
14
  def get_base_class(usage: Usage, as_string: bool = False):
15
15
  if issubclass(usage.comp_class, Sink):
@@ -87,6 +87,50 @@ def print_man(registry: ComponentRegistry, name: str, usage: Usage):
87
87
  for expr_tokens, expect in usage.get_examples(): # expect in InlineSource format
88
88
  print_example(registry, expr_tokens, expect, name)
89
89
 
90
+ def display_configs():
91
+ path = Path(CONFIG_FILE).expanduser()
92
+
93
+ with pager_stdout():
94
+ with path.open("r", encoding="utf-8") as f:
95
+ data = yaml.safe_load(f) or {}
96
+ if not isinstance(data, dict):
97
+ raise ValueError("Top-level YAML must be a mapping of records")
98
+
99
+ print(f'Component configs defined in {CONFIG_FILE}')
100
+ print()
101
+ for name, body_dict in data.items():
102
+ print('=========================================')
103
+ print(' ', highlight(name, 'bold', name))
104
+ print('=========================================')
105
+
106
+ if 'password' in body_dict:
107
+ body_dict['password'] = '*************'
108
+
109
+ try:
110
+ yaml.dump(
111
+ body_dict,
112
+ sys.stdout,
113
+ sort_keys=False,
114
+ explicit_start=False,
115
+ allow_unicode=True,
116
+ width=10**9,
117
+ )
118
+ except BrokenPipeError:
119
+ break
120
+ print()
121
+
122
+ def display_macros():
123
+ macros = read_macros()
124
+
125
+ with pager_stdout():
126
+ print(f"Macros defined in '{MACROS_FILE}'")
127
+
128
+ print(f"Usage: pjk [...] {MACRO_PREFIX}:<macro_name> [...]")
129
+ print()
130
+ for name, value in macros.items():
131
+ print(f'{name}: {value}')
132
+ print()
133
+
90
134
  def do_examples(token:str, registry: ComponentRegistry):
91
135
  no_pager = token.endswith('+')
92
136
  cm = nullcontext() if no_pager else pager_stdout()
@@ -7,12 +7,27 @@ from typing import Any, List
7
7
  from pjk.components import Source, Pipe, Sink
8
8
  from pjk.usage import TokenError, UsageError, ParsedToken, Usage
9
9
  from pjk.pipes.let_reduce import ReducePipe
10
- from pjk.sources.macro_source import MACROS_FILE, MACRO_PREFIX, read_macros
11
10
  from pjk.pipes.progress_pipe import ProgressPipe
12
11
  from pjk.registry import ComponentRegistry
13
12
  from pjk.progress import papi
13
+ from typing import Dict
14
+ from pathlib import Path
14
15
  from pjk.progress import ProgressIgnore
15
16
 
17
+ MACROS_FILE = '~/.pjk/macros.txt'
18
+ MACRO_PREFIX = 'm'
19
+ def read_macros(file_name: str = MACROS_FILE) -> Dict[str, str]:
20
+ out: Dict[str, str] = {}
21
+ path = Path(file_name).expanduser()
22
+ with path.open(encoding="utf-8") as f:
23
+ for raw in f:
24
+ line = raw.split("#", 1)[0].strip()
25
+ if not line or ":" not in line:
26
+ continue
27
+ key, val = line.split(":", 1)
28
+ out[key.strip()] = val.strip()
29
+ return out
30
+
16
31
  # macros are of the form MACRO_PREFIX:<instance>
17
32
  def handle_macros(token: str, expanded: List[str]):
18
33
  if not token.startswith(f'{MACRO_PREFIX}:'):
@@ -187,7 +202,7 @@ class ExpressionParser:
187
202
 
188
203
  class ReducerAggregatorPipe(Pipe):
189
204
  def __init__(self, top_level_reducers: List[Any]):
190
- super().__init__(None)
205
+ super().__init__(None, None)
191
206
  self.top_level_reducers = top_level_reducers
192
207
  self.reduction = {}
193
208
  self.done = False
@@ -275,6 +290,7 @@ class UpstreamSource(Source):
275
290
  return u
276
291
 
277
292
  def __init__(self):
293
+ super().__init__(root=None)
278
294
  self.data = []
279
295
  self.inner_source = None
280
296
  self.sub_recs_in = papi.get_counter(self, var_label='sub_recs_in')
@@ -338,7 +354,7 @@ class SubExpression(Pipe, ProgressIgnore):
338
354
  return None
339
355
 
340
356
  def __init__(self, ptok: ParsedToken, usage: Usage):
341
- super().__init__(ptok)
357
+ super().__init__(ptok, usage)
342
358
  self.subexp_ops = []
343
359
  self.stack_helper = StackLoader()
344
360
  self.subexp_stack = OperandStack()
@@ -52,10 +52,10 @@ class DenormPipe(Pipe):
52
52
  return usage
53
53
 
54
54
  def __init__(self, ptok: ParsedToken, usage: Usage):
55
- super().__init__(ptok)
55
+ super().__init__(ptok, usage)
56
56
 
57
57
  self.field = usage.get_arg('field')
58
- self.recs_in = papi.get_counter(self, None) # don't display
58
+ self.recs_in = papi.get_counter(self, 'recs_in', display=False)
59
59
  self.recs_out = papi.get_percentage_counter(self, 'recs_out', self.recs_in)
60
60
 
61
61
  self._pending_iter = None
@@ -39,11 +39,11 @@ class FilterPipe(Pipe):
39
39
  return usage
40
40
 
41
41
  def __init__(self, ptok: ParsedToken, usage: Usage):
42
- super().__init__(ptok)
42
+ super().__init__(ptok, usage)
43
43
  self.mode = usage.get_arg('mode')
44
44
  self.left = None
45
45
  self.right = None
46
- self.recs_in = papi.get_counter(self, None) # don't display
46
+ self.recs_in = papi.get_counter(self, 'recs_in', display=False)
47
47
  self.recs_out = papi.get_percentage_counter(self, 'recs_out', self.recs_in)
48
48
 
49
49
  def reset(self):
@@ -60,7 +60,7 @@ class JoinPipe(Pipe):
60
60
  return usage
61
61
 
62
62
  def __init__(self, ptok: ParsedToken, usage: Usage):
63
- super().__init__(ptok)
63
+ super().__init__(ptok, usage)
64
64
 
65
65
  self.mode = usage.get_arg('mode')
66
66
  self.left = None
@@ -68,7 +68,7 @@ class JoinPipe(Pipe):
68
68
  self._pending_right = None
69
69
  self._check_right = False
70
70
 
71
- self.recs_in = papi.get_counter(self, None) # don't display
71
+ self.recs_in = papi.get_counter(self, 'recs_in', display=False)
72
72
  self.matches = papi.get_percentage_counter(self, 'matches', self.recs_in)
73
73
  self.recs_out = papi.get_counter(self, 'recs_out')
74
74
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  # djk/pipes/let_reduce.py
5
5
 
6
- from pjk.components import Pipe
6
+ from pjk.components import DeepCopyPipe
7
7
  from pjk.usage import ParsedToken, Usage, UsageError, TokenError, NoBindUsage
8
8
  from pjk.common import SafeNamespace, ReducingNamespace
9
9
  import re
@@ -78,7 +78,7 @@ def eval_accumulating(expr: str, record: dict, op: str, acc=None):
78
78
  return do_eval(expr, env)
79
79
 
80
80
  # --- LetPipe (simple field assignment) ---
81
- class LetPipe(Pipe):
81
+ class LetPipe(DeepCopyPipe):
82
82
  @classmethod
83
83
  def usage(cls):
84
84
  usage = NoBindUsage( # can't use bound usage because of complicated parsing
@@ -93,7 +93,7 @@ class LetPipe(Pipe):
93
93
  return usage
94
94
 
95
95
  def __init__(self, ptok: ParsedToken, usage: Usage):
96
- super().__init__(ptok)
96
+ super().__init__(ptok, usage)
97
97
  args = parse_args(ptok.whole_token.split(':', 1)[-1])
98
98
  self.field = args['field']
99
99
  self.op = args['op']
@@ -121,7 +121,7 @@ def is_comprehension(expr: str) -> bool:
121
121
  except SyntaxError:
122
122
  return False
123
123
 
124
- class ReducePipe(Pipe):
124
+ class ReducePipe(DeepCopyPipe):
125
125
  @classmethod
126
126
  def usage(cls):
127
127
  usage = NoBindUsage( # can't use bound usage because of complicated parsing
@@ -161,7 +161,7 @@ class ReducePipe(Pipe):
161
161
  return usage
162
162
 
163
163
  def __init__(self, ptok: ParsedToken, usage: Usage):
164
- super().__init__(ptok)
164
+ super().__init__(ptok, usage)
165
165
  args = parse_args(ptok.whole_token.split(':', 1)[-1])
166
166
  self.field = args['field']
167
167
  self.op = args['op']
@@ -28,7 +28,7 @@ class MapByPipe(Pipe, KeyedSource):
28
28
  return u
29
29
 
30
30
  def __init__(self, ptok: ParsedToken, usage: Usage):
31
- super().__init__(ptok)
31
+ super().__init__(ptok, usage)
32
32
  self.is_group = False
33
33
  self.fields = usage.get_arg('key').split(',')
34
34
  self.rec_map = {}
@@ -37,7 +37,7 @@ class MapByPipe(Pipe, KeyedSource):
37
37
  self.do_count = usage.get_param(name='count').lower() == 'true'
38
38
  self.counts = {}
39
39
  self.missing_keys = papi.get_counter(self, 'missing_keys')
40
- self.recs_in = papi.get_counter(self, None) # don't display
40
+ self.recs_in = papi.get_counter(self, 'recs_in', display=False)
41
41
  # recs_out = distinct_keys
42
42
  self.distinct_keys = papi.get_percentage_counter(self, 'recs_out', self.recs_in)
43
43
 
@@ -40,7 +40,7 @@ class QueryPipe(Pipe):
40
40
  self.output_shape = usage.get_param('shape')
41
41
  self.count = usage.get_param('count')
42
42
  self.query_field = 'query' # for all subclasses
43
- self.inrecs = papi.get_counter(self, var_label=None) # don't display progress
43
+ self.inrecs = papi.get_counter(self, var_label='recs_in')
44
44
  self.outrecs = papi.get_percentage_counter(self, var_label='recs_out', denom_counter=self.inrecs)
45
45
 
46
46
  @abstractmethod
@@ -19,7 +19,7 @@ class RemoveField(DeepCopyPipe):
19
19
  return usage
20
20
 
21
21
  def __init__(self, ptok: ParsedToken, usage: Usage):
22
- super().__init__(ptok)
22
+ super().__init__(ptok, usage)
23
23
  arg_string = usage.get_arg('fields')
24
24
  self.fields = [f.strip() for f in arg_string.split(',') if f.strip()]
25
25
  if not self.fields:
@@ -19,7 +19,7 @@ class SelectFields(DeepCopyPipe):
19
19
  return usage
20
20
 
21
21
  def __init__(self, ptok: ParsedToken, usage: Usage):
22
- super().__init__(ptok)
22
+ super().__init__(ptok, usage)
23
23
 
24
24
  arg_string = usage.get_arg('fields')
25
25
  if not arg_string:
@@ -21,7 +21,7 @@ class SortPipe(Pipe):
21
21
  return usage
22
22
 
23
23
  def __init__(self, ptok: ParsedToken, usage: Usage):
24
- super().__init__(ptok)
24
+ super().__init__(ptok, usage)
25
25
 
26
26
  arg_string = usage.get_arg('field')
27
27
  if arg_string.startswith("-"):
@@ -19,7 +19,7 @@ class TailPipe(Pipe):
19
19
  return usage
20
20
 
21
21
  def __init__(self, ptok: ParsedToken, usage: Usage):
22
- super().__init__(ptok)
22
+ super().__init__(ptok, usage)
23
23
  self.limit = usage.get_arg('limit')
24
24
 
25
25
  self.buffer = []
@@ -22,10 +22,11 @@ class WherePipe(DeepCopyPipe):
22
22
  u.def_example(expr_tokens=["[{color:'blue'}, {color:'red'}, {color:'black'}]", "where:f.color.startswith('bl')"], expect="[{color:'blue'}, {color:'black'}]")
23
23
  return u
24
24
 
25
- def __init__(self, ptok: ParsedToken, usage: Usage):
26
- super().__init__(ptok, usage)
25
+ def __init__(self, ptok: ParsedToken, usage: Usage, root = None):
26
+ super().__init__(ptok, usage, root)
27
27
  self.expr = ptok.whole_token.split(':', 1)[1]
28
- self.inrecs = papi.get_counter(self, var_label=None) # don't display progress
28
+
29
+ self.inrecs = papi.get_counter(self, var_label='recs_in', display=False)
29
30
  self.outrecs = papi.get_percentage_counter(self, var_label='recs_out', denom_counter=self.inrecs)
30
31
  try:
31
32
  self.code = compile(self.expr, '<where>', 'eval')
@@ -46,3 +47,4 @@ class WherePipe(DeepCopyPipe):
46
47
  except Exception:
47
48
  continue # ignore eval errors
48
49
 
50
+