python-jack-knife 0.7.4__tar.gz → 0.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {python_jack_knife-0.7.4/src/python_jack_knife.egg-info → python_jack_knife-0.7.5}/PKG-INFO +2 -1
  2. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/pyproject.toml +1 -0
  3. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/__init__.py +2 -1
  4. python_jack_knife-0.7.5/src/pjk/engine.py +51 -0
  5. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/history.py +1 -1
  6. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/integrations/opensearch_query_pipe.py +1 -2
  7. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/integrations/postgres_pipe.py +19 -2
  8. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/main.py +4 -2
  9. python_jack_knife-0.7.5/src/pjk/parse_pjk_file.py +66 -0
  10. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/parser.py +108 -33
  11. python_jack_knife-0.7.5/src/pjk/pipes/ddiff.py +144 -0
  12. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/factory.py +44 -1
  13. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/let_reduce.py +137 -4
  14. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/query_pipe.py +29 -19
  15. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/registry.py +7 -0
  16. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/format_sink.py +2 -2
  17. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/graph.py +14 -0
  18. python_jack_knife-0.7.5/src/pjk/sinks/graph_axis.py +9 -0
  19. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/graph_bar_line.py +8 -0
  20. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/graph_cumulative.py +4 -0
  21. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/graph_hist.py +3 -0
  22. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/graph_scatter.py +4 -0
  23. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/csv_source.py +0 -2
  24. python_jack_knife-0.7.5/src/pjk/sources/dict_list_source.py +15 -0
  25. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/factory.py +5 -0
  26. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/format_source.py +17 -7
  27. python_jack_knife-0.7.5/src/pjk/sources/http_source.py +98 -0
  28. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/s3_select_source.py +24 -3
  29. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/usage.py +24 -2
  30. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/version.py +1 -1
  31. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5/src/python_jack_knife.egg-info}/PKG-INFO +2 -1
  32. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/python_jack_knife.egg-info/SOURCES.txt +6 -0
  33. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/python_jack_knife.egg-info/requires.txt +1 -0
  34. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/LICENSE +0 -0
  35. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/README.md +0 -0
  36. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/setup.cfg +0 -0
  37. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/common.py +0 -0
  38. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/components.py +0 -0
  39. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/integrations/opensearch_client.py +0 -0
  40. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  41. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/integrations/snowflake_pipe.py +0 -0
  42. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/log.py +0 -0
  43. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/man_page.py +0 -0
  44. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/__init__.py +0 -0
  45. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/denorm.py +0 -0
  46. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/filter.py +0 -0
  47. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/head.py +0 -0
  48. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/join.py +0 -0
  49. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/map.py +0 -0
  50. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/move_field.py +0 -0
  51. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/progress_pipe.py +0 -0
  52. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/remove_field.py +0 -0
  53. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/sample.py +0 -0
  54. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/select.py +0 -0
  55. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/sort.py +0 -0
  56. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/tail.py +0 -0
  57. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/user_pipe_factory.py +0 -0
  58. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/pipes/where.py +0 -0
  59. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/progress.py +0 -0
  60. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/__init__.py +0 -0
  61. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/create_sink.py +0 -0
  62. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/csv_sink.py +0 -0
  63. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/devnull.py +0 -0
  64. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/dir_sink.py +0 -0
  65. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/expect.py +0 -0
  66. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/factory.py +0 -0
  67. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/json_sink.py +0 -0
  68. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/s3_sink.py +0 -0
  69. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/s3_stream.py +0 -0
  70. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/sinks.py +0 -0
  71. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/stdout.py +0 -0
  72. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/tsv_sink.py +0 -0
  73. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sinks/user_sink_factory.py +0 -0
  74. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/__init__.py +0 -0
  75. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/dir_source.py +0 -0
  76. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/favorite_source.py +0 -0
  77. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/inline_source.py +0 -0
  78. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/json_source.py +0 -0
  79. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/lazy_file.py +0 -0
  80. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/lazy_file_local.py +0 -0
  81. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/lazy_file_s3.py +0 -0
  82. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/npy_source.py +0 -0
  83. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/parquet_source.py +0 -0
  84. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/s3_source.py +0 -0
  85. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/source_list.py +0 -0
  86. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/sql_source.py +0 -0
  87. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/tsv_source.py +0 -0
  88. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/pjk/sources/user_source_factory.py +0 -0
  89. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  90. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  91. {python_jack_knife-0.7.4 → python_jack_knife-0.7.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -212,6 +212,7 @@ License-File: LICENSE
212
212
  Requires-Dist: hjson>=3.1.0
213
213
  Requires-Dist: pyyaml>=6.0
214
214
  Requires-Dist: requests>=2.32.0
215
+ Requires-Dist: deepdiff<9,>=8.0.0
215
216
  Provides-Extra: aws
216
217
  Requires-Dist: boto3>=1.34; extra == "aws"
217
218
  Provides-Extra: postgres
@@ -17,6 +17,7 @@ dependencies = [
17
17
  "hjson>=3.1.0",
18
18
  "pyyaml>=6.0",
19
19
  "requests>=2.32.0",
20
+ "deepdiff>=8.0.0,<9",
20
21
  ]
21
22
 
22
23
  [project.optional-dependencies]
@@ -1,5 +1,6 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
  from .version import __version__
4
+ from .engine import PjkEngine
4
5
 
5
- __all__ = ["__version__"]
6
+ __all__ = ["__version__", "PjkEngine"]
@@ -0,0 +1,51 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ from typing import Iterator, List, Optional
5
+
6
+ from pjk.parser import ExpressionParser, expand_macros
7
+ from pjk.registry import ComponentRegistry
8
+ from pjk.sources.dict_list_source import DictListSource
9
+
10
+
11
+ class PjkEngine:
12
+ """
13
+ Run a pjk pipeline from a .pjk file, optionally with supplied input records.
14
+
15
+ - inrecs supplied: the source in the .pjk file is replaced with inrecs.
16
+ Expression may be full (source + pipes + sink) or pipes-only.
17
+ - inrecs=None: expression.pjk is fully self-contained (source, pipes, sink)
18
+ """
19
+
20
+ def __init__(self, inrecs: Optional[List[dict]] = None, pjk_file: str = ""):
21
+ self.inrecs = inrecs
22
+ self.pjk_file = pjk_file
23
+
24
+ def __iter__(self) -> Iterator[dict]:
25
+ registry = ComponentRegistry()
26
+ parser = ExpressionParser(registry)
27
+ expanded = expand_macros([self.pjk_file])
28
+
29
+ if self.inrecs is not None:
30
+ source_override = DictListSource(self.inrecs)
31
+ try:
32
+ first_is_source = registry.create_source(expanded[0]) is not None
33
+ except Exception:
34
+ first_is_source = False
35
+ if first_is_source:
36
+ expanded = ["{to_override: 'true'}"] + expanded[1:]
37
+ else:
38
+ expanded = ["{to_override: 'true'}"] + expanded
39
+ else:
40
+ source_override = None
41
+
42
+ sink = parser.parse(expanded, source_override=source_override)
43
+
44
+ inputs = [sink.input]
45
+ sink.input._get_sources(inputs)
46
+ try:
47
+ for record in sink.input:
48
+ yield record
49
+ finally:
50
+ for inp in inputs:
51
+ inp.close()
@@ -91,7 +91,7 @@ def display_history():
91
91
 
92
92
  ordn = 1
93
93
  for command in reversed(clist):
94
- print(f'{ordn}\t{command}')
94
+ print(f'{ordn}\tpjk {command}')
95
95
  ordn += 1
96
96
 
97
97
  def get_history_tokens(ord_str: str):
@@ -108,8 +108,7 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
108
108
  yield {
109
109
  "took_ms": took,
110
110
  "total_hits": total_hits,
111
- "index": self.index,
112
- "os_query_object": req_body
111
+ "index": self.index
113
112
  }
114
113
 
115
114
  # Emit each hit
@@ -5,6 +5,8 @@
5
5
 
6
6
  import base64
7
7
  import datetime as _dt
8
+ import socket
9
+ import sys
8
10
  import uuid
9
11
  import time
10
12
  from decimal import Decimal
@@ -17,6 +19,21 @@ from pjk.pipes.query_pipe import QueryPipe
17
19
  MAX_RETRIES = 3
18
20
  BASE_DELAY = 0.1 # seconds
19
21
 
22
+
23
+ def _print_db_connect_failure(host: str, port: int, exc: BaseException) -> None:
24
+ print("Failed to connect to DB", file=sys.stderr)
25
+ cur: Optional[BaseException] = exc
26
+ while cur is not None:
27
+ if isinstance(cur, socket.gaierror):
28
+ print(
29
+ f" Could not resolve hostname {host!r} (port {port}). "
30
+ "Private or corporate DB hosts usually require VPN or split-DNS.",
31
+ file=sys.stderr,
32
+ )
33
+ return
34
+ cur = cur.__cause__
35
+
36
+
20
37
  class DBClient:
21
38
  """Per-instance pg8000 connection wrapper. No shared state."""
22
39
 
@@ -47,8 +64,8 @@ class DBClient:
47
64
  self.conn = pg8000.connect(**kwargs)
48
65
  self.conn.autocommit = True
49
66
  except Exception as e:
50
- print("Failed to connect to DB")
51
- raise e
67
+ _print_db_connect_failure(host, port, e)
68
+ raise
52
69
 
53
70
  def close(self):
54
71
  if getattr(self, "conn", None) is None:
@@ -14,7 +14,7 @@ import concurrent.futures
14
14
  from pjk.registry import ComponentRegistry
15
15
  from pjk.sinks.stdout import StdoutSink
16
16
  from pjk.man_page import do_man, do_examples, display_configs, display_macros
17
- from pjk.history import write_history, display_history, get_history_tokens
17
+ from pjk.history import write_history, display_history, get_history_tokens, printable_command
18
18
  from pjk.sinks.expect import ExpectSink
19
19
  from pjk.progress import ProgressDisplay
20
20
  from pjk.version import __version__
@@ -52,7 +52,6 @@ def execute_threaded(sinks, stop_progress=None):
52
52
 
53
53
  def initialize():
54
54
  init_logging()
55
- write_history(sys.argv[1:])
56
55
 
57
56
  #src = Path("src/pjk/resources/configs.tmpl")
58
57
  #dst_dir = Path.home() / ".pjk"
@@ -111,12 +110,15 @@ def execute_tokens(tokens: List[str]):
111
110
  if not tokens:
112
111
  print('No such history')
113
112
  return
113
+ cmd = printable_command(tokens)
114
+ print(f"pjk {cmd}")
114
115
 
115
116
  parser = ExpressionParser(registry)
116
117
 
117
118
  display = None
118
119
  try:
119
120
  sink = parser.parse(tokens)
121
+ write_history(sys.argv[1:]) # now that it's parsed sucessfully
120
122
  if not isinstance(sink, (StdoutSink | ExpectSink)):
121
123
  display = ProgressDisplay(interval=3.0)
122
124
  display.start()
@@ -0,0 +1,66 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import os
5
+ import re
6
+ import shlex
7
+ from typing import Dict, List
8
+ from pjk.usage import TokenError, UsageError
9
+
10
+ PJK_END_TOKEN = 'END'
11
+ PJK_SET_TOKEN = 'SET'
12
+
13
+ # ${VAR} or $VAR - match anywhere in token (${VAR} first to avoid partial match)
14
+ VAR_REF_PATTERN = re.compile(r'\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}|\$([a-zA-Z_][a-zA-Z0-9_]*)')
15
+
16
+
17
+ def _expand_token(t: str, env: Dict[str, str]) -> str:
18
+ """Expand $VAR or ${VAR} anywhere in token; raise if undefined."""
19
+
20
+ def repl(m):
21
+ name = m.group(1) or m.group(2)
22
+ if name not in env:
23
+ raise TokenError(f"Undefined variable: ${name}")
24
+ return env[name]
25
+
26
+ return VAR_REF_PATTERN.sub(repl, t)
27
+
28
+
29
+ def handle_pjk_file(token: str, expanded: List[str]):
30
+ if not token.endswith(".pjk"):
31
+ return False
32
+
33
+ if not os.path.isfile(token):
34
+ raise TokenError(f"pjk file not found: {token}")
35
+
36
+ with open(token, "r") as f:
37
+ lines = f.readlines()
38
+
39
+ env: Dict[str, str] = {}
40
+
41
+ for line in lines:
42
+ try:
43
+ parts = shlex.split(line, comments=True, posix=True)
44
+ except ValueError as e:
45
+ raise UsageError(f"Error parsing {token}: {e}")
46
+
47
+ if not parts:
48
+ continue
49
+ if parts[0] == PJK_END_TOKEN:
50
+ break
51
+
52
+ if parts[0] == PJK_SET_TOKEN:
53
+ for p in parts[1:]:
54
+ if '=' in p:
55
+ k, v = p.split('=', 1)
56
+ env[k.strip()] = v.strip()
57
+ continue
58
+
59
+ for p in parts:
60
+ if p == PJK_END_TOKEN:
61
+ break
62
+ expanded.append(_expand_token(p, env))
63
+ else:
64
+ continue
65
+ break
66
+ return True
@@ -13,6 +13,8 @@ from pjk.progress import papi
13
13
  from typing import Dict
14
14
  from pathlib import Path
15
15
  from pjk.progress import ProgressIgnore
16
+ from pjk.parse_pjk_file import handle_pjk_file
17
+ from pjk.common import SafeNamespace
16
18
 
17
19
  MACROS_FILE = '~/.pjk/macros.txt'
18
20
  MACRO_PREFIX = 'm'
@@ -47,27 +49,6 @@ def handle_macros(token: str, expanded: List[str]):
47
49
 
48
50
  return True
49
51
 
50
- def handle_pjk_file(token: str, expanded: List[str]):
51
- if not token.endswith(".pjk"):
52
- return False
53
-
54
- if not os.path.isfile(token):
55
- raise TokenError(f"pjk file not found: {token}")
56
-
57
- with open(token, "r") as f:
58
- lines = f.readlines()
59
-
60
- # Remove comments outside quotes, then split
61
- stripped = []
62
- for line in lines:
63
- try:
64
- parts = shlex.split(line, comments=True, posix=True)
65
- stripped.extend(parts)
66
- except ValueError as e:
67
- raise UsageError(f"Error parsing {token}: {e}")
68
- expanded.extend(stripped)
69
- return True
70
-
71
52
  def expand_macros(tokens: List[str]) -> List[str]:
72
53
  expanded = []
73
54
  for token in tokens:
@@ -130,7 +111,7 @@ class ExpressionParser:
130
111
 
131
112
  source = self.stack.pop()
132
113
  if isinstance(source, SubExpression):
133
- raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' keyword." )
114
+ raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' or 'if' keyword." )
134
115
 
135
116
  if not self.stack.empty():
136
117
  raise TokenError.from_list(['A sink can only consume one source.',
@@ -145,7 +126,7 @@ class ExpressionParser:
145
126
  sink = self.registry.create_sink(token)
146
127
 
147
128
  if not sink:
148
- raise TokenError.from_list(['expression must end in a sink.',
129
+ raise TokenError.from_list(['non-sink in final position.',
149
130
  'pjk <source> [<pipe> ...] <sink>'])
150
131
 
151
132
  # so each sink doesn't have to, maybe make a base class or mixin for sinks
@@ -155,7 +136,7 @@ class ExpressionParser:
155
136
  sink.add_source(progress_pipe)
156
137
  return sink
157
138
 
158
- def parse(self, tokens: List[str]) -> Sink:
139
+ def parse(self, tokens: List[str], source_override=None) -> Sink:
159
140
  usage_error_message = "You've got a problem here."
160
141
  stack_helper = StackLoader()
161
142
  self.tokens = tokens
@@ -173,7 +154,9 @@ class ExpressionParser:
173
154
  return self.get_sink(stack_helper, token)
174
155
 
175
156
  source = self.registry.create_source(token)
176
- if source:
157
+ if source:
158
+ if pos == 0 and source_override is not None:
159
+ source = source_override
177
160
  stack_helper.add_operator(source, self.stack)
178
161
  progress_pipe = ProgressPipe(component=source, simple=True)
179
162
  stack_helper.add_operator(progress_pipe, self.stack)
@@ -184,6 +167,16 @@ class ExpressionParser:
184
167
  stack_helper.add_operator(subexp, self.stack)
185
168
  continue
186
169
 
170
+ if not self.stack.empty() and isinstance(self.stack.peek(), SubExpression):
171
+ if token == "else":
172
+ self.stack.peek().enter_else_branch()
173
+ continue
174
+ if token.startswith("if:"):
175
+ op = self.stack.peek().finish_conditional(token)
176
+ if op:
177
+ stack_helper.add_operator(op, self.stack)
178
+ continue
179
+
187
180
  pipe = self.registry.create_pipe(token)
188
181
  if pipe:
189
182
  stack_helper.add_operator(pipe, self.stack)
@@ -191,11 +184,10 @@ class ExpressionParser:
191
184
 
192
185
  else: # unrecognized token
193
186
  # could be sink in WRONG position, let's see for better error message
194
- sink = self.registry.create_sink(token)
195
- if sink:
196
- raise TokenError.from_list(['sink may only occur in final position.',
187
+ if self.registry.is_sink(token):
188
+ raise TokenError.from_list(['sink in non-final position.',
197
189
  'pjk <source> [<pipe> ...] <sink>'])
198
- raise TokenError.from_list([token, 'unrecognized token'])
190
+ raise TokenError.from_list([f"'{token}' unrecognized."])
199
191
 
200
192
  except TokenError as e:
201
193
  raise UsageError(usage_error_message, self.tokens, pos, e)
@@ -237,7 +229,7 @@ class StackLoader:
237
229
  if not stack.empty() and isinstance(stack.peek(), SubExpression):
238
230
  subexp = stack.peek()
239
231
 
240
- if isinstance(op, SubExpressionOver) and subexp.recursion_depth() == 0:
232
+ if isinstance(op, (SubExpressionOver, SubExpressionIf)) and subexp.recursion_depth() == 0:
241
233
  subexp = stack.pop()
242
234
  op.add_source(subexp)
243
235
  stack.push(op)
@@ -343,6 +335,64 @@ class SubExpressionOver(Pipe):
343
335
  self.left.subexp_process(record, self.over_arg)
344
336
  yield record
345
337
 
338
+
339
+ class IdentityPipe(Pipe):
340
+ """Pass-through pipe for empty else branch."""
341
+
342
+ def __iter__(self):
343
+ yield from self.left
344
+
345
+
346
+ class SubExpressionIf(Pipe):
347
+ """Closes a conditional block: [ then_ops else else_ops if:expr"""
348
+
349
+ @classmethod
350
+ def usage(cls) -> Usage:
351
+ u = Usage(name="if", desc="conditional sub-expression.", component_class=cls)
352
+ return u
353
+
354
+ def __init__(self, expr: str, then_chain, else_chain, upstream_source):
355
+ super().__init__(None, None)
356
+ self.expr = expr
357
+ self.then_chain = then_chain
358
+ self.else_chain = else_chain
359
+ self.upstream_source = upstream_source
360
+ self.inrecs = papi.get_counter(self, var_label='recs_in', display=False)
361
+ self.recs_true = papi.get_percentage_counter(self, var_label='recs_true', denom_counter=self.inrecs)
362
+ try:
363
+ self.code = compile(expr, '<if>', 'eval')
364
+ except Exception as e:
365
+ raise UsageError(f"Invalid if expression: {expr}") from e
366
+
367
+ def reset(self):
368
+ if self.then_chain:
369
+ self.then_chain.reset()
370
+ if self.else_chain:
371
+ self.else_chain.reset()
372
+
373
+ def __iter__(self):
374
+ for record in self.left:
375
+ self.inrecs.increment()
376
+ self.upstream_source.set_list([record])
377
+ f = SafeNamespace(record)
378
+ try:
379
+ cond_true = eval(self.code, {}, {'f': f})
380
+ except Exception:
381
+ cond_true = False
382
+ if cond_true:
383
+ self.recs_true.increment()
384
+ chain = self.then_chain
385
+ else:
386
+ chain = self.else_chain
387
+
388
+ if chain:
389
+ chain.reset()
390
+ for r in chain:
391
+ yield r
392
+ else:
393
+ yield record
394
+
395
+
346
396
  class SubExpression(Pipe, ProgressIgnore):
347
397
  @classmethod
348
398
  def create(cls, token: str) -> Pipe:
@@ -357,17 +407,42 @@ class SubExpression(Pipe, ProgressIgnore):
357
407
  super().__init__(ptok, usage)
358
408
  self.subexp_ops = []
359
409
  self.stack_helper = StackLoader()
360
- self.subexp_stack = OperandStack()
410
+ self.subexp_stack = OperandStack()
361
411
  self.upstream_source = UpstreamSource()
362
412
  self.subexp_stack.push(self.upstream_source)
363
- self.recursions = 0 # number of subexpression within
413
+ self.recursions = 0
364
414
  self.subexp_left = None
415
+ self.in_else_branch = False
416
+ self.conditional_then_chain = None
417
+
418
+ def enter_else_branch(self):
419
+ """Switch to collecting else branch; save then chain."""
420
+ self.conditional_then_chain = self.subexp_stack.pop()
421
+ self.subexp_stack.push(self.upstream_source)
422
+ self.in_else_branch = True
423
+
424
+ def finish_conditional(self, token: str):
425
+ """Build SubExpressionIf from collected then/else chains."""
426
+ expr = token.split(':', 1)[1]
427
+ if self.in_else_branch:
428
+ else_chain = self.subexp_stack.pop()
429
+ then_chain = self.conditional_then_chain
430
+ else:
431
+ then_chain = self.subexp_stack.pop()
432
+ else_chain = None
433
+ if else_chain is self.upstream_source:
434
+ else_chain = IdentityPipe(None, None)
435
+ else_chain.add_source(self.upstream_source)
436
+ self.in_else_branch = False
437
+ self.conditional_then_chain = None
438
+ self.subexp_stack.push(self.upstream_source)
439
+ return SubExpressionIf(expr, then_chain, else_chain, self.upstream_source)
365
440
 
366
441
  def add_subop(self, op):
367
442
  self.subexp_ops.append(op)
368
443
  if isinstance(op, SubExpression):
369
444
  self.recursions += 1
370
- elif isinstance(op, SubExpressionOver):
445
+ elif isinstance(op, (SubExpressionOver, SubExpressionIf)):
371
446
  self.recursions -= 1
372
447
  self.stack_helper.add_operator(op, self.subexp_stack)
373
448
 
@@ -0,0 +1,144 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # pjk/pipes/ddiff.py
5
+
6
+ import json
7
+ from itertools import zip_longest
8
+
9
+ from deepdiff import DeepDiff
10
+
11
+ from pjk.components import Pipe
12
+ from pjk.usage import Usage, ParsedToken
13
+ from pjk.progress import papi
14
+
15
+ _PAD = object()
16
+
17
+ _DDIFF_OLD_NEW_KEYS = (
18
+ ("old_value", "left_value"),
19
+ ("new_value", "right_value"),
20
+ ("old_type", "left_type"),
21
+ ("new_type", "right_type"),
22
+ )
23
+
24
+
25
+ def _diff_left_right_labels(obj):
26
+ """Map DeepDiff old/new keys to left/right (matches stream argument order)."""
27
+ if isinstance(obj, dict):
28
+ out = {}
29
+ for k, v in obj.items():
30
+ for old_k, new_k in _DDIFF_OLD_NEW_KEYS:
31
+ if k == old_k:
32
+ k = new_k
33
+ break
34
+ out[k] = _diff_left_right_labels(v)
35
+ return out
36
+ if isinstance(obj, list):
37
+ return [_diff_left_right_labels(x) for x in obj]
38
+ return obj
39
+
40
+
41
+ class DdiffPipe(Pipe):
42
+ arity = 2
43
+
44
+ @classmethod
45
+ def usage(cls):
46
+ u = Usage(
47
+ name="ddiff",
48
+ desc=(
49
+ "Lockstep deep diff of two record streams."
50
+ ),
51
+ component_class=cls,
52
+ )
53
+ u.def_syntax(
54
+ "pjk <left_source> <right_source> ddiff ..."
55
+ )
56
+ u.def_param(
57
+ name="ignore_order",
58
+ usage="DeepDiff ignore_order (lists and sets)",
59
+ valid_values={"true", "false"},
60
+ default="false",
61
+ )
62
+ u.def_param(
63
+ name="omit_equal",
64
+ usage="Suppress output when the two records are deeply equal",
65
+ valid_values={"true", "false"},
66
+ default="false",
67
+ )
68
+ u.def_param(
69
+ name="significant_digits",
70
+ usage="DeepDiff significant_digits for numeric comparisons",
71
+ is_num=True,
72
+ default=None,
73
+ )
74
+ ferry_ford = "[{ferry:'orca', cars:[{make: 'ford', size:9}]}]"
75
+ ferry_bmw = "[{ferry:'orca', cars:[{make: 'bmw', size:4}]}]"
76
+ ferry_ford_rec = {"ferry": "orca", "cars": [{"make": "ford", "size": 9}]}
77
+ ferry_bmw_rec = {"ferry": "orca", "cars": [{"make": "bmw", "size": 4}]}
78
+ u.def_example(
79
+ expr_tokens=[ferry_ford, ferry_ford, "ddiff"],
80
+ expect=json.dumps(
81
+ [{"left": ferry_ford_rec, "right": ferry_ford_rec, "diff": {}}],
82
+ separators=(",", ":"),
83
+ ),
84
+ )
85
+ u.def_example(
86
+ expr_tokens=[ferry_ford, ferry_bmw, "ddiff"],
87
+ expect=json.dumps(
88
+ [
89
+ {
90
+ "left": ferry_ford_rec,
91
+ "right": ferry_bmw_rec,
92
+ "diff": _diff_left_right_labels(
93
+ json.loads(
94
+ DeepDiff(
95
+ ferry_ford_rec, ferry_bmw_rec
96
+ ).to_json()
97
+ )
98
+ ),
99
+ }
100
+ ],
101
+ separators=(",", ":"),
102
+ ),
103
+ )
104
+ return u
105
+
106
+ def __init__(self, ptok: ParsedToken, usage: Usage):
107
+ super().__init__(ptok, usage)
108
+ self.recs_in = papi.get_counter(self, "recs_in", display=False)
109
+ self.recs_out = papi.get_counter(self, "recs_out")
110
+
111
+ def reset(self):
112
+ pass
113
+
114
+ @staticmethod
115
+ def _truthy(param) -> bool:
116
+ if param is None:
117
+ return False
118
+ return str(param).lower() == "true"
119
+
120
+ def __iter__(self):
121
+ ignore_order = self._truthy(self.usage.get_param("ignore_order"))
122
+ omit_equal = self._truthy(self.usage.get_param("omit_equal"))
123
+ sig = self.usage.get_param("significant_digits")
124
+
125
+ dd_kwargs = {}
126
+ if ignore_order:
127
+ dd_kwargs["ignore_order"] = True
128
+ if sig is not None:
129
+ dd_kwargs["significant_digits"] = sig
130
+
131
+ for left_rec, right_rec in zip_longest(self.left, self.right, fillvalue=_PAD):
132
+ self.recs_in.increment()
133
+ if left_rec is _PAD:
134
+ left_rec = {}
135
+ if right_rec is _PAD:
136
+ right_rec = {}
137
+
138
+ d = DeepDiff(left_rec, right_rec, **dd_kwargs)
139
+ # Normalize to JSON-native dict/list (to_dict() may use e.g. SetOrdered).
140
+ diff_map = _diff_left_right_labels(json.loads(d.to_json()))
141
+ if omit_equal and not diff_map:
142
+ continue
143
+ self.recs_out.increment()
144
+ yield {"left": left_rec, "right": right_rec, "diff": diff_map}