python-jack-knife 0.7.4__tar.gz → 0.7.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.7.4/src/python_jack_knife.egg-info → python_jack_knife-0.7.6}/PKG-INFO +5 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/pyproject.toml +2 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/__init__.py +2 -1
- python_jack_knife-0.7.6/src/pjk/engine.py +51 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/history.py +1 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/opensearch_client.py +6 -6
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/opensearch_query_pipe.py +1 -2
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/postgres_pipe.py +19 -2
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/main.py +4 -2
- python_jack_knife-0.7.6/src/pjk/parse_pjk_file.py +66 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/parser.py +108 -33
- python_jack_knife-0.7.6/src/pjk/pipes/ddiff.py +144 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/factory.py +44 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/let_reduce.py +137 -4
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/query_pipe.py +29 -19
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/registry.py +7 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/format_sink.py +2 -2
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/graph.py +14 -0
- python_jack_knife-0.7.6/src/pjk/sinks/graph_axis.py +9 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/graph_bar_line.py +8 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/graph_cumulative.py +4 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/graph_hist.py +3 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/graph_scatter.py +4 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/csv_source.py +0 -2
- python_jack_knife-0.7.6/src/pjk/sources/dict_list_source.py +15 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/factory.py +5 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/format_source.py +17 -7
- python_jack_knife-0.7.6/src/pjk/sources/http_source.py +98 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/s3_select_source.py +24 -3
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/usage.py +83 -12
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/version.py +1 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6/src/python_jack_knife.egg-info}/PKG-INFO +5 -1
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/python_jack_knife.egg-info/SOURCES.txt +6 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/python_jack_knife.egg-info/requires.txt +4 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/LICENSE +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/README.md +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/setup.cfg +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/common.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/components.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/opensearch_index_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/snowflake_pipe.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/log.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/filter.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/head.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/join.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/map.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/move_field.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/progress_pipe.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/sample.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/select.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/tail.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/pipes/where.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/progress.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/create_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/factory.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/favorite_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-jack-knife
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.6
|
|
4
4
|
Summary: Python Jack Knife – a command line data processor
|
|
5
5
|
Author-email: Mike Schultz <mike.schultz@gmail.com>
|
|
6
6
|
License:
|
|
@@ -212,6 +212,7 @@ License-File: LICENSE
|
|
|
212
212
|
Requires-Dist: hjson>=3.1.0
|
|
213
213
|
Requires-Dist: pyyaml>=6.0
|
|
214
214
|
Requires-Dist: requests>=2.32.0
|
|
215
|
+
Requires-Dist: deepdiff<9,>=8.0.0
|
|
215
216
|
Provides-Extra: aws
|
|
216
217
|
Requires-Dist: boto3>=1.34; extra == "aws"
|
|
217
218
|
Provides-Extra: postgres
|
|
@@ -225,6 +226,9 @@ Provides-Extra: dev
|
|
|
225
226
|
Requires-Dist: pytest; extra == "dev"
|
|
226
227
|
Requires-Dist: black; extra == "dev"
|
|
227
228
|
Requires-Dist: ruff; extra == "dev"
|
|
229
|
+
Requires-Dist: build; extra == "dev"
|
|
230
|
+
Requires-Dist: twine; extra == "dev"
|
|
231
|
+
Requires-Dist: opensearch-py; extra == "dev"
|
|
228
232
|
Provides-Extra: all
|
|
229
233
|
Requires-Dist: boto3>=1.34; extra == "all"
|
|
230
234
|
Requires-Dist: pg8000>=1.30.0; extra == "all"
|
|
@@ -17,6 +17,7 @@ dependencies = [
|
|
|
17
17
|
"hjson>=3.1.0",
|
|
18
18
|
"pyyaml>=6.0",
|
|
19
19
|
"requests>=2.32.0",
|
|
20
|
+
"deepdiff>=8.0.0,<9",
|
|
20
21
|
]
|
|
21
22
|
|
|
22
23
|
[project.optional-dependencies]
|
|
@@ -24,7 +25,7 @@ aws = ["boto3>=1.34"]
|
|
|
24
25
|
postgres = ["pg8000>=1.30.0"]
|
|
25
26
|
parquet = ["pyarrow>=15.0.0"]
|
|
26
27
|
plot = ["matplotlib>=3.9.0", "pandas>=2.2.0"]
|
|
27
|
-
dev = ["pytest", "black", "ruff"]
|
|
28
|
+
dev = ["pytest", "black", "ruff", "build", "twine", "opensearch-py"]
|
|
28
29
|
all = [
|
|
29
30
|
"boto3>=1.34",
|
|
30
31
|
"pg8000>=1.30.0",
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
from typing import Iterator, List, Optional
|
|
5
|
+
|
|
6
|
+
from pjk.parser import ExpressionParser, expand_macros
|
|
7
|
+
from pjk.registry import ComponentRegistry
|
|
8
|
+
from pjk.sources.dict_list_source import DictListSource
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PjkEngine:
|
|
12
|
+
"""
|
|
13
|
+
Run a pjk pipeline from a .pjk file, optionally with supplied input records.
|
|
14
|
+
|
|
15
|
+
- inrecs supplied: the source in the .pjk file is replaced with inrecs.
|
|
16
|
+
Expression may be full (source + pipes + sink) or pipes-only.
|
|
17
|
+
- inrecs=None: expression.pjk is fully self-contained (source, pipes, sink)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, inrecs: Optional[List[dict]] = None, pjk_file: str = ""):
|
|
21
|
+
self.inrecs = inrecs
|
|
22
|
+
self.pjk_file = pjk_file
|
|
23
|
+
|
|
24
|
+
def __iter__(self) -> Iterator[dict]:
|
|
25
|
+
registry = ComponentRegistry()
|
|
26
|
+
parser = ExpressionParser(registry)
|
|
27
|
+
expanded = expand_macros([self.pjk_file])
|
|
28
|
+
|
|
29
|
+
if self.inrecs is not None:
|
|
30
|
+
source_override = DictListSource(self.inrecs)
|
|
31
|
+
try:
|
|
32
|
+
first_is_source = registry.create_source(expanded[0]) is not None
|
|
33
|
+
except Exception:
|
|
34
|
+
first_is_source = False
|
|
35
|
+
if first_is_source:
|
|
36
|
+
expanded = ["{to_override: 'true'}"] + expanded[1:]
|
|
37
|
+
else:
|
|
38
|
+
expanded = ["{to_override: 'true'}"] + expanded
|
|
39
|
+
else:
|
|
40
|
+
source_override = None
|
|
41
|
+
|
|
42
|
+
sink = parser.parse(expanded, source_override=source_override)
|
|
43
|
+
|
|
44
|
+
inputs = [sink.input]
|
|
45
|
+
sink.input._get_sources(inputs)
|
|
46
|
+
try:
|
|
47
|
+
for record in sink.input:
|
|
48
|
+
yield record
|
|
49
|
+
finally:
|
|
50
|
+
for inp in inputs:
|
|
51
|
+
inp.close()
|
{python_jack_knife-0.7.4 → python_jack_knife-0.7.6}/src/pjk/integrations/opensearch_client.py
RENAMED
|
@@ -2,20 +2,20 @@ from pjk.usage import Usage
|
|
|
2
2
|
|
|
3
3
|
# name, type, default
|
|
4
4
|
OS_CONFIG_TUPLES = [
|
|
5
|
-
("default_index", str, None),
|
|
5
|
+
("default_index", str, None, True),
|
|
6
6
|
("os_auth_use_aws", bool, "true"),
|
|
7
7
|
("os_scheme", str, "https"),
|
|
8
8
|
("os_verify_certs", bool, "true"),
|
|
9
|
-
("os_ca_certs", str, None),
|
|
10
|
-
("os_region", str, None),
|
|
9
|
+
("os_ca_certs", str, None, True),
|
|
10
|
+
("os_region", str, None, True),
|
|
11
11
|
("os_service", str, "es"),
|
|
12
|
-
("os_username", str, None),
|
|
13
|
-
("os_password", str, None),
|
|
12
|
+
("os_username", str, None, True),
|
|
13
|
+
("os_password", str, None, True),
|
|
14
14
|
("os_timeout", float, 30),
|
|
15
15
|
("os_ssl_assert_hostname", bool, "true"),
|
|
16
16
|
("os_ssl_show_warn", bool, "false"),
|
|
17
17
|
("os_host", str, None),
|
|
18
|
-
("os_port", int, None)
|
|
18
|
+
("os_port", int, None, True),
|
|
19
19
|
]
|
|
20
20
|
|
|
21
21
|
class OpenSearchClient:
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
import base64
|
|
7
7
|
import datetime as _dt
|
|
8
|
+
import socket
|
|
9
|
+
import sys
|
|
8
10
|
import uuid
|
|
9
11
|
import time
|
|
10
12
|
from decimal import Decimal
|
|
@@ -17,6 +19,21 @@ from pjk.pipes.query_pipe import QueryPipe
|
|
|
17
19
|
MAX_RETRIES = 3
|
|
18
20
|
BASE_DELAY = 0.1 # seconds
|
|
19
21
|
|
|
22
|
+
|
|
23
|
+
def _print_db_connect_failure(host: str, port: int, exc: BaseException) -> None:
|
|
24
|
+
print("Failed to connect to DB", file=sys.stderr)
|
|
25
|
+
cur: Optional[BaseException] = exc
|
|
26
|
+
while cur is not None:
|
|
27
|
+
if isinstance(cur, socket.gaierror):
|
|
28
|
+
print(
|
|
29
|
+
f" Could not resolve hostname {host!r} (port {port}). "
|
|
30
|
+
"Private or corporate DB hosts usually require VPN or split-DNS.",
|
|
31
|
+
file=sys.stderr,
|
|
32
|
+
)
|
|
33
|
+
return
|
|
34
|
+
cur = cur.__cause__
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
class DBClient:
|
|
21
38
|
"""Per-instance pg8000 connection wrapper. No shared state."""
|
|
22
39
|
|
|
@@ -47,8 +64,8 @@ class DBClient:
|
|
|
47
64
|
self.conn = pg8000.connect(**kwargs)
|
|
48
65
|
self.conn.autocommit = True
|
|
49
66
|
except Exception as e:
|
|
50
|
-
|
|
51
|
-
raise
|
|
67
|
+
_print_db_connect_failure(host, port, e)
|
|
68
|
+
raise
|
|
52
69
|
|
|
53
70
|
def close(self):
|
|
54
71
|
if getattr(self, "conn", None) is None:
|
|
@@ -14,7 +14,7 @@ import concurrent.futures
|
|
|
14
14
|
from pjk.registry import ComponentRegistry
|
|
15
15
|
from pjk.sinks.stdout import StdoutSink
|
|
16
16
|
from pjk.man_page import do_man, do_examples, display_configs, display_macros
|
|
17
|
-
from pjk.history import write_history, display_history, get_history_tokens
|
|
17
|
+
from pjk.history import write_history, display_history, get_history_tokens, printable_command
|
|
18
18
|
from pjk.sinks.expect import ExpectSink
|
|
19
19
|
from pjk.progress import ProgressDisplay
|
|
20
20
|
from pjk.version import __version__
|
|
@@ -52,7 +52,6 @@ def execute_threaded(sinks, stop_progress=None):
|
|
|
52
52
|
|
|
53
53
|
def initialize():
|
|
54
54
|
init_logging()
|
|
55
|
-
write_history(sys.argv[1:])
|
|
56
55
|
|
|
57
56
|
#src = Path("src/pjk/resources/configs.tmpl")
|
|
58
57
|
#dst_dir = Path.home() / ".pjk"
|
|
@@ -111,12 +110,15 @@ def execute_tokens(tokens: List[str]):
|
|
|
111
110
|
if not tokens:
|
|
112
111
|
print('No such history')
|
|
113
112
|
return
|
|
113
|
+
cmd = printable_command(tokens)
|
|
114
|
+
print(f"pjk {cmd}")
|
|
114
115
|
|
|
115
116
|
parser = ExpressionParser(registry)
|
|
116
117
|
|
|
117
118
|
display = None
|
|
118
119
|
try:
|
|
119
120
|
sink = parser.parse(tokens)
|
|
121
|
+
write_history(sys.argv[1:]) # now that it's parsed sucessfully
|
|
120
122
|
if not isinstance(sink, (StdoutSink | ExpectSink)):
|
|
121
123
|
display = ProgressDisplay(interval=3.0)
|
|
122
124
|
display.start()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import shlex
|
|
7
|
+
from typing import Dict, List
|
|
8
|
+
from pjk.usage import TokenError, UsageError
|
|
9
|
+
|
|
10
|
+
PJK_END_TOKEN = 'END'
|
|
11
|
+
PJK_SET_TOKEN = 'SET'
|
|
12
|
+
|
|
13
|
+
# ${VAR} or $VAR - match anywhere in token (${VAR} first to avoid partial match)
|
|
14
|
+
VAR_REF_PATTERN = re.compile(r'\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}|\$([a-zA-Z_][a-zA-Z0-9_]*)')
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _expand_token(t: str, env: Dict[str, str]) -> str:
|
|
18
|
+
"""Expand $VAR or ${VAR} anywhere in token; raise if undefined."""
|
|
19
|
+
|
|
20
|
+
def repl(m):
|
|
21
|
+
name = m.group(1) or m.group(2)
|
|
22
|
+
if name not in env:
|
|
23
|
+
raise TokenError(f"Undefined variable: ${name}")
|
|
24
|
+
return env[name]
|
|
25
|
+
|
|
26
|
+
return VAR_REF_PATTERN.sub(repl, t)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def handle_pjk_file(token: str, expanded: List[str]):
|
|
30
|
+
if not token.endswith(".pjk"):
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
if not os.path.isfile(token):
|
|
34
|
+
raise TokenError(f"pjk file not found: {token}")
|
|
35
|
+
|
|
36
|
+
with open(token, "r") as f:
|
|
37
|
+
lines = f.readlines()
|
|
38
|
+
|
|
39
|
+
env: Dict[str, str] = {}
|
|
40
|
+
|
|
41
|
+
for line in lines:
|
|
42
|
+
try:
|
|
43
|
+
parts = shlex.split(line, comments=True, posix=True)
|
|
44
|
+
except ValueError as e:
|
|
45
|
+
raise UsageError(f"Error parsing {token}: {e}")
|
|
46
|
+
|
|
47
|
+
if not parts:
|
|
48
|
+
continue
|
|
49
|
+
if parts[0] == PJK_END_TOKEN:
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
if parts[0] == PJK_SET_TOKEN:
|
|
53
|
+
for p in parts[1:]:
|
|
54
|
+
if '=' in p:
|
|
55
|
+
k, v = p.split('=', 1)
|
|
56
|
+
env[k.strip()] = v.strip()
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
for p in parts:
|
|
60
|
+
if p == PJK_END_TOKEN:
|
|
61
|
+
break
|
|
62
|
+
expanded.append(_expand_token(p, env))
|
|
63
|
+
else:
|
|
64
|
+
continue
|
|
65
|
+
break
|
|
66
|
+
return True
|
|
@@ -13,6 +13,8 @@ from pjk.progress import papi
|
|
|
13
13
|
from typing import Dict
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
from pjk.progress import ProgressIgnore
|
|
16
|
+
from pjk.parse_pjk_file import handle_pjk_file
|
|
17
|
+
from pjk.common import SafeNamespace
|
|
16
18
|
|
|
17
19
|
MACROS_FILE = '~/.pjk/macros.txt'
|
|
18
20
|
MACRO_PREFIX = 'm'
|
|
@@ -47,27 +49,6 @@ def handle_macros(token: str, expanded: List[str]):
|
|
|
47
49
|
|
|
48
50
|
return True
|
|
49
51
|
|
|
50
|
-
def handle_pjk_file(token: str, expanded: List[str]):
|
|
51
|
-
if not token.endswith(".pjk"):
|
|
52
|
-
return False
|
|
53
|
-
|
|
54
|
-
if not os.path.isfile(token):
|
|
55
|
-
raise TokenError(f"pjk file not found: {token}")
|
|
56
|
-
|
|
57
|
-
with open(token, "r") as f:
|
|
58
|
-
lines = f.readlines()
|
|
59
|
-
|
|
60
|
-
# Remove comments outside quotes, then split
|
|
61
|
-
stripped = []
|
|
62
|
-
for line in lines:
|
|
63
|
-
try:
|
|
64
|
-
parts = shlex.split(line, comments=True, posix=True)
|
|
65
|
-
stripped.extend(parts)
|
|
66
|
-
except ValueError as e:
|
|
67
|
-
raise UsageError(f"Error parsing {token}: {e}")
|
|
68
|
-
expanded.extend(stripped)
|
|
69
|
-
return True
|
|
70
|
-
|
|
71
52
|
def expand_macros(tokens: List[str]) -> List[str]:
|
|
72
53
|
expanded = []
|
|
73
54
|
for token in tokens:
|
|
@@ -130,7 +111,7 @@ class ExpressionParser:
|
|
|
130
111
|
|
|
131
112
|
source = self.stack.pop()
|
|
132
113
|
if isinstance(source, SubExpression):
|
|
133
|
-
raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' keyword." )
|
|
114
|
+
raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' or 'if' keyword." )
|
|
134
115
|
|
|
135
116
|
if not self.stack.empty():
|
|
136
117
|
raise TokenError.from_list(['A sink can only consume one source.',
|
|
@@ -145,7 +126,7 @@ class ExpressionParser:
|
|
|
145
126
|
sink = self.registry.create_sink(token)
|
|
146
127
|
|
|
147
128
|
if not sink:
|
|
148
|
-
raise TokenError.from_list(['
|
|
129
|
+
raise TokenError.from_list(['non-sink in final position.',
|
|
149
130
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
150
131
|
|
|
151
132
|
# so each sink doesn't have to, maybe make a base class or mixin for sinks
|
|
@@ -155,7 +136,7 @@ class ExpressionParser:
|
|
|
155
136
|
sink.add_source(progress_pipe)
|
|
156
137
|
return sink
|
|
157
138
|
|
|
158
|
-
def parse(self, tokens: List[str]) -> Sink:
|
|
139
|
+
def parse(self, tokens: List[str], source_override=None) -> Sink:
|
|
159
140
|
usage_error_message = "You've got a problem here."
|
|
160
141
|
stack_helper = StackLoader()
|
|
161
142
|
self.tokens = tokens
|
|
@@ -173,7 +154,9 @@ class ExpressionParser:
|
|
|
173
154
|
return self.get_sink(stack_helper, token)
|
|
174
155
|
|
|
175
156
|
source = self.registry.create_source(token)
|
|
176
|
-
if source:
|
|
157
|
+
if source:
|
|
158
|
+
if pos == 0 and source_override is not None:
|
|
159
|
+
source = source_override
|
|
177
160
|
stack_helper.add_operator(source, self.stack)
|
|
178
161
|
progress_pipe = ProgressPipe(component=source, simple=True)
|
|
179
162
|
stack_helper.add_operator(progress_pipe, self.stack)
|
|
@@ -184,6 +167,16 @@ class ExpressionParser:
|
|
|
184
167
|
stack_helper.add_operator(subexp, self.stack)
|
|
185
168
|
continue
|
|
186
169
|
|
|
170
|
+
if not self.stack.empty() and isinstance(self.stack.peek(), SubExpression):
|
|
171
|
+
if token == "else":
|
|
172
|
+
self.stack.peek().enter_else_branch()
|
|
173
|
+
continue
|
|
174
|
+
if token.startswith("if:"):
|
|
175
|
+
op = self.stack.peek().finish_conditional(token)
|
|
176
|
+
if op:
|
|
177
|
+
stack_helper.add_operator(op, self.stack)
|
|
178
|
+
continue
|
|
179
|
+
|
|
187
180
|
pipe = self.registry.create_pipe(token)
|
|
188
181
|
if pipe:
|
|
189
182
|
stack_helper.add_operator(pipe, self.stack)
|
|
@@ -191,11 +184,10 @@ class ExpressionParser:
|
|
|
191
184
|
|
|
192
185
|
else: # unrecognized token
|
|
193
186
|
# could be sink in WRONG position, let's see for better error message
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
raise TokenError.from_list(['sink may only occur in final position.',
|
|
187
|
+
if self.registry.is_sink(token):
|
|
188
|
+
raise TokenError.from_list(['sink in non-final position.',
|
|
197
189
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
198
|
-
raise TokenError.from_list([token
|
|
190
|
+
raise TokenError.from_list([f"'{token}' unrecognized."])
|
|
199
191
|
|
|
200
192
|
except TokenError as e:
|
|
201
193
|
raise UsageError(usage_error_message, self.tokens, pos, e)
|
|
@@ -237,7 +229,7 @@ class StackLoader:
|
|
|
237
229
|
if not stack.empty() and isinstance(stack.peek(), SubExpression):
|
|
238
230
|
subexp = stack.peek()
|
|
239
231
|
|
|
240
|
-
if isinstance(op, SubExpressionOver) and subexp.recursion_depth() == 0:
|
|
232
|
+
if isinstance(op, (SubExpressionOver, SubExpressionIf)) and subexp.recursion_depth() == 0:
|
|
241
233
|
subexp = stack.pop()
|
|
242
234
|
op.add_source(subexp)
|
|
243
235
|
stack.push(op)
|
|
@@ -343,6 +335,64 @@ class SubExpressionOver(Pipe):
|
|
|
343
335
|
self.left.subexp_process(record, self.over_arg)
|
|
344
336
|
yield record
|
|
345
337
|
|
|
338
|
+
|
|
339
|
+
class IdentityPipe(Pipe):
|
|
340
|
+
"""Pass-through pipe for empty else branch."""
|
|
341
|
+
|
|
342
|
+
def __iter__(self):
|
|
343
|
+
yield from self.left
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class SubExpressionIf(Pipe):
|
|
347
|
+
"""Closes a conditional block: [ then_ops else else_ops if:expr"""
|
|
348
|
+
|
|
349
|
+
@classmethod
|
|
350
|
+
def usage(cls) -> Usage:
|
|
351
|
+
u = Usage(name="if", desc="conditional sub-expression.", component_class=cls)
|
|
352
|
+
return u
|
|
353
|
+
|
|
354
|
+
def __init__(self, expr: str, then_chain, else_chain, upstream_source):
|
|
355
|
+
super().__init__(None, None)
|
|
356
|
+
self.expr = expr
|
|
357
|
+
self.then_chain = then_chain
|
|
358
|
+
self.else_chain = else_chain
|
|
359
|
+
self.upstream_source = upstream_source
|
|
360
|
+
self.inrecs = papi.get_counter(self, var_label='recs_in', display=False)
|
|
361
|
+
self.recs_true = papi.get_percentage_counter(self, var_label='recs_true', denom_counter=self.inrecs)
|
|
362
|
+
try:
|
|
363
|
+
self.code = compile(expr, '<if>', 'eval')
|
|
364
|
+
except Exception as e:
|
|
365
|
+
raise UsageError(f"Invalid if expression: {expr}") from e
|
|
366
|
+
|
|
367
|
+
def reset(self):
|
|
368
|
+
if self.then_chain:
|
|
369
|
+
self.then_chain.reset()
|
|
370
|
+
if self.else_chain:
|
|
371
|
+
self.else_chain.reset()
|
|
372
|
+
|
|
373
|
+
def __iter__(self):
|
|
374
|
+
for record in self.left:
|
|
375
|
+
self.inrecs.increment()
|
|
376
|
+
self.upstream_source.set_list([record])
|
|
377
|
+
f = SafeNamespace(record)
|
|
378
|
+
try:
|
|
379
|
+
cond_true = eval(self.code, {}, {'f': f})
|
|
380
|
+
except Exception:
|
|
381
|
+
cond_true = False
|
|
382
|
+
if cond_true:
|
|
383
|
+
self.recs_true.increment()
|
|
384
|
+
chain = self.then_chain
|
|
385
|
+
else:
|
|
386
|
+
chain = self.else_chain
|
|
387
|
+
|
|
388
|
+
if chain:
|
|
389
|
+
chain.reset()
|
|
390
|
+
for r in chain:
|
|
391
|
+
yield r
|
|
392
|
+
else:
|
|
393
|
+
yield record
|
|
394
|
+
|
|
395
|
+
|
|
346
396
|
class SubExpression(Pipe, ProgressIgnore):
|
|
347
397
|
@classmethod
|
|
348
398
|
def create(cls, token: str) -> Pipe:
|
|
@@ -357,17 +407,42 @@ class SubExpression(Pipe, ProgressIgnore):
|
|
|
357
407
|
super().__init__(ptok, usage)
|
|
358
408
|
self.subexp_ops = []
|
|
359
409
|
self.stack_helper = StackLoader()
|
|
360
|
-
self.subexp_stack = OperandStack()
|
|
410
|
+
self.subexp_stack = OperandStack()
|
|
361
411
|
self.upstream_source = UpstreamSource()
|
|
362
412
|
self.subexp_stack.push(self.upstream_source)
|
|
363
|
-
self.recursions = 0
|
|
413
|
+
self.recursions = 0
|
|
364
414
|
self.subexp_left = None
|
|
415
|
+
self.in_else_branch = False
|
|
416
|
+
self.conditional_then_chain = None
|
|
417
|
+
|
|
418
|
+
def enter_else_branch(self):
|
|
419
|
+
"""Switch to collecting else branch; save then chain."""
|
|
420
|
+
self.conditional_then_chain = self.subexp_stack.pop()
|
|
421
|
+
self.subexp_stack.push(self.upstream_source)
|
|
422
|
+
self.in_else_branch = True
|
|
423
|
+
|
|
424
|
+
def finish_conditional(self, token: str):
|
|
425
|
+
"""Build SubExpressionIf from collected then/else chains."""
|
|
426
|
+
expr = token.split(':', 1)[1]
|
|
427
|
+
if self.in_else_branch:
|
|
428
|
+
else_chain = self.subexp_stack.pop()
|
|
429
|
+
then_chain = self.conditional_then_chain
|
|
430
|
+
else:
|
|
431
|
+
then_chain = self.subexp_stack.pop()
|
|
432
|
+
else_chain = None
|
|
433
|
+
if else_chain is self.upstream_source:
|
|
434
|
+
else_chain = IdentityPipe(None, None)
|
|
435
|
+
else_chain.add_source(self.upstream_source)
|
|
436
|
+
self.in_else_branch = False
|
|
437
|
+
self.conditional_then_chain = None
|
|
438
|
+
self.subexp_stack.push(self.upstream_source)
|
|
439
|
+
return SubExpressionIf(expr, then_chain, else_chain, self.upstream_source)
|
|
365
440
|
|
|
366
441
|
def add_subop(self, op):
|
|
367
442
|
self.subexp_ops.append(op)
|
|
368
443
|
if isinstance(op, SubExpression):
|
|
369
444
|
self.recursions += 1
|
|
370
|
-
elif isinstance(op, SubExpressionOver):
|
|
445
|
+
elif isinstance(op, (SubExpressionOver, SubExpressionIf)):
|
|
371
446
|
self.recursions -= 1
|
|
372
447
|
self.stack_helper.add_operator(op, self.subexp_stack)
|
|
373
448
|
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
# pjk/pipes/ddiff.py
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from itertools import zip_longest
|
|
8
|
+
|
|
9
|
+
from deepdiff import DeepDiff
|
|
10
|
+
|
|
11
|
+
from pjk.components import Pipe
|
|
12
|
+
from pjk.usage import Usage, ParsedToken
|
|
13
|
+
from pjk.progress import papi
|
|
14
|
+
|
|
15
|
+
_PAD = object()
|
|
16
|
+
|
|
17
|
+
_DDIFF_OLD_NEW_KEYS = (
|
|
18
|
+
("old_value", "left_value"),
|
|
19
|
+
("new_value", "right_value"),
|
|
20
|
+
("old_type", "left_type"),
|
|
21
|
+
("new_type", "right_type"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _diff_left_right_labels(obj):
|
|
26
|
+
"""Map DeepDiff old/new keys to left/right (matches stream argument order)."""
|
|
27
|
+
if isinstance(obj, dict):
|
|
28
|
+
out = {}
|
|
29
|
+
for k, v in obj.items():
|
|
30
|
+
for old_k, new_k in _DDIFF_OLD_NEW_KEYS:
|
|
31
|
+
if k == old_k:
|
|
32
|
+
k = new_k
|
|
33
|
+
break
|
|
34
|
+
out[k] = _diff_left_right_labels(v)
|
|
35
|
+
return out
|
|
36
|
+
if isinstance(obj, list):
|
|
37
|
+
return [_diff_left_right_labels(x) for x in obj]
|
|
38
|
+
return obj
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DdiffPipe(Pipe):
|
|
42
|
+
arity = 2
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def usage(cls):
|
|
46
|
+
u = Usage(
|
|
47
|
+
name="ddiff",
|
|
48
|
+
desc=(
|
|
49
|
+
"Lockstep deep diff of two record streams."
|
|
50
|
+
),
|
|
51
|
+
component_class=cls,
|
|
52
|
+
)
|
|
53
|
+
u.def_syntax(
|
|
54
|
+
"pjk <left_source> <right_source> ddiff ..."
|
|
55
|
+
)
|
|
56
|
+
u.def_param(
|
|
57
|
+
name="ignore_order",
|
|
58
|
+
usage="DeepDiff ignore_order (lists and sets)",
|
|
59
|
+
valid_values={"true", "false"},
|
|
60
|
+
default="false",
|
|
61
|
+
)
|
|
62
|
+
u.def_param(
|
|
63
|
+
name="omit_equal",
|
|
64
|
+
usage="Suppress output when the two records are deeply equal",
|
|
65
|
+
valid_values={"true", "false"},
|
|
66
|
+
default="false",
|
|
67
|
+
)
|
|
68
|
+
u.def_param(
|
|
69
|
+
name="significant_digits",
|
|
70
|
+
usage="DeepDiff significant_digits for numeric comparisons",
|
|
71
|
+
is_num=True,
|
|
72
|
+
default=None,
|
|
73
|
+
)
|
|
74
|
+
ferry_ford = "[{ferry:'orca', cars:[{make: 'ford', size:9}]}]"
|
|
75
|
+
ferry_bmw = "[{ferry:'orca', cars:[{make: 'bmw', size:4}]}]"
|
|
76
|
+
ferry_ford_rec = {"ferry": "orca", "cars": [{"make": "ford", "size": 9}]}
|
|
77
|
+
ferry_bmw_rec = {"ferry": "orca", "cars": [{"make": "bmw", "size": 4}]}
|
|
78
|
+
u.def_example(
|
|
79
|
+
expr_tokens=[ferry_ford, ferry_ford, "ddiff"],
|
|
80
|
+
expect=json.dumps(
|
|
81
|
+
[{"left": ferry_ford_rec, "right": ferry_ford_rec, "diff": {}}],
|
|
82
|
+
separators=(",", ":"),
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
u.def_example(
|
|
86
|
+
expr_tokens=[ferry_ford, ferry_bmw, "ddiff"],
|
|
87
|
+
expect=json.dumps(
|
|
88
|
+
[
|
|
89
|
+
{
|
|
90
|
+
"left": ferry_ford_rec,
|
|
91
|
+
"right": ferry_bmw_rec,
|
|
92
|
+
"diff": _diff_left_right_labels(
|
|
93
|
+
json.loads(
|
|
94
|
+
DeepDiff(
|
|
95
|
+
ferry_ford_rec, ferry_bmw_rec
|
|
96
|
+
).to_json()
|
|
97
|
+
)
|
|
98
|
+
),
|
|
99
|
+
}
|
|
100
|
+
],
|
|
101
|
+
separators=(",", ":"),
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
return u
|
|
105
|
+
|
|
106
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
107
|
+
super().__init__(ptok, usage)
|
|
108
|
+
self.recs_in = papi.get_counter(self, "recs_in", display=False)
|
|
109
|
+
self.recs_out = papi.get_counter(self, "recs_out")
|
|
110
|
+
|
|
111
|
+
def reset(self):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _truthy(param) -> bool:
|
|
116
|
+
if param is None:
|
|
117
|
+
return False
|
|
118
|
+
return str(param).lower() == "true"
|
|
119
|
+
|
|
120
|
+
def __iter__(self):
|
|
121
|
+
ignore_order = self._truthy(self.usage.get_param("ignore_order"))
|
|
122
|
+
omit_equal = self._truthy(self.usage.get_param("omit_equal"))
|
|
123
|
+
sig = self.usage.get_param("significant_digits")
|
|
124
|
+
|
|
125
|
+
dd_kwargs = {}
|
|
126
|
+
if ignore_order:
|
|
127
|
+
dd_kwargs["ignore_order"] = True
|
|
128
|
+
if sig is not None:
|
|
129
|
+
dd_kwargs["significant_digits"] = sig
|
|
130
|
+
|
|
131
|
+
for left_rec, right_rec in zip_longest(self.left, self.right, fillvalue=_PAD):
|
|
132
|
+
self.recs_in.increment()
|
|
133
|
+
if left_rec is _PAD:
|
|
134
|
+
left_rec = {}
|
|
135
|
+
if right_rec is _PAD:
|
|
136
|
+
right_rec = {}
|
|
137
|
+
|
|
138
|
+
d = DeepDiff(left_rec, right_rec, **dd_kwargs)
|
|
139
|
+
# Normalize to JSON-native dict/list (to_dict() may use e.g. SetOrdered).
|
|
140
|
+
diff_map = _diff_left_right_labels(json.loads(d.to_json()))
|
|
141
|
+
if omit_equal and not diff_map:
|
|
142
|
+
continue
|
|
143
|
+
self.recs_out.increment()
|
|
144
|
+
yield {"left": left_rec, "right": right_rec, "diff": diff_map}
|