python-jack-knife 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. pjk/__init__.py +5 -0
  2. pjk/base.py +377 -0
  3. pjk/common.py +150 -0
  4. pjk/log.py +67 -0
  5. pjk/main.py +106 -0
  6. pjk/man_page.py +125 -0
  7. pjk/parser.py +284 -0
  8. pjk/pipes/__init__.py +0 -0
  9. pjk/pipes/denorm.py +68 -0
  10. pjk/pipes/factory.py +62 -0
  11. pjk/pipes/filter.py +57 -0
  12. pjk/pipes/head.py +34 -0
  13. pjk/pipes/join.py +85 -0
  14. pjk/pipes/let_reduce.py +198 -0
  15. pjk/pipes/map.py +91 -0
  16. pjk/pipes/move_field.py +36 -0
  17. pjk/pipes/postgres_pipe.py +209 -0
  18. pjk/pipes/remove_field.py +36 -0
  19. pjk/pipes/select.py +42 -0
  20. pjk/pipes/sort.py +63 -0
  21. pjk/pipes/tail.py +39 -0
  22. pjk/pipes/user_pipe_factory.py +45 -0
  23. pjk/pipes/where.py +49 -0
  24. pjk/registry.py +143 -0
  25. pjk/sinks/__init__.py +0 -0
  26. pjk/sinks/csv_sink.py +33 -0
  27. pjk/sinks/ddb.py +54 -0
  28. pjk/sinks/devnull.py +31 -0
  29. pjk/sinks/dir_sink.py +59 -0
  30. pjk/sinks/expect.py +53 -0
  31. pjk/sinks/factory.py +108 -0
  32. pjk/sinks/graph.py +57 -0
  33. pjk/sinks/graph_bar_line.py +229 -0
  34. pjk/sinks/graph_cumulative.py +55 -0
  35. pjk/sinks/graph_hist.py +72 -0
  36. pjk/sinks/graph_scatter.py +29 -0
  37. pjk/sinks/json_sink.py +23 -0
  38. pjk/sinks/s3_sink.py +100 -0
  39. pjk/sinks/sinks.py +68 -0
  40. pjk/sinks/stdout.py +44 -0
  41. pjk/sinks/tsv_sink.py +22 -0
  42. pjk/sinks/user_sink_factory.py +43 -0
  43. pjk/sources/__init__.py +0 -0
  44. pjk/sources/csv_source.py +28 -0
  45. pjk/sources/dir_source.py +69 -0
  46. pjk/sources/factory.py +100 -0
  47. pjk/sources/format_usage.py +11 -0
  48. pjk/sources/inline_source.py +56 -0
  49. pjk/sources/json_source.py +35 -0
  50. pjk/sources/lazy_file.py +16 -0
  51. pjk/sources/lazy_file_local.py +22 -0
  52. pjk/sources/lazy_file_s3.py +28 -0
  53. pjk/sources/parquet_source.py +32 -0
  54. pjk/sources/s3_source.py +146 -0
  55. pjk/sources/source_list.py +23 -0
  56. pjk/sources/sql_source.py +32 -0
  57. pjk/sources/tsv_source.py +15 -0
  58. pjk/sources/user_source_factory.py +33 -0
  59. pjk/version.py +4 -0
  60. python_jack_knife-0.5.0.dist-info/METADATA +254 -0
  61. python_jack_knife-0.5.0.dist-info/RECORD +65 -0
  62. python_jack_knife-0.5.0.dist-info/WHEEL +5 -0
  63. python_jack_knife-0.5.0.dist-info/entry_points.txt +2 -0
  64. python_jack_knife-0.5.0.dist-info/licenses/LICENSE +202 -0
  65. python_jack_knife-0.5.0.dist-info/top_level.txt +1 -0
pjk/pipes/select.py ADDED
@@ -0,0 +1,42 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/select_pipe.py
5
+
6
+ from pjk.base import Pipe, Usage, ParsedToken, UsageError
7
+
8
+ class SelectFields(Pipe):
9
+ deep_copyable: bool = True
10
+
11
+ @classmethod
12
+ def usage(cls):
13
+ usage = Usage(
14
+ name='sel',
15
+ desc='Keep only the specified fields from each record',
16
+ component_class=cls
17
+ )
18
+ usage.def_arg(name='fields', usage='Comma-separated list of fields to retain')
19
+ usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", 'sel:id,color'], expect="id: 1, color:'blue'")
20
+ return usage
21
+
22
+ def __init__(self, ptok: ParsedToken, usage: Usage):
23
+ super().__init__(ptok)
24
+
25
+ arg_string = usage.get_arg('fields')
26
+ if not arg_string:
27
+ raise UsageError("select:<f1,f2,...> requires at least one field")
28
+
29
+ self.keep_fields = {f.strip() for f in arg_string.split(',') if f.strip()}
30
+ if not self.keep_fields:
31
+ raise UsageError("select must include at least one valid field name")
32
+
33
+ def reset(self):
34
+ pass # stateless
35
+
36
+ def __iter__(self):
37
+ for record in self.left:
38
+ keys = list(record.keys())
39
+ for k in keys:
40
+ if k not in self.keep_fields:
41
+ record.pop(k)
42
+ yield record
pjk/pipes/sort.py ADDED
@@ -0,0 +1,63 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/sort.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, Usage, UsageError
7
+
8
+ class SortPipe(Pipe):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='sort',
13
+ desc="Sort records by a single field (records with missing field sort last).",
14
+ component_class=cls
15
+ )
16
+ usage.def_arg(name='field', usage="+name or -name for ascending or decending sort by field 'name'.")
17
+ usage.def_example(expr_tokens=["[{id:17}, {id:10}, {id:1}]", 'sort:+id'], expect="[{id:1}, {id:10}, {id:17}]")
18
+ usage.def_example(expr_tokens=["[{id:1}, {color:'blue'}, {color:'green'}]", 'sort:-color'], expect="[{color:'green'}, {color:'blue'}, {id:1}]")
19
+ return usage
20
+
21
+ def __init__(self, ptok: ParsedToken, usage: Usage):
22
+ super().__init__(ptok)
23
+
24
+ arg_string = usage.get_arg('field')
25
+ if not arg_string:
26
+ raise UsageError("sort:[+-]<field> requires direction and field name")
27
+
28
+ if arg_string.startswith("-"):
29
+ self.field = arg_string[1:]
30
+ self.reverse = True
31
+ elif arg_string.startswith("+"):
32
+ self.field = arg_string[1:]
33
+ self.reverse = False
34
+ else:
35
+ raise UsageError("sort:[+-]<field> must start with '+' or '-'")
36
+
37
+ self._buffer = None
38
+ self._index = 0
39
+
40
+ def reset(self):
41
+ self._buffer = None
42
+ self._index = 0
43
+
44
+ def __iter__(self):
45
+ if self._buffer is None:
46
+ self._buffer = list(self.left)
47
+
48
+ # Partition into records with and without the sort field
49
+ present = [r for r in self._buffer if r.get(self.field) is not None]
50
+ missing = [r for r in self._buffer if r.get(self.field) is None]
51
+
52
+ present.sort(
53
+ key=lambda r: r.get(self.field),
54
+ reverse=self.reverse
55
+ )
56
+
57
+ self._buffer = present + missing # always push missing to the end
58
+
59
+ while self._index < len(self._buffer):
60
+ yield self._buffer[self._index]
61
+ self._index += 1
62
+
63
+
pjk/pipes/tail.py ADDED
@@ -0,0 +1,39 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/tail.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, Usage
7
+
8
+ class TailPipe(Pipe):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='tail',
13
+ desc='take last records of input (when single-threaded)',
14
+ component_class=cls
15
+ )
16
+ usage.def_arg(name='limit', usage='number of records', is_num=True)
17
+ usage.def_example(expr_tokens=['[{id:1}, {id:2}]', 'tail:1'], expect="{id:2}")
18
+ return usage
19
+
20
+ def __init__(self, ptok: ParsedToken, usage: Usage):
21
+ super().__init__(ptok)
22
+ self.limit = usage.get_arg('limit')
23
+
24
+ self.buffer = []
25
+ self.ready = False
26
+
27
+ def reset(self):
28
+ self.buffer.clear()
29
+ self.ready = False
30
+
31
+ def __iter__(self):
32
+ if not self.ready:
33
+ for record in self.left:
34
+ self.buffer.append(record)
35
+ if len(self.buffer) > self.limit:
36
+ self.buffer.pop(0)
37
+ self.ready = True
38
+
39
+ yield from self.buffer
@@ -0,0 +1,45 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/user_pipe_factory.py
5
+
6
+ import importlib.util
7
+ from typing import Optional
8
+ from pjk.base import Pipe, Sink, ParsedToken, UsageError
9
+
10
+ class UserPipeFactory:
11
+ @staticmethod
12
+ def create(ptok: ParsedToken) -> Optional[Pipe]:
13
+ script_path = ptok.pre_colon
14
+
15
+ try:
16
+ # Load module dynamically from script path
17
+ spec = importlib.util.spec_from_file_location("user_pipe", script_path)
18
+ if spec is None or spec.loader is None:
19
+ raise UsageError(f"Could not load Python file: {script_path}")
20
+
21
+ module = importlib.util.module_from_spec(spec)
22
+ spec.loader.exec_module(module)
23
+ except Exception as e:
24
+ raise UsageError(f"Failed to import {script_path}: {e}")
25
+
26
+ # Look for exactly one top-level Pipe class that isn't a Sink or base Pipe
27
+ pipe_cls = None
28
+ for value in vars(module).values():
29
+ if (
30
+ isinstance(value, type)
31
+ and issubclass(value, Pipe)
32
+ and not issubclass(value, Sink)
33
+ and value is not Pipe
34
+ and value.__module__ == module.__name__
35
+ ):
36
+ if pipe_cls is not None:
37
+ raise UsageError(f"Multiple Pipe classes found in {script_path}. Only one is allowed.")
38
+ pipe_cls = value
39
+
40
+ if pipe_cls is None:
41
+ return None
42
+
43
+ usage = pipe_cls.usage()
44
+ usage.bind(ptok)
45
+ return pipe_cls(ptok, usage)
pjk/pipes/where.py ADDED
@@ -0,0 +1,49 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/where.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, NoBindUsage, Usage, UsageError
7
+ from pjk.common import SafeNamespace
8
+
9
+ class WherePipe(Pipe):
10
+ @classmethod
11
+ def usage(cls):
12
+ usage = NoBindUsage(
13
+ name='where',
14
+ desc="Filter records using a Python expression over fields",
15
+ component_class=cls
16
+ )
17
+ usage.def_arg(name='expr', usage='Python expression using \'f.<field>\' syntax')
18
+ usage.def_example(expr_tokens=["[{size:1}, {size:5}, {size:10}]", "where:f.size >= 5"], expect="[{size:5}, {size:10}]")
19
+ usage.def_example(expr_tokens=["[{color:'blue'}, {color:'red'}, {color:'black'}]", "where:f.color.startswith('bl')"], expect="[{color:'blue'}, {color:'black'}]")
20
+ return usage
21
+
22
+ def __init__(self, ptok: ParsedToken, usage: Usage):
23
+ super().__init__(ptok, usage)
24
+ self.expr = ptok.whole_token.split(':', 1)[1]
25
+ try:
26
+ self.code = compile(self.expr, '<where>', 'eval')
27
+ except Exception as e:
28
+ raise UsageError(f"Invalid where expression: {self.expr}") from e
29
+
30
+ def reset(self):
31
+ pass # stateless
32
+
33
+ def __iter__(self):
34
+ for record in self.left:
35
+ f = SafeNamespace(record)
36
+ try:
37
+ if eval(self.code, {}, {'f': f}):
38
+ yield record
39
+ except Exception:
40
+ continue # ignore eval errors
41
+
42
+ def deep_copy(self):
43
+ source_clone = self.left.deep_copy()
44
+ if source_clone:
45
+ pipe = WherePipe(self.ptok, self.usage)
46
+ pipe.add_source(source_clone)
47
+ return pipe
48
+ else:
49
+ return None
pjk/registry.py ADDED
@@ -0,0 +1,143 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import os
5
+ from pjk.sinks.factory import SinkFactory
6
+ from pjk.pipes.factory import PipeFactory
7
+ from pjk.sources.factory import SourceFactory
8
+ import importlib.util
9
+ import importlib
10
+ import importlib.metadata
11
+ from pjk.base import Pipe, Source, Sink
12
+
13
+ class ComponentRegistry:
14
+ def __init__(self):
15
+ self.source_factory = SourceFactory()
16
+ self.pipe_factory = PipeFactory()
17
+ self.sink_factory = SinkFactory()
18
+
19
+ sources, pipes, sinks = load_user_components()
20
+ for name, comp in sources.items():
21
+ self.source_factory.register(name, comp)
22
+
23
+ for name, comp in pipes.items():
24
+ self.pipe_factory.register(name, comp)
25
+
26
+ for name, comp in sinks.items():
27
+ self.sink_factory.register(name, comp)
28
+
29
+ load_package_extras()
30
+
31
+ def register(self, name, comp):
32
+ if is_pipe(comp):
33
+ print('HELEELELELELELEEE')
34
+ if hasattr(comp, "usage"):
35
+ usage = comp.usage()
36
+ name = usage.name
37
+ self.pipe_factory.register(name, comp)
38
+ elif is_sink(comp):
39
+ self.sink_factory.register(name, comp)
40
+ elif is_source(comp):
41
+ self.source_factory(name, comp)
42
+
43
+ def create_source(self, token: str):
44
+ return self.source_factory.create(token)
45
+
46
+ def create_pipe(self, token: str):
47
+ return self.pipe_factory.create(token)
48
+
49
+ def create_sink(self, token: str):
50
+ return self.sink_factory.create(token)
51
+
52
+ def get_factories(self):
53
+ return [self.source_factory, self.pipe_factory, self.sink_factory]
54
+
55
+ def print_usage(self):
56
+ print('Usage: pjk <source> [<pipe> ...] <sink>')
57
+ print(' pjk man <component> | --all')
58
+ print(' pjk examples')
59
+ print()
60
+ self.source_factory.print_descriptions()
61
+ print()
62
+ self.pipe_factory.print_descriptions()
63
+ print()
64
+ self.sink_factory.print_descriptions()
65
+
66
+ return
67
+
68
+ def is_source(obj, module):
69
+ return (
70
+ isinstance(obj, type)
71
+ and issubclass(obj, Source)
72
+ and not issubclass(obj, Pipe)
73
+ and not issubclass(obj, Sink)
74
+ and obj is not Source
75
+ and obj.__module__ == module.__name__ # 🧠 only user-defined classes
76
+ )
77
+
78
+ def is_pipe(obj, module):
79
+ return (
80
+ isinstance(obj, type)
81
+ and issubclass(obj, Pipe)
82
+ and not issubclass(obj, Sink)
83
+ and obj is not Pipe
84
+ and obj.__module__ == module.__name__
85
+ )
86
+
87
+ def is_sink(obj, module):
88
+ return (
89
+ isinstance(obj, type)
90
+ and issubclass(obj, Sink)
91
+ and obj is not Sink
92
+ and obj.__module__ == module.__name__
93
+ )
94
+
95
+ def load_user_components(path=os.path.expanduser("~/.pjk/plugins")):
96
+ sources = {}
97
+ pipes = {}
98
+ sinks = {}
99
+
100
+ if not os.path.isdir(path):
101
+ return {}, {}, {}
102
+
103
+ for fname in os.listdir(path):
104
+ if not fname.endswith(".py"):
105
+ continue
106
+ fpath = os.path.join(path, fname)
107
+ modname = f"user_component_{fname[:-3]}"
108
+ spec = importlib.util.spec_from_file_location(modname, fpath)
109
+ if not spec or not spec.loader:
110
+ continue
111
+ module = importlib.util.module_from_spec(spec)
112
+ try:
113
+ spec.loader.exec_module(module)
114
+ except Exception as e:
115
+ print(f"[djk] Failed to load {fname}: {e}")
116
+ continue
117
+
118
+ for obj in vars(module).values():
119
+ if not isinstance(obj, type):
120
+ continue
121
+ if hasattr(obj, "usage"):
122
+ usage = obj.usage()
123
+ name = usage.name
124
+
125
+ if is_sink(obj, module):
126
+ sinks[name] = obj
127
+ elif is_pipe(obj, module):
128
+ pipes[name] = obj
129
+ elif is_source(obj, module):
130
+ sources[name] = obj
131
+
132
+ return sources, pipes, sinks
133
+
134
+ def load_package_extras():
135
+ """
136
+ Discover and import all installed pjk extras (via entry points).
137
+ """
138
+ for ep in importlib.metadata.entry_points(group="pjk.package_extras"):
139
+ try:
140
+ importlib.import_module(ep.value)
141
+ print(f"[pjk] loaded package extra: {ep.name} -> {ep.value}")
142
+ except Exception as e:
143
+ print(f"[pjk] failed to load extra {ep.name}: {e}")
pjk/sinks/__init__.py ADDED
File without changes
pjk/sinks/csv_sink.py ADDED
@@ -0,0 +1,33 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import csv
5
+ from pjk.base import Sink, Source, ParsedToken, Usage
6
+
7
+ class CSVSink(Sink):
8
+ is_format = True
9
+
10
+ @classmethod
11
+ def usage(cls):
12
+ usage = Usage(
13
+ name='csv',
14
+ desc='Write records to a CSV file with dynamic header from first record',
15
+ component_class=cls
16
+ )
17
+ usage.def_arg('path', usage='Path prefix (no extension)')
18
+ return usage
19
+
20
+ def __init__(self, ptok: ParsedToken, usage: Usage):
21
+ super().__init__(ptok, usage)
22
+ path_no_ext = usage.get_arg('path')
23
+ self.path = f"{path_no_ext}.csv"
24
+
25
+ def process(self) -> None:
26
+ with open(self.path, 'w', newline='') as f:
27
+ writer = None
28
+
29
+ for record in self.input:
30
+ if writer is None:
31
+ writer = csv.DictWriter(f, fieldnames=record.keys())
32
+ writer.writeheader()
33
+ writer.writerow(record)
pjk/sinks/ddb.py ADDED
@@ -0,0 +1,54 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ from pjk.base import Sink, Source, ParsedToken, Usage
5
+ from decimal import Decimal
6
+
7
+ class DDBSink(Sink):
8
+ @classmethod
9
+ def usage(cls):
10
+ usage = Usage(
11
+ name='ddb',
12
+ desc='Write records to a DynamoDB table via batch_writer()',
13
+ component_class=cls
14
+ )
15
+ usage.def_arg('table', usage='DynamoDB table name')
16
+ usage.def_param('batch_size', usage='How many records to write per batch (max 25)')
17
+ return usage
18
+
19
+ def __init__(self, input_source: Source, ptok: ParsedToken, usage: Usage):
20
+ super().__init__(input_source)
21
+ import boto3 # lazy import
22
+
23
+ self.table_name = usage.get_arg('table')
24
+ self.batch_size = int(usage.get_param('batch_size', default='10'))
25
+ self.num_recs = 0
26
+ self.batch = []
27
+
28
+ dynamodb = boto3.resource('dynamodb')
29
+ self.table = dynamodb.Table(self.table_name)
30
+
31
+ def process_batch(self):
32
+ if not self.batch:
33
+ return
34
+
35
+ with self.table.batch_writer() as batch:
36
+ for item in self.batch:
37
+ clean_item = {
38
+ k: (Decimal(str(v)) if isinstance(v, float) else v)
39
+ for k, v in item.items()
40
+ }
41
+ batch.put_item(Item=clean_item)
42
+
43
+ self.batch = []
44
+
45
+ def process(self):
46
+ for record in self.input:
47
+ self.batch.append(record)
48
+ self.num_recs += 1
49
+
50
+ if len(self.batch) >= self.batch_size:
51
+ self.process_batch()
52
+
53
+ self.process_batch()
54
+ print(f"DDBSink wrote {self.num_recs} records.")
pjk/sinks/devnull.py ADDED
@@ -0,0 +1,31 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/sinks/devnull.py
5
+
6
+ from pjk.base import Sink, Source, ParsedToken, Usage
7
+
8
+ class DevNullSink(Sink):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='devnull',
13
+ desc='Consume all input records and discard them (debug/testing)',
14
+ component_class=cls
15
+ )
16
+ usage.def_example(expr_tokens=['{id:1}', 'devnull'], expect=None)
17
+ return usage
18
+
19
+ def __init__(self, ptok: ParsedToken, usage: Usage):
20
+ super().__init__(ptok, usage)
21
+ self.count = 0
22
+
23
+ def process(self):
24
+ for record in self.input:
25
+ self.count += 1
26
+
27
+ def print_info(self):
28
+ print(f'num_recs:{self.count}')
29
+
30
+ def deep_copy(self):
31
+ return None # until we implement cross-thread coordination
pjk/sinks/dir_sink.py ADDED
@@ -0,0 +1,59 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import os
5
+ from pjk.base import Source, Sink, ParsedToken, Usage
6
+ from pjk.log import logger
7
+
8
+ class DirSink(Sink):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='<format>',
13
+ desc='Write records to a local directory in the given <format> (e.g., csv)',
14
+ component_class=cls
15
+ )
16
+ usage.def_arg(name='dir', usage='Path to output directory')
17
+ return usage
18
+
19
+ def __init__(self, ptok: ParsedToken, usage: Usage, sink_class: type, is_gz: bool, fileno: int = 0):
20
+ super().__init__(ptok, usage)
21
+ self.dir_path = usage.get_arg('dir') # ✅ Use usage, not ptok directly
22
+ self.ptok = ptok
23
+ self.usage = usage
24
+ self.sink_class = sink_class
25
+ self.is_gz = is_gz
26
+ self.fileno = fileno
27
+ self.num_files = 1
28
+
29
+ os.makedirs(self.dir_path, exist_ok=True)
30
+
31
+ def process(self):
32
+ file = os.path.join(self.dir_path, f'file-{self.fileno:04d}')
33
+ file_ptok = ParsedToken(f'{file}:{self.is_gz}')
34
+ file_usage = self.sink_class.usage()
35
+ file_usage.bind(file_ptok)
36
+
37
+ file_sink = self.sink_class(file_ptok, file_usage)
38
+ file_sink.add_source(self.input)
39
+
40
+ logger.debug(f'in process sinking to: {file}')
41
+ file_sink.process()
42
+
43
+ def deep_copy(self):
44
+ source_clone = self.input.deep_copy()
45
+ if not source_clone:
46
+ return None
47
+
48
+ clone = DirSink(
49
+ ptok=self.ptok,
50
+ usage=self.usage,
51
+ sink_class=self.sink_class,
52
+ is_gz=self.is_gz,
53
+ fileno=self.num_files
54
+ )
55
+
56
+ clone.add_source(source_clone)
57
+
58
+ self.num_files += 1
59
+ return clone
pjk/sinks/expect.py ADDED
@@ -0,0 +1,53 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ from pjk.base import Source, Sink, ParsedToken, Usage
5
+ from pjk.sources.inline_source import InlineSource
6
+ import sys
7
+
8
+ class ExpectSink(Sink):
9
+ # NOTE: ExpectSink intentionally does NOT use Usage due to raw JSON argument parsing
10
+ # e.g., expect:'[{a:1},{a:2}]' must preserve the entire post-colon string unparsed
11
+
12
+ def __init__(self, ptok: ParsedToken, usage: Usage):
13
+ super().__init__(ptok, usage)
14
+ self.inline = ptok.whole_token.split(':', 1)[-1]
15
+ self.expect_source = InlineSource(self.inline)
16
+ self._expect_iter = iter(self.expect_source)
17
+
18
+ def print_info(self):
19
+ command = ' '.join(sys.argv[1:-1]) # omit 'pjk' and 'expect'
20
+ print(f'{command} ==> OK!\n') # only prints on success
21
+
22
+ def process(self) -> None:
23
+ command = ' '.join(sys.argv[1:-1]) # omit 'pjk' and 'expect'
24
+
25
+ for test_rec in self.input:
26
+ try:
27
+ expect_rec = next(self._expect_iter)
28
+ except StopIteration:
29
+ raise ValueError(
30
+ f"expect failure: {command}\n"
31
+ f"expected_record:None\n"
32
+ f"got_record:{test_rec}\n"
33
+ f"entire_expected:{self.inline}"
34
+ )
35
+
36
+ if test_rec != expect_rec:
37
+ raise ValueError(
38
+ f"expect failure: {command}\n"
39
+ f"expected_record:{expect_rec}\n"
40
+ f"got_record:{test_rec}\n"
41
+ f"entire_expected:{self.inline}"
42
+ )
43
+
44
+ try:
45
+ expect_rec = next(self._expect_iter)
46
+ raise ValueError(
47
+ f"expect failure: {command}\n"
48
+ f"expected_record:{expect_rec}\n"
49
+ f"got_record:None\n"
50
+ f"entire_expected:{self.inline}"
51
+ )
52
+ except StopIteration:
53
+ pass