python-jack-knife 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. pjk/__init__.py +5 -0
  2. pjk/base.py +377 -0
  3. pjk/common.py +150 -0
  4. pjk/log.py +67 -0
  5. pjk/main.py +106 -0
  6. pjk/man_page.py +125 -0
  7. pjk/parser.py +284 -0
  8. pjk/pipes/__init__.py +0 -0
  9. pjk/pipes/denorm.py +68 -0
  10. pjk/pipes/factory.py +62 -0
  11. pjk/pipes/filter.py +57 -0
  12. pjk/pipes/head.py +34 -0
  13. pjk/pipes/join.py +85 -0
  14. pjk/pipes/let_reduce.py +198 -0
  15. pjk/pipes/map.py +91 -0
  16. pjk/pipes/move_field.py +36 -0
  17. pjk/pipes/postgres_pipe.py +209 -0
  18. pjk/pipes/remove_field.py +36 -0
  19. pjk/pipes/select.py +42 -0
  20. pjk/pipes/sort.py +63 -0
  21. pjk/pipes/tail.py +39 -0
  22. pjk/pipes/user_pipe_factory.py +45 -0
  23. pjk/pipes/where.py +49 -0
  24. pjk/registry.py +143 -0
  25. pjk/sinks/__init__.py +0 -0
  26. pjk/sinks/csv_sink.py +33 -0
  27. pjk/sinks/ddb.py +54 -0
  28. pjk/sinks/devnull.py +31 -0
  29. pjk/sinks/dir_sink.py +59 -0
  30. pjk/sinks/expect.py +53 -0
  31. pjk/sinks/factory.py +108 -0
  32. pjk/sinks/graph.py +57 -0
  33. pjk/sinks/graph_bar_line.py +229 -0
  34. pjk/sinks/graph_cumulative.py +55 -0
  35. pjk/sinks/graph_hist.py +72 -0
  36. pjk/sinks/graph_scatter.py +29 -0
  37. pjk/sinks/json_sink.py +23 -0
  38. pjk/sinks/s3_sink.py +100 -0
  39. pjk/sinks/sinks.py +68 -0
  40. pjk/sinks/stdout.py +44 -0
  41. pjk/sinks/tsv_sink.py +22 -0
  42. pjk/sinks/user_sink_factory.py +43 -0
  43. pjk/sources/__init__.py +0 -0
  44. pjk/sources/csv_source.py +28 -0
  45. pjk/sources/dir_source.py +69 -0
  46. pjk/sources/factory.py +100 -0
  47. pjk/sources/format_usage.py +11 -0
  48. pjk/sources/inline_source.py +56 -0
  49. pjk/sources/json_source.py +35 -0
  50. pjk/sources/lazy_file.py +16 -0
  51. pjk/sources/lazy_file_local.py +22 -0
  52. pjk/sources/lazy_file_s3.py +28 -0
  53. pjk/sources/parquet_source.py +32 -0
  54. pjk/sources/s3_source.py +146 -0
  55. pjk/sources/source_list.py +23 -0
  56. pjk/sources/sql_source.py +32 -0
  57. pjk/sources/tsv_source.py +15 -0
  58. pjk/sources/user_source_factory.py +33 -0
  59. pjk/version.py +4 -0
  60. python_jack_knife-0.5.0.dist-info/METADATA +254 -0
  61. python_jack_knife-0.5.0.dist-info/RECORD +65 -0
  62. python_jack_knife-0.5.0.dist-info/WHEEL +5 -0
  63. python_jack_knife-0.5.0.dist-info/entry_points.txt +2 -0
  64. python_jack_knife-0.5.0.dist-info/licenses/LICENSE +202 -0
  65. python_jack_knife-0.5.0.dist-info/top_level.txt +1 -0
pjk/pipes/join.py ADDED
@@ -0,0 +1,85 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/join.py
5
+
6
+ from pjk.base import Pipe, Usage, UsageError, ParsedToken, KeyedSource
7
+
8
+ class JoinPipe(Pipe):
9
+ arity = 2 # left = record stream, right = KeyedSource
10
+
11
+ @classmethod
12
+ def usage(cls):
13
+ usage = Usage(
14
+ name='join',
15
+ desc="Join records against a keyed source on shared fields",
16
+ component_class=cls
17
+ )
18
+ usage.def_arg(
19
+ name='mode',
20
+ usage="'left', 'inner', or 'outer' join behavior",
21
+ valid_values={'left', 'inner', 'outer'}
22
+ )
23
+ usage.def_syntax("pjk <left_source> <map_source> map:<how>:<key> join:<mode> <sink>")
24
+
25
+ usage.def_example(expr_tokens=
26
+ [
27
+ "[{color:'blue'},{color:'green'}]",
28
+ "[{color:'blue', price:50}, {color:'red', price:20}]",
29
+ 'map:o:color',
30
+ "join:left"
31
+ ],
32
+ expect="[{color:'blue', price:50}, {color:'green'}]")
33
+ usage.def_example(expr_tokens=
34
+ [
35
+ "[{color:'blue'},{color:'green'}]",
36
+ "[{color:'blue', price:50}, {color:'red', price:20}]",
37
+ 'map:o:color',
38
+ "join:inner"
39
+ ],
40
+ expect="[{color:'blue', price:50}]")
41
+
42
+ usage.def_example(expr_tokens=
43
+ [
44
+ "[{color:'blue'},{color:'green'}]",
45
+ "[{color:'blue', price:50}, {color:'red', price:20}]",
46
+ 'map:o:color',
47
+ "join:outer"
48
+ ],
49
+ expect="[{color:'blue', price:50}, {color:'green'}, {color:'red', price: 20}]")
50
+ return usage
51
+
52
+ def __init__(self, ptok: ParsedToken, usage: Usage):
53
+ super().__init__(ptok)
54
+
55
+ self.mode = usage.get_arg('mode')
56
+ self.left = None
57
+ self.right = None
58
+ self._pending_right = None
59
+ self._check_right = False
60
+
61
+ def reset(self):
62
+ self._pending_right = None
63
+ self._check_right = False
64
+
65
+ def __iter__(self):
66
+ if not isinstance(self.right, KeyedSource):
67
+ raise UsageError("right source must be a KeyedSource")
68
+
69
+ for left_rec in self.left:
70
+ match = self.right.lookup(left_rec)
71
+
72
+ if match is not None:
73
+ merged = dict(left_rec)
74
+ merged.update(match)
75
+ yield merged
76
+ elif self.mode == "left":
77
+ yield left_rec
78
+ elif self.mode == "outer":
79
+ yield left_rec
80
+ elif self.mode == "inner":
81
+ continue
82
+
83
+ if self.mode == "outer":
84
+ for right_rec in self.right.get_unlookedup_records():
85
+ yield right_rec
@@ -0,0 +1,198 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/let_reduce.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, NoBindUsage, Usage, UsageError, TokenError
7
+ from pjk.common import SafeNamespace, ReducingNamespace
8
+ import re
9
+ import ast
10
+ import json
11
+
12
+ # --- Shared Utilities ---
13
+ def parse_args(token: str):
14
+ pattern = re.compile(r'(?P<field>\w+)(?P<op>[:=\+\-\*/]+)(?P<rest>.+)$')
15
+ match = pattern.fullmatch(token)
16
+ if not match:
17
+ raise ValueError(f"Invalid token syntax: {token!r}")
18
+ return match.groupdict()
19
+
20
+ def do_eval(expr, env):
21
+ try:
22
+ safe_env = dict(env)
23
+ safe_env['json'] = json
24
+ return eval(expr, {}, safe_env)
25
+ except Exception:
26
+ raise UsageError(f"UsageError in expression: {expr}")
27
+
28
+ def eval_regular(expr: str, record: dict):
29
+ env = {'f': SafeNamespace(record)}
30
+ if re.match(r'[a-zA-Z0-9_]+$', expr):
31
+ return expr
32
+ return do_eval(expr, env)
33
+
34
+ def eval_accumulating(expr: str, record: dict, op: str, acc=None):
35
+ if op in ('-=', '*=', '/=') and 'acc' not in expr:
36
+ expr = f'acc {op[0]} ({expr})'
37
+
38
+ env = {
39
+ 'f': SafeNamespace(record),
40
+ 'acc': acc
41
+ }
42
+
43
+ try:
44
+ node = ast.parse(expr, mode='eval').body
45
+ except SyntaxError:
46
+ raise UsageError(f"Invalid expression: {expr}")
47
+
48
+ if isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp)):
49
+ env['f'] = ReducingNamespace(record)
50
+
51
+ if isinstance(node, ast.ListComp):
52
+ values = eval(compile(ast.Expression(node), '<reduce:listcomp>', 'eval'), {}, env)
53
+ return (acc or []) + list(values)
54
+
55
+ if isinstance(node, ast.SetComp):
56
+ values = eval(compile(ast.Expression(node), '<reduce:setcomp>', 'eval'), {}, env)
57
+ return (acc or set()).union(values)
58
+
59
+ if isinstance(node, ast.DictComp):
60
+ values = eval(compile(ast.Expression(node), '<reduce:dictcomp>', 'eval'), {}, env)
61
+ return {**(acc or {}), **values}
62
+
63
+ if op == '+=':
64
+ value = eval(expr, {}, env)
65
+ if isinstance(value, (int, float)):
66
+ return (acc or 0) + value
67
+ elif isinstance(value, str):
68
+ return str(acc or '') + value
69
+ elif isinstance(value, list):
70
+ return (acc or []) + value
71
+ else:
72
+ return (acc or []) + [value]
73
+
74
+ if op in ('-=', '*=', '/='):
75
+ return do_eval(expr, env)
76
+
77
+ return do_eval(expr, env)
78
+
79
+ # --- LetPipe (simple field assignment) ---
80
+ class LetPipe(Pipe):
81
+ @classmethod
82
+ def usage(cls):
83
+ usage = NoBindUsage( # can't use bound usage because of complicated parsing
84
+ name='let',
85
+ desc="set a new field equal to a rhs python expression",
86
+ component_class=cls
87
+ )
88
+ usage.def_arg(name='rhs', usage="python rhs expression (use f.<field> syntax)")
89
+ usage.def_example(expr_tokens=['{hello:0}', 'let:there=f.hello + 1'], expect="{hello:0, there: 1}")
90
+ usage.def_example(expr_tokens=['{hello:0}', 'let:foo:bar'], expect="{hello:0, foo: 'bar'}")
91
+ usage.def_example(expr_tokens=['{hello:0}', 'let:foo=int(1)'], expect="{hello:0, foo: 1}")
92
+ return usage
93
+
94
+ def __init__(self, ptok: ParsedToken, usage: Usage):
95
+ super().__init__(ptok)
96
+ args = parse_args(ptok.whole_token.split(':', 1)[-1])
97
+ self.field = args['field']
98
+ self.op = args['op']
99
+ self.rest = args['rest']
100
+
101
+ if self.op in ('+=', '-=', '*=', '/='):
102
+ raise TokenError("Aggregation operator not allowed in let, use reduce:")
103
+
104
+ def reset(self):
105
+ pass # stateless
106
+
107
+ def __iter__(self):
108
+ for record in self.left:
109
+ if self.op == ':':
110
+ record[self.field] = self.rest
111
+ else:
112
+ record[self.field] = eval_regular(self.rest, record)
113
+ yield record
114
+
115
+ # --- ReducePipe (stateful accumulator) ---
116
+ def is_comprehension(expr: str) -> bool:
117
+ try:
118
+ node = ast.parse(expr, mode='eval').body
119
+ return isinstance(node, (ast.ListComp, ast.SetComp, ast.DictComp))
120
+ except SyntaxError:
121
+ return False
122
+
123
+ class ReducePipe(Pipe):
124
+ @classmethod
125
+ def usage(cls):
126
+ usage = NoBindUsage( # can't use bound usage because of complicated parsing
127
+ name='reduce',
128
+ desc="set a new field equal to a reduction over records of a sub or main expression\n" +
129
+ "rhs operators must be accumulating, e.g. +=, -=, *=, /=\n" +
130
+ "or use list or dictionary comprehension",
131
+ component_class=cls
132
+ )
133
+ usage.def_arg(name='rhs', usage="accumulating python rhs expression (use f.<field> syntax)")
134
+
135
+ usage.def_example(expr_tokens=["{ferry:'orca', cars:[{make: 'ford', size:9}, {make:'bmw', size:4}]}",
136
+ '[', 'reduce:total_size+=f.size', 'over:cars'
137
+ ],
138
+ expect="{ferry:'orca', cars:[{make: 'ford', size:9}, {make:'bmw', size:4}], total_size: 13}")
139
+
140
+ usage.def_example(expr_tokens=["[{make: 'honda'}, {make: 'ford'}, {make:'bmw'}]",
141
+ 'reduce:cars=[x for x in f.make]'
142
+ ],
143
+ expect="{cars:['honda', 'ford', 'bmw']}")
144
+
145
+ usage.def_example(expr_tokens=["[{i:[1,2]},{i:[3]}]",
146
+ 'reduce:flattened=[x for x in f.i]'
147
+ ],
148
+ expect="{flattened:[1, 2, 3]}")
149
+
150
+ usage.def_example(expr_tokens=["[{i:1},{i:3}, {i:7}]",
151
+ 'reduce:diff-=f.i'
152
+ ],
153
+ expect="{diff:-11}")
154
+
155
+ usage.def_example(expr_tokens=["[{i:1},{i:3}, {i:7}]",
156
+ 'reduce:product*=f.i'
157
+ ],
158
+ expect="{product:21}")
159
+
160
+ return usage
161
+
162
+ def __init__(self, ptok: ParsedToken, usage: Usage):
163
+ super().__init__(ptok)
164
+ args = parse_args(ptok.whole_token.split(':', 1)[-1])
165
+ self.field = args['field']
166
+ self.op = args['op']
167
+ self.rest = args['rest']
168
+
169
+ if self.op not in ('+=', '-=', '*=', '/='):
170
+ if is_comprehension(self.rest):
171
+ self.op = '+='
172
+ else:
173
+ raise TokenError("Reduce pipe requires an accumulating operator (+=, -=, etc.), unless RHS is a comprehension")
174
+
175
+ self.accum_value = self.initial_acc_value()
176
+
177
+ def initial_acc_value(self):
178
+ if self.op == '+=':
179
+ return 0
180
+ elif self.op == '*=':
181
+ return 1
182
+ elif self.op == '-=':
183
+ return 0
184
+ elif self.op == '/=':
185
+ return 1.0
186
+ else:
187
+ return None
188
+
189
+ def reset(self):
190
+ self.accum_value = self.initial_acc_value()
191
+
192
+ def __iter__(self):
193
+ for record in self.left:
194
+ self.accum_value = eval_accumulating(self.rest, record, self.op, self.accum_value)
195
+ yield record
196
+
197
+ def get_subexp_result(self):
198
+ return (self.field, self.accum_value)
pjk/pipes/map.py ADDED
@@ -0,0 +1,91 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/group.py
5
+
6
+ from typing import Optional
7
+ from pjk.base import ParsedToken, Usage, Pipe, KeyedSource
8
+
9
+ class MapPipe(Pipe, KeyedSource):
10
+ @classmethod
11
+ def usage(cls):
12
+ usage = Usage(
13
+ name='map',
14
+ desc="maps records to key, either overriding or grouping duplicates. Creates Keyed Source for join or filter.",
15
+ component_class=cls
16
+ )
17
+ usage.def_arg(name='how', usage="'o' for override, 'g' for group", valid_values={'o', 'g'})
18
+ usage.def_arg(name='key', usage='comma separated fields to map by')
19
+ usage.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'map:o:id'],
20
+ expect="[{id:2, color:'red'}, {id:1, color:'green'}]")
21
+ usage.def_example(expr_tokens=["[{id: 1, color:'blue'}, {id:1, color:'green'}, {id:2, color:'red'}]", 'map:g:id'],
22
+ expect="[{id:2, child:[{color:'red'}]}, {id:1, child:[{color:'blue'},{color: 'green'}]}]")
23
+ usage.def_example(expr_tokens=["[{id: 1, color:'blue', size:5}, {id:1, color:'green', size:10}]", 'map:o:id,color'],
24
+ expect="[{id:1, color:'green', size: 10}, {id:1, color:'blue', size:5}]")
25
+
26
+ return usage
27
+
28
+ def __init__(self, ptok: ParsedToken, usage: Usage):
29
+ super().__init__(ptok)
30
+ self.is_group = usage.get_arg('how') == 'g'
31
+ self.fields = usage.get_arg('key').split(',')
32
+ self.rec_map = {}
33
+ self.matched_map = {}
34
+ self._rec_list = None
35
+ self.is_loaded = False
36
+
37
+ def reset(self):
38
+ self.rec_map.clear()
39
+ self.matched_map.clear()
40
+ self._rec_list = None
41
+ self.is_loaded = False
42
+
43
+ def load(self):
44
+ if self.is_loaded:
45
+ return
46
+ self.is_loaded = True
47
+
48
+ for record in self.left:
49
+ key_rec = {}
50
+ for field in self.fields:
51
+ key_rec[field] = record.pop(field, None) if self.is_group else record.get(field)
52
+
53
+ key = tuple(key_rec.values())
54
+ existing = self.rec_map.get(key)
55
+
56
+ if not existing:
57
+ if self.is_group:
58
+ key_rec['child'] = [record]
59
+ self.rec_map[key] = key_rec
60
+ else:
61
+ self.rec_map[key] = record
62
+ else:
63
+ if self.is_group:
64
+ existing['child'].append(record)
65
+ else:
66
+ self.rec_map[key] = record
67
+
68
+ def __iter__(self):
69
+ if not self.is_loaded:
70
+ self.load()
71
+ if self._rec_list is None:
72
+ self._rec_list = list(self.rec_map.values())
73
+
74
+ while self._rec_list:
75
+ yield self._rec_list.pop()
76
+
77
+ def lookup(self, left_rec) -> Optional[dict]:
78
+ if not self.is_loaded:
79
+ self.load()
80
+
81
+ key = tuple(left_rec.get(f) for f in self.fields)
82
+ rec = self.rec_map.pop(key, None)
83
+ if rec is not None:
84
+ self.matched_map[key] = rec
85
+ return rec
86
+ return self.matched_map.get(key)
87
+
88
+ def get_unlookedup_records(self):
89
+ if not self.is_loaded:
90
+ self.load()
91
+ return list(self.rec_map.values())
@@ -0,0 +1,36 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/move_field.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, Usage
7
+
8
+ class MoveField(Pipe):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='as',
13
+ desc='Move one field to another key in the record',
14
+ component_class=cls
15
+ )
16
+ usage.def_arg(name='src', usage='Source field name')
17
+ usage.def_arg(name='dst', usage='Destination field name')
18
+ usage.def_example(expr_tokens=['{up:1}', 'as:up:down'], expect="{down:1}")
19
+
20
+ return usage
21
+
22
+ def __init__(self, ptok: ParsedToken, usage: Usage):
23
+ super().__init__(ptok)
24
+ self.src = usage.get_arg('src')
25
+ self.dst = usage.get_arg('dst')
26
+ self.count = 0
27
+
28
+ def reset(self):
29
+ self.count = 0
30
+
31
+ def __iter__(self):
32
+ for record in self.left:
33
+ self.count += 1
34
+ if self.src in record:
35
+ record[self.dst] = record.pop(self.src)
36
+ yield record
@@ -0,0 +1,209 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+ #
4
+ # djk/pipes/postgres_pipe.py
5
+
6
+ import base64
7
+ import datetime as _dt
8
+ import uuid
9
+ from decimal import Decimal
10
+ from typing import Any, Dict, Optional
11
+
12
+ from pjk.base import Pipe, ParsedToken, NoBindUsage, Usage, TokenError
13
+ from pjk.common import Lookups
14
+
15
+
16
+ class DBClient:
17
+ """Simple shared-connection wrapper for pg8000."""
18
+ _connection = None
19
+
20
+ def __init__(self, host: str, username: str, password: Optional[str],
21
+ dbname: str, port: int = 5432, ssl: bool = False):
22
+ import pg8000 # lazy import
23
+ if DBClient._connection is None:
24
+ try:
25
+ kwargs = dict(user=username, password=password, host=host, database=dbname, port=port)
26
+ if ssl:
27
+ import ssl as _ssl
28
+ kwargs["ssl_context"] = _ssl.create_default_context()
29
+ DBClient._connection = pg8000.connect(**kwargs)
30
+ DBClient._connection.autocommit = True
31
+ except Exception as e:
32
+ print("Failed to connect to DB")
33
+ raise e
34
+ self.conn = DBClient._connection
35
+
36
+ def close(self):
37
+ if self.conn is not None:
38
+ try:
39
+ self.conn.close()
40
+ finally:
41
+ DBClient._connection = None
42
+
43
+
44
+ def _iso_dt(x: _dt.datetime) -> str:
45
+ """ISO 8601; normalize UTC offset to 'Z'."""
46
+ s = x.isoformat()
47
+ return s.replace("+00:00", "Z")
48
+
49
+
50
+ def normalize(obj: Any) -> Any:
51
+ """
52
+ Make values JSON/YAML-safe and portable (schema-agnostic):
53
+ - Decimal -> exact string (no sci-notation)
54
+ - date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
55
+ - UUID -> string
56
+ - bytes -> base64 string
57
+ - lists/tuples/sets, dicts -> normalized recursively
58
+ - leaves int/float/str/bool/None as-is
59
+ """
60
+ if obj is None:
61
+ return None
62
+
63
+ if isinstance(obj, Decimal):
64
+ return format(obj, "f") # exact value as string
65
+
66
+ if isinstance(obj, _dt.datetime):
67
+ return _iso_dt(obj)
68
+
69
+ if isinstance(obj, (_dt.date, _dt.time)):
70
+ return obj.isoformat()
71
+
72
+ if isinstance(obj, uuid.UUID):
73
+ return str(obj)
74
+
75
+ if isinstance(obj, (bytes, bytearray, memoryview)):
76
+ return base64.b64encode(bytes(obj)).decode("ascii")
77
+
78
+ if isinstance(obj, dict):
79
+ return {k: normalize(v) for k, v in obj.items()}
80
+
81
+ if isinstance(obj, (list, tuple, set)):
82
+ return [normalize(v) for v in obj]
83
+
84
+ return obj
85
+
86
+
87
+ def _row_to_dict(cursor, row) -> Dict[str, Any]:
88
+ cols = [d[0] for d in cursor.description]
89
+ return {col: normalize(val) for col, val in zip(cols, row)}
90
+
91
+
92
+ class PostgresPipe(Pipe):
93
+ @classmethod
94
+ def usage(cls):
95
+ usage = Usage(
96
+ name="pgres",
97
+ desc="Postgres query pipe; executes SQL from input record['query'].",
98
+ component_class=cls,
99
+ )
100
+ usage.def_arg(
101
+ "dbname",
102
+ "name of db. Entry in ~/.pjk/lookups.yaml containing host, user, password"
103
+ )
104
+ usage.def_param(
105
+ "header",
106
+ usage="emit header record before query results",
107
+ valid_values={"true", "false"}, default='true',
108
+ )
109
+
110
+ usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
111
+ usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
112
+ return usage
113
+
114
+ def __init__(self, ptok: ParsedToken, usage: Usage):
115
+ super().__init__(ptok, usage)
116
+
117
+ lookups = Lookups()
118
+ self.dbname = usage.get_arg("dbname")
119
+ db_params = lookups.get(self.dbname)
120
+ if not db_params:
121
+ # f-string so dbname prints correctly
122
+ raise TokenError(
123
+ f"~/.pjk/lookups.yaml must contain entry for '{self.dbname}' with host, user, password."
124
+ )
125
+
126
+ self.db_host = db_params.get("host")
127
+ self.db_user = db_params.get("user")
128
+ self.db_pass = db_params.get("password")
129
+ self.db_port = int(db_params.get("port", 5432))
130
+ self.db_ssl = bool(db_params.get("ssl", False))
131
+
132
+ self.query_field = "query" # SQL string
133
+ self.params_field = "params" # optional: list/tuple (positional) or dict (named)
134
+ self.do_header = usage.get_param("header") == "true"
135
+
136
+ def reset(self):
137
+ # stateless across reset
138
+ pass
139
+
140
+ def _make_header(self, cur, query: str, params=None) -> Dict[str, Any]:
141
+ """
142
+ Inspect the cursor and build a full header record.
143
+ Figures out result, rowcount, function automatically.
144
+ """
145
+ h = {
146
+ "query": query,
147
+ "db": self.dbname,
148
+ "dbhost": self.db_host,
149
+ }
150
+ if params:
151
+ h["params"] = params
152
+
153
+ if cur.description:
154
+ cols = [d[0] for d in cur.description]
155
+ if len(cols) == 1 and cols[0] == "ingest_event":
156
+ _ = cur.fetchone() # consume void row
157
+ h["result"] = "ok"
158
+ h["function"] = "ingest_event"
159
+ else:
160
+ h["result"] = "ok"
161
+ h["rowcount"] = cur.rowcount if cur.rowcount != -1 else None
162
+ else:
163
+ h["result"] = "ok"
164
+ h["rowcount"] = cur.rowcount
165
+
166
+ return {"header": h}
167
+
168
+ def __iter__(self):
169
+ client = DBClient(
170
+ host=self.db_host,
171
+ username=self.db_user,
172
+ password=self.db_pass,
173
+ dbname=self.dbname,
174
+ port=self.db_port,
175
+ ssl=self.db_ssl,
176
+ )
177
+ try:
178
+ for input_record in self.left:
179
+ query = input_record.get(self.query_field)
180
+ if not query:
181
+ yield {"_error": "missing query"}
182
+ continue
183
+ params = input_record.get(self.params_field)
184
+
185
+ cur = client.conn.cursor()
186
+ try:
187
+ # execute
188
+ if params is None:
189
+ cur.execute(query)
190
+ else:
191
+ if isinstance(params, (list, tuple, dict)):
192
+ cur.execute(query, params)
193
+ else:
194
+ cur.execute(query, (params,))
195
+
196
+ # yield header first
197
+ if self.do_header:
198
+ yield self._make_header(cur, query, params)
199
+
200
+ # then stream rows if it was a real SELECT with results
201
+ if cur.description:
202
+ cols = [d[0] for d in cur.description]
203
+ if not (len(cols) == 1 and cols[0] == "ingest_event"):
204
+ for row in cur:
205
+ yield _row_to_dict(cur, row)
206
+ finally:
207
+ cur.close()
208
+ finally:
209
+ client.close()
@@ -0,0 +1,36 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ # djk/pipes/remove_field.py
5
+
6
+ from pjk.base import Pipe, ParsedToken, Usage, UsageError
7
+
8
+ class RemoveField(Pipe):
9
+ @classmethod
10
+ def usage(cls):
11
+ usage = Usage(
12
+ name='drop',
13
+ desc='Remove one or more fields from each record',
14
+ component_class=cls
15
+ )
16
+ usage.def_arg(name='fields', usage='Comma-separated list of field names to drop')
17
+ usage.def_example(expr_tokens=["{id:1, dir:'up', color:'blue'}", 'drop:id,color'], expect="dir: 'up'")
18
+ return usage
19
+
20
+ def __init__(self, ptok: ParsedToken, usage: Usage):
21
+ super().__init__(ptok)
22
+ arg_string = usage.get_arg('fields')
23
+ self.fields = [f.strip() for f in arg_string.split(',') if f.strip()]
24
+ if not self.fields:
25
+ raise UsageError("rm must include at least one valid field name")
26
+ self.count = 0
27
+
28
+ def reset(self):
29
+ self.count = 0
30
+
31
+ def __iter__(self):
32
+ for record in self.left:
33
+ self.count += 1
34
+ for field in self.fields:
35
+ record.pop(field, None)
36
+ yield record