python-jack-knife 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pjk/common.py CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
+ import re
6
7
  import yaml
7
8
  from pjk.base import TokenError, Integration
8
9
 
@@ -156,3 +157,6 @@ class ComponentFactory:
156
157
 
157
158
  def create(self, token: str):
158
159
  pass
160
+
161
+ def is_valid_field_name(name: str):
162
+ return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
pjk/parser.py CHANGED
@@ -55,8 +55,20 @@ class OperandStack:
55
55
  return None
56
56
  return self.stack[-1]
57
57
 
58
+ def clear(self):
59
+ self.stack.clear()
60
+
58
61
  def empty(self):
59
62
  return len(self.stack) == 0
63
+
64
+ def print(self, toadd):
65
+ print('---------')
66
+ if toadd:
67
+ print(f'{type(toadd).__name__}={id(toadd)}')
68
+ if len(self.stack) == 0:
69
+ print(f'Stack={id(self)} StackEmpty')
70
+ for op in self.stack:
71
+ print(f'Stack={id(self)} {type(op).__name__}={id(op)}')
60
72
 
61
73
  class ExpressionParser:
62
74
  def __init__(self, registry: ComponentRegistry):
@@ -69,6 +81,9 @@ class ExpressionParser:
69
81
  'pjk <source> [<pipe> ...] <sink>'])
70
82
 
71
83
  source = self.stack.pop()
84
+ if isinstance(source, SubExpression):
85
+ raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' keyword." )
86
+
72
87
  if not self.stack.empty():
73
88
  raise TokenError.from_list(['A sink can only consume one source.',
74
89
  'pjk <source> [<pipe> ...] <sink>'])
@@ -167,29 +182,38 @@ class StackLoader:
167
182
  return ReducerAggregatorPipe(top_level_reducers=self.top_level_reducers)
168
183
 
169
184
  def add_operator(self, op, stack: OperandStack):
185
+ #stack.print(op)
186
+
170
187
  if not stack.empty() and isinstance(stack.peek(), SubExpression):
171
- top = stack.peek()
188
+ subexp = stack.peek()
172
189
 
173
- if isinstance(op, SubExpressionOver):
174
- subexp_begin = stack.pop()
175
- subexp_begin.set_over_arg(op.get_over_arg())
176
- op.add_source(subexp_begin)
190
+ if isinstance(op, SubExpressionOver) and subexp.recursion_depth() == 0:
191
+ subexp = stack.pop()
192
+ op.add_source(subexp)
177
193
  stack.push(op)
178
194
 
179
195
  global stack_level
180
- # SEEMS LIKE A HACK! FIXME. The stack should handle this but its off by one
181
- stack_level-=1
196
+ stack_level -=1 # not sure why this can't be handled exclusively by the stack
182
197
  return
198
+
183
199
  else: # an operator within the subexpression
184
- top.add_subop(op)
200
+ subexp = stack.peek()
201
+ subexp.add_subop(op)
185
202
  return
186
203
 
204
+ if isinstance(op, SubExpressionOver):
205
+ if stack.empty or not isinstance(stack.peek(), SubExpression):
206
+ raise TokenError("Poorly formed sub-expression. 'over' keyword without matching begin token '['.")
207
+ op.add_source(stack.pop())
208
+ stack.push(op)
209
+
187
210
  # order matters, because sources are pipes
188
211
  if isinstance(op, Pipe):
189
212
  arity = op.arity # class level attribute
190
213
  for _ in range(arity):
191
214
  if stack.empty():
192
- raise UsageError(f"'{op}' requires {arity} input(s)")
215
+ name = type(op).usage().name
216
+ raise TokenError(f"'{name}' requires {arity} input(s)")
193
217
  op.add_source(stack.pop())
194
218
  stack.push(op)
195
219
 
@@ -243,6 +267,31 @@ class UpstreamSource(Source):
243
267
  self.sub_recs_in.increment()
244
268
  yield item
245
269
 
270
+ class SubExpressionOver(Pipe):
271
+ @classmethod
272
+ def usage(cls) -> Usage:
273
+ u = Usage(
274
+ name="over",
275
+ desc="sub-expression over.",
276
+ component_class=cls,
277
+ )
278
+ return u
279
+
280
+ def __init__(self, ptok: ParsedToken, usage: Usage):
281
+ super().__init__(ptok, usage)
282
+ self.over_arg = ptok.get_arg(0)
283
+
284
+ def reset(self):
285
+ pass # stateless
286
+
287
+ def __iter__(self):
288
+ if not isinstance(self.left, SubExpression):
289
+ raise Exception('this actually cannot happen, but did')
290
+
291
+ for record in self.left:
292
+ self.left.subexp_process(record, self.over_arg)
293
+ yield record
294
+
246
295
  class SubExpression(Pipe, ProgressIgnore):
247
296
  @classmethod
248
297
  def create(cls, token: str) -> Pipe:
@@ -255,28 +304,34 @@ class SubExpression(Pipe, ProgressIgnore):
255
304
 
256
305
  def __init__(self, ptok: ParsedToken, usage: Usage):
257
306
  super().__init__(ptok)
258
- self.over_arg = None
259
- self.over_field = None
260
307
  self.subexp_ops = []
261
- self.over_pipe = None
262
308
  self.stack_helper = StackLoader()
263
309
  self.subexp_stack = OperandStack()
264
310
  self.upstream_source = UpstreamSource()
265
311
  self.subexp_stack.push(self.upstream_source)
312
+ self.recursions = 0 # number of subexpression within
313
+ self.subexp_left = None
266
314
 
267
315
  def add_subop(self, op):
268
316
  self.subexp_ops.append(op)
317
+ if isinstance(op, SubExpression):
318
+ self.recursions += 1
319
+ elif isinstance(op, SubExpressionOver):
320
+ self.recursions -= 1
269
321
  self.stack_helper.add_operator(op, self.subexp_stack)
270
322
 
271
- def set_over_arg(self, over_arg): #FIXME, this should take QueryPipe
272
- self.over_arg = over_arg
273
- if over_arg.endswith('.py'):
274
- self.over_field = 'child'
275
- self.over_pipe = UserPipeFactory.create(over_arg)
276
- self.upstream_source.set_source(self.over_pipe)
277
- self.subexp_ops.append(self.over_pipe)
278
- else:
279
- self.over_field = over_arg
323
+ def recursion_depth(self):
324
+ return self.recursions
325
+
326
+ #def bind(self, subex_over: SubExpressionOver):
327
+ # self.over_arg = subex_over.get_over_arg()
328
+ # if self.over_arg.endswith('.py'):
329
+ # self.over_field = 'child'
330
+ # self.over_pipe = UserPipeFactory.create(self.over_arg)
331
+ # self.upstream_source.set_source(self.over_pipe)
332
+ # self.subexp_ops.append(self.over_pipe)
333
+ # else:
334
+ # self.over_field = self.over_arg
280
335
 
281
336
  def reset(self):
282
337
  for op in self.subexp_ops:
@@ -284,60 +339,40 @@ class SubExpression(Pipe, ProgressIgnore):
284
339
  op.reset()
285
340
 
286
341
  def __iter__(self):
287
- for record in self.left:
288
- if self.over_pipe:
289
- one = UpstreamSource()
290
- one.add_item(record)
291
- self.over_pipe.set_sources([one])
292
- else:
293
- field_data = record.pop(self.over_field, None)
294
- if not field_data:
295
- yield record
296
- continue
297
- if isinstance(field_data, list):
298
- self.upstream_source.set_list(field_data)
299
- else:
300
- self.upstream_source.set_list([field_data])
342
+ yield from self.left # pass thru to subexp_over which then calls process
301
343
 
302
- # Reset sub-pipe stack
303
- for op in self.subexp_ops:
304
- op.reset()
344
+ def subexp_process(self, record: dict, over_field: str):
345
+ #for record in self.left:
346
+ # if self.over_pipe:
347
+ # one = UpstreamSource()
348
+ # one.add_item(record)
349
+ # self.over_pipe.set_sources([one])
305
350
 
306
- out_recs = []
307
- top = self.subexp_stack.peek()
308
- for rec in top:
309
- out_recs.append(rec)
351
+ if not self.subexp_left:
352
+ self.subexp_left = self.subexp_stack.pop()
310
353
 
311
- record[self.over_field] = out_recs
354
+ field_data = record.pop(over_field, None)
355
+ if not field_data:
356
+ return
312
357
 
313
- for op in self.subexp_ops:
314
- get_subexp = getattr(op, "get_subexp_result", None)
315
- if get_subexp:
316
- name, value = get_subexp()
317
- if name:
318
- record[name] = value
358
+ if isinstance(field_data, list):
359
+ self.upstream_source.set_list(field_data)
360
+ else:
361
+ self.upstream_source.set_list([field_data])
319
362
 
320
- yield record
363
+ # Reset sub-pipe stack
364
+ for op in self.subexp_ops:
365
+ op.reset()
321
366
 
322
- class SubExpressionOver(Pipe):
323
- @classmethod
324
- def usage(cls) -> Usage:
325
- u = Usage(
326
- name="over",
327
- desc="sub-expression over.",
328
- component_class=cls,
329
- )
330
- return u
331
-
332
- def __init__(self, ptok: ParsedToken, usage: Usage):
333
- super().__init__(ptok, usage)
334
- self.over_arg = ptok.get_arg(0)
367
+ out_recs = []
335
368
 
336
- def get_over_arg(self):
337
- return self.over_arg
369
+ for rec in self.subexp_left:
370
+ out_recs.append(rec)
338
371
 
339
- def reset(self):
340
- pass # stateless
372
+ record[over_field] = out_recs
341
373
 
342
- def __iter__(self):
343
- yield from self.left
374
+ for op in self.subexp_ops:
375
+ if isinstance(op, ReducePipe):
376
+ name, value = op.get_subexp_result()
377
+ if name:
378
+ record[name] = value
pjk/pipes/denorm.py CHANGED
@@ -5,6 +5,7 @@
5
5
 
6
6
  from pjk.base import Pipe, ParsedToken, Usage, UsageError
7
7
  from typing import Iterator
8
+ from pjk.progress import papi
8
9
 
9
10
  class Denormer:
10
11
  def __init__(self, record, field):
@@ -23,7 +24,7 @@ class Denormer:
23
24
  elif isinstance(data, dict):
24
25
  self.subrec_list = [data]
25
26
  else:
26
- raise UsageError("can only denorm sub-records")
27
+ raise UsageError("can only explode sub-records")
27
28
 
28
29
  def __iter__(self) -> Iterator[dict]:
29
30
  for subrec in self.subrec_list:
@@ -53,8 +54,8 @@ class DenormPipe(Pipe):
53
54
  super().__init__(ptok)
54
55
 
55
56
  self.field = usage.get_arg('field')
56
- if not self.field:
57
- raise UsageError("denorm must include a field name")
57
+ self.recs_in = papi.get_counter(self, None) # don't display
58
+ self.recs_out = papi.get_percentage_counter(self, 'recs_out', self.recs_in)
58
59
 
59
60
  self._pending_iter = None
60
61
 
@@ -63,6 +64,8 @@ class DenormPipe(Pipe):
63
64
 
64
65
  def __iter__(self):
65
66
  for record in self.left:
67
+ self.recs_in.increment()
66
68
  denormer = Denormer(record, self.field)
67
69
  for out in denormer:
70
+ self.recs_out.increment()
68
71
  yield out
pjk/pipes/move_field.py CHANGED
@@ -3,34 +3,33 @@
3
3
 
4
4
  # djk/pipes/move_field.py
5
5
 
6
- from pjk.base import Pipe, ParsedToken, Usage
6
+ from pjk.base import Pipe, ParsedToken, Usage, TokenError
7
+ from pjk.common import is_valid_field_name
7
8
 
8
9
  class MoveField(Pipe):
9
10
  @classmethod
10
11
  def usage(cls):
11
- usage = Usage(
12
+ u = Usage(
12
13
  name='as',
13
14
  desc='rename a field in the record',
14
15
  component_class=cls
15
16
  )
16
- usage.def_arg(name='src', usage='Source field name')
17
- usage.def_arg(name='dst', usage='Destination field name')
18
- usage.def_example(expr_tokens=['{up:1}', 'as:up:down'], expect="{down:1}")
17
+ u.def_arg(name='src', usage='Source field name')
18
+ u.def_arg(name='dst', usage='Destination field name')
19
+ u.def_example(expr_tokens=['{up:1}', 'as:up:down'], expect="{down:1}")
19
20
 
20
- return usage
21
+ return u
21
22
 
22
23
  def __init__(self, ptok: ParsedToken, usage: Usage):
23
24
  super().__init__(ptok, usage)
24
25
  self.src = usage.get_arg('src')
25
26
  self.dst = usage.get_arg('dst')
26
- self.count = 0
27
27
 
28
- def reset(self):
29
- self.count = 0
28
+ if not is_valid_field_name(self.dst) or not is_valid_field_name(self.src):
29
+ raise TokenError('field names only allow letters, numbers (non-initially) and underbar')
30
30
 
31
31
  def __iter__(self):
32
32
  for record in self.left:
33
- self.count += 1
34
33
  if self.src in record:
35
34
  record[self.dst] = record.pop(self.src)
36
35
  yield record
pjk/pipes/query_pipe.py CHANGED
@@ -18,7 +18,7 @@ class QueryPipe(Pipe):
18
18
  desc=cls.desc,
19
19
  component_class=cls
20
20
  )
21
- u.def_arg(name=cls.arg0[0], usage=f'{cls.arg0[1]} ~/.pjk/lookups.yaml must containing entry {cls.__name__}-<{cls.arg0[0]}>\n containing necessary parameters.')
21
+ u.def_arg(name=cls.arg0[0], usage=f"{cls.arg0[1]} ~/.pjk/lookups.yaml must contain entry '{cls.__name__}-<{cls.arg0[0]}'>\n with necessary parameters.")
22
22
  u.def_param("query_field", usage="field of query.", default="query")
23
23
  u.def_param("count", usage="Number of search results, (databases may ignore)", is_num=True, default="10")
24
24
  u.def_param("shape", usage='the shape of ouput records', is_num=False,
pjk/version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- __version__ = "0.6.0"
4
+ __version__ = "0.6.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -1,27 +1,27 @@
1
1
  pjk/__init__.py,sha256=6HGDVcFOFv6VPSNjxVnusm9wHqy01pELX3AyCWFzqWg,128
2
2
  pjk/base.py,sha256=ObBsoTkfWp5qigr-QdNGZBs6tNH41P18kTnSMSJnntA,12775
3
- pjk/common.py,sha256=0buZgs3dBgNaxUugrFkzBR3qEr_IFfC1-PhvTP9FnCs,4809
3
+ pjk/common.py,sha256=c2Y9TTS1Lz8KNGSaKqN3DMfb7fOGkndZon_d0V0iwm0,4914
4
4
  pjk/log.py,sha256=LjBboWhUrr2Cz-ygpftCIK17ee2-zNHKZjeJyoW0PlA,2163
5
5
  pjk/main.py,sha256=fM2xuofSv_lzSBk4rD7ZCWvR1beRMgHiezL4rIBD0dE,3924
6
6
  pjk/man_page.py,sha256=BNxRbzXfvR_5FoPkpCwxApyMsfSSrSdDu2PM-IVblA4,4447
7
- pjk/parser.py,sha256=C0rWqb_v-HvV7TTcacn6eU4w5o2fConPYVadSLwMha4,11404
7
+ pjk/parser.py,sha256=2aZiW0SY9KPKb2h4xPasOhxehcxn_l_BEx2swmN_7_E,12673
8
8
  pjk/progress.py,sha256=scacaLRbSMBJEEEDeOTYt8sKwXP-CYEGoANG7IXVoQ4,9597
9
9
  pjk/registry.py,sha256=0_exY3sivaOHyRtf-v88OSugl2LGAbbFjyFS4cPbGO8,6864
10
- pjk/version.py,sha256=YIm7tlVfTl0uD8veTyKIyKgY-0nK0BdMVCC3KdWugN4,91
10
+ pjk/version.py,sha256=pUQpi34dOjfho08lZGT_KTgebMk9IucMzzRKSwCd6Bc,91
11
11
  pjk/integrations/ddb_sink.py,sha256=qFuh0ZkZ1oGrx576-226eu75sauGqa8BF_u-FLs4yW4,1704
12
12
  pjk/integrations/postgres_pipe.py,sha256=MZut7dikqbXr0XIrL31yrXM_0ZrgExD6suVA9jd_xmA,6192
13
13
  pjk/integrations/snowflake_pipe.py,sha256=z22E4KBfZHnAi6Bb8e9PXJwvwVYPeJjF02FI5o0rRus,8549
14
14
  pjk/pipes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- pjk/pipes/denorm.py,sha256=3zY8E34e5ORiddP3zEXRg1_iLYV6ivsdEBilNBjjHTo,2065
15
+ pjk/pipes/denorm.py,sha256=tL8v3lTDCZPXDlffrx71PIojC_JWjN3PCJriNP6mw0o,2235
16
16
  pjk/pipes/factory.py,sha256=rwCWneXB1gosk9I24yr1Z0RDUYHrjHx8swFLrDR1k44,2119
17
17
  pjk/pipes/filter.py,sha256=Fo5-MJaNAfTabRZxw9fQ1n4amhgUUCncC_GoOWswdRw,2053
18
18
  pjk/pipes/head.py,sha256=5ZAacwzpVWuR_xWCnlHqyJUwbxfEcF8vIogdKIJBTRs,903
19
19
  pjk/pipes/join.py,sha256=zE9JrrGnOjDB7OuBT2_FxleptFfbBJJLdFak9FbsOjc,3149
20
20
  pjk/pipes/let_reduce.py,sha256=QfCs-omZq-a2hMFr5Nnt1hhQuiXol0IMA2diXwesVUA,7153
21
21
  pjk/pipes/map.py,sha256=mOBKjx-AHDmOa3mBMnFfb-PYqFbKnqGVjBj3hx2fcys,5107
22
- pjk/pipes/move_field.py,sha256=H494BdPsoYVbASGO-EUjEqxuClQF3a3R2OjRyeAQyYk,1018
22
+ pjk/pipes/move_field.py,sha256=ZgJZZSD7irWVnOdHyhNUq9Mw3oLuGVUr1WKjdjBbwgo,1142
23
23
  pjk/pipes/progress_pipe.py,sha256=GfnQFmeAOyeSHl69TXNWWz7sBd6MVpzDvYH2mZBp1Hg,1239
24
- pjk/pipes/query_pipe.py,sha256=3rO6pblw741QGfuTMlKv95sppdCstnDE3sT8VZe7knI,2978
24
+ pjk/pipes/query_pipe.py,sha256=nGcQh0qOcHHzCyVcwfrac5HmT9pPzyGlk--zJtmxVnc,2970
25
25
  pjk/pipes/remove_field.py,sha256=QjEO6-phRngM2emBJ6xv8UA2d_iA44tYN8Crx4lhqQ0,1169
26
26
  pjk/pipes/sample.py,sha256=_YezoC3hZbG0qpkg9C6AuHLjuWseeAqsYYOXXAMvbOQ,2269
27
27
  pjk/pipes/select.py,sha256=OuVAk8x7rCkb0neroBmUUuovxHc-z2q7VaUNNWrEdZY,1331
@@ -66,9 +66,9 @@ pjk/sources/source_list.py,sha256=5L2vFrtVSl9rKf2NjfpUFOOAb-iypVDKYCw1-3xgcEo,64
66
66
  pjk/sources/sql_source.py,sha256=2CpOuikd2BVmMW_UbhVGPfXUf3BJmOcK5yMtmbTMYvw,743
67
67
  pjk/sources/tsv_source.py,sha256=37nhEblCZ8XeTNHVo-WcdJ8HbIbiwKgsDES_yzz6EdU,306
68
68
  pjk/sources/user_source_factory.py,sha256=0XIz6NkiBhGAteGDP8cNy7MALWsUF3smtLj4Qnj41Q8,1326
69
- python_jack_knife-0.6.0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
70
- python_jack_knife-0.6.0.dist-info/METADATA,sha256=lEQvJQufcy-g2Zq4MGV6FSk7XX3XXtyQojWrP9sXrmM,14641
71
- python_jack_knife-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
- python_jack_knife-0.6.0.dist-info/entry_points.txt,sha256=kzZ10zEisvEaG2xYqqw7xRpuV62rAO_dPEHnM6USelk,38
73
- python_jack_knife-0.6.0.dist-info/top_level.txt,sha256=r-Ef_I9SbVDL9jD-W0WtshstLos_7guWbpItYxxSllQ,4
74
- python_jack_knife-0.6.0.dist-info/RECORD,,
69
+ python_jack_knife-0.6.2.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
70
+ python_jack_knife-0.6.2.dist-info/METADATA,sha256=6GiGT2ccWGYUKbaGPIQXPMSKJsHfM7_K8xatC6eGn1w,14641
71
+ python_jack_knife-0.6.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ python_jack_knife-0.6.2.dist-info/entry_points.txt,sha256=kzZ10zEisvEaG2xYqqw7xRpuV62rAO_dPEHnM6USelk,38
73
+ python_jack_knife-0.6.2.dist-info/top_level.txt,sha256=r-Ef_I9SbVDL9jD-W0WtshstLos_7guWbpItYxxSllQ,4
74
+ python_jack_knife-0.6.2.dist-info/RECORD,,