python-jack-knife 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/PKG-INFO +1 -1
  2. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/base.py +24 -20
  3. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/common.py +36 -28
  4. python_jack_knife-0.5.1/src/pjk/sinks/ddb.py → python_jack_knife-0.6.0/src/pjk/integrations/ddb_sink.py +2 -2
  5. {python_jack_knife-0.5.1/src/pjk/pipes → python_jack_knife-0.6.0/src/pjk/integrations}/postgres_pipe.py +29 -49
  6. python_jack_knife-0.6.0/src/pjk/integrations/snowflake_pipe.py +258 -0
  7. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/main.py +56 -31
  8. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/man_page.py +4 -3
  9. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/parser.py +95 -36
  10. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/factory.py +15 -6
  11. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/filter.py +8 -4
  12. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/head.py +4 -6
  13. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/join.py +15 -4
  14. python_jack_knife-0.6.0/src/pjk/pipes/map.py +138 -0
  15. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/move_field.py +2 -2
  16. python_jack_knife-0.6.0/src/pjk/pipes/progress_pipe.py +36 -0
  17. python_jack_knife-0.6.0/src/pjk/pipes/query_pipe.py +90 -0
  18. python_jack_knife-0.6.0/src/pjk/pipes/sample.py +68 -0
  19. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/select.py +2 -4
  20. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/sort.py +6 -4
  21. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/tail.py +1 -1
  22. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/where.py +10 -5
  23. python_jack_knife-0.6.0/src/pjk/progress.py +277 -0
  24. python_jack_knife-0.6.0/src/pjk/registry.py +199 -0
  25. python_jack_knife-0.6.0/src/pjk/sinks/create_sink.py +110 -0
  26. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/devnull.py +13 -6
  27. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/dir_sink.py +9 -5
  28. python_jack_knife-0.6.0/src/pjk/sinks/expect.py +92 -0
  29. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/factory.py +10 -8
  30. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph.py +1 -1
  31. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/user_sink_factory.py +2 -1
  32. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/dir_source.py +2 -0
  33. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/factory.py +7 -38
  34. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/format_source.py +5 -0
  35. python_jack_knife-0.6.0/src/pjk/sources/npy_source.py +76 -0
  36. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/s3_source.py +2 -1
  37. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/user_source_factory.py +5 -1
  38. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/version.py +1 -1
  39. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  40. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/SOURCES.txt +9 -2
  41. python_jack_knife-0.5.1/src/pjk/pipes/map.py +0 -91
  42. python_jack_knife-0.5.1/src/pjk/registry.py +0 -150
  43. python_jack_knife-0.5.1/src/pjk/sinks/expect.py +0 -53
  44. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/LICENSE +0 -0
  45. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/README.md +0 -0
  46. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/pyproject.toml +0 -0
  47. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/setup.cfg +0 -0
  48. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/__init__.py +0 -0
  49. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/log.py +0 -0
  50. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/__init__.py +0 -0
  51. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/denorm.py +0 -0
  52. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/let_reduce.py +0 -0
  53. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/remove_field.py +0 -0
  54. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/user_pipe_factory.py +0 -0
  55. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/__init__.py +0 -0
  56. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/csv_sink.py +0 -0
  57. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/format_sink.py +0 -0
  58. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_bar_line.py +0 -0
  59. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_cumulative.py +0 -0
  60. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_hist.py +0 -0
  61. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_scatter.py +0 -0
  62. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/json_sink.py +0 -0
  63. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/s3_sink.py +0 -0
  64. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/s3_stream.py +0 -0
  65. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/sinks.py +0 -0
  66. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/stdout.py +0 -0
  67. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/tsv_sink.py +0 -0
  68. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/__init__.py +0 -0
  69. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/csv_source.py +0 -0
  70. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/inline_source.py +0 -0
  71. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/json_source.py +0 -0
  72. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file.py +0 -0
  73. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file_local.py +0 -0
  74. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file_s3.py +0 -0
  75. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/parquet_source.py +0 -0
  76. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/source_list.py +0 -0
  77. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/sql_source.py +0 -0
  78. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/tsv_source.py +0 -0
  79. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  80. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  81. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/requires.txt +0 -0
  82. {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -162,7 +162,7 @@ class Usage:
162
162
  return '\n'.join(lines)
163
163
 
164
164
  def get_token_syntax(self):
165
- if not self.syntax:
165
+ if self.syntax:
166
166
  return self.syntax # else piece it together
167
167
 
168
168
  token = f'{self.name}'
@@ -279,6 +279,11 @@ class KeyedSource(ABC):
279
279
  def deep_copy(self):
280
280
  return None
281
281
 
282
+ # mixin
283
+ # just for distinguishing components for display
284
+ class Integration(ABC):
285
+ pass
286
+
282
287
  class Source(ABC):
283
288
  @classmethod
284
289
  def usage(cls):
@@ -300,10 +305,14 @@ class Source(ABC):
300
305
 
301
306
  def deep_copy(self):
302
307
  return None # Default: not copyable unless overridden
303
-
308
+
309
+ def close(self):
310
+ pass
311
+
312
+ def _get_sources(self, source_list: list):
313
+ pass
304
314
 
305
315
  class Pipe(Source):
306
- deep_copyable: bool = False # default to false
307
316
  arity: int = 1
308
317
 
309
318
  def __init__(self, ptok: ParsedToken, usage: Usage = None):
@@ -326,20 +335,12 @@ class Pipe(Source):
326
335
  pass # optional hook
327
336
 
328
337
  def deep_copy(self) -> Optional["Pipe"]:
329
- if not self.deep_copyable:
330
- return None
331
- if not self.inputs:
332
- raise RuntimeError(f"{self.__class__.__name__} has no inputs set")
333
-
334
- clone = self.__class__(self.ptok, self.__class__.usage())
335
-
336
- for input in self.inputs:
337
- strand = input.deep_copy()
338
- if strand is None:
339
- return None
340
- clone.add_source(strand)
341
-
342
- return clone
338
+ return None
339
+
340
+ def _get_sources(self, source_list: list):
341
+ for ix in self.inputs:
342
+ source_list.append(ix)
343
+ ix._get_sources(source_list)
343
344
 
344
345
  class DeepCopyPipe(Pipe):
345
346
  def deep_copy(self):
@@ -373,13 +374,16 @@ class Sink(ABC):
373
374
  self.process()
374
375
  self.close()
375
376
 
377
+ # get all inputs in the execution chain for closing
378
+ inputs = [self.input]
379
+ self.input._get_sources(inputs)
380
+ for input in inputs:
381
+ input.close()
382
+
376
383
  # optional
377
384
  def close(self):
378
385
  pass
379
386
 
380
- def print_info(self):
381
- pass
382
-
383
387
  def add_source(self, source: Source) -> None:
384
388
  self.input = source
385
389
 
@@ -4,6 +4,7 @@
4
4
  import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
6
  import yaml
7
+ from pjk.base import TokenError, Integration
7
8
 
8
9
  class SafeNamespace:
9
10
  def __init__(self, obj):
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
73
74
  return text.replace(value, f"{style}{value}{RESET}")
74
75
 
75
76
  class Lookups:
76
- def __init__(self):
77
+ def __init__(self, component_class):
77
78
  self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
79
+ self.class_name = type(component_class).__name__
78
80
  self._data = {}
79
81
  self._load()
80
-
82
+
81
83
  def _load(self):
82
84
  """Load lookups from YAML file if it exists."""
83
85
  if os.path.exists(self.lookups_yaml):
@@ -93,8 +95,13 @@ class Lookups:
93
95
  yaml.safe_dump(self._data, f)
94
96
 
95
97
  def get(self, key, default=None):
96
- """Retrieve a lookup value by key."""
97
- return self._data.get(key, default)
98
+ lookup_key = f'{self.class_name}-{key}'
99
+ entry = self._data.get(lookup_key, default)
100
+ if not entry:
101
+ raise TokenError(
102
+ f"~/.pjk/lookups.yaml does not contain entry for '{lookup_key}' with required params."
103
+ )
104
+ return entry
98
105
 
99
106
  def set(self, key, value):
100
107
  """Set a lookup value and persist it."""
@@ -112,36 +119,37 @@ class Lookups:
112
119
  return dict(self._data)
113
120
 
114
121
  class ComponentFactory:
115
- def __init__(self, components: dict, comp_type_name: str):
122
+ def __init__(self, core_components: dict):
116
123
  self.num_orig = 0
117
- self.components = components # name -> component_class
118
- self.comp_type_name = comp_type_name
119
- self.num_orig_comps = len(components)
124
+ self._components = {}
125
+ for k, v in core_components.items():
126
+ if issubclass(v, Integration):
127
+ self.register(k, v, 'integration')
128
+ else:
129
+ self.register(k, v, 'core')
120
130
 
121
- def register(self, name, comp_class):
122
- self.components[name] = comp_class
131
+ def register(self, name, comp_class, origin: str):
132
+ self._components[name] = (comp_class, origin)
123
133
 
124
134
  def get_comp_type_name(self):
125
- return self.comp_type_name
126
-
127
- def print_descriptions(self):
128
- header = highlight(f'{self.comp_type_name}s')
129
- print(header)
130
-
131
- i = 0
132
- plugin = ''
133
- for name, comp_class in self.components.items():
134
- usage = comp_class.usage()
135
- lines = usage.desc.split('\n')
136
- if i >= self.num_orig_comps:
137
- plugin = '(~/.pjk/plugin)'
138
- line = f' {name:<12} {lines[0]} {plugin}'
139
- line = highlight(line, 'bold', plugin) if plugin else line
140
- print(line)
141
- i += 1
135
+ pass
136
+
137
+ def get_component_name_class_tuples(self, origin: str = None) -> list:
138
+ ret = []
139
+ for k, (v, org) in self._components.items():
140
+ if not origin or origin == org:
141
+ ret.append((k, v))
142
+ return ret
143
+
144
+ def get_component_class(self, name: str):
145
+ tuple = self._components.get(name)
146
+ if not tuple:
147
+ return None
148
+ component_class, origin = tuple
149
+ return component_class
142
150
 
143
151
  def get_usage(self, name: str):
144
- comp_class = self.components.get(name)
152
+ comp_class = self.get_component_class(name)
145
153
  if not comp_class:
146
154
  return None
147
155
  return comp_class.usage()
@@ -1,10 +1,10 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- from pjk.base import Sink, Source, ParsedToken, Usage
4
+ from pjk.base import Sink, Integration, Source, ParsedToken, Usage
5
5
  from decimal import Decimal
6
6
 
7
- class DDBSink(Sink):
7
+ class DDBSink(Sink, Integration):
8
8
  @classmethod
9
9
  def usage(cls):
10
10
  usage = Usage(
@@ -9,8 +9,9 @@ import uuid
9
9
  from decimal import Decimal
10
10
  from typing import Any, Dict, Optional
11
11
 
12
- from pjk.base import Pipe, ParsedToken, NoBindUsage, Usage, TokenError
12
+ from pjk.base import Integration, ParsedToken, Usage
13
13
  from pjk.common import Lookups
14
+ from pjk.pipes.query_pipe import QueryPipe
14
15
 
15
16
 
16
17
  class DBClient:
@@ -89,49 +90,30 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
89
90
  return {col: normalize(val) for col, val in zip(cols, row)}
90
91
 
91
92
 
92
- class PostgresPipe(Pipe):
93
- @classmethod
94
- def usage(cls):
95
- usage = Usage(
96
- name="pgres",
97
- desc="Postgres query pipe; executes SQL from input record['query'].",
98
- component_class=cls,
99
- )
100
- usage.def_arg(
101
- "dbname",
102
- "name of db. Entry in ~/.pjk/lookups.yaml containing host, user, password"
103
- )
104
- usage.def_param(
105
- "header",
106
- usage="emit header record before query results",
107
- valid_values={"true", "false"}, default='true',
108
- )
109
-
110
- usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
111
- usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
112
- return usage
93
+ class PostgresPipe(QueryPipe,Integration):
94
+ name = 'pgres'
95
+ desc = "Postgres query pipe; executes SQL from input."
96
+ arg0 = ("dbname", 'database name.')
97
+ examples = [
98
+ ['myquery.sql', 'pgres:mydb', '-'],
99
+ ["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb', '-'],
100
+ ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb']
101
+ ]
113
102
 
114
103
  def __init__(self, ptok: ParsedToken, usage: Usage):
115
104
  super().__init__(ptok, usage)
116
105
 
117
- lookups = Lookups()
106
+ lookups = Lookups(self)
118
107
  self.dbname = usage.get_arg("dbname")
119
- db_params = lookups.get(self.dbname)
120
- if not db_params:
121
- # f-string so dbname prints correctly
122
- raise TokenError(
123
- f"~/.pjk/lookups.yaml must contain entry for '{self.dbname}' with host, user, password."
124
- )
125
-
126
- self.db_host = db_params.get("host")
127
- self.db_user = db_params.get("user")
128
- self.db_pass = db_params.get("password")
129
- self.db_port = int(db_params.get("port", 5432))
130
- self.db_ssl = bool(db_params.get("ssl", False))
131
-
132
- self.query_field = "query" # SQL string
108
+
109
+ self.db_host = self.lookup_params.get("host")
110
+ self.db_user = self.lookup_params.get("user")
111
+ self.db_pass = self.lookup_params.get("password")
112
+ self.db_port = int(self.lookup_params.get("port", 5432))
113
+ self.db_ssl = bool(self.lookup_params.get("ssl", False))
114
+
115
+ self.query_field = usage.get_param('query_field')
133
116
  self.params_field = "params" # optional: list/tuple (positional) or dict (named)
134
- self.do_header = usage.get_param("header") == "true"
135
117
 
136
118
  def reset(self):
137
119
  # stateless across reset
@@ -143,7 +125,6 @@ class PostgresPipe(Pipe):
143
125
  Figures out result, rowcount, function automatically.
144
126
  """
145
127
  h = {
146
- "query": query,
147
128
  "db": self.dbname,
148
129
  "dbhost": self.db_host,
149
130
  }
@@ -163,9 +144,9 @@ class PostgresPipe(Pipe):
163
144
  h["result"] = "ok"
164
145
  h["rowcount"] = cur.rowcount
165
146
 
166
- return {"header": h}
147
+ return h
167
148
 
168
- def __iter__(self):
149
+ def execute_query_returning_Q_xR_iterable(self, record):
169
150
  client = DBClient(
170
151
  host=self.db_host,
171
152
  username=self.db_user,
@@ -175,12 +156,12 @@ class PostgresPipe(Pipe):
175
156
  ssl=self.db_ssl,
176
157
  )
177
158
  try:
178
- for input_record in self.left:
179
- query = input_record.get(self.query_field)
180
- if not query:
181
- yield {"_error": "missing query"}
182
- continue
183
- params = input_record.get(self.params_field)
159
+ query = record.get(self.query_field)
160
+ if not query:
161
+ record['_error'] = 'missing query'
162
+ yield record
163
+ else:
164
+ params = record.get(self.params_field)
184
165
 
185
166
  cur = client.conn.cursor()
186
167
  try:
@@ -194,8 +175,7 @@ class PostgresPipe(Pipe):
194
175
  cur.execute(query, (params,))
195
176
 
196
177
  # yield header first
197
- if self.do_header:
198
- yield self._make_header(cur, query, params)
178
+ yield self._make_header(cur, query, params)
199
179
 
200
180
  # then stream rows if it was a real SELECT with results
201
181
  if cur.description:
@@ -0,0 +1,258 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+ #
4
+ # djk/pipes/snowflake_pipe.py
5
+
6
+ import base64
7
+ import datetime as _dt
8
+ import uuid
9
+ from decimal import Decimal
10
+ from typing import Any, Dict, Optional
11
+
12
+ from pjk.base import ParsedToken, Usage, TokenError, Integration
13
+ from pjk.pipes.query_pipe import QueryPipe
14
+
15
+
16
+ # ---------- utilities ----------
17
+
18
+ def _iso_dt(x: _dt.datetime) -> str:
19
+ """ISO 8601; normalize UTC offset to 'Z' for UTC."""
20
+ s = x.isoformat()
21
+ return s.replace("+00:00", "Z")
22
+
23
+
24
+ def normalize(obj: Any) -> Any:
25
+ """
26
+ Make values JSON/YAML-safe and portable (schema-agnostic):
27
+ - Decimal -> exact string (no sci-notation)
28
+ - date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
29
+ - UUID -> string
30
+ - bytes -> base64 string
31
+ - lists/tuples/sets, dicts -> normalized recursively
32
+ - leaves int/float/str/bool/None as-is
33
+ """
34
+ if obj is None:
35
+ return None
36
+ if isinstance(obj, Decimal):
37
+ return format(obj, "f")
38
+ if isinstance(obj, _dt.datetime):
39
+ return _iso_dt(obj)
40
+ if isinstance(obj, (_dt.date, _dt.time)):
41
+ return obj.isoformat()
42
+ if isinstance(obj, uuid.UUID):
43
+ return str(obj)
44
+ if isinstance(obj, (bytes, bytearray, memoryview)):
45
+ return base64.b64encode(bytes(obj)).decode("ascii")
46
+ if isinstance(obj, dict):
47
+ return {k: normalize(v) for k, v in obj.items()}
48
+ if isinstance(obj, (list, tuple, set)):
49
+ return [normalize(v) for v in obj]
50
+ return obj
51
+
52
+
53
+ def _row_to_dict(cursor, row) -> Dict[str, Any]:
54
+ cols = [d[0] for d in cursor.description]
55
+ return {col: normalize(val) for col, val in zip(cols, row)}
56
+
57
+
58
+ def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
59
+ """
60
+ Fetch a value from params using any of the provided keys,
61
+ trying case variants and optional SNOWFLAKE_ prefix.
62
+ """
63
+ variants = []
64
+ for k in keys:
65
+ variants.extend([
66
+ k, k.lower(), k.upper(),
67
+ f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
68
+ ])
69
+ for v in variants:
70
+ if v in params:
71
+ return params[v]
72
+ return None
73
+
74
+
75
+ # ---------- client ----------
76
+
77
+ class SnowflakeClient:
78
+ """
79
+ Simple connection wrapper for snowflake-connector-python.
80
+ One connection per client instance (safer than sharing across threads).
81
+ """
82
+ def __init__(
83
+ self,
84
+ *,
85
+ account: str,
86
+ user: str,
87
+ password: Optional[str] = None,
88
+ authenticator: Optional[str] = None, # e.g. 'externalbrowser', 'oauth', 'snowflake'
89
+ role: Optional[str] = None,
90
+ warehouse: Optional[str] = None,
91
+ database: Optional[str] = None,
92
+ schema: Optional[str] = None
93
+ ):
94
+ import snowflake.connector # lazy import
95
+
96
+ kwargs: Dict[str, Any] = {
97
+ "account": account,
98
+ "user": user,
99
+ }
100
+ if password:
101
+ kwargs["password"] = password
102
+ if authenticator:
103
+ kwargs["authenticator"] = authenticator
104
+ if role:
105
+ kwargs["role"] = role
106
+ if warehouse:
107
+ kwargs["warehouse"] = warehouse
108
+ if database:
109
+ kwargs["database"] = database
110
+ if schema:
111
+ kwargs["schema"] = schema
112
+
113
+ try:
114
+ self.conn = snowflake.connector.connect(**kwargs)
115
+ # autocommit is True by default; make explicit
116
+ self.conn.autocommit(True)
117
+ # Apply explicit USE statements as a safety net (only if provided)
118
+ with self.conn.cursor() as cur:
119
+ if role:
120
+ cur.execute(f'USE ROLE "{role}"')
121
+ if warehouse:
122
+ cur.execute(f'USE WAREHOUSE "{warehouse}"')
123
+ if database:
124
+ cur.execute(f'USE DATABASE "{database}"')
125
+ if schema:
126
+ cur.execute(f'USE SCHEMA "{schema}"')
127
+ except Exception as e:
128
+ print("Failed to connect to Snowflake")
129
+ raise e
130
+
131
+ def close(self):
132
+ if getattr(self, "conn", None) is not None:
133
+ try:
134
+ self.conn.close()
135
+ finally:
136
+ self.conn = None
137
+
138
+
139
+ # ---------- pipe ----------
140
+
141
+ class SnowflakePipe(QueryPipe, Integration):
142
+ """
143
+ Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
144
+ Connection/session settings are pulled from ~/.pjk/lookups.yaml under the arg name.
145
+ """
146
+ name = 'snowflake'
147
+ desc = "Snowflake query pipe; executes an SQL query for each input record."
148
+ arg0 = ('dbname', 'database name.')
149
+ examples = [
150
+ ["{'query': 'SELECT CURRENT_ROLE();'}", "snow:EDLDB", "-"],
151
+ ["myquery.sql", "snow:EDLDB", "-"]
152
+ ]
153
+
154
+ def __init__(self, ptok: ParsedToken, usage: Usage):
155
+ super().__init__(ptok, usage)
156
+
157
+ self.dbname = usage.get_arg(type(self).arg0[0])
158
+
159
+ # Accept both bare keys and SNOWFLAKE_* variants in lookups.yaml
160
+ self.sf_account = _get_any(self.lookup_params, "account")
161
+ self.sf_user = _get_any(self.lookup_params, "user")
162
+ self.sf_auth = _get_any(self.lookup_params, "authenticator")
163
+ self.sf_role = _get_any(self.lookup_params, "role")
164
+ self.sf_wh = _get_any(self.lookup_params, "warehouse")
165
+ self.sf_db = self.dbname
166
+ self.sf_schema = _get_any(self.lookup_params, "schema")
167
+
168
+ # Basic validation
169
+ missing = [k for k, v in [
170
+ ("account", self.sf_account),
171
+ ("user", self.sf_user),
172
+ ("authenticator|password", self.sf_auth or self.sf_password),
173
+ ("role", self.sf_role),
174
+ ("warehouse", self.sf_wh),
175
+ ("schema", self.sf_schema),
176
+ ] if not v]
177
+ if missing:
178
+ raise TokenError(
179
+ f"lookups entry '{self.dbname}' missing: {', '.join(missing)}"
180
+ )
181
+
182
+ self.query_field = usage.get_param('query_field')
183
+ self.params_field = "params" # optional: list/tuple (positional) or dict (named)
184
+
185
+ def reset(self):
186
+ # stateless across reset
187
+ pass
188
+
189
+ def _make_header(self, cur, params=None) -> Dict[str, Any]:
190
+ """
191
+ Build a header record with query metadata and session context.
192
+ """
193
+ h: Dict[str, Any] = {
194
+ "db": self.dbname,
195
+ "account": self.sf_account,
196
+ "role": self.sf_role,
197
+ "warehouse": self.sf_wh,
198
+ }
199
+ if self.sf_db:
200
+ h["database"] = self.sf_db
201
+ if self.sf_schema:
202
+ h["schema"] = self.sf_schema
203
+ if params is not None:
204
+ h["params"] = params
205
+
206
+ # Snowflake's cursor.rowcount is often -1 for SELECT until fully fetched.
207
+ # We still include it if known (for DML it may be accurate).
208
+ try:
209
+ rc = getattr(cur, "rowcount", None)
210
+ if isinstance(rc, int) and rc >= 0:
211
+ h["rowcount"] = rc
212
+ except Exception:
213
+ pass
214
+
215
+ h["result"] = "ok"
216
+ return h
217
+
218
+ def execute_query_returning_Q_xR_iterable(self, record):
219
+ client = SnowflakeClient(
220
+ account=self.sf_account,
221
+ user=self.sf_user,
222
+ authenticator=self.sf_auth,
223
+ role=self.sf_role,
224
+ warehouse=self.sf_wh,
225
+ database=self.sf_db,
226
+ schema=self.sf_schema,
227
+ )
228
+ try:
229
+ query = record.get(self.query_field)
230
+ if not query:
231
+ record['_error'] = 'missing query'
232
+ yield record
233
+
234
+ else:
235
+ params = record.get(self.params_field)
236
+
237
+ cur = client.conn.cursor()
238
+ try:
239
+ # Execute (supports positional or named params per DB-API)
240
+ if params is None:
241
+ cur.execute(query)
242
+ else:
243
+ if isinstance(params, (list, tuple, dict)):
244
+ cur.execute(query, params)
245
+ else:
246
+ # single scalar -> positional 1-tuple
247
+ cur.execute(query, (params,))
248
+
249
+ yield self._make_header(cur, params)
250
+
251
+ # Stream result rows for queries that return a result set
252
+ if cur.description:
253
+ for row in cur:
254
+ yield _row_to_dict(cur, row)
255
+ finally:
256
+ cur.close()
257
+ finally:
258
+ client.close()