python-jack-knife 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/PKG-INFO +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/base.py +24 -20
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/common.py +36 -28
- python_jack_knife-0.5.1/src/pjk/sinks/ddb.py → python_jack_knife-0.6.0/src/pjk/integrations/ddb_sink.py +2 -2
- {python_jack_knife-0.5.1/src/pjk/pipes → python_jack_knife-0.6.0/src/pjk/integrations}/postgres_pipe.py +29 -49
- python_jack_knife-0.6.0/src/pjk/integrations/snowflake_pipe.py +258 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/main.py +56 -31
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/man_page.py +4 -3
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/parser.py +95 -36
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/factory.py +15 -6
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/filter.py +8 -4
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/head.py +4 -6
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/join.py +15 -4
- python_jack_knife-0.6.0/src/pjk/pipes/map.py +138 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/move_field.py +2 -2
- python_jack_knife-0.6.0/src/pjk/pipes/progress_pipe.py +36 -0
- python_jack_knife-0.6.0/src/pjk/pipes/query_pipe.py +90 -0
- python_jack_knife-0.6.0/src/pjk/pipes/sample.py +68 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/select.py +2 -4
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/sort.py +6 -4
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/tail.py +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/where.py +10 -5
- python_jack_knife-0.6.0/src/pjk/progress.py +277 -0
- python_jack_knife-0.6.0/src/pjk/registry.py +199 -0
- python_jack_knife-0.6.0/src/pjk/sinks/create_sink.py +110 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/devnull.py +13 -6
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/dir_sink.py +9 -5
- python_jack_knife-0.6.0/src/pjk/sinks/expect.py +92 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/factory.py +10 -8
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph.py +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/user_sink_factory.py +2 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/dir_source.py +2 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/factory.py +7 -38
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/format_source.py +5 -0
- python_jack_knife-0.6.0/src/pjk/sources/npy_source.py +76 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/s3_source.py +2 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/user_source_factory.py +5 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/version.py +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/SOURCES.txt +9 -2
- python_jack_knife-0.5.1/src/pjk/pipes/map.py +0 -91
- python_jack_knife-0.5.1/src/pjk/registry.py +0 -150
- python_jack_knife-0.5.1/src/pjk/sinks/expect.py +0 -53
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/LICENSE +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/README.md +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/pyproject.toml +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/setup.cfg +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/log.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.5.1 → python_jack_knife-0.6.0}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -162,7 +162,7 @@ class Usage:
|
|
|
162
162
|
return '\n'.join(lines)
|
|
163
163
|
|
|
164
164
|
def get_token_syntax(self):
|
|
165
|
-
if
|
|
165
|
+
if self.syntax:
|
|
166
166
|
return self.syntax # else piece it together
|
|
167
167
|
|
|
168
168
|
token = f'{self.name}'
|
|
@@ -279,6 +279,11 @@ class KeyedSource(ABC):
|
|
|
279
279
|
def deep_copy(self):
|
|
280
280
|
return None
|
|
281
281
|
|
|
282
|
+
# mixin
|
|
283
|
+
# just for distinguishing components for display
|
|
284
|
+
class Integration(ABC):
|
|
285
|
+
pass
|
|
286
|
+
|
|
282
287
|
class Source(ABC):
|
|
283
288
|
@classmethod
|
|
284
289
|
def usage(cls):
|
|
@@ -300,10 +305,14 @@ class Source(ABC):
|
|
|
300
305
|
|
|
301
306
|
def deep_copy(self):
|
|
302
307
|
return None # Default: not copyable unless overridden
|
|
303
|
-
|
|
308
|
+
|
|
309
|
+
def close(self):
|
|
310
|
+
pass
|
|
311
|
+
|
|
312
|
+
def _get_sources(self, source_list: list):
|
|
313
|
+
pass
|
|
304
314
|
|
|
305
315
|
class Pipe(Source):
|
|
306
|
-
deep_copyable: bool = False # default to false
|
|
307
316
|
arity: int = 1
|
|
308
317
|
|
|
309
318
|
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
@@ -326,20 +335,12 @@ class Pipe(Source):
|
|
|
326
335
|
pass # optional hook
|
|
327
336
|
|
|
328
337
|
def deep_copy(self) -> Optional["Pipe"]:
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
for input in self.inputs:
|
|
337
|
-
strand = input.deep_copy()
|
|
338
|
-
if strand is None:
|
|
339
|
-
return None
|
|
340
|
-
clone.add_source(strand)
|
|
341
|
-
|
|
342
|
-
return clone
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
def _get_sources(self, source_list: list):
|
|
341
|
+
for ix in self.inputs:
|
|
342
|
+
source_list.append(ix)
|
|
343
|
+
ix._get_sources(source_list)
|
|
343
344
|
|
|
344
345
|
class DeepCopyPipe(Pipe):
|
|
345
346
|
def deep_copy(self):
|
|
@@ -373,13 +374,16 @@ class Sink(ABC):
|
|
|
373
374
|
self.process()
|
|
374
375
|
self.close()
|
|
375
376
|
|
|
377
|
+
# get all inputs in the execution chain for closing
|
|
378
|
+
inputs = [self.input]
|
|
379
|
+
self.input._get_sources(inputs)
|
|
380
|
+
for input in inputs:
|
|
381
|
+
input.close()
|
|
382
|
+
|
|
376
383
|
# optional
|
|
377
384
|
def close(self):
|
|
378
385
|
pass
|
|
379
386
|
|
|
380
|
-
def print_info(self):
|
|
381
|
-
pass
|
|
382
|
-
|
|
383
387
|
def add_source(self, source: Source) -> None:
|
|
384
388
|
self.input = source
|
|
385
389
|
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import sys, shutil, subprocess, contextlib, signal
|
|
5
5
|
import os
|
|
6
6
|
import yaml
|
|
7
|
+
from pjk.base import TokenError, Integration
|
|
7
8
|
|
|
8
9
|
class SafeNamespace:
|
|
9
10
|
def __init__(self, obj):
|
|
@@ -73,11 +74,12 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
73
74
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
74
75
|
|
|
75
76
|
class Lookups:
|
|
76
|
-
def __init__(self):
|
|
77
|
+
def __init__(self, component_class):
|
|
77
78
|
self.lookups_yaml = os.path.expanduser('~/.pjk/lookups.yaml')
|
|
79
|
+
self.class_name = type(component_class).__name__
|
|
78
80
|
self._data = {}
|
|
79
81
|
self._load()
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
def _load(self):
|
|
82
84
|
"""Load lookups from YAML file if it exists."""
|
|
83
85
|
if os.path.exists(self.lookups_yaml):
|
|
@@ -93,8 +95,13 @@ class Lookups:
|
|
|
93
95
|
yaml.safe_dump(self._data, f)
|
|
94
96
|
|
|
95
97
|
def get(self, key, default=None):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
lookup_key = f'{self.class_name}-{key}'
|
|
99
|
+
entry = self._data.get(lookup_key, default)
|
|
100
|
+
if not entry:
|
|
101
|
+
raise TokenError(
|
|
102
|
+
f"~/.pjk/lookups.yaml does not contain entry for '{lookup_key}' with required params."
|
|
103
|
+
)
|
|
104
|
+
return entry
|
|
98
105
|
|
|
99
106
|
def set(self, key, value):
|
|
100
107
|
"""Set a lookup value and persist it."""
|
|
@@ -112,36 +119,37 @@ class Lookups:
|
|
|
112
119
|
return dict(self._data)
|
|
113
120
|
|
|
114
121
|
class ComponentFactory:
|
|
115
|
-
def __init__(self,
|
|
122
|
+
def __init__(self, core_components: dict):
|
|
116
123
|
self.num_orig = 0
|
|
117
|
-
self.
|
|
118
|
-
|
|
119
|
-
|
|
124
|
+
self._components = {}
|
|
125
|
+
for k, v in core_components.items():
|
|
126
|
+
if issubclass(v, Integration):
|
|
127
|
+
self.register(k, v, 'integration')
|
|
128
|
+
else:
|
|
129
|
+
self.register(k, v, 'core')
|
|
120
130
|
|
|
121
|
-
def register(self, name, comp_class):
|
|
122
|
-
self.
|
|
131
|
+
def register(self, name, comp_class, origin: str):
|
|
132
|
+
self._components[name] = (comp_class, origin)
|
|
123
133
|
|
|
124
134
|
def get_comp_type_name(self):
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
print(line)
|
|
141
|
-
i += 1
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
def get_component_name_class_tuples(self, origin: str = None) -> list:
|
|
138
|
+
ret = []
|
|
139
|
+
for k, (v, org) in self._components.items():
|
|
140
|
+
if not origin or origin == org:
|
|
141
|
+
ret.append((k, v))
|
|
142
|
+
return ret
|
|
143
|
+
|
|
144
|
+
def get_component_class(self, name: str):
|
|
145
|
+
tuple = self._components.get(name)
|
|
146
|
+
if not tuple:
|
|
147
|
+
return None
|
|
148
|
+
component_class, origin = tuple
|
|
149
|
+
return component_class
|
|
142
150
|
|
|
143
151
|
def get_usage(self, name: str):
|
|
144
|
-
comp_class = self.
|
|
152
|
+
comp_class = self.get_component_class(name)
|
|
145
153
|
if not comp_class:
|
|
146
154
|
return None
|
|
147
155
|
return comp_class.usage()
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
|
2
2
|
# Copyright 2024 Mike Schultz
|
|
3
3
|
|
|
4
|
-
from pjk.base import Sink, Source, ParsedToken, Usage
|
|
4
|
+
from pjk.base import Sink, Integration, Source, ParsedToken, Usage
|
|
5
5
|
from decimal import Decimal
|
|
6
6
|
|
|
7
|
-
class DDBSink(Sink):
|
|
7
|
+
class DDBSink(Sink, Integration):
|
|
8
8
|
@classmethod
|
|
9
9
|
def usage(cls):
|
|
10
10
|
usage = Usage(
|
|
@@ -9,8 +9,9 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.base import
|
|
12
|
+
from pjk.base import Integration, ParsedToken, Usage
|
|
13
13
|
from pjk.common import Lookups
|
|
14
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class DBClient:
|
|
@@ -89,49 +90,30 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
89
90
|
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
90
91
|
|
|
91
92
|
|
|
92
|
-
class PostgresPipe(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
"dbname",
|
|
102
|
-
"name of db. Entry in ~/.pjk/lookups.yaml containing host, user, password"
|
|
103
|
-
)
|
|
104
|
-
usage.def_param(
|
|
105
|
-
"header",
|
|
106
|
-
usage="emit header record before query results",
|
|
107
|
-
valid_values={"true", "false"}, default='true',
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
|
|
111
|
-
usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
|
|
112
|
-
return usage
|
|
93
|
+
class PostgresPipe(QueryPipe,Integration):
|
|
94
|
+
name = 'pgres'
|
|
95
|
+
desc = "Postgres query pipe; executes SQL from input."
|
|
96
|
+
arg0 = ("dbname", 'database name.')
|
|
97
|
+
examples = [
|
|
98
|
+
['myquery.sql', 'pgres:mydb', '-'],
|
|
99
|
+
["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb', '-'],
|
|
100
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb']
|
|
101
|
+
]
|
|
113
102
|
|
|
114
103
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
115
104
|
super().__init__(ptok, usage)
|
|
116
105
|
|
|
117
|
-
lookups = Lookups()
|
|
106
|
+
lookups = Lookups(self)
|
|
118
107
|
self.dbname = usage.get_arg("dbname")
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
self.
|
|
127
|
-
self.db_user = db_params.get("user")
|
|
128
|
-
self.db_pass = db_params.get("password")
|
|
129
|
-
self.db_port = int(db_params.get("port", 5432))
|
|
130
|
-
self.db_ssl = bool(db_params.get("ssl", False))
|
|
131
|
-
|
|
132
|
-
self.query_field = "query" # SQL string
|
|
108
|
+
|
|
109
|
+
self.db_host = self.lookup_params.get("host")
|
|
110
|
+
self.db_user = self.lookup_params.get("user")
|
|
111
|
+
self.db_pass = self.lookup_params.get("password")
|
|
112
|
+
self.db_port = int(self.lookup_params.get("port", 5432))
|
|
113
|
+
self.db_ssl = bool(self.lookup_params.get("ssl", False))
|
|
114
|
+
|
|
115
|
+
self.query_field = usage.get_param('query_field')
|
|
133
116
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
134
|
-
self.do_header = usage.get_param("header") == "true"
|
|
135
117
|
|
|
136
118
|
def reset(self):
|
|
137
119
|
# stateless across reset
|
|
@@ -143,7 +125,6 @@ class PostgresPipe(Pipe):
|
|
|
143
125
|
Figures out result, rowcount, function automatically.
|
|
144
126
|
"""
|
|
145
127
|
h = {
|
|
146
|
-
"query": query,
|
|
147
128
|
"db": self.dbname,
|
|
148
129
|
"dbhost": self.db_host,
|
|
149
130
|
}
|
|
@@ -163,9 +144,9 @@ class PostgresPipe(Pipe):
|
|
|
163
144
|
h["result"] = "ok"
|
|
164
145
|
h["rowcount"] = cur.rowcount
|
|
165
146
|
|
|
166
|
-
return
|
|
147
|
+
return h
|
|
167
148
|
|
|
168
|
-
def
|
|
149
|
+
def execute_query_returning_Q_xR_iterable(self, record):
|
|
169
150
|
client = DBClient(
|
|
170
151
|
host=self.db_host,
|
|
171
152
|
username=self.db_user,
|
|
@@ -175,12 +156,12 @@ class PostgresPipe(Pipe):
|
|
|
175
156
|
ssl=self.db_ssl,
|
|
176
157
|
)
|
|
177
158
|
try:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
yield
|
|
182
|
-
|
|
183
|
-
params =
|
|
159
|
+
query = record.get(self.query_field)
|
|
160
|
+
if not query:
|
|
161
|
+
record['_error'] = 'missing query'
|
|
162
|
+
yield record
|
|
163
|
+
else:
|
|
164
|
+
params = record.get(self.params_field)
|
|
184
165
|
|
|
185
166
|
cur = client.conn.cursor()
|
|
186
167
|
try:
|
|
@@ -194,8 +175,7 @@ class PostgresPipe(Pipe):
|
|
|
194
175
|
cur.execute(query, (params,))
|
|
195
176
|
|
|
196
177
|
# yield header first
|
|
197
|
-
|
|
198
|
-
yield self._make_header(cur, query, params)
|
|
178
|
+
yield self._make_header(cur, query, params)
|
|
199
179
|
|
|
200
180
|
# then stream rows if it was a real SELECT with results
|
|
201
181
|
if cur.description:
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
#
|
|
4
|
+
# djk/pipes/snowflake_pipe.py
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import datetime as _dt
|
|
8
|
+
import uuid
|
|
9
|
+
from decimal import Decimal
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from pjk.base import ParsedToken, Usage, TokenError, Integration
|
|
13
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ---------- utilities ----------
|
|
17
|
+
|
|
18
|
+
def _iso_dt(x: _dt.datetime) -> str:
|
|
19
|
+
"""ISO 8601; normalize UTC offset to 'Z' for UTC."""
|
|
20
|
+
s = x.isoformat()
|
|
21
|
+
return s.replace("+00:00", "Z")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def normalize(obj: Any) -> Any:
|
|
25
|
+
"""
|
|
26
|
+
Make values JSON/YAML-safe and portable (schema-agnostic):
|
|
27
|
+
- Decimal -> exact string (no sci-notation)
|
|
28
|
+
- date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
|
|
29
|
+
- UUID -> string
|
|
30
|
+
- bytes -> base64 string
|
|
31
|
+
- lists/tuples/sets, dicts -> normalized recursively
|
|
32
|
+
- leaves int/float/str/bool/None as-is
|
|
33
|
+
"""
|
|
34
|
+
if obj is None:
|
|
35
|
+
return None
|
|
36
|
+
if isinstance(obj, Decimal):
|
|
37
|
+
return format(obj, "f")
|
|
38
|
+
if isinstance(obj, _dt.datetime):
|
|
39
|
+
return _iso_dt(obj)
|
|
40
|
+
if isinstance(obj, (_dt.date, _dt.time)):
|
|
41
|
+
return obj.isoformat()
|
|
42
|
+
if isinstance(obj, uuid.UUID):
|
|
43
|
+
return str(obj)
|
|
44
|
+
if isinstance(obj, (bytes, bytearray, memoryview)):
|
|
45
|
+
return base64.b64encode(bytes(obj)).decode("ascii")
|
|
46
|
+
if isinstance(obj, dict):
|
|
47
|
+
return {k: normalize(v) for k, v in obj.items()}
|
|
48
|
+
if isinstance(obj, (list, tuple, set)):
|
|
49
|
+
return [normalize(v) for v in obj]
|
|
50
|
+
return obj
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
54
|
+
cols = [d[0] for d in cursor.description]
|
|
55
|
+
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
|
|
59
|
+
"""
|
|
60
|
+
Fetch a value from params using any of the provided keys,
|
|
61
|
+
trying case variants and optional SNOWFLAKE_ prefix.
|
|
62
|
+
"""
|
|
63
|
+
variants = []
|
|
64
|
+
for k in keys:
|
|
65
|
+
variants.extend([
|
|
66
|
+
k, k.lower(), k.upper(),
|
|
67
|
+
f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
|
|
68
|
+
])
|
|
69
|
+
for v in variants:
|
|
70
|
+
if v in params:
|
|
71
|
+
return params[v]
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---------- client ----------
|
|
76
|
+
|
|
77
|
+
class SnowflakeClient:
|
|
78
|
+
"""
|
|
79
|
+
Simple connection wrapper for snowflake-connector-python.
|
|
80
|
+
One connection per client instance (safer than sharing across threads).
|
|
81
|
+
"""
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
*,
|
|
85
|
+
account: str,
|
|
86
|
+
user: str,
|
|
87
|
+
password: Optional[str] = None,
|
|
88
|
+
authenticator: Optional[str] = None, # e.g. 'externalbrowser', 'oauth', 'snowflake'
|
|
89
|
+
role: Optional[str] = None,
|
|
90
|
+
warehouse: Optional[str] = None,
|
|
91
|
+
database: Optional[str] = None,
|
|
92
|
+
schema: Optional[str] = None
|
|
93
|
+
):
|
|
94
|
+
import snowflake.connector # lazy import
|
|
95
|
+
|
|
96
|
+
kwargs: Dict[str, Any] = {
|
|
97
|
+
"account": account,
|
|
98
|
+
"user": user,
|
|
99
|
+
}
|
|
100
|
+
if password:
|
|
101
|
+
kwargs["password"] = password
|
|
102
|
+
if authenticator:
|
|
103
|
+
kwargs["authenticator"] = authenticator
|
|
104
|
+
if role:
|
|
105
|
+
kwargs["role"] = role
|
|
106
|
+
if warehouse:
|
|
107
|
+
kwargs["warehouse"] = warehouse
|
|
108
|
+
if database:
|
|
109
|
+
kwargs["database"] = database
|
|
110
|
+
if schema:
|
|
111
|
+
kwargs["schema"] = schema
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
self.conn = snowflake.connector.connect(**kwargs)
|
|
115
|
+
# autocommit is True by default; make explicit
|
|
116
|
+
self.conn.autocommit(True)
|
|
117
|
+
# Apply explicit USE statements as a safety net (only if provided)
|
|
118
|
+
with self.conn.cursor() as cur:
|
|
119
|
+
if role:
|
|
120
|
+
cur.execute(f'USE ROLE "{role}"')
|
|
121
|
+
if warehouse:
|
|
122
|
+
cur.execute(f'USE WAREHOUSE "{warehouse}"')
|
|
123
|
+
if database:
|
|
124
|
+
cur.execute(f'USE DATABASE "{database}"')
|
|
125
|
+
if schema:
|
|
126
|
+
cur.execute(f'USE SCHEMA "{schema}"')
|
|
127
|
+
except Exception as e:
|
|
128
|
+
print("Failed to connect to Snowflake")
|
|
129
|
+
raise e
|
|
130
|
+
|
|
131
|
+
def close(self):
|
|
132
|
+
if getattr(self, "conn", None) is not None:
|
|
133
|
+
try:
|
|
134
|
+
self.conn.close()
|
|
135
|
+
finally:
|
|
136
|
+
self.conn = None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------- pipe ----------
|
|
140
|
+
|
|
141
|
+
class SnowflakePipe(QueryPipe, Integration):
|
|
142
|
+
"""
|
|
143
|
+
Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
|
|
144
|
+
Connection/session settings are pulled from ~/.pjk/lookups.yaml under the arg name.
|
|
145
|
+
"""
|
|
146
|
+
name = 'snowflake'
|
|
147
|
+
desc = "Snowflake query pipe; executes an SQL query for each input record."
|
|
148
|
+
arg0 = ('dbname', 'database name.')
|
|
149
|
+
examples = [
|
|
150
|
+
["{'query': 'SELECT CURRENT_ROLE();'}", "snow:EDLDB", "-"],
|
|
151
|
+
["myquery.sql", "snow:EDLDB", "-"]
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
155
|
+
super().__init__(ptok, usage)
|
|
156
|
+
|
|
157
|
+
self.dbname = usage.get_arg(type(self).arg0[0])
|
|
158
|
+
|
|
159
|
+
# Accept both bare keys and SNOWFLAKE_* variants in lookups.yaml
|
|
160
|
+
self.sf_account = _get_any(self.lookup_params, "account")
|
|
161
|
+
self.sf_user = _get_any(self.lookup_params, "user")
|
|
162
|
+
self.sf_auth = _get_any(self.lookup_params, "authenticator")
|
|
163
|
+
self.sf_role = _get_any(self.lookup_params, "role")
|
|
164
|
+
self.sf_wh = _get_any(self.lookup_params, "warehouse")
|
|
165
|
+
self.sf_db = self.dbname
|
|
166
|
+
self.sf_schema = _get_any(self.lookup_params, "schema")
|
|
167
|
+
|
|
168
|
+
# Basic validation
|
|
169
|
+
missing = [k for k, v in [
|
|
170
|
+
("account", self.sf_account),
|
|
171
|
+
("user", self.sf_user),
|
|
172
|
+
("authenticator|password", self.sf_auth or self.sf_password),
|
|
173
|
+
("role", self.sf_role),
|
|
174
|
+
("warehouse", self.sf_wh),
|
|
175
|
+
("schema", self.sf_schema),
|
|
176
|
+
] if not v]
|
|
177
|
+
if missing:
|
|
178
|
+
raise TokenError(
|
|
179
|
+
f"lookups entry '{self.dbname}' missing: {', '.join(missing)}"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
self.query_field = usage.get_param('query_field')
|
|
183
|
+
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
184
|
+
|
|
185
|
+
def reset(self):
|
|
186
|
+
# stateless across reset
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
def _make_header(self, cur, params=None) -> Dict[str, Any]:
|
|
190
|
+
"""
|
|
191
|
+
Build a header record with query metadata and session context.
|
|
192
|
+
"""
|
|
193
|
+
h: Dict[str, Any] = {
|
|
194
|
+
"db": self.dbname,
|
|
195
|
+
"account": self.sf_account,
|
|
196
|
+
"role": self.sf_role,
|
|
197
|
+
"warehouse": self.sf_wh,
|
|
198
|
+
}
|
|
199
|
+
if self.sf_db:
|
|
200
|
+
h["database"] = self.sf_db
|
|
201
|
+
if self.sf_schema:
|
|
202
|
+
h["schema"] = self.sf_schema
|
|
203
|
+
if params is not None:
|
|
204
|
+
h["params"] = params
|
|
205
|
+
|
|
206
|
+
# Snowflake's cursor.rowcount is often -1 for SELECT until fully fetched.
|
|
207
|
+
# We still include it if known (for DML it may be accurate).
|
|
208
|
+
try:
|
|
209
|
+
rc = getattr(cur, "rowcount", None)
|
|
210
|
+
if isinstance(rc, int) and rc >= 0:
|
|
211
|
+
h["rowcount"] = rc
|
|
212
|
+
except Exception:
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
h["result"] = "ok"
|
|
216
|
+
return h
|
|
217
|
+
|
|
218
|
+
def execute_query_returning_Q_xR_iterable(self, record):
|
|
219
|
+
client = SnowflakeClient(
|
|
220
|
+
account=self.sf_account,
|
|
221
|
+
user=self.sf_user,
|
|
222
|
+
authenticator=self.sf_auth,
|
|
223
|
+
role=self.sf_role,
|
|
224
|
+
warehouse=self.sf_wh,
|
|
225
|
+
database=self.sf_db,
|
|
226
|
+
schema=self.sf_schema,
|
|
227
|
+
)
|
|
228
|
+
try:
|
|
229
|
+
query = record.get(self.query_field)
|
|
230
|
+
if not query:
|
|
231
|
+
record['_error'] = 'missing query'
|
|
232
|
+
yield record
|
|
233
|
+
|
|
234
|
+
else:
|
|
235
|
+
params = record.get(self.params_field)
|
|
236
|
+
|
|
237
|
+
cur = client.conn.cursor()
|
|
238
|
+
try:
|
|
239
|
+
# Execute (supports positional or named params per DB-API)
|
|
240
|
+
if params is None:
|
|
241
|
+
cur.execute(query)
|
|
242
|
+
else:
|
|
243
|
+
if isinstance(params, (list, tuple, dict)):
|
|
244
|
+
cur.execute(query, params)
|
|
245
|
+
else:
|
|
246
|
+
# single scalar -> positional 1-tuple
|
|
247
|
+
cur.execute(query, (params,))
|
|
248
|
+
|
|
249
|
+
yield self._make_header(cur, params)
|
|
250
|
+
|
|
251
|
+
# Stream result rows for queries that return a result set
|
|
252
|
+
if cur.description:
|
|
253
|
+
for row in cur:
|
|
254
|
+
yield _row_to_dict(cur, row)
|
|
255
|
+
finally:
|
|
256
|
+
cur.close()
|
|
257
|
+
finally:
|
|
258
|
+
client.close()
|