python-jack-knife 0.5.5__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/PKG-INFO +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/base.py +5 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/common.py +29 -24
- python_jack_knife-0.5.5/src/pjk/sinks/ddb.py → python_jack_knife-0.6.2/src/pjk/integrations/ddb_sink.py +2 -2
- {python_jack_knife-0.5.5/src/pjk/pipes → python_jack_knife-0.6.2/src/pjk/integrations}/postgres_pipe.py +26 -42
- python_jack_knife-0.6.2/src/pjk/integrations/snowflake_pipe.py +258 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/man_page.py +4 -3
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/parser.py +169 -84
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/denorm.py +6 -3
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/factory.py +9 -4
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/filter.py +5 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/head.py +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/join.py +11 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/map.py +8 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/move_field.py +9 -10
- python_jack_knife-0.6.2/src/pjk/pipes/progress_pipe.py +36 -0
- python_jack_knife-0.6.2/src/pjk/pipes/query_pipe.py +90 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/sample.py +5 -3
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/sort.py +6 -4
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/tail.py +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/where.py +2 -2
- python_jack_knife-0.6.2/src/pjk/progress.py +277 -0
- python_jack_knife-0.6.2/src/pjk/registry.py +199 -0
- python_jack_knife-0.6.2/src/pjk/sinks/create_sink.py +110 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/dir_sink.py +9 -5
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/factory.py +10 -3
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph.py +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/factory.py +5 -5
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/user_source_factory.py +5 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/version.py +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/SOURCES.txt +5 -2
- python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +0 -41
- python_jack_knife-0.5.5/src/pjk/progress.py +0 -177
- python_jack_knife-0.5.5/src/pjk/registry.py +0 -172
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/LICENSE +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/README.md +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/pyproject.toml +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/setup.cfg +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/log.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/main.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/select.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/format_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
|
|
4
4
|
import sys, shutil, subprocess, contextlib, signal
|
|
5
5
|
import os
|
|
6
|
+
import re
|
|
6
7
|
import yaml
|
|
7
|
-
from pjk.base import TokenError
|
|
8
|
+
from pjk.base import TokenError, Integration
|
|
8
9
|
|
|
9
10
|
class SafeNamespace:
|
|
10
11
|
def __init__(self, obj):
|
|
@@ -99,7 +100,7 @@ class Lookups:
|
|
|
99
100
|
entry = self._data.get(lookup_key, default)
|
|
100
101
|
if not entry:
|
|
101
102
|
raise TokenError(
|
|
102
|
-
f"~/.pjk/lookups.yaml
|
|
103
|
+
f"~/.pjk/lookups.yaml does not contain entry for '{lookup_key}' with required params."
|
|
103
104
|
)
|
|
104
105
|
return entry
|
|
105
106
|
|
|
@@ -119,39 +120,43 @@ class Lookups:
|
|
|
119
120
|
return dict(self._data)
|
|
120
121
|
|
|
121
122
|
class ComponentFactory:
|
|
122
|
-
def __init__(self,
|
|
123
|
+
def __init__(self, core_components: dict):
|
|
123
124
|
self.num_orig = 0
|
|
124
|
-
self.
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
self._components = {}
|
|
126
|
+
for k, v in core_components.items():
|
|
127
|
+
if issubclass(v, Integration):
|
|
128
|
+
self.register(k, v, 'integration')
|
|
129
|
+
else:
|
|
130
|
+
self.register(k, v, 'core')
|
|
127
131
|
|
|
128
|
-
def register(self, name, comp_class):
|
|
129
|
-
self.
|
|
132
|
+
def register(self, name, comp_class, origin: str):
|
|
133
|
+
self._components[name] = (comp_class, origin)
|
|
130
134
|
|
|
131
135
|
def get_comp_type_name(self):
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def print_descriptions(self):
|
|
135
|
-
header = highlight(f'{self.comp_type_name}s')
|
|
136
|
-
print(header)
|
|
136
|
+
pass
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
for
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
break
|
|
138
|
+
def get_component_name_class_tuples(self, origin: str = None) -> list:
|
|
139
|
+
ret = []
|
|
140
|
+
for k, (v, org) in self._components.items():
|
|
141
|
+
if not origin or origin == org:
|
|
142
|
+
ret.append((k, v))
|
|
143
|
+
return ret
|
|
145
144
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
145
|
+
def get_component_class(self, name: str):
|
|
146
|
+
tuple = self._components.get(name)
|
|
147
|
+
if not tuple:
|
|
148
|
+
return None
|
|
149
|
+
component_class, origin = tuple
|
|
150
|
+
return component_class
|
|
149
151
|
|
|
150
152
|
def get_usage(self, name: str):
|
|
151
|
-
comp_class = self.
|
|
153
|
+
comp_class = self.get_component_class(name)
|
|
152
154
|
if not comp_class:
|
|
153
155
|
return None
|
|
154
156
|
return comp_class.usage()
|
|
155
157
|
|
|
156
158
|
def create(self, token: str):
|
|
157
159
|
pass
|
|
160
|
+
|
|
161
|
+
def is_valid_field_name(name: str):
|
|
162
|
+
return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
|
2
2
|
# Copyright 2024 Mike Schultz
|
|
3
3
|
|
|
4
|
-
from pjk.base import Sink, Source, ParsedToken, Usage
|
|
4
|
+
from pjk.base import Sink, Integration, Source, ParsedToken, Usage
|
|
5
5
|
from decimal import Decimal
|
|
6
6
|
|
|
7
|
-
class DDBSink(Sink):
|
|
7
|
+
class DDBSink(Sink, Integration):
|
|
8
8
|
@classmethod
|
|
9
9
|
def usage(cls):
|
|
10
10
|
usage = Usage(
|
|
@@ -9,8 +9,9 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.base import
|
|
12
|
+
from pjk.base import Integration, ParsedToken, Usage
|
|
13
13
|
from pjk.common import Lookups
|
|
14
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class DBClient:
|
|
@@ -89,28 +90,15 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
89
90
|
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
90
91
|
|
|
91
92
|
|
|
92
|
-
class PostgresPipe(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
"dbname",
|
|
102
|
-
f"~/.pjk/lookups.yaml must containing entry '{cls.__name__}-<dbname>' with host, user, password"
|
|
103
|
-
)
|
|
104
|
-
usage.def_param(
|
|
105
|
-
"header",
|
|
106
|
-
usage="emit header record before query results",
|
|
107
|
-
valid_values={"true", "false"}, default='false',
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
|
|
111
|
-
usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
|
|
112
|
-
usage.def_example(expr_tokens=["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb'], expect=None)
|
|
113
|
-
return usage
|
|
93
|
+
class PostgresPipe(QueryPipe,Integration):
|
|
94
|
+
name = 'pgres'
|
|
95
|
+
desc = "Postgres query pipe; executes SQL from input."
|
|
96
|
+
arg0 = ("dbname", 'database name.')
|
|
97
|
+
examples = [
|
|
98
|
+
['myquery.sql', 'pgres:mydb', '-'],
|
|
99
|
+
["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb', '-'],
|
|
100
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb']
|
|
101
|
+
]
|
|
114
102
|
|
|
115
103
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
116
104
|
super().__init__(ptok, usage)
|
|
@@ -118,16 +106,14 @@ class PostgresPipe(Pipe):
|
|
|
118
106
|
lookups = Lookups(self)
|
|
119
107
|
self.dbname = usage.get_arg("dbname")
|
|
120
108
|
|
|
121
|
-
|
|
122
|
-
self.
|
|
123
|
-
self.
|
|
124
|
-
self.
|
|
125
|
-
self.
|
|
126
|
-
self.db_ssl = bool(db_params.get("ssl", False))
|
|
109
|
+
self.db_host = self.lookup_params.get("host")
|
|
110
|
+
self.db_user = self.lookup_params.get("user")
|
|
111
|
+
self.db_pass = self.lookup_params.get("password")
|
|
112
|
+
self.db_port = int(self.lookup_params.get("port", 5432))
|
|
113
|
+
self.db_ssl = bool(self.lookup_params.get("ssl", False))
|
|
127
114
|
|
|
128
|
-
self.query_field =
|
|
115
|
+
self.query_field = usage.get_param('query_field')
|
|
129
116
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
130
|
-
self.do_header = usage.get_param("header") == "true"
|
|
131
117
|
|
|
132
118
|
def reset(self):
|
|
133
119
|
# stateless across reset
|
|
@@ -139,7 +125,6 @@ class PostgresPipe(Pipe):
|
|
|
139
125
|
Figures out result, rowcount, function automatically.
|
|
140
126
|
"""
|
|
141
127
|
h = {
|
|
142
|
-
"query": query,
|
|
143
128
|
"db": self.dbname,
|
|
144
129
|
"dbhost": self.db_host,
|
|
145
130
|
}
|
|
@@ -159,9 +144,9 @@ class PostgresPipe(Pipe):
|
|
|
159
144
|
h["result"] = "ok"
|
|
160
145
|
h["rowcount"] = cur.rowcount
|
|
161
146
|
|
|
162
|
-
return
|
|
147
|
+
return h
|
|
163
148
|
|
|
164
|
-
def
|
|
149
|
+
def execute_query_returning_Q_xR_iterable(self, record):
|
|
165
150
|
client = DBClient(
|
|
166
151
|
host=self.db_host,
|
|
167
152
|
username=self.db_user,
|
|
@@ -171,12 +156,12 @@ class PostgresPipe(Pipe):
|
|
|
171
156
|
ssl=self.db_ssl,
|
|
172
157
|
)
|
|
173
158
|
try:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
yield
|
|
178
|
-
|
|
179
|
-
params =
|
|
159
|
+
query = record.get(self.query_field)
|
|
160
|
+
if not query:
|
|
161
|
+
record['_error'] = 'missing query'
|
|
162
|
+
yield record
|
|
163
|
+
else:
|
|
164
|
+
params = record.get(self.params_field)
|
|
180
165
|
|
|
181
166
|
cur = client.conn.cursor()
|
|
182
167
|
try:
|
|
@@ -190,8 +175,7 @@ class PostgresPipe(Pipe):
|
|
|
190
175
|
cur.execute(query, (params,))
|
|
191
176
|
|
|
192
177
|
# yield header first
|
|
193
|
-
|
|
194
|
-
yield self._make_header(cur, query, params)
|
|
178
|
+
yield self._make_header(cur, query, params)
|
|
195
179
|
|
|
196
180
|
# then stream rows if it was a real SELECT with results
|
|
197
181
|
if cur.description:
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
#
|
|
4
|
+
# djk/pipes/snowflake_pipe.py
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import datetime as _dt
|
|
8
|
+
import uuid
|
|
9
|
+
from decimal import Decimal
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from pjk.base import ParsedToken, Usage, TokenError, Integration
|
|
13
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ---------- utilities ----------
|
|
17
|
+
|
|
18
|
+
def _iso_dt(x: _dt.datetime) -> str:
|
|
19
|
+
"""ISO 8601; normalize UTC offset to 'Z' for UTC."""
|
|
20
|
+
s = x.isoformat()
|
|
21
|
+
return s.replace("+00:00", "Z")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def normalize(obj: Any) -> Any:
|
|
25
|
+
"""
|
|
26
|
+
Make values JSON/YAML-safe and portable (schema-agnostic):
|
|
27
|
+
- Decimal -> exact string (no sci-notation)
|
|
28
|
+
- date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
|
|
29
|
+
- UUID -> string
|
|
30
|
+
- bytes -> base64 string
|
|
31
|
+
- lists/tuples/sets, dicts -> normalized recursively
|
|
32
|
+
- leaves int/float/str/bool/None as-is
|
|
33
|
+
"""
|
|
34
|
+
if obj is None:
|
|
35
|
+
return None
|
|
36
|
+
if isinstance(obj, Decimal):
|
|
37
|
+
return format(obj, "f")
|
|
38
|
+
if isinstance(obj, _dt.datetime):
|
|
39
|
+
return _iso_dt(obj)
|
|
40
|
+
if isinstance(obj, (_dt.date, _dt.time)):
|
|
41
|
+
return obj.isoformat()
|
|
42
|
+
if isinstance(obj, uuid.UUID):
|
|
43
|
+
return str(obj)
|
|
44
|
+
if isinstance(obj, (bytes, bytearray, memoryview)):
|
|
45
|
+
return base64.b64encode(bytes(obj)).decode("ascii")
|
|
46
|
+
if isinstance(obj, dict):
|
|
47
|
+
return {k: normalize(v) for k, v in obj.items()}
|
|
48
|
+
if isinstance(obj, (list, tuple, set)):
|
|
49
|
+
return [normalize(v) for v in obj]
|
|
50
|
+
return obj
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
54
|
+
cols = [d[0] for d in cursor.description]
|
|
55
|
+
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
|
|
59
|
+
"""
|
|
60
|
+
Fetch a value from params using any of the provided keys,
|
|
61
|
+
trying case variants and optional SNOWFLAKE_ prefix.
|
|
62
|
+
"""
|
|
63
|
+
variants = []
|
|
64
|
+
for k in keys:
|
|
65
|
+
variants.extend([
|
|
66
|
+
k, k.lower(), k.upper(),
|
|
67
|
+
f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
|
|
68
|
+
])
|
|
69
|
+
for v in variants:
|
|
70
|
+
if v in params:
|
|
71
|
+
return params[v]
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---------- client ----------
|
|
76
|
+
|
|
77
|
+
class SnowflakeClient:
|
|
78
|
+
"""
|
|
79
|
+
Simple connection wrapper for snowflake-connector-python.
|
|
80
|
+
One connection per client instance (safer than sharing across threads).
|
|
81
|
+
"""
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
*,
|
|
85
|
+
account: str,
|
|
86
|
+
user: str,
|
|
87
|
+
password: Optional[str] = None,
|
|
88
|
+
authenticator: Optional[str] = None, # e.g. 'externalbrowser', 'oauth', 'snowflake'
|
|
89
|
+
role: Optional[str] = None,
|
|
90
|
+
warehouse: Optional[str] = None,
|
|
91
|
+
database: Optional[str] = None,
|
|
92
|
+
schema: Optional[str] = None
|
|
93
|
+
):
|
|
94
|
+
import snowflake.connector # lazy import
|
|
95
|
+
|
|
96
|
+
kwargs: Dict[str, Any] = {
|
|
97
|
+
"account": account,
|
|
98
|
+
"user": user,
|
|
99
|
+
}
|
|
100
|
+
if password:
|
|
101
|
+
kwargs["password"] = password
|
|
102
|
+
if authenticator:
|
|
103
|
+
kwargs["authenticator"] = authenticator
|
|
104
|
+
if role:
|
|
105
|
+
kwargs["role"] = role
|
|
106
|
+
if warehouse:
|
|
107
|
+
kwargs["warehouse"] = warehouse
|
|
108
|
+
if database:
|
|
109
|
+
kwargs["database"] = database
|
|
110
|
+
if schema:
|
|
111
|
+
kwargs["schema"] = schema
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
self.conn = snowflake.connector.connect(**kwargs)
|
|
115
|
+
# autocommit is True by default; make explicit
|
|
116
|
+
self.conn.autocommit(True)
|
|
117
|
+
# Apply explicit USE statements as a safety net (only if provided)
|
|
118
|
+
with self.conn.cursor() as cur:
|
|
119
|
+
if role:
|
|
120
|
+
cur.execute(f'USE ROLE "{role}"')
|
|
121
|
+
if warehouse:
|
|
122
|
+
cur.execute(f'USE WAREHOUSE "{warehouse}"')
|
|
123
|
+
if database:
|
|
124
|
+
cur.execute(f'USE DATABASE "{database}"')
|
|
125
|
+
if schema:
|
|
126
|
+
cur.execute(f'USE SCHEMA "{schema}"')
|
|
127
|
+
except Exception as e:
|
|
128
|
+
print("Failed to connect to Snowflake")
|
|
129
|
+
raise e
|
|
130
|
+
|
|
131
|
+
def close(self):
|
|
132
|
+
if getattr(self, "conn", None) is not None:
|
|
133
|
+
try:
|
|
134
|
+
self.conn.close()
|
|
135
|
+
finally:
|
|
136
|
+
self.conn = None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ---------- pipe ----------
|
|
140
|
+
|
|
141
|
+
class SnowflakePipe(QueryPipe, Integration):
|
|
142
|
+
"""
|
|
143
|
+
Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
|
|
144
|
+
Connection/session settings are pulled from ~/.pjk/lookups.yaml under the arg name.
|
|
145
|
+
"""
|
|
146
|
+
name = 'snowflake'
|
|
147
|
+
desc = "Snowflake query pipe; executes an SQL query for each input record."
|
|
148
|
+
arg0 = ('dbname', 'database name.')
|
|
149
|
+
examples = [
|
|
150
|
+
["{'query': 'SELECT CURRENT_ROLE();'}", "snow:EDLDB", "-"],
|
|
151
|
+
["myquery.sql", "snow:EDLDB", "-"]
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
155
|
+
super().__init__(ptok, usage)
|
|
156
|
+
|
|
157
|
+
self.dbname = usage.get_arg(type(self).arg0[0])
|
|
158
|
+
|
|
159
|
+
# Accept both bare keys and SNOWFLAKE_* variants in lookups.yaml
|
|
160
|
+
self.sf_account = _get_any(self.lookup_params, "account")
|
|
161
|
+
self.sf_user = _get_any(self.lookup_params, "user")
|
|
162
|
+
self.sf_auth = _get_any(self.lookup_params, "authenticator")
|
|
163
|
+
self.sf_role = _get_any(self.lookup_params, "role")
|
|
164
|
+
self.sf_wh = _get_any(self.lookup_params, "warehouse")
|
|
165
|
+
self.sf_db = self.dbname
|
|
166
|
+
self.sf_schema = _get_any(self.lookup_params, "schema")
|
|
167
|
+
|
|
168
|
+
# Basic validation
|
|
169
|
+
missing = [k for k, v in [
|
|
170
|
+
("account", self.sf_account),
|
|
171
|
+
("user", self.sf_user),
|
|
172
|
+
("authenticator|password", self.sf_auth or self.sf_password),
|
|
173
|
+
("role", self.sf_role),
|
|
174
|
+
("warehouse", self.sf_wh),
|
|
175
|
+
("schema", self.sf_schema),
|
|
176
|
+
] if not v]
|
|
177
|
+
if missing:
|
|
178
|
+
raise TokenError(
|
|
179
|
+
f"lookups entry '{self.dbname}' missing: {', '.join(missing)}"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
self.query_field = usage.get_param('query_field')
|
|
183
|
+
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
184
|
+
|
|
185
|
+
def reset(self):
|
|
186
|
+
# stateless across reset
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
def _make_header(self, cur, params=None) -> Dict[str, Any]:
|
|
190
|
+
"""
|
|
191
|
+
Build a header record with query metadata and session context.
|
|
192
|
+
"""
|
|
193
|
+
h: Dict[str, Any] = {
|
|
194
|
+
"db": self.dbname,
|
|
195
|
+
"account": self.sf_account,
|
|
196
|
+
"role": self.sf_role,
|
|
197
|
+
"warehouse": self.sf_wh,
|
|
198
|
+
}
|
|
199
|
+
if self.sf_db:
|
|
200
|
+
h["database"] = self.sf_db
|
|
201
|
+
if self.sf_schema:
|
|
202
|
+
h["schema"] = self.sf_schema
|
|
203
|
+
if params is not None:
|
|
204
|
+
h["params"] = params
|
|
205
|
+
|
|
206
|
+
# Snowflake's cursor.rowcount is often -1 for SELECT until fully fetched.
|
|
207
|
+
# We still include it if known (for DML it may be accurate).
|
|
208
|
+
try:
|
|
209
|
+
rc = getattr(cur, "rowcount", None)
|
|
210
|
+
if isinstance(rc, int) and rc >= 0:
|
|
211
|
+
h["rowcount"] = rc
|
|
212
|
+
except Exception:
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
h["result"] = "ok"
|
|
216
|
+
return h
|
|
217
|
+
|
|
218
|
+
def execute_query_returning_Q_xR_iterable(self, record):
|
|
219
|
+
client = SnowflakeClient(
|
|
220
|
+
account=self.sf_account,
|
|
221
|
+
user=self.sf_user,
|
|
222
|
+
authenticator=self.sf_auth,
|
|
223
|
+
role=self.sf_role,
|
|
224
|
+
warehouse=self.sf_wh,
|
|
225
|
+
database=self.sf_db,
|
|
226
|
+
schema=self.sf_schema,
|
|
227
|
+
)
|
|
228
|
+
try:
|
|
229
|
+
query = record.get(self.query_field)
|
|
230
|
+
if not query:
|
|
231
|
+
record['_error'] = 'missing query'
|
|
232
|
+
yield record
|
|
233
|
+
|
|
234
|
+
else:
|
|
235
|
+
params = record.get(self.params_field)
|
|
236
|
+
|
|
237
|
+
cur = client.conn.cursor()
|
|
238
|
+
try:
|
|
239
|
+
# Execute (supports positional or named params per DB-API)
|
|
240
|
+
if params is None:
|
|
241
|
+
cur.execute(query)
|
|
242
|
+
else:
|
|
243
|
+
if isinstance(params, (list, tuple, dict)):
|
|
244
|
+
cur.execute(query, params)
|
|
245
|
+
else:
|
|
246
|
+
# single scalar -> positional 1-tuple
|
|
247
|
+
cur.execute(query, (params,))
|
|
248
|
+
|
|
249
|
+
yield self._make_header(cur, params)
|
|
250
|
+
|
|
251
|
+
# Stream result rows for queries that return a result set
|
|
252
|
+
if cur.description:
|
|
253
|
+
for row in cur:
|
|
254
|
+
yield _row_to_dict(cur, row)
|
|
255
|
+
finally:
|
|
256
|
+
cur.close()
|
|
257
|
+
finally:
|
|
258
|
+
client.close()
|
|
@@ -51,8 +51,9 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
|
|
|
51
51
|
cm = nullcontext() if no_pager else pager_stdout()
|
|
52
52
|
with cm:
|
|
53
53
|
for factory in registry.get_factories():
|
|
54
|
-
comp_type = factory.get_comp_type_name()
|
|
55
|
-
|
|
54
|
+
#comp_type = factory.get_comp_type_name()
|
|
55
|
+
component_tuples = factory.get_component_name_class_tuples() # all of them
|
|
56
|
+
for name, comp_class in component_tuples:
|
|
56
57
|
usage = factory.get_usage(name)
|
|
57
58
|
print_man(registry, name, usage)
|
|
58
59
|
print()
|
|
@@ -84,7 +85,7 @@ def do_examples(token:str, registry: ComponentRegistry):
|
|
|
84
85
|
with cm:
|
|
85
86
|
for factory in registry.get_factories():
|
|
86
87
|
comp_type = factory.get_comp_type_name()
|
|
87
|
-
for name, comp_class in factory.
|
|
88
|
+
for name, comp_class in factory.get_component_name_class_tuples():
|
|
88
89
|
usage = comp_class.usage()
|
|
89
90
|
|
|
90
91
|
comp_type = usage.get_base_class(as_string=True)
|