python-jack-knife 0.5.5__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/PKG-INFO +1 -1
  2. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/base.py +5 -0
  3. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/common.py +29 -24
  4. python_jack_knife-0.5.5/src/pjk/sinks/ddb.py → python_jack_knife-0.6.2/src/pjk/integrations/ddb_sink.py +2 -2
  5. {python_jack_knife-0.5.5/src/pjk/pipes → python_jack_knife-0.6.2/src/pjk/integrations}/postgres_pipe.py +26 -42
  6. python_jack_knife-0.6.2/src/pjk/integrations/snowflake_pipe.py +258 -0
  7. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/man_page.py +4 -3
  8. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/parser.py +169 -84
  9. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/denorm.py +6 -3
  10. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/factory.py +9 -4
  11. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/filter.py +5 -1
  12. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/head.py +1 -1
  13. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/join.py +11 -0
  14. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/map.py +8 -0
  15. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/move_field.py +9 -10
  16. python_jack_knife-0.6.2/src/pjk/pipes/progress_pipe.py +36 -0
  17. python_jack_knife-0.6.2/src/pjk/pipes/query_pipe.py +90 -0
  18. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/sample.py +5 -3
  19. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/sort.py +6 -4
  20. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/tail.py +1 -1
  21. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/where.py +2 -2
  22. python_jack_knife-0.6.2/src/pjk/progress.py +277 -0
  23. python_jack_knife-0.6.2/src/pjk/registry.py +199 -0
  24. python_jack_knife-0.6.2/src/pjk/sinks/create_sink.py +110 -0
  25. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/dir_sink.py +9 -5
  26. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/factory.py +10 -3
  27. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph.py +1 -1
  28. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/factory.py +5 -5
  29. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/user_source_factory.py +5 -1
  30. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/version.py +1 -1
  31. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  32. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/SOURCES.txt +5 -2
  33. python_jack_knife-0.5.5/src/pjk/pipes/progress_pipe.py +0 -41
  34. python_jack_knife-0.5.5/src/pjk/progress.py +0 -177
  35. python_jack_knife-0.5.5/src/pjk/registry.py +0 -172
  36. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/LICENSE +0 -0
  37. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/README.md +0 -0
  38. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/pyproject.toml +0 -0
  39. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/setup.cfg +0 -0
  40. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/__init__.py +0 -0
  41. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/log.py +0 -0
  42. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/main.py +0 -0
  43. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/__init__.py +0 -0
  44. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/let_reduce.py +0 -0
  45. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/remove_field.py +0 -0
  46. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/select.py +0 -0
  47. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/pipes/user_pipe_factory.py +0 -0
  48. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/__init__.py +0 -0
  49. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/csv_sink.py +0 -0
  50. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/devnull.py +0 -0
  51. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/expect.py +0 -0
  52. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/format_sink.py +0 -0
  53. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_bar_line.py +0 -0
  54. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_cumulative.py +0 -0
  55. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_hist.py +0 -0
  56. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/graph_scatter.py +0 -0
  57. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/json_sink.py +0 -0
  58. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/s3_sink.py +0 -0
  59. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/s3_stream.py +0 -0
  60. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/sinks.py +0 -0
  61. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/stdout.py +0 -0
  62. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/tsv_sink.py +0 -0
  63. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sinks/user_sink_factory.py +0 -0
  64. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/__init__.py +0 -0
  65. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/csv_source.py +0 -0
  66. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/dir_source.py +0 -0
  67. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/format_source.py +0 -0
  68. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/inline_source.py +0 -0
  69. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/json_source.py +0 -0
  70. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file.py +0 -0
  71. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file_local.py +0 -0
  72. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/lazy_file_s3.py +0 -0
  73. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/npy_source.py +0 -0
  74. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/parquet_source.py +0 -0
  75. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/s3_source.py +0 -0
  76. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/source_list.py +0 -0
  77. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/sql_source.py +0 -0
  78. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/pjk/sources/tsv_source.py +0 -0
  79. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  80. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  81. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/requires.txt +0 -0
  82. {python_jack_knife-0.5.5 → python_jack_knife-0.6.2}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.5.5
3
+ Version: 0.6.2
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -279,6 +279,11 @@ class KeyedSource(ABC):
279
279
  def deep_copy(self):
280
280
  return None
281
281
 
282
+ # mixin
283
+ # just for distinguishing components for display
284
+ class Integration(ABC):
285
+ pass
286
+
282
287
  class Source(ABC):
283
288
  @classmethod
284
289
  def usage(cls):
@@ -3,8 +3,9 @@
3
3
 
4
4
  import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
+ import re
6
7
  import yaml
7
- from pjk.base import TokenError
8
+ from pjk.base import TokenError, Integration
8
9
 
9
10
  class SafeNamespace:
10
11
  def __init__(self, obj):
@@ -99,7 +100,7 @@ class Lookups:
99
100
  entry = self._data.get(lookup_key, default)
100
101
  if not entry:
101
102
  raise TokenError(
102
- f"~/.pjk/lookups.yaml must contain entry for '{lookup_key}' with host, user, password."
103
+ f"~/.pjk/lookups.yaml does not contain entry for '{lookup_key}' with required params."
103
104
  )
104
105
  return entry
105
106
 
@@ -119,39 +120,43 @@ class Lookups:
119
120
  return dict(self._data)
120
121
 
121
122
  class ComponentFactory:
122
- def __init__(self, components: dict, comp_type_name: str):
123
+ def __init__(self, core_components: dict):
123
124
  self.num_orig = 0
124
- self.components = components # name -> component_class
125
- self.comp_type_name = comp_type_name
126
- self.num_orig_comps = len(components)
125
+ self._components = {}
126
+ for k, v in core_components.items():
127
+ if issubclass(v, Integration):
128
+ self.register(k, v, 'integration')
129
+ else:
130
+ self.register(k, v, 'core')
127
131
 
128
- def register(self, name, comp_class):
129
- self.components[name] = comp_class
132
+ def register(self, name, comp_class, origin: str):
133
+ self._components[name] = (comp_class, origin)
130
134
 
131
135
  def get_comp_type_name(self):
132
- return self.comp_type_name
133
-
134
- def print_descriptions(self):
135
- header = highlight(f'{self.comp_type_name}s')
136
- print(header)
136
+ pass
137
137
 
138
- i = 0
139
- # user and outside package components are also here, but printed from registry class
140
- for name, comp_class in self.components.items():
141
- usage = comp_class.usage()
142
- lines = usage.desc.split('\n')
143
- if i >= self.num_orig_comps:
144
- break
138
+ def get_component_name_class_tuples(self, origin: str = None) -> list:
139
+ ret = []
140
+ for k, (v, org) in self._components.items():
141
+ if not origin or origin == org:
142
+ ret.append((k, v))
143
+ return ret
145
144
 
146
- line = f' {name:<12} {lines[0]}'
147
- print(line)
148
- i += 1
145
+ def get_component_class(self, name: str):
146
+ tuple = self._components.get(name)
147
+ if not tuple:
148
+ return None
149
+ component_class, origin = tuple
150
+ return component_class
149
151
 
150
152
  def get_usage(self, name: str):
151
- comp_class = self.components.get(name)
153
+ comp_class = self.get_component_class(name)
152
154
  if not comp_class:
153
155
  return None
154
156
  return comp_class.usage()
155
157
 
156
158
  def create(self, token: str):
157
159
  pass
160
+
161
+ def is_valid_field_name(name: str):
162
+ return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
@@ -1,10 +1,10 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- from pjk.base import Sink, Source, ParsedToken, Usage
4
+ from pjk.base import Sink, Integration, Source, ParsedToken, Usage
5
5
  from decimal import Decimal
6
6
 
7
- class DDBSink(Sink):
7
+ class DDBSink(Sink, Integration):
8
8
  @classmethod
9
9
  def usage(cls):
10
10
  usage = Usage(
@@ -9,8 +9,9 @@ import uuid
9
9
  from decimal import Decimal
10
10
  from typing import Any, Dict, Optional
11
11
 
12
- from pjk.base import Pipe, ParsedToken, NoBindUsage, Usage, TokenError
12
+ from pjk.base import Integration, ParsedToken, Usage
13
13
  from pjk.common import Lookups
14
+ from pjk.pipes.query_pipe import QueryPipe
14
15
 
15
16
 
16
17
  class DBClient:
@@ -89,28 +90,15 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
89
90
  return {col: normalize(val) for col, val in zip(cols, row)}
90
91
 
91
92
 
92
- class PostgresPipe(Pipe):
93
- @classmethod
94
- def usage(cls):
95
- usage = Usage(
96
- name="pgres",
97
- desc="Postgres query pipe; executes SQL from input record['query'].",
98
- component_class=cls,
99
- )
100
- usage.def_arg(
101
- "dbname",
102
- f"~/.pjk/lookups.yaml must containing entry '{cls.__name__}-<dbname>' with host, user, password"
103
- )
104
- usage.def_param(
105
- "header",
106
- usage="emit header record before query results",
107
- valid_values={"true", "false"}, default='false',
108
- )
109
-
110
- usage.def_example(expr_tokens=['myquery.sql', 'pgres:mydb'], expect=None)
111
- usage.def_example(expr_tokens=["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb'], expect=None)
112
- usage.def_example(expr_tokens=["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb'], expect=None)
113
- return usage
93
+ class PostgresPipe(QueryPipe,Integration):
94
+ name = 'pgres'
95
+ desc = "Postgres query pipe; executes SQL from input."
96
+ arg0 = ("dbname", 'database name.')
97
+ examples = [
98
+ ['myquery.sql', 'pgres:mydb', '-'],
99
+ ["{'query': 'SELECT * from MY_TABLE;'}", 'pgres:mydb', '-'],
100
+ ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'pgres:mydb']
101
+ ]
114
102
 
115
103
  def __init__(self, ptok: ParsedToken, usage: Usage):
116
104
  super().__init__(ptok, usage)
@@ -118,16 +106,14 @@ class PostgresPipe(Pipe):
118
106
  lookups = Lookups(self)
119
107
  self.dbname = usage.get_arg("dbname")
120
108
 
121
- db_params = lookups.get(self.dbname)
122
- self.db_host = db_params.get("host")
123
- self.db_user = db_params.get("user")
124
- self.db_pass = db_params.get("password")
125
- self.db_port = int(db_params.get("port", 5432))
126
- self.db_ssl = bool(db_params.get("ssl", False))
109
+ self.db_host = self.lookup_params.get("host")
110
+ self.db_user = self.lookup_params.get("user")
111
+ self.db_pass = self.lookup_params.get("password")
112
+ self.db_port = int(self.lookup_params.get("port", 5432))
113
+ self.db_ssl = bool(self.lookup_params.get("ssl", False))
127
114
 
128
- self.query_field = "query" # SQL string
115
+ self.query_field = usage.get_param('query_field')
129
116
  self.params_field = "params" # optional: list/tuple (positional) or dict (named)
130
- self.do_header = usage.get_param("header") == "true"
131
117
 
132
118
  def reset(self):
133
119
  # stateless across reset
@@ -139,7 +125,6 @@ class PostgresPipe(Pipe):
139
125
  Figures out result, rowcount, function automatically.
140
126
  """
141
127
  h = {
142
- "query": query,
143
128
  "db": self.dbname,
144
129
  "dbhost": self.db_host,
145
130
  }
@@ -159,9 +144,9 @@ class PostgresPipe(Pipe):
159
144
  h["result"] = "ok"
160
145
  h["rowcount"] = cur.rowcount
161
146
 
162
- return {"header": h}
147
+ return h
163
148
 
164
- def __iter__(self):
149
+ def execute_query_returning_Q_xR_iterable(self, record):
165
150
  client = DBClient(
166
151
  host=self.db_host,
167
152
  username=self.db_user,
@@ -171,12 +156,12 @@ class PostgresPipe(Pipe):
171
156
  ssl=self.db_ssl,
172
157
  )
173
158
  try:
174
- for input_record in self.left:
175
- query = input_record.get(self.query_field)
176
- if not query:
177
- yield {"_error": "missing query"}
178
- continue
179
- params = input_record.get(self.params_field)
159
+ query = record.get(self.query_field)
160
+ if not query:
161
+ record['_error'] = 'missing query'
162
+ yield record
163
+ else:
164
+ params = record.get(self.params_field)
180
165
 
181
166
  cur = client.conn.cursor()
182
167
  try:
@@ -190,8 +175,7 @@ class PostgresPipe(Pipe):
190
175
  cur.execute(query, (params,))
191
176
 
192
177
  # yield header first
193
- if self.do_header:
194
- yield self._make_header(cur, query, params)
178
+ yield self._make_header(cur, query, params)
195
179
 
196
180
  # then stream rows if it was a real SELECT with results
197
181
  if cur.description:
@@ -0,0 +1,258 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+ #
4
+ # djk/pipes/snowflake_pipe.py
5
+
6
+ import base64
7
+ import datetime as _dt
8
+ import uuid
9
+ from decimal import Decimal
10
+ from typing import Any, Dict, Optional
11
+
12
+ from pjk.base import ParsedToken, Usage, TokenError, Integration
13
+ from pjk.pipes.query_pipe import QueryPipe
14
+
15
+
16
+ # ---------- utilities ----------
17
+
18
+ def _iso_dt(x: _dt.datetime) -> str:
19
+ """ISO 8601; normalize UTC offset to 'Z' for UTC."""
20
+ s = x.isoformat()
21
+ return s.replace("+00:00", "Z")
22
+
23
+
24
+ def normalize(obj: Any) -> Any:
25
+ """
26
+ Make values JSON/YAML-safe and portable (schema-agnostic):
27
+ - Decimal -> exact string (no sci-notation)
28
+ - date/datetime/time -> ISO-8601 string (datetime keeps offset; UTC -> 'Z')
29
+ - UUID -> string
30
+ - bytes -> base64 string
31
+ - lists/tuples/sets, dicts -> normalized recursively
32
+ - leaves int/float/str/bool/None as-is
33
+ """
34
+ if obj is None:
35
+ return None
36
+ if isinstance(obj, Decimal):
37
+ return format(obj, "f")
38
+ if isinstance(obj, _dt.datetime):
39
+ return _iso_dt(obj)
40
+ if isinstance(obj, (_dt.date, _dt.time)):
41
+ return obj.isoformat()
42
+ if isinstance(obj, uuid.UUID):
43
+ return str(obj)
44
+ if isinstance(obj, (bytes, bytearray, memoryview)):
45
+ return base64.b64encode(bytes(obj)).decode("ascii")
46
+ if isinstance(obj, dict):
47
+ return {k: normalize(v) for k, v in obj.items()}
48
+ if isinstance(obj, (list, tuple, set)):
49
+ return [normalize(v) for v in obj]
50
+ return obj
51
+
52
+
53
+ def _row_to_dict(cursor, row) -> Dict[str, Any]:
54
+ cols = [d[0] for d in cursor.description]
55
+ return {col: normalize(val) for col, val in zip(cols, row)}
56
+
57
+
58
+ def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
59
+ """
60
+ Fetch a value from params using any of the provided keys,
61
+ trying case variants and optional SNOWFLAKE_ prefix.
62
+ """
63
+ variants = []
64
+ for k in keys:
65
+ variants.extend([
66
+ k, k.lower(), k.upper(),
67
+ f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
68
+ ])
69
+ for v in variants:
70
+ if v in params:
71
+ return params[v]
72
+ return None
73
+
74
+
75
+ # ---------- client ----------
76
+
77
+ class SnowflakeClient:
78
+ """
79
+ Simple connection wrapper for snowflake-connector-python.
80
+ One connection per client instance (safer than sharing across threads).
81
+ """
82
+ def __init__(
83
+ self,
84
+ *,
85
+ account: str,
86
+ user: str,
87
+ password: Optional[str] = None,
88
+ authenticator: Optional[str] = None, # e.g. 'externalbrowser', 'oauth', 'snowflake'
89
+ role: Optional[str] = None,
90
+ warehouse: Optional[str] = None,
91
+ database: Optional[str] = None,
92
+ schema: Optional[str] = None
93
+ ):
94
+ import snowflake.connector # lazy import
95
+
96
+ kwargs: Dict[str, Any] = {
97
+ "account": account,
98
+ "user": user,
99
+ }
100
+ if password:
101
+ kwargs["password"] = password
102
+ if authenticator:
103
+ kwargs["authenticator"] = authenticator
104
+ if role:
105
+ kwargs["role"] = role
106
+ if warehouse:
107
+ kwargs["warehouse"] = warehouse
108
+ if database:
109
+ kwargs["database"] = database
110
+ if schema:
111
+ kwargs["schema"] = schema
112
+
113
+ try:
114
+ self.conn = snowflake.connector.connect(**kwargs)
115
+ # autocommit is True by default; make explicit
116
+ self.conn.autocommit(True)
117
+ # Apply explicit USE statements as a safety net (only if provided)
118
+ with self.conn.cursor() as cur:
119
+ if role:
120
+ cur.execute(f'USE ROLE "{role}"')
121
+ if warehouse:
122
+ cur.execute(f'USE WAREHOUSE "{warehouse}"')
123
+ if database:
124
+ cur.execute(f'USE DATABASE "{database}"')
125
+ if schema:
126
+ cur.execute(f'USE SCHEMA "{schema}"')
127
+ except Exception as e:
128
+ print("Failed to connect to Snowflake")
129
+ raise e
130
+
131
+ def close(self):
132
+ if getattr(self, "conn", None) is not None:
133
+ try:
134
+ self.conn.close()
135
+ finally:
136
+ self.conn = None
137
+
138
+
139
+ # ---------- pipe ----------
140
+
141
+ class SnowflakePipe(QueryPipe, Integration):
142
+ """
143
+ Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
144
+ Connection/session settings are pulled from ~/.pjk/lookups.yaml under the arg name.
145
+ """
146
+ name = 'snowflake'
147
+ desc = "Snowflake query pipe; executes an SQL query for each input record."
148
+ arg0 = ('dbname', 'database name.')
149
+ examples = [
150
+ ["{'query': 'SELECT CURRENT_ROLE();'}", "snow:EDLDB", "-"],
151
+ ["myquery.sql", "snow:EDLDB", "-"]
152
+ ]
153
+
154
+ def __init__(self, ptok: ParsedToken, usage: Usage):
155
+ super().__init__(ptok, usage)
156
+
157
+ self.dbname = usage.get_arg(type(self).arg0[0])
158
+
159
+ # Accept both bare keys and SNOWFLAKE_* variants in lookups.yaml
160
+ self.sf_account = _get_any(self.lookup_params, "account")
161
+ self.sf_user = _get_any(self.lookup_params, "user")
162
+ self.sf_auth = _get_any(self.lookup_params, "authenticator")
163
+ self.sf_role = _get_any(self.lookup_params, "role")
164
+ self.sf_wh = _get_any(self.lookup_params, "warehouse")
165
+ self.sf_db = self.dbname
166
+ self.sf_schema = _get_any(self.lookup_params, "schema")
167
+
168
+ # Basic validation
169
+ missing = [k for k, v in [
170
+ ("account", self.sf_account),
171
+ ("user", self.sf_user),
172
+ ("authenticator|password", self.sf_auth or self.sf_password),
173
+ ("role", self.sf_role),
174
+ ("warehouse", self.sf_wh),
175
+ ("schema", self.sf_schema),
176
+ ] if not v]
177
+ if missing:
178
+ raise TokenError(
179
+ f"lookups entry '{self.dbname}' missing: {', '.join(missing)}"
180
+ )
181
+
182
+ self.query_field = usage.get_param('query_field')
183
+ self.params_field = "params" # optional: list/tuple (positional) or dict (named)
184
+
185
+ def reset(self):
186
+ # stateless across reset
187
+ pass
188
+
189
+ def _make_header(self, cur, params=None) -> Dict[str, Any]:
190
+ """
191
+ Build a header record with query metadata and session context.
192
+ """
193
+ h: Dict[str, Any] = {
194
+ "db": self.dbname,
195
+ "account": self.sf_account,
196
+ "role": self.sf_role,
197
+ "warehouse": self.sf_wh,
198
+ }
199
+ if self.sf_db:
200
+ h["database"] = self.sf_db
201
+ if self.sf_schema:
202
+ h["schema"] = self.sf_schema
203
+ if params is not None:
204
+ h["params"] = params
205
+
206
+ # Snowflake's cursor.rowcount is often -1 for SELECT until fully fetched.
207
+ # We still include it if known (for DML it may be accurate).
208
+ try:
209
+ rc = getattr(cur, "rowcount", None)
210
+ if isinstance(rc, int) and rc >= 0:
211
+ h["rowcount"] = rc
212
+ except Exception:
213
+ pass
214
+
215
+ h["result"] = "ok"
216
+ return h
217
+
218
+ def execute_query_returning_Q_xR_iterable(self, record):
219
+ client = SnowflakeClient(
220
+ account=self.sf_account,
221
+ user=self.sf_user,
222
+ authenticator=self.sf_auth,
223
+ role=self.sf_role,
224
+ warehouse=self.sf_wh,
225
+ database=self.sf_db,
226
+ schema=self.sf_schema,
227
+ )
228
+ try:
229
+ query = record.get(self.query_field)
230
+ if not query:
231
+ record['_error'] = 'missing query'
232
+ yield record
233
+
234
+ else:
235
+ params = record.get(self.params_field)
236
+
237
+ cur = client.conn.cursor()
238
+ try:
239
+ # Execute (supports positional or named params per DB-API)
240
+ if params is None:
241
+ cur.execute(query)
242
+ else:
243
+ if isinstance(params, (list, tuple, dict)):
244
+ cur.execute(query, params)
245
+ else:
246
+ # single scalar -> positional 1-tuple
247
+ cur.execute(query, (params,))
248
+
249
+ yield self._make_header(cur, params)
250
+
251
+ # Stream result rows for queries that return a result set
252
+ if cur.description:
253
+ for row in cur:
254
+ yield _row_to_dict(cur, row)
255
+ finally:
256
+ cur.close()
257
+ finally:
258
+ client.close()
@@ -51,8 +51,9 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
51
51
  cm = nullcontext() if no_pager else pager_stdout()
52
52
  with cm:
53
53
  for factory in registry.get_factories():
54
- comp_type = factory.get_comp_type_name()
55
- for name in factory.components.keys():
54
+ #comp_type = factory.get_comp_type_name()
55
+ component_tuples = factory.get_component_name_class_tuples() # all of them
56
+ for name, comp_class in component_tuples:
56
57
  usage = factory.get_usage(name)
57
58
  print_man(registry, name, usage)
58
59
  print()
@@ -84,7 +85,7 @@ def do_examples(token:str, registry: ComponentRegistry):
84
85
  with cm:
85
86
  for factory in registry.get_factories():
86
87
  comp_type = factory.get_comp_type_name()
87
- for name, comp_class in factory.components.items():
88
+ for name, comp_class in factory.get_component_name_class_tuples():
88
89
  usage = comp_class.usage()
89
90
 
90
91
  comp_type = usage.get_base_class(as_string=True)