python-jack-knife 0.6.0__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/PKG-INFO +1 -1
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/common.py +19 -33
- python_jack_knife-0.6.3/src/pjk/integrations/opensearch_client.py +77 -0
- python_jack_knife-0.6.3/src/pjk/integrations/opensearch_query_pipe.py +104 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/integrations/postgres_pipe.py +11 -12
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/integrations/snowflake_pipe.py +14 -31
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/main.py +13 -2
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/parser.py +104 -69
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/denorm.py +6 -3
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/factory.py +3 -1
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/move_field.py +9 -10
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/query_pipe.py +4 -13
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/version.py +1 -1
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/SOURCES.txt +2 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/LICENSE +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/README.md +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/pyproject.toml +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/setup.cfg +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/base.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/integrations/ddb_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/log.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/filter.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/head.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/join.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/map.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/progress_pipe.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/sample.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/select.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/tail.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/pipes/where.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/progress.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/registry.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/create_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/factory.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/graph.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/factory.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/format_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/json_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
|
|
4
4
|
import sys, shutil, subprocess, contextlib, signal
|
|
5
5
|
import os
|
|
6
|
+
import re
|
|
6
7
|
import yaml
|
|
7
|
-
from pjk.base import TokenError, Integration
|
|
8
|
+
from pjk.base import TokenError, Integration, Source, Pipe
|
|
8
9
|
|
|
9
10
|
class SafeNamespace:
|
|
10
11
|
def __init__(self, obj):
|
|
@@ -73,50 +74,32 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
73
74
|
style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
|
|
74
75
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
75
76
|
|
|
76
|
-
class
|
|
77
|
-
def __init__(self, component_class):
|
|
78
|
-
|
|
77
|
+
class Config:
|
|
78
|
+
def __init__(self, instance_type: str, component_class: Source|Pipe, instance: str):
|
|
79
|
+
# instance = name of the instance, e.g. 'myindexcollection', instance_type = 'index'
|
|
80
|
+
# instance_type only used by automatic config template maker MUST BE STRING LITERAL!
|
|
81
|
+
self.configs_yaml = os.path.expanduser('~/.pjk/component_configs.yaml')
|
|
79
82
|
self.class_name = type(component_class).__name__
|
|
83
|
+
self.instance = instance
|
|
80
84
|
self._data = {}
|
|
81
85
|
self._load()
|
|
82
86
|
|
|
83
87
|
def _load(self):
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
with open(self.lookups_yaml, 'r') as f:
|
|
88
|
+
if os.path.exists(self.configs_yaml):
|
|
89
|
+
with open(self.configs_yaml, 'r') as f:
|
|
87
90
|
self._data = yaml.safe_load(f) or {}
|
|
88
91
|
else:
|
|
89
92
|
self._data = {}
|
|
90
93
|
|
|
91
|
-
def
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
with open(self.lookups_yaml, 'w') as f:
|
|
95
|
-
yaml.safe_dump(self._data, f)
|
|
96
|
-
|
|
97
|
-
def get(self, key, default=None):
|
|
98
|
-
lookup_key = f'{self.class_name}-{key}'
|
|
99
|
-
entry = self._data.get(lookup_key, default)
|
|
94
|
+
def lookup(self, param: str, default=None):
|
|
95
|
+
instance_key = f'{self.class_name}-{self.instance}'
|
|
96
|
+
entry = self._data.get(instance_key, None)
|
|
100
97
|
if not entry:
|
|
101
98
|
raise TokenError(
|
|
102
|
-
f"~/.pjk/
|
|
99
|
+
f"~/.pjk/component_configs.yaml does not contain entry for '{instance_key}' with required params."
|
|
103
100
|
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def set(self, key, value):
|
|
107
|
-
"""Set a lookup value and persist it."""
|
|
108
|
-
self._data[key] = value
|
|
109
|
-
self.save()
|
|
110
|
-
|
|
111
|
-
def delete(self, key):
|
|
112
|
-
"""Remove a key if it exists and save."""
|
|
113
|
-
if key in self._data:
|
|
114
|
-
del self._data[key]
|
|
115
|
-
self.save()
|
|
116
|
-
|
|
117
|
-
def all(self):
|
|
118
|
-
"""Return the full lookup dictionary."""
|
|
119
|
-
return dict(self._data)
|
|
101
|
+
|
|
102
|
+
return entry.get(param, default)
|
|
120
103
|
|
|
121
104
|
class ComponentFactory:
|
|
122
105
|
def __init__(self, core_components: dict):
|
|
@@ -156,3 +139,6 @@ class ComponentFactory:
|
|
|
156
139
|
|
|
157
140
|
def create(self, token: str):
|
|
158
141
|
pass
|
|
142
|
+
|
|
143
|
+
def is_valid_field_name(name: str):
|
|
144
|
+
return re.fullmatch(r'^[A-Za-z_][A-Za-z0-9_]*$', name)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from pjk.common import Config
|
|
2
|
+
|
|
3
|
+
class OpenSearchClient:
|
|
4
|
+
|
|
5
|
+
@classmethod
|
|
6
|
+
def get_client(cls, config: Config):
|
|
7
|
+
aws_auth = config.lookup("os_auth_use_aws", "true") != "false"
|
|
8
|
+
scheme = config.lookup("os_scheme", "https")
|
|
9
|
+
verify_certs = config.lookup("os_verify_certs", "true") != "false"
|
|
10
|
+
ca_certs = config.lookup("os_ca_certs", None)
|
|
11
|
+
region = config.lookup("os_region", None)
|
|
12
|
+
service = config.lookup("os_service", "es")
|
|
13
|
+
username = config.lookup("os_username", None)
|
|
14
|
+
password = config.lookup("os_password", None)
|
|
15
|
+
timeout = float(config.lookup("os_timeout", 30))
|
|
16
|
+
ssl_assert_hostname = config.lookup("os_ssl_assert_hostname", "true") != "false"
|
|
17
|
+
ssl_show_warn = config.lookup("os_ssl_show_warn", "false") == "true"
|
|
18
|
+
host = config.lookup("os_host", None)
|
|
19
|
+
port = config.lookup("os_port", None)
|
|
20
|
+
|
|
21
|
+
# Reasonable port defaults
|
|
22
|
+
if port is None:
|
|
23
|
+
port = 443 if scheme == "https" else 9200
|
|
24
|
+
else:
|
|
25
|
+
port = int(port)
|
|
26
|
+
|
|
27
|
+
if host is None:
|
|
28
|
+
raise ValueError("Config os_host is required (set os_host + os_port/os_scheme, or a connection profile).")
|
|
29
|
+
|
|
30
|
+
# Lazy import so this module can still be imported if deps aren't installed.
|
|
31
|
+
try:
|
|
32
|
+
from opensearchpy import OpenSearch, RequestsHttpConnection, Urllib3HttpConnection
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise RuntimeError("opensearch-py must be installed to use OpenSearchQueryPipe") from e
|
|
35
|
+
|
|
36
|
+
http_auth = None
|
|
37
|
+
connection_class = Urllib3HttpConnection # default
|
|
38
|
+
use_ssl = (scheme == "https")
|
|
39
|
+
|
|
40
|
+
if aws_auth:
|
|
41
|
+
# AWS SigV4 (works for OpenSearch Service / legacy ES domains)
|
|
42
|
+
try:
|
|
43
|
+
import boto3
|
|
44
|
+
from requests_aws4auth import AWS4Auth
|
|
45
|
+
except Exception as e:
|
|
46
|
+
raise RuntimeError("boto3 and requests-aws4auth are required for os_auth_method='aws'") from e
|
|
47
|
+
|
|
48
|
+
if not region:
|
|
49
|
+
raise ValueError("Config os_region is required for os_auth_method='aws'.")
|
|
50
|
+
|
|
51
|
+
session = boto3.Session()
|
|
52
|
+
credentials = session.get_credentials()
|
|
53
|
+
if credentials is None:
|
|
54
|
+
raise RuntimeError("No AWS credentials found (boto3 session.get_credentials() returned None).")
|
|
55
|
+
|
|
56
|
+
creds = credentials.get_frozen_credentials()
|
|
57
|
+
http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, service, session_token=creds.token)
|
|
58
|
+
connection_class = RequestsHttpConnection # SigV4 signing via requests path
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
if not (username and password):
|
|
62
|
+
raise ValueError("os_username and os_password are required for os_auth_method='basic'.")
|
|
63
|
+
http_auth = (username, password)
|
|
64
|
+
|
|
65
|
+
# Build client
|
|
66
|
+
client = OpenSearch(
|
|
67
|
+
hosts=[{"host": host, "port": port}],
|
|
68
|
+
http_auth=http_auth,
|
|
69
|
+
use_ssl=use_ssl,
|
|
70
|
+
verify_certs=verify_certs,
|
|
71
|
+
ssl_assert_hostname=ssl_assert_hostname,
|
|
72
|
+
ssl_show_warn=ssl_show_warn,
|
|
73
|
+
ca_certs=ca_certs,
|
|
74
|
+
timeout=timeout,
|
|
75
|
+
connection_class=connection_class,
|
|
76
|
+
)
|
|
77
|
+
return client
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from typing import Optional, Iterator, Dict, Any, Iterable
|
|
6
|
+
|
|
7
|
+
from pjk.base import Pipe, ParsedToken, Usage, Integration
|
|
8
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
9
|
+
from pjk.common import Config
|
|
10
|
+
from pjk.integrations.opensearch_client import OpenSearchClient
|
|
11
|
+
|
|
12
|
+
def build_body_from_string(query_string: str) -> dict:
|
|
13
|
+
if query_string == "*":
|
|
14
|
+
return {"query": {"match_all": {}}}
|
|
15
|
+
else:
|
|
16
|
+
return {
|
|
17
|
+
"query": {
|
|
18
|
+
"simple_query_string": {
|
|
19
|
+
"query": query_string
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
25
|
+
name = "os_query"
|
|
26
|
+
desc = "Opensearch query pipe. Uses record['query_string'] or record['query_object'] for os query"
|
|
27
|
+
arg0 = ("index", "index to query over")
|
|
28
|
+
examples = [
|
|
29
|
+
["{'query_string': '*'}", 'os_query:myindex', '-'],
|
|
30
|
+
["{'query_string': 'dog'}", 'os_query:myindex', '-'],
|
|
31
|
+
["{'query_string': 'dog AND cat'}", 'os_query:myindex', '-'],
|
|
32
|
+
["{'query_object': {query: {...}}", 'os_query:myindex', '-'],
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
36
|
+
super().__init__(ptok, usage)
|
|
37
|
+
|
|
38
|
+
# index from arg0 or config
|
|
39
|
+
self.index = ptok.get_arg(0)
|
|
40
|
+
|
|
41
|
+
# Build the OpenSearch client (handles AWS/basic/none)
|
|
42
|
+
config = Config('index', self, self.index)
|
|
43
|
+
self.client = OpenSearchClient.get_client(config)
|
|
44
|
+
|
|
45
|
+
# Iteration state
|
|
46
|
+
self.cur_record: Optional[Dict[str, Any]] = None
|
|
47
|
+
self.hits_iter: Optional[Iterator[Dict[str, Any]]] = None
|
|
48
|
+
|
|
49
|
+
def reset(self):
|
|
50
|
+
# keep the index open between drains
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def close(self):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def execute_query_returning_Q_xR_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
|
|
57
|
+
query_string = query_record.get('query_string', None)
|
|
58
|
+
query_body = None
|
|
59
|
+
|
|
60
|
+
if query_string:
|
|
61
|
+
query_body = build_body_from_string(query_string)
|
|
62
|
+
else:
|
|
63
|
+
query_body = query_record.get('query_object')
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
# Build final request body
|
|
67
|
+
req_body = deepcopy(query_body)
|
|
68
|
+
req_body["size"] = self.count
|
|
69
|
+
|
|
70
|
+
res = self.client.search(index=self.index, body=req_body)
|
|
71
|
+
|
|
72
|
+
total_hits = 0
|
|
73
|
+
took = res.get("took")
|
|
74
|
+
hits = res.get("hits", {}).get("hits", [])
|
|
75
|
+
total_obj = res.get("hits", {}).get("total", {})
|
|
76
|
+
if isinstance(total_obj, dict):
|
|
77
|
+
total_hits = total_obj.get("value", 0)
|
|
78
|
+
elif isinstance(total_obj, int):
|
|
79
|
+
total_hits = total_obj
|
|
80
|
+
|
|
81
|
+
# Emit a metadata record first
|
|
82
|
+
yield {
|
|
83
|
+
"took_ms": took,
|
|
84
|
+
"total_hits": total_hits,
|
|
85
|
+
"index": self.index,
|
|
86
|
+
"os_query_body": req_body
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Emit each hit
|
|
90
|
+
for hit in hits:
|
|
91
|
+
if "_source" in hit and isinstance(hit["_source"], dict):
|
|
92
|
+
yield hit["_source"]
|
|
93
|
+
else:
|
|
94
|
+
# Some queries (e.g., stored fields only) might not include _source
|
|
95
|
+
yield {"_type": "os_query_hit", "_hit": hit}
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print("OpenSearch query error:", e, file=sys.stderr)
|
|
99
|
+
traceback.print_exc()
|
|
100
|
+
yield {
|
|
101
|
+
"_type": "os_query_error",
|
|
102
|
+
"error": str(e),
|
|
103
|
+
"query_record": query_record,
|
|
104
|
+
}
|
|
@@ -10,7 +10,7 @@ from decimal import Decimal
|
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
12
|
from pjk.base import Integration, ParsedToken, Usage
|
|
13
|
-
from pjk.common import
|
|
13
|
+
from pjk.common import Config
|
|
14
14
|
from pjk.pipes.query_pipe import QueryPipe
|
|
15
15
|
|
|
16
16
|
|
|
@@ -91,26 +91,25 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
class PostgresPipe(QueryPipe,Integration):
|
|
94
|
-
name = '
|
|
94
|
+
name = 'postgres'
|
|
95
95
|
desc = "Postgres query pipe; executes SQL from input."
|
|
96
96
|
arg0 = ("dbname", 'database name.')
|
|
97
97
|
examples = [
|
|
98
|
-
['myquery.sql', '
|
|
99
|
-
["{'query': 'SELECT * from MY_TABLE;'}", '
|
|
100
|
-
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", '
|
|
98
|
+
['myquery.sql', 'postgres:mydb', '-'],
|
|
99
|
+
["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
|
|
100
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
|
|
101
101
|
]
|
|
102
102
|
|
|
103
103
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
104
104
|
super().__init__(ptok, usage)
|
|
105
105
|
|
|
106
|
-
lookups = Lookups(self)
|
|
107
106
|
self.dbname = usage.get_arg("dbname")
|
|
108
|
-
|
|
109
|
-
self.db_host =
|
|
110
|
-
self.db_user =
|
|
111
|
-
self.db_pass =
|
|
112
|
-
self.db_port = int(
|
|
113
|
-
self.db_ssl = bool(
|
|
107
|
+
config = Config('dbname', self, self.dbname)
|
|
108
|
+
self.db_host = config.lookup("host")
|
|
109
|
+
self.db_user = config.lookup("user")
|
|
110
|
+
self.db_pass = config.lookup("password")
|
|
111
|
+
self.db_port = int(config.lookup("port", 5432))
|
|
112
|
+
self.db_ssl = bool(config.lookup("ssl", False))
|
|
114
113
|
|
|
115
114
|
self.query_field = usage.get_param('query_field')
|
|
116
115
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
@@ -9,8 +9,9 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.base import ParsedToken,
|
|
12
|
+
from pjk.base import ParsedToken, TokenError, Usage, Integration
|
|
13
13
|
from pjk.pipes.query_pipe import QueryPipe
|
|
14
|
+
from pjk.common import Config
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
# ---------- utilities ----------
|
|
@@ -55,23 +56,6 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
55
56
|
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
|
|
59
|
-
"""
|
|
60
|
-
Fetch a value from params using any of the provided keys,
|
|
61
|
-
trying case variants and optional SNOWFLAKE_ prefix.
|
|
62
|
-
"""
|
|
63
|
-
variants = []
|
|
64
|
-
for k in keys:
|
|
65
|
-
variants.extend([
|
|
66
|
-
k, k.lower(), k.upper(),
|
|
67
|
-
f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
|
|
68
|
-
])
|
|
69
|
-
for v in variants:
|
|
70
|
-
if v in params:
|
|
71
|
-
return params[v]
|
|
72
|
-
return None
|
|
73
|
-
|
|
74
|
-
|
|
75
59
|
# ---------- client ----------
|
|
76
60
|
|
|
77
61
|
class SnowflakeClient:
|
|
@@ -141,29 +125,28 @@ class SnowflakeClient:
|
|
|
141
125
|
class SnowflakePipe(QueryPipe, Integration):
|
|
142
126
|
"""
|
|
143
127
|
Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
|
|
144
|
-
Connection/session settings are pulled from ~/.pjk/
|
|
128
|
+
Connection/session settings are pulled from ~/.pjk/component_configs.yaml under the arg name.
|
|
145
129
|
"""
|
|
146
130
|
name = 'snowflake'
|
|
147
131
|
desc = "Snowflake query pipe; executes an SQL query for each input record."
|
|
148
132
|
arg0 = ('dbname', 'database name.')
|
|
149
133
|
examples = [
|
|
150
|
-
["{'query': 'SELECT CURRENT_ROLE();'}", "
|
|
151
|
-
["myquery.sql", "
|
|
134
|
+
["{'query': 'SELECT CURRENT_ROLE();'}", "snowflake:EDLDB", "-"],
|
|
135
|
+
["myquery.sql", "snowflake:EDLDB", "-"]
|
|
152
136
|
]
|
|
153
137
|
|
|
154
138
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
155
139
|
super().__init__(ptok, usage)
|
|
156
140
|
|
|
157
|
-
self.dbname = usage.get_arg(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
self.
|
|
161
|
-
self.
|
|
162
|
-
self.
|
|
163
|
-
self.
|
|
164
|
-
self.
|
|
141
|
+
self.dbname = usage.get_arg('dbname')
|
|
142
|
+
config = Config('dbname', self, self.dbname)
|
|
143
|
+
self.sf_account = config.lookup("account")
|
|
144
|
+
self.sf_user = config.lookup("user")
|
|
145
|
+
self.sf_auth = config.lookup("authenticator")
|
|
146
|
+
self.sf_role = config.lookup("role")
|
|
147
|
+
self.sf_wh = config.lookup("warehouse")
|
|
148
|
+
self.sf_schema = config.lookup("schema")
|
|
165
149
|
self.sf_db = self.dbname
|
|
166
|
-
self.sf_schema = _get_any(self.lookup_params, "schema")
|
|
167
150
|
|
|
168
151
|
# Basic validation
|
|
169
152
|
missing = [k for k, v in [
|
|
@@ -176,7 +159,7 @@ class SnowflakePipe(QueryPipe, Integration):
|
|
|
176
159
|
] if not v]
|
|
177
160
|
if missing:
|
|
178
161
|
raise TokenError(
|
|
179
|
-
f"
|
|
162
|
+
f"config entry '{self.dbname}' missing: {', '.join(missing)}"
|
|
180
163
|
)
|
|
181
164
|
|
|
182
165
|
self.query_field = usage.get_param('query_field')
|
|
@@ -11,6 +11,8 @@ from pjk.parser import ExpressionParser
|
|
|
11
11
|
from pjk.base import UsageError
|
|
12
12
|
from pjk.log import init as init_logging
|
|
13
13
|
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import shutil
|
|
14
16
|
import traceback
|
|
15
17
|
import concurrent.futures
|
|
16
18
|
from pjk.registry import ComponentRegistry
|
|
@@ -65,13 +67,22 @@ def execute_threaded(sinks, stop_progress=None):
|
|
|
65
67
|
else:
|
|
66
68
|
executor.shutdown(wait=True)
|
|
67
69
|
|
|
70
|
+
def initialize():
|
|
71
|
+
init_logging()
|
|
72
|
+
|
|
73
|
+
'''
|
|
74
|
+
src = Path("src/pjk/resources/component_configs.tmpl")
|
|
75
|
+
dst_dir = Path.home() / ".pjk"
|
|
76
|
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
shutil.copy(src, dst_dir / src.name)
|
|
78
|
+
'''
|
|
79
|
+
|
|
68
80
|
def execute(command: str):
|
|
69
81
|
tokens = shlex.split(command, comments=True, posix=True)
|
|
70
82
|
execute_tokens(tokens)
|
|
71
83
|
|
|
72
84
|
def execute_tokens(tokens: List[str]):
|
|
73
|
-
|
|
74
|
-
# (remove the sys.exit SIGINT handler here)
|
|
85
|
+
initialize()
|
|
75
86
|
|
|
76
87
|
if '--version' in tokens:
|
|
77
88
|
print(f"pjk version {__version__}")
|
|
@@ -55,8 +55,20 @@ class OperandStack:
|
|
|
55
55
|
return None
|
|
56
56
|
return self.stack[-1]
|
|
57
57
|
|
|
58
|
+
def clear(self):
|
|
59
|
+
self.stack.clear()
|
|
60
|
+
|
|
58
61
|
def empty(self):
|
|
59
62
|
return len(self.stack) == 0
|
|
63
|
+
|
|
64
|
+
def print(self, toadd):
|
|
65
|
+
print('---------')
|
|
66
|
+
if toadd:
|
|
67
|
+
print(f'{type(toadd).__name__}={id(toadd)}')
|
|
68
|
+
if len(self.stack) == 0:
|
|
69
|
+
print(f'Stack={id(self)} StackEmpty')
|
|
70
|
+
for op in self.stack:
|
|
71
|
+
print(f'Stack={id(self)} {type(op).__name__}={id(op)}')
|
|
60
72
|
|
|
61
73
|
class ExpressionParser:
|
|
62
74
|
def __init__(self, registry: ComponentRegistry):
|
|
@@ -69,6 +81,9 @@ class ExpressionParser:
|
|
|
69
81
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
70
82
|
|
|
71
83
|
source = self.stack.pop()
|
|
84
|
+
if isinstance(source, SubExpression):
|
|
85
|
+
raise TokenError("Poorly formed sub-expression. Begin token '[' without matching 'over' keyword." )
|
|
86
|
+
|
|
72
87
|
if not self.stack.empty():
|
|
73
88
|
raise TokenError.from_list(['A sink can only consume one source.',
|
|
74
89
|
'pjk <source> [<pipe> ...] <sink>'])
|
|
@@ -167,29 +182,38 @@ class StackLoader:
|
|
|
167
182
|
return ReducerAggregatorPipe(top_level_reducers=self.top_level_reducers)
|
|
168
183
|
|
|
169
184
|
def add_operator(self, op, stack: OperandStack):
|
|
185
|
+
#stack.print(op)
|
|
186
|
+
|
|
170
187
|
if not stack.empty() and isinstance(stack.peek(), SubExpression):
|
|
171
|
-
|
|
188
|
+
subexp = stack.peek()
|
|
172
189
|
|
|
173
|
-
if isinstance(op, SubExpressionOver):
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
op.add_source(subexp_begin)
|
|
190
|
+
if isinstance(op, SubExpressionOver) and subexp.recursion_depth() == 0:
|
|
191
|
+
subexp = stack.pop()
|
|
192
|
+
op.add_source(subexp)
|
|
177
193
|
stack.push(op)
|
|
178
194
|
|
|
179
195
|
global stack_level
|
|
180
|
-
|
|
181
|
-
stack_level-=1
|
|
196
|
+
stack_level -=1 # not sure why this can't be handled exclusively by the stack
|
|
182
197
|
return
|
|
198
|
+
|
|
183
199
|
else: # an operator within the subexpression
|
|
184
|
-
|
|
200
|
+
subexp = stack.peek()
|
|
201
|
+
subexp.add_subop(op)
|
|
185
202
|
return
|
|
186
203
|
|
|
204
|
+
if isinstance(op, SubExpressionOver):
|
|
205
|
+
if stack.empty or not isinstance(stack.peek(), SubExpression):
|
|
206
|
+
raise TokenError("Poorly formed sub-expression. 'over' keyword without matching begin token '['.")
|
|
207
|
+
op.add_source(stack.pop())
|
|
208
|
+
stack.push(op)
|
|
209
|
+
|
|
187
210
|
# order matters, because sources are pipes
|
|
188
211
|
if isinstance(op, Pipe):
|
|
189
212
|
arity = op.arity # class level attribute
|
|
190
213
|
for _ in range(arity):
|
|
191
214
|
if stack.empty():
|
|
192
|
-
|
|
215
|
+
name = type(op).usage().name
|
|
216
|
+
raise TokenError(f"'{name}' requires {arity} input(s)")
|
|
193
217
|
op.add_source(stack.pop())
|
|
194
218
|
stack.push(op)
|
|
195
219
|
|
|
@@ -243,6 +267,31 @@ class UpstreamSource(Source):
|
|
|
243
267
|
self.sub_recs_in.increment()
|
|
244
268
|
yield item
|
|
245
269
|
|
|
270
|
+
class SubExpressionOver(Pipe):
|
|
271
|
+
@classmethod
|
|
272
|
+
def usage(cls) -> Usage:
|
|
273
|
+
u = Usage(
|
|
274
|
+
name="over",
|
|
275
|
+
desc="sub-expression over.",
|
|
276
|
+
component_class=cls,
|
|
277
|
+
)
|
|
278
|
+
return u
|
|
279
|
+
|
|
280
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
281
|
+
super().__init__(ptok, usage)
|
|
282
|
+
self.over_arg = ptok.get_arg(0)
|
|
283
|
+
|
|
284
|
+
def reset(self):
|
|
285
|
+
pass # stateless
|
|
286
|
+
|
|
287
|
+
def __iter__(self):
|
|
288
|
+
if not isinstance(self.left, SubExpression):
|
|
289
|
+
raise Exception('this actually cannot happen, but did')
|
|
290
|
+
|
|
291
|
+
for record in self.left:
|
|
292
|
+
self.left.subexp_process(record, self.over_arg)
|
|
293
|
+
yield record
|
|
294
|
+
|
|
246
295
|
class SubExpression(Pipe, ProgressIgnore):
|
|
247
296
|
@classmethod
|
|
248
297
|
def create(cls, token: str) -> Pipe:
|
|
@@ -255,28 +304,34 @@ class SubExpression(Pipe, ProgressIgnore):
|
|
|
255
304
|
|
|
256
305
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
257
306
|
super().__init__(ptok)
|
|
258
|
-
self.over_arg = None
|
|
259
|
-
self.over_field = None
|
|
260
307
|
self.subexp_ops = []
|
|
261
|
-
self.over_pipe = None
|
|
262
308
|
self.stack_helper = StackLoader()
|
|
263
309
|
self.subexp_stack = OperandStack()
|
|
264
310
|
self.upstream_source = UpstreamSource()
|
|
265
311
|
self.subexp_stack.push(self.upstream_source)
|
|
312
|
+
self.recursions = 0 # number of subexpression within
|
|
313
|
+
self.subexp_left = None
|
|
266
314
|
|
|
267
315
|
def add_subop(self, op):
|
|
268
316
|
self.subexp_ops.append(op)
|
|
317
|
+
if isinstance(op, SubExpression):
|
|
318
|
+
self.recursions += 1
|
|
319
|
+
elif isinstance(op, SubExpressionOver):
|
|
320
|
+
self.recursions -= 1
|
|
269
321
|
self.stack_helper.add_operator(op, self.subexp_stack)
|
|
270
322
|
|
|
271
|
-
def
|
|
272
|
-
self.
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
323
|
+
def recursion_depth(self):
|
|
324
|
+
return self.recursions
|
|
325
|
+
|
|
326
|
+
#def bind(self, subex_over: SubExpressionOver):
|
|
327
|
+
# self.over_arg = subex_over.get_over_arg()
|
|
328
|
+
# if self.over_arg.endswith('.py'):
|
|
329
|
+
# self.over_field = 'child'
|
|
330
|
+
# self.over_pipe = UserPipeFactory.create(self.over_arg)
|
|
331
|
+
# self.upstream_source.set_source(self.over_pipe)
|
|
332
|
+
# self.subexp_ops.append(self.over_pipe)
|
|
333
|
+
# else:
|
|
334
|
+
# self.over_field = self.over_arg
|
|
280
335
|
|
|
281
336
|
def reset(self):
|
|
282
337
|
for op in self.subexp_ops:
|
|
@@ -284,60 +339,40 @@ class SubExpression(Pipe, ProgressIgnore):
|
|
|
284
339
|
op.reset()
|
|
285
340
|
|
|
286
341
|
def __iter__(self):
|
|
287
|
-
|
|
288
|
-
if self.over_pipe:
|
|
289
|
-
one = UpstreamSource()
|
|
290
|
-
one.add_item(record)
|
|
291
|
-
self.over_pipe.set_sources([one])
|
|
292
|
-
else:
|
|
293
|
-
field_data = record.pop(self.over_field, None)
|
|
294
|
-
if not field_data:
|
|
295
|
-
yield record
|
|
296
|
-
continue
|
|
297
|
-
if isinstance(field_data, list):
|
|
298
|
-
self.upstream_source.set_list(field_data)
|
|
299
|
-
else:
|
|
300
|
-
self.upstream_source.set_list([field_data])
|
|
342
|
+
yield from self.left # pass thru to subexp_over which then calls process
|
|
301
343
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
344
|
+
def subexp_process(self, record: dict, over_field: str):
|
|
345
|
+
#for record in self.left:
|
|
346
|
+
# if self.over_pipe:
|
|
347
|
+
# one = UpstreamSource()
|
|
348
|
+
# one.add_item(record)
|
|
349
|
+
# self.over_pipe.set_sources([one])
|
|
305
350
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
for rec in top:
|
|
309
|
-
out_recs.append(rec)
|
|
351
|
+
if not self.subexp_left:
|
|
352
|
+
self.subexp_left = self.subexp_stack.pop()
|
|
310
353
|
|
|
311
|
-
|
|
354
|
+
field_data = record.pop(over_field, None)
|
|
355
|
+
if not field_data:
|
|
356
|
+
return
|
|
312
357
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
if name:
|
|
318
|
-
record[name] = value
|
|
358
|
+
if isinstance(field_data, list):
|
|
359
|
+
self.upstream_source.set_list(field_data)
|
|
360
|
+
else:
|
|
361
|
+
self.upstream_source.set_list([field_data])
|
|
319
362
|
|
|
320
|
-
|
|
363
|
+
# Reset sub-pipe stack
|
|
364
|
+
for op in self.subexp_ops:
|
|
365
|
+
op.reset()
|
|
321
366
|
|
|
322
|
-
|
|
323
|
-
@classmethod
|
|
324
|
-
def usage(cls) -> Usage:
|
|
325
|
-
u = Usage(
|
|
326
|
-
name="over",
|
|
327
|
-
desc="sub-expression over.",
|
|
328
|
-
component_class=cls,
|
|
329
|
-
)
|
|
330
|
-
return u
|
|
331
|
-
|
|
332
|
-
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
333
|
-
super().__init__(ptok, usage)
|
|
334
|
-
self.over_arg = ptok.get_arg(0)
|
|
367
|
+
out_recs = []
|
|
335
368
|
|
|
336
|
-
|
|
337
|
-
|
|
369
|
+
for rec in self.subexp_left:
|
|
370
|
+
out_recs.append(rec)
|
|
338
371
|
|
|
339
|
-
|
|
340
|
-
pass # stateless
|
|
372
|
+
record[over_field] = out_recs
|
|
341
373
|
|
|
342
|
-
|
|
343
|
-
|
|
374
|
+
for op in self.subexp_ops:
|
|
375
|
+
if isinstance(op, ReducePipe):
|
|
376
|
+
name, value = op.get_subexp_result()
|
|
377
|
+
if name:
|
|
378
|
+
record[name] = value
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
from pjk.base import Pipe, ParsedToken, Usage, UsageError
|
|
7
7
|
from typing import Iterator
|
|
8
|
+
from pjk.progress import papi
|
|
8
9
|
|
|
9
10
|
class Denormer:
|
|
10
11
|
def __init__(self, record, field):
|
|
@@ -23,7 +24,7 @@ class Denormer:
|
|
|
23
24
|
elif isinstance(data, dict):
|
|
24
25
|
self.subrec_list = [data]
|
|
25
26
|
else:
|
|
26
|
-
raise UsageError("can only
|
|
27
|
+
raise UsageError("can only explode sub-records")
|
|
27
28
|
|
|
28
29
|
def __iter__(self) -> Iterator[dict]:
|
|
29
30
|
for subrec in self.subrec_list:
|
|
@@ -53,8 +54,8 @@ class DenormPipe(Pipe):
|
|
|
53
54
|
super().__init__(ptok)
|
|
54
55
|
|
|
55
56
|
self.field = usage.get_arg('field')
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
self.recs_in = papi.get_counter(self, None) # don't display
|
|
58
|
+
self.recs_out = papi.get_percentage_counter(self, 'recs_out', self.recs_in)
|
|
58
59
|
|
|
59
60
|
self._pending_iter = None
|
|
60
61
|
|
|
@@ -63,6 +64,8 @@ class DenormPipe(Pipe):
|
|
|
63
64
|
|
|
64
65
|
def __iter__(self):
|
|
65
66
|
for record in self.left:
|
|
67
|
+
self.recs_in.increment()
|
|
66
68
|
denormer = Denormer(record, self.field)
|
|
67
69
|
for out in denormer:
|
|
70
|
+
self.recs_out.increment()
|
|
68
71
|
yield out
|
|
@@ -20,6 +20,7 @@ from pjk.pipes.select import SelectFields
|
|
|
20
20
|
from pjk.pipes.denorm import DenormPipe
|
|
21
21
|
from pjk.integrations.postgres_pipe import PostgresPipe
|
|
22
22
|
from pjk.integrations.snowflake_pipe import SnowflakePipe
|
|
23
|
+
from pjk.integrations.opensearch_query_pipe import OpenSearchQueryPipe
|
|
23
24
|
from pjk.pipes.sample import SamplePipe
|
|
24
25
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
25
26
|
|
|
@@ -40,7 +41,8 @@ COMPONENTS = {
|
|
|
40
41
|
'sample': SamplePipe,
|
|
41
42
|
'explode': DenormPipe,
|
|
42
43
|
'postgres': PostgresPipe,
|
|
43
|
-
'snowflake': SnowflakePipe
|
|
44
|
+
'snowflake': SnowflakePipe,
|
|
45
|
+
'os_query': OpenSearchQueryPipe
|
|
44
46
|
}
|
|
45
47
|
|
|
46
48
|
class PipeFactory(ComponentFactory):
|
|
@@ -3,34 +3,33 @@
|
|
|
3
3
|
|
|
4
4
|
# djk/pipes/move_field.py
|
|
5
5
|
|
|
6
|
-
from pjk.base import Pipe, ParsedToken, Usage
|
|
6
|
+
from pjk.base import Pipe, ParsedToken, Usage, TokenError
|
|
7
|
+
from pjk.common import is_valid_field_name
|
|
7
8
|
|
|
8
9
|
class MoveField(Pipe):
|
|
9
10
|
@classmethod
|
|
10
11
|
def usage(cls):
|
|
11
|
-
|
|
12
|
+
u = Usage(
|
|
12
13
|
name='as',
|
|
13
14
|
desc='rename a field in the record',
|
|
14
15
|
component_class=cls
|
|
15
16
|
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
u.def_arg(name='src', usage='Source field name')
|
|
18
|
+
u.def_arg(name='dst', usage='Destination field name')
|
|
19
|
+
u.def_example(expr_tokens=['{up:1}', 'as:up:down'], expect="{down:1}")
|
|
19
20
|
|
|
20
|
-
return
|
|
21
|
+
return u
|
|
21
22
|
|
|
22
23
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
23
24
|
super().__init__(ptok, usage)
|
|
24
25
|
self.src = usage.get_arg('src')
|
|
25
26
|
self.dst = usage.get_arg('dst')
|
|
26
|
-
self.count = 0
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
if not is_valid_field_name(self.dst) or not is_valid_field_name(self.src):
|
|
29
|
+
raise TokenError('field names only allow letters, numbers (non-initially) and underbar')
|
|
30
30
|
|
|
31
31
|
def __iter__(self):
|
|
32
32
|
for record in self.left:
|
|
33
|
-
self.count += 1
|
|
34
33
|
if self.src in record:
|
|
35
34
|
record[self.dst] = record.pop(self.src)
|
|
36
35
|
yield record
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from pjk.base import Pipe, ParsedToken, Usage
|
|
2
|
-
from pjk.common import Lookups
|
|
1
|
+
from pjk.base import Pipe, ParsedToken, Usage
|
|
3
2
|
from typing import Any, Dict, Iterable, Optional
|
|
4
3
|
from abc import abstractmethod
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class QueryPipe(Pipe):
|
|
8
|
-
requires_lookups = True
|
|
9
7
|
name: str = None
|
|
10
8
|
desc: str = None
|
|
11
9
|
arg0: tuple[Optional[str], Optional[str]] = (None, None)
|
|
@@ -18,8 +16,7 @@ class QueryPipe(Pipe):
|
|
|
18
16
|
desc=cls.desc,
|
|
19
17
|
component_class=cls
|
|
20
18
|
)
|
|
21
|
-
u.def_arg(name=cls.arg0[0], usage=f
|
|
22
|
-
u.def_param("query_field", usage="field of query.", default="query")
|
|
19
|
+
u.def_arg(name=cls.arg0[0], usage=f"{cls.arg0[1]} ~/.pjk/component_configs.yaml must contain entry '{cls.__name__}-<{cls.arg0[0]}'>\n with necessary parameters.")
|
|
23
20
|
u.def_param("count", usage="Number of search results, (databases may ignore)", is_num=True, default="10")
|
|
24
21
|
u.def_param("shape", usage='the shape of ouput records', is_num=False,
|
|
25
22
|
valid_values={'xR', 'Q_xR', 'Qxr'}, default='xR')
|
|
@@ -32,14 +29,8 @@ class QueryPipe(Pipe):
|
|
|
32
29
|
|
|
33
30
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
34
31
|
super().__init__(ptok, usage)
|
|
35
|
-
|
|
36
|
-
self.lookup_params = None
|
|
37
|
-
if type(self).requires_lookups:
|
|
38
|
-
lookups = Lookups(self)
|
|
39
|
-
lookup_name = usage.get_arg(type(self).arg0[0])
|
|
40
|
-
self.lookup_params = lookups.get(lookup_name)
|
|
41
|
-
|
|
42
32
|
self.output_shape = usage.get_param('shape')
|
|
33
|
+
self.count = usage.get_param('count')
|
|
43
34
|
|
|
44
35
|
@abstractmethod
|
|
45
36
|
def execute_query_returning_Q_xR_iterable(self, record) -> Iterable[Dict[str, Any]]:
|
|
@@ -83,7 +74,7 @@ class QueryPipe(Pipe):
|
|
|
83
74
|
q_out = self._make_q_object(in_rec, out_rec)
|
|
84
75
|
continue
|
|
85
76
|
r_list.append(out_rec)
|
|
86
|
-
q_out['
|
|
77
|
+
q_out['child'] = r_list
|
|
87
78
|
yield q_out
|
|
88
79
|
|
|
89
80
|
|
{python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/SOURCES.txt
RENAMED
|
@@ -12,6 +12,8 @@ src/pjk/progress.py
|
|
|
12
12
|
src/pjk/registry.py
|
|
13
13
|
src/pjk/version.py
|
|
14
14
|
src/pjk/integrations/ddb_sink.py
|
|
15
|
+
src/pjk/integrations/opensearch_client.py
|
|
16
|
+
src/pjk/integrations/opensearch_query_pipe.py
|
|
15
17
|
src/pjk/integrations/postgres_pipe.py
|
|
16
18
|
src/pjk/integrations/snowflake_pipe.py
|
|
17
19
|
src/pjk/pipes/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/requires.txt
RENAMED
|
File without changes
|
{python_jack_knife-0.6.0 → python_jack_knife-0.6.3}/src/python_jack_knife.egg-info/top_level.txt
RENAMED
|
File without changes
|