python-jack-knife 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pjk/common.py +15 -33
- pjk/integrations/opensearch_client.py +78 -0
- pjk/integrations/opensearch_query_pipe.py +104 -0
- pjk/integrations/postgres_pipe.py +11 -12
- pjk/integrations/snowflake_pipe.py +14 -31
- pjk/main.py +13 -2
- pjk/pipes/factory.py +3 -1
- pjk/pipes/query_pipe.py +4 -13
- pjk/sources/format_source.py +3 -3
- pjk/version.py +1 -1
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/METADATA +1 -1
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/RECORD +16 -14
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/WHEEL +0 -0
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/entry_points.txt +0 -0
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/licenses/LICENSE +0 -0
- {python_jack_knife-0.6.2.dist-info → python_jack_knife-0.6.4.dist-info}/top_level.txt +0 -0
pjk/common.py
CHANGED
|
@@ -5,7 +5,7 @@ import sys, shutil, subprocess, contextlib, signal
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
import yaml
|
|
8
|
-
from pjk.base import TokenError, Integration
|
|
8
|
+
from pjk.base import TokenError, Integration, Source, Pipe
|
|
9
9
|
|
|
10
10
|
class SafeNamespace:
|
|
11
11
|
def __init__(self, obj):
|
|
@@ -74,50 +74,32 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
74
74
|
style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
|
|
75
75
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
76
76
|
|
|
77
|
-
class
|
|
78
|
-
def __init__(self, component_class):
|
|
79
|
-
|
|
77
|
+
class Config:
|
|
78
|
+
def __init__(self, instance_type: str, component_class: Source|Pipe, instance: str):
|
|
79
|
+
# instance = name of the instance, e.g. 'myindexcollection', instance_type = 'index'
|
|
80
|
+
# instance_type only used by automatic config template maker MUST BE STRING LITERAL!
|
|
81
|
+
self.configs_yaml = os.path.expanduser('~/.pjk/component_configs.yaml')
|
|
80
82
|
self.class_name = type(component_class).__name__
|
|
83
|
+
self.instance = instance
|
|
81
84
|
self._data = {}
|
|
82
85
|
self._load()
|
|
83
86
|
|
|
84
87
|
def _load(self):
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
with open(self.lookups_yaml, 'r') as f:
|
|
88
|
+
if os.path.exists(self.configs_yaml):
|
|
89
|
+
with open(self.configs_yaml, 'r') as f:
|
|
88
90
|
self._data = yaml.safe_load(f) or {}
|
|
89
91
|
else:
|
|
90
92
|
self._data = {}
|
|
91
93
|
|
|
92
|
-
def
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
with open(self.lookups_yaml, 'w') as f:
|
|
96
|
-
yaml.safe_dump(self._data, f)
|
|
97
|
-
|
|
98
|
-
def get(self, key, default=None):
|
|
99
|
-
lookup_key = f'{self.class_name}-{key}'
|
|
100
|
-
entry = self._data.get(lookup_key, default)
|
|
94
|
+
def lookup(self, param: str, default=None):
|
|
95
|
+
instance_key = f'{self.class_name}-{self.instance}'
|
|
96
|
+
entry = self._data.get(instance_key, None)
|
|
101
97
|
if not entry:
|
|
102
98
|
raise TokenError(
|
|
103
|
-
f"~/.pjk/
|
|
99
|
+
f"~/.pjk/component_configs.yaml does not contain entry for '{instance_key}' with required params."
|
|
104
100
|
)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def set(self, key, value):
|
|
108
|
-
"""Set a lookup value and persist it."""
|
|
109
|
-
self._data[key] = value
|
|
110
|
-
self.save()
|
|
111
|
-
|
|
112
|
-
def delete(self, key):
|
|
113
|
-
"""Remove a key if it exists and save."""
|
|
114
|
-
if key in self._data:
|
|
115
|
-
del self._data[key]
|
|
116
|
-
self.save()
|
|
117
|
-
|
|
118
|
-
def all(self):
|
|
119
|
-
"""Return the full lookup dictionary."""
|
|
120
|
-
return dict(self._data)
|
|
101
|
+
|
|
102
|
+
return entry.get(param, default)
|
|
121
103
|
|
|
122
104
|
class ComponentFactory:
|
|
123
105
|
def __init__(self, core_components: dict):
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pjk.common import Config
|
|
2
|
+
|
|
3
|
+
class OpenSearchClient:
|
|
4
|
+
|
|
5
|
+
@classmethod
|
|
6
|
+
def get_client(cls, config: Config):
|
|
7
|
+
aws_auth = config.lookup("os_auth_use_aws", "true") == 'true'
|
|
8
|
+
scheme = config.lookup("os_scheme", "https")
|
|
9
|
+
verify_certs = config.lookup("os_verify_certs", "true") == 'true'
|
|
10
|
+
ca_certs = config.lookup("os_ca_certs", None)
|
|
11
|
+
region = config.lookup("os_region", None)
|
|
12
|
+
service = config.lookup("os_service", "es")
|
|
13
|
+
username = config.lookup("os_username", None)
|
|
14
|
+
password = config.lookup("os_password", None)
|
|
15
|
+
timeout = float(config.lookup("os_timeout", 30))
|
|
16
|
+
ssl_assert_hostname = config.lookup("os_ssl_assert_hostname", "true") == 'true'
|
|
17
|
+
ssl_show_warn = config.lookup("os_ssl_show_warn", "false") == 'true'
|
|
18
|
+
host = config.lookup("os_host", None)
|
|
19
|
+
port = config.lookup("os_port", None)
|
|
20
|
+
|
|
21
|
+
# Reasonable port defaults
|
|
22
|
+
if port is None:
|
|
23
|
+
port = 443 if scheme == "https" else 9200
|
|
24
|
+
else:
|
|
25
|
+
port = int(port)
|
|
26
|
+
|
|
27
|
+
if host is None:
|
|
28
|
+
raise ValueError("Config os_host is required (set os_host + os_port/os_scheme, or a connection profile).")
|
|
29
|
+
|
|
30
|
+
# Lazy import so this module can still be imported if deps aren't installed.
|
|
31
|
+
try:
|
|
32
|
+
from opensearchpy import OpenSearch, RequestsHttpConnection, Urllib3HttpConnection
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise RuntimeError("opensearch-py must be installed to use OpenSearchQueryPipe") from e
|
|
35
|
+
|
|
36
|
+
http_auth = None
|
|
37
|
+
connection_class = Urllib3HttpConnection # default
|
|
38
|
+
use_ssl = (scheme == "https")
|
|
39
|
+
|
|
40
|
+
if aws_auth:
|
|
41
|
+
# AWS SigV4 (works for OpenSearch Service / legacy ES domains)
|
|
42
|
+
try:
|
|
43
|
+
import boto3
|
|
44
|
+
from requests_aws4auth import AWS4Auth
|
|
45
|
+
except Exception as e:
|
|
46
|
+
raise RuntimeError("boto3 and requests-aws4auth are required for os_auth_method='aws'") from e
|
|
47
|
+
|
|
48
|
+
if not region:
|
|
49
|
+
raise ValueError("Config os_region is required for os_auth_method='aws'.")
|
|
50
|
+
|
|
51
|
+
session = boto3.Session()
|
|
52
|
+
credentials = session.get_credentials()
|
|
53
|
+
if credentials is None:
|
|
54
|
+
raise RuntimeError("No AWS credentials found (boto3 session.get_credentials() returned None).")
|
|
55
|
+
|
|
56
|
+
creds = credentials.get_frozen_credentials()
|
|
57
|
+
http_auth = AWS4Auth(creds.access_key, creds.secret_key, region, service, session_token=creds.token)
|
|
58
|
+
connection_class = RequestsHttpConnection # SigV4 signing via requests path
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
if not (username and password):
|
|
62
|
+
raise ValueError("os_username and os_password are required for os_auth_method='basic'.")
|
|
63
|
+
http_auth = (username, password)
|
|
64
|
+
|
|
65
|
+
# Build client
|
|
66
|
+
client = OpenSearch(
|
|
67
|
+
hosts=[{"host": host, "port": port}],
|
|
68
|
+
http_auth=http_auth,
|
|
69
|
+
use_ssl=use_ssl,
|
|
70
|
+
verify_certs=verify_certs,
|
|
71
|
+
ssl_assert_hostname=ssl_assert_hostname,
|
|
72
|
+
ssl_show_warn=ssl_show_warn,
|
|
73
|
+
ca_certs=ca_certs,
|
|
74
|
+
timeout=timeout,
|
|
75
|
+
connection_class=connection_class,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return client
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from typing import Optional, Iterator, Dict, Any, Iterable
|
|
6
|
+
|
|
7
|
+
from pjk.base import Pipe, ParsedToken, Usage, Integration
|
|
8
|
+
from pjk.pipes.query_pipe import QueryPipe
|
|
9
|
+
from pjk.common import Config
|
|
10
|
+
from pjk.integrations.opensearch_client import OpenSearchClient
|
|
11
|
+
|
|
12
|
+
def build_body_from_string(query_string: str) -> dict:
|
|
13
|
+
if query_string == "*":
|
|
14
|
+
return {"query": {"match_all": {}}}
|
|
15
|
+
else:
|
|
16
|
+
return {
|
|
17
|
+
"query": {
|
|
18
|
+
"simple_query_string": {
|
|
19
|
+
"query": query_string
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
25
|
+
name = "os_query"
|
|
26
|
+
desc = "Opensearch query pipe. Uses record['query_string'] or record['query_object'] for os query"
|
|
27
|
+
arg0 = ("index", "index to query over")
|
|
28
|
+
examples = [
|
|
29
|
+
["{'query_string': '*'}", 'os_query:myindex', '-'],
|
|
30
|
+
["{'query_string': 'dog'}", 'os_query:myindex', '-'],
|
|
31
|
+
["{'query_string': 'dog AND cat'}", 'os_query:myindex', '-'],
|
|
32
|
+
["{'query_object': {query: {...}}", 'os_query:myindex', '-'],
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
36
|
+
super().__init__(ptok, usage)
|
|
37
|
+
|
|
38
|
+
# index from arg0 or config
|
|
39
|
+
self.index = ptok.get_arg(0)
|
|
40
|
+
|
|
41
|
+
# Build the OpenSearch client (handles AWS/basic/none)
|
|
42
|
+
config = Config('index', self, self.index)
|
|
43
|
+
self.client = OpenSearchClient.get_client(config)
|
|
44
|
+
|
|
45
|
+
# Iteration state
|
|
46
|
+
self.cur_record: Optional[Dict[str, Any]] = None
|
|
47
|
+
self.hits_iter: Optional[Iterator[Dict[str, Any]]] = None
|
|
48
|
+
|
|
49
|
+
def reset(self):
|
|
50
|
+
# keep the index open between drains
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def close(self):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def execute_query_returning_Q_xR_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
|
|
57
|
+
query_string = query_record.get('query_string', None)
|
|
58
|
+
query_body = None
|
|
59
|
+
|
|
60
|
+
if query_string:
|
|
61
|
+
query_body = build_body_from_string(query_string)
|
|
62
|
+
else:
|
|
63
|
+
query_body = query_record.get('query_object')
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
# Build final request body
|
|
67
|
+
req_body = deepcopy(query_body)
|
|
68
|
+
req_body["size"] = self.count
|
|
69
|
+
|
|
70
|
+
res = self.client.search(index=self.index, body=req_body)
|
|
71
|
+
|
|
72
|
+
total_hits = 0
|
|
73
|
+
took = res.get("took")
|
|
74
|
+
hits = res.get("hits", {}).get("hits", [])
|
|
75
|
+
total_obj = res.get("hits", {}).get("total", {})
|
|
76
|
+
if isinstance(total_obj, dict):
|
|
77
|
+
total_hits = total_obj.get("value", 0)
|
|
78
|
+
elif isinstance(total_obj, int):
|
|
79
|
+
total_hits = total_obj
|
|
80
|
+
|
|
81
|
+
# Emit a metadata record first
|
|
82
|
+
yield {
|
|
83
|
+
"took_ms": took,
|
|
84
|
+
"total_hits": total_hits,
|
|
85
|
+
"index": self.index,
|
|
86
|
+
"os_query_body": req_body
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Emit each hit
|
|
90
|
+
for hit in hits:
|
|
91
|
+
if "_source" in hit and isinstance(hit["_source"], dict):
|
|
92
|
+
yield hit["_source"]
|
|
93
|
+
else:
|
|
94
|
+
# Some queries (e.g., stored fields only) might not include _source
|
|
95
|
+
yield {"_type": "os_query_hit", "_hit": hit}
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print("OpenSearch query error:", e, file=sys.stderr)
|
|
99
|
+
traceback.print_exc()
|
|
100
|
+
yield {
|
|
101
|
+
"_type": "os_query_error",
|
|
102
|
+
"error": str(e),
|
|
103
|
+
"query_record": query_record,
|
|
104
|
+
}
|
|
@@ -10,7 +10,7 @@ from decimal import Decimal
|
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
12
|
from pjk.base import Integration, ParsedToken, Usage
|
|
13
|
-
from pjk.common import
|
|
13
|
+
from pjk.common import Config
|
|
14
14
|
from pjk.pipes.query_pipe import QueryPipe
|
|
15
15
|
|
|
16
16
|
|
|
@@ -91,26 +91,25 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
class PostgresPipe(QueryPipe,Integration):
|
|
94
|
-
name = '
|
|
94
|
+
name = 'postgres'
|
|
95
95
|
desc = "Postgres query pipe; executes SQL from input."
|
|
96
96
|
arg0 = ("dbname", 'database name.')
|
|
97
97
|
examples = [
|
|
98
|
-
['myquery.sql', '
|
|
99
|
-
["{'query': 'SELECT * from MY_TABLE;'}", '
|
|
100
|
-
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", '
|
|
98
|
+
['myquery.sql', 'postgres:mydb', '-'],
|
|
99
|
+
["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
|
|
100
|
+
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
|
|
101
101
|
]
|
|
102
102
|
|
|
103
103
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
104
104
|
super().__init__(ptok, usage)
|
|
105
105
|
|
|
106
|
-
lookups = Lookups(self)
|
|
107
106
|
self.dbname = usage.get_arg("dbname")
|
|
108
|
-
|
|
109
|
-
self.db_host =
|
|
110
|
-
self.db_user =
|
|
111
|
-
self.db_pass =
|
|
112
|
-
self.db_port = int(
|
|
113
|
-
self.db_ssl = bool(
|
|
107
|
+
config = Config('dbname', self, self.dbname)
|
|
108
|
+
self.db_host = config.lookup("host")
|
|
109
|
+
self.db_user = config.lookup("user")
|
|
110
|
+
self.db_pass = config.lookup("password")
|
|
111
|
+
self.db_port = int(config.lookup("port", 5432))
|
|
112
|
+
self.db_ssl = bool(config.lookup("ssl", False))
|
|
114
113
|
|
|
115
114
|
self.query_field = usage.get_param('query_field')
|
|
116
115
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
@@ -9,8 +9,9 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.base import ParsedToken,
|
|
12
|
+
from pjk.base import ParsedToken, TokenError, Usage, Integration
|
|
13
13
|
from pjk.pipes.query_pipe import QueryPipe
|
|
14
|
+
from pjk.common import Config
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
# ---------- utilities ----------
|
|
@@ -55,23 +56,6 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
55
56
|
return {col: normalize(val) for col, val in zip(cols, row)}
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
def _get_any(params: Dict[str, Any], *keys: str) -> Optional[Any]:
|
|
59
|
-
"""
|
|
60
|
-
Fetch a value from params using any of the provided keys,
|
|
61
|
-
trying case variants and optional SNOWFLAKE_ prefix.
|
|
62
|
-
"""
|
|
63
|
-
variants = []
|
|
64
|
-
for k in keys:
|
|
65
|
-
variants.extend([
|
|
66
|
-
k, k.lower(), k.upper(),
|
|
67
|
-
f"snowflake_{k}".lower(), f"SNOWFLAKE_{k}".upper()
|
|
68
|
-
])
|
|
69
|
-
for v in variants:
|
|
70
|
-
if v in params:
|
|
71
|
-
return params[v]
|
|
72
|
-
return None
|
|
73
|
-
|
|
74
|
-
|
|
75
59
|
# ---------- client ----------
|
|
76
60
|
|
|
77
61
|
class SnowflakeClient:
|
|
@@ -141,29 +125,28 @@ class SnowflakeClient:
|
|
|
141
125
|
class SnowflakePipe(QueryPipe, Integration):
|
|
142
126
|
"""
|
|
143
127
|
Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
|
|
144
|
-
Connection/session settings are pulled from ~/.pjk/
|
|
128
|
+
Connection/session settings are pulled from ~/.pjk/component_configs.yaml under the arg name.
|
|
145
129
|
"""
|
|
146
130
|
name = 'snowflake'
|
|
147
131
|
desc = "Snowflake query pipe; executes an SQL query for each input record."
|
|
148
132
|
arg0 = ('dbname', 'database name.')
|
|
149
133
|
examples = [
|
|
150
|
-
["{'query': 'SELECT CURRENT_ROLE();'}", "
|
|
151
|
-
["myquery.sql", "
|
|
134
|
+
["{'query': 'SELECT CURRENT_ROLE();'}", "snowflake:EDLDB", "-"],
|
|
135
|
+
["myquery.sql", "snowflake:EDLDB", "-"]
|
|
152
136
|
]
|
|
153
137
|
|
|
154
138
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
155
139
|
super().__init__(ptok, usage)
|
|
156
140
|
|
|
157
|
-
self.dbname = usage.get_arg(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
self.
|
|
161
|
-
self.
|
|
162
|
-
self.
|
|
163
|
-
self.
|
|
164
|
-
self.
|
|
141
|
+
self.dbname = usage.get_arg('dbname')
|
|
142
|
+
config = Config('dbname', self, self.dbname)
|
|
143
|
+
self.sf_account = config.lookup("account")
|
|
144
|
+
self.sf_user = config.lookup("user")
|
|
145
|
+
self.sf_auth = config.lookup("authenticator")
|
|
146
|
+
self.sf_role = config.lookup("role")
|
|
147
|
+
self.sf_wh = config.lookup("warehouse")
|
|
148
|
+
self.sf_schema = config.lookup("schema")
|
|
165
149
|
self.sf_db = self.dbname
|
|
166
|
-
self.sf_schema = _get_any(self.lookup_params, "schema")
|
|
167
150
|
|
|
168
151
|
# Basic validation
|
|
169
152
|
missing = [k for k, v in [
|
|
@@ -176,7 +159,7 @@ class SnowflakePipe(QueryPipe, Integration):
|
|
|
176
159
|
] if not v]
|
|
177
160
|
if missing:
|
|
178
161
|
raise TokenError(
|
|
179
|
-
f"
|
|
162
|
+
f"config entry '{self.dbname}' missing: {', '.join(missing)}"
|
|
180
163
|
)
|
|
181
164
|
|
|
182
165
|
self.query_field = usage.get_param('query_field')
|
pjk/main.py
CHANGED
|
@@ -11,6 +11,8 @@ from pjk.parser import ExpressionParser
|
|
|
11
11
|
from pjk.base import UsageError
|
|
12
12
|
from pjk.log import init as init_logging
|
|
13
13
|
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import shutil
|
|
14
16
|
import traceback
|
|
15
17
|
import concurrent.futures
|
|
16
18
|
from pjk.registry import ComponentRegistry
|
|
@@ -65,13 +67,22 @@ def execute_threaded(sinks, stop_progress=None):
|
|
|
65
67
|
else:
|
|
66
68
|
executor.shutdown(wait=True)
|
|
67
69
|
|
|
70
|
+
def initialize():
|
|
71
|
+
init_logging()
|
|
72
|
+
|
|
73
|
+
'''
|
|
74
|
+
src = Path("src/pjk/resources/component_configs.tmpl")
|
|
75
|
+
dst_dir = Path.home() / ".pjk"
|
|
76
|
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
shutil.copy(src, dst_dir / src.name)
|
|
78
|
+
'''
|
|
79
|
+
|
|
68
80
|
def execute(command: str):
|
|
69
81
|
tokens = shlex.split(command, comments=True, posix=True)
|
|
70
82
|
execute_tokens(tokens)
|
|
71
83
|
|
|
72
84
|
def execute_tokens(tokens: List[str]):
|
|
73
|
-
|
|
74
|
-
# (remove the sys.exit SIGINT handler here)
|
|
85
|
+
initialize()
|
|
75
86
|
|
|
76
87
|
if '--version' in tokens:
|
|
77
88
|
print(f"pjk version {__version__}")
|
pjk/pipes/factory.py
CHANGED
|
@@ -20,6 +20,7 @@ from pjk.pipes.select import SelectFields
|
|
|
20
20
|
from pjk.pipes.denorm import DenormPipe
|
|
21
21
|
from pjk.integrations.postgres_pipe import PostgresPipe
|
|
22
22
|
from pjk.integrations.snowflake_pipe import SnowflakePipe
|
|
23
|
+
from pjk.integrations.opensearch_query_pipe import OpenSearchQueryPipe
|
|
23
24
|
from pjk.pipes.sample import SamplePipe
|
|
24
25
|
from pjk.pipes.user_pipe_factory import UserPipeFactory
|
|
25
26
|
|
|
@@ -40,7 +41,8 @@ COMPONENTS = {
|
|
|
40
41
|
'sample': SamplePipe,
|
|
41
42
|
'explode': DenormPipe,
|
|
42
43
|
'postgres': PostgresPipe,
|
|
43
|
-
'snowflake': SnowflakePipe
|
|
44
|
+
'snowflake': SnowflakePipe,
|
|
45
|
+
'os_query': OpenSearchQueryPipe
|
|
44
46
|
}
|
|
45
47
|
|
|
46
48
|
class PipeFactory(ComponentFactory):
|
pjk/pipes/query_pipe.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from pjk.base import Pipe, ParsedToken, Usage
|
|
2
|
-
from pjk.common import Lookups
|
|
1
|
+
from pjk.base import Pipe, ParsedToken, Usage
|
|
3
2
|
from typing import Any, Dict, Iterable, Optional
|
|
4
3
|
from abc import abstractmethod
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class QueryPipe(Pipe):
|
|
8
|
-
requires_lookups = True
|
|
9
7
|
name: str = None
|
|
10
8
|
desc: str = None
|
|
11
9
|
arg0: tuple[Optional[str], Optional[str]] = (None, None)
|
|
@@ -18,8 +16,7 @@ class QueryPipe(Pipe):
|
|
|
18
16
|
desc=cls.desc,
|
|
19
17
|
component_class=cls
|
|
20
18
|
)
|
|
21
|
-
u.def_arg(name=cls.arg0[0], usage=f"{cls.arg0[1]} ~/.pjk/
|
|
22
|
-
u.def_param("query_field", usage="field of query.", default="query")
|
|
19
|
+
u.def_arg(name=cls.arg0[0], usage=f"{cls.arg0[1]} ~/.pjk/component_configs.yaml must contain entry '{cls.__name__}-<{cls.arg0[0]}'>\n with necessary parameters.")
|
|
23
20
|
u.def_param("count", usage="Number of search results, (databases may ignore)", is_num=True, default="10")
|
|
24
21
|
u.def_param("shape", usage='the shape of ouput records', is_num=False,
|
|
25
22
|
valid_values={'xR', 'Q_xR', 'Qxr'}, default='xR')
|
|
@@ -32,14 +29,8 @@ class QueryPipe(Pipe):
|
|
|
32
29
|
|
|
33
30
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
34
31
|
super().__init__(ptok, usage)
|
|
35
|
-
|
|
36
|
-
self.lookup_params = None
|
|
37
|
-
if type(self).requires_lookups:
|
|
38
|
-
lookups = Lookups(self)
|
|
39
|
-
lookup_name = usage.get_arg(type(self).arg0[0])
|
|
40
|
-
self.lookup_params = lookups.get(lookup_name)
|
|
41
|
-
|
|
42
32
|
self.output_shape = usage.get_param('shape')
|
|
33
|
+
self.count = usage.get_param('count')
|
|
43
34
|
|
|
44
35
|
@abstractmethod
|
|
45
36
|
def execute_query_returning_Q_xR_iterable(self, record) -> Iterable[Dict[str, Any]]:
|
|
@@ -83,7 +74,7 @@ class QueryPipe(Pipe):
|
|
|
83
74
|
q_out = self._make_q_object(in_rec, out_rec)
|
|
84
75
|
continue
|
|
85
76
|
r_list.append(out_rec)
|
|
86
|
-
q_out['
|
|
77
|
+
q_out['child'] = r_list
|
|
87
78
|
yield q_out
|
|
88
79
|
|
|
89
80
|
|
pjk/sources/format_source.py
CHANGED
|
@@ -60,9 +60,9 @@ class FormatSource(Source):
|
|
|
60
60
|
"""
|
|
61
61
|
|
|
62
62
|
pattern = re.compile(
|
|
63
|
-
r'^(?:(?P<pre_colon>[^:]+):)?'
|
|
64
|
-
r'(?P<path
|
|
65
|
-
r'(?:\.(?P<ext
|
|
63
|
+
r'^(?:(?P<pre_colon>[^:]+):)?' # take everything up to the first colon (if any)
|
|
64
|
+
r'(?P<path>.+?)' # then the rest of the path, allowing colons
|
|
65
|
+
r'(?:\.(?P<ext>[A-Za-z0-9]+(?:\.gz)?))?$' # optional .json / .csv / .json.gz etc., at the very end
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
# we don't use framework token parsing (except for params) cuz too complicated
|
pjk/version.py
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
pjk/__init__.py,sha256=6HGDVcFOFv6VPSNjxVnusm9wHqy01pELX3AyCWFzqWg,128
|
|
2
2
|
pjk/base.py,sha256=ObBsoTkfWp5qigr-QdNGZBs6tNH41P18kTnSMSJnntA,12775
|
|
3
|
-
pjk/common.py,sha256=
|
|
3
|
+
pjk/common.py,sha256=aYM9y-ANM1-_8ICa18etkP3wYNw0oSilZZwd5s7krME,4576
|
|
4
4
|
pjk/log.py,sha256=LjBboWhUrr2Cz-ygpftCIK17ee2-zNHKZjeJyoW0PlA,2163
|
|
5
|
-
pjk/main.py,sha256=
|
|
5
|
+
pjk/main.py,sha256=xm33gbTxapj_hTu30BygMOOYhLwz_frXKaZyKKaCEl4,4150
|
|
6
6
|
pjk/man_page.py,sha256=BNxRbzXfvR_5FoPkpCwxApyMsfSSrSdDu2PM-IVblA4,4447
|
|
7
7
|
pjk/parser.py,sha256=2aZiW0SY9KPKb2h4xPasOhxehcxn_l_BEx2swmN_7_E,12673
|
|
8
8
|
pjk/progress.py,sha256=scacaLRbSMBJEEEDeOTYt8sKwXP-CYEGoANG7IXVoQ4,9597
|
|
9
9
|
pjk/registry.py,sha256=0_exY3sivaOHyRtf-v88OSugl2LGAbbFjyFS4cPbGO8,6864
|
|
10
|
-
pjk/version.py,sha256=
|
|
10
|
+
pjk/version.py,sha256=smFKtPKnTVCimiTTlUrQYXoyxFS9F6JlapeUCyMHJpY,91
|
|
11
11
|
pjk/integrations/ddb_sink.py,sha256=qFuh0ZkZ1oGrx576-226eu75sauGqa8BF_u-FLs4yW4,1704
|
|
12
|
-
pjk/integrations/
|
|
13
|
-
pjk/integrations/
|
|
12
|
+
pjk/integrations/opensearch_client.py,sha256=kpDISpLzGRA5maOf3JfdbOTVxpvSSe2gfzUrE1SVU4I,3250
|
|
13
|
+
pjk/integrations/opensearch_query_pipe.py,sha256=JCb1s0amk7u-MeNXr1di8wY2UTKioHSJKDrthRQYzu0,3526
|
|
14
|
+
pjk/integrations/postgres_pipe.py,sha256=z9xbxvwPg1T4bSln_5r57wWMildwe-gHdpf5Y_G6MmA,6178
|
|
15
|
+
pjk/integrations/snowflake_pipe.py,sha256=JzcjxY4CbXHfVKsURD1ryZqgZfkDDfWJN_AjRcOtFms,8007
|
|
14
16
|
pjk/pipes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
17
|
pjk/pipes/denorm.py,sha256=tL8v3lTDCZPXDlffrx71PIojC_JWjN3PCJriNP6mw0o,2235
|
|
16
|
-
pjk/pipes/factory.py,sha256=
|
|
18
|
+
pjk/pipes/factory.py,sha256=pfdca16CKGfaPE-pqxW651KGmJRinyYqBeOI29y5ptQ,2231
|
|
17
19
|
pjk/pipes/filter.py,sha256=Fo5-MJaNAfTabRZxw9fQ1n4amhgUUCncC_GoOWswdRw,2053
|
|
18
20
|
pjk/pipes/head.py,sha256=5ZAacwzpVWuR_xWCnlHqyJUwbxfEcF8vIogdKIJBTRs,903
|
|
19
21
|
pjk/pipes/join.py,sha256=zE9JrrGnOjDB7OuBT2_FxleptFfbBJJLdFak9FbsOjc,3149
|
|
@@ -21,7 +23,7 @@ pjk/pipes/let_reduce.py,sha256=QfCs-omZq-a2hMFr5Nnt1hhQuiXol0IMA2diXwesVUA,7153
|
|
|
21
23
|
pjk/pipes/map.py,sha256=mOBKjx-AHDmOa3mBMnFfb-PYqFbKnqGVjBj3hx2fcys,5107
|
|
22
24
|
pjk/pipes/move_field.py,sha256=ZgJZZSD7irWVnOdHyhNUq9Mw3oLuGVUr1WKjdjBbwgo,1142
|
|
23
25
|
pjk/pipes/progress_pipe.py,sha256=GfnQFmeAOyeSHl69TXNWWz7sBd6MVpzDvYH2mZBp1Hg,1239
|
|
24
|
-
pjk/pipes/query_pipe.py,sha256=
|
|
26
|
+
pjk/pipes/query_pipe.py,sha256=X6hSK-HSPfeUvVa-dNxjQSm0Jfz0aqCQmD4kRtv5mKw,2647
|
|
25
27
|
pjk/pipes/remove_field.py,sha256=QjEO6-phRngM2emBJ6xv8UA2d_iA44tYN8Crx4lhqQ0,1169
|
|
26
28
|
pjk/pipes/sample.py,sha256=_YezoC3hZbG0qpkg9C6AuHLjuWseeAqsYYOXXAMvbOQ,2269
|
|
27
29
|
pjk/pipes/select.py,sha256=OuVAk8x7rCkb0neroBmUUuovxHc-z2q7VaUNNWrEdZY,1331
|
|
@@ -53,7 +55,7 @@ pjk/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
53
55
|
pjk/sources/csv_source.py,sha256=Qlk3wB99jQPNIR4yRWmvrpCvGBsc8qcVxjVmtMYPlc0,702
|
|
54
56
|
pjk/sources/dir_source.py,sha256=V8kVrzwSXyAk3bfwCAQA5dzgEFE3HKIuXAdiP_eP7Ls,2474
|
|
55
57
|
pjk/sources/factory.py,sha256=J_cltOaKSE1uAJOyXWzNeoWNqBH2rsLQ2iGApccSUn0,1732
|
|
56
|
-
pjk/sources/format_source.py,sha256=
|
|
58
|
+
pjk/sources/format_source.py,sha256=Yae2G4sSsKdmFPbxRz68yIPJwxlHBMKojQYUxjradC4,4802
|
|
57
59
|
pjk/sources/inline_source.py,sha256=iskdhxoJ6uzzBZpy-1N2fH0UyJuSATMw7soE_ZRz1Yg,1703
|
|
58
60
|
pjk/sources/json_source.py,sha256=83YG4dsnBErqbp72I3D-_7acJMo6h-H-vQd_PviJxmc,1061
|
|
59
61
|
pjk/sources/lazy_file.py,sha256=fQYaQz7bytG9vY4JNtIQJxfHWFowCn5il51H7vQrTNg,400
|
|
@@ -66,9 +68,9 @@ pjk/sources/source_list.py,sha256=5L2vFrtVSl9rKf2NjfpUFOOAb-iypVDKYCw1-3xgcEo,64
|
|
|
66
68
|
pjk/sources/sql_source.py,sha256=2CpOuikd2BVmMW_UbhVGPfXUf3BJmOcK5yMtmbTMYvw,743
|
|
67
69
|
pjk/sources/tsv_source.py,sha256=37nhEblCZ8XeTNHVo-WcdJ8HbIbiwKgsDES_yzz6EdU,306
|
|
68
70
|
pjk/sources/user_source_factory.py,sha256=0XIz6NkiBhGAteGDP8cNy7MALWsUF3smtLj4Qnj41Q8,1326
|
|
69
|
-
python_jack_knife-0.6.
|
|
70
|
-
python_jack_knife-0.6.
|
|
71
|
-
python_jack_knife-0.6.
|
|
72
|
-
python_jack_knife-0.6.
|
|
73
|
-
python_jack_knife-0.6.
|
|
74
|
-
python_jack_knife-0.6.
|
|
71
|
+
python_jack_knife-0.6.4.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
72
|
+
python_jack_knife-0.6.4.dist-info/METADATA,sha256=x9zWQRClA3ZHnR6ycsh440aCmRn-igSh0XGZG4Xti8I,14641
|
|
73
|
+
python_jack_knife-0.6.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
74
|
+
python_jack_knife-0.6.4.dist-info/entry_points.txt,sha256=kzZ10zEisvEaG2xYqqw7xRpuV62rAO_dPEHnM6USelk,38
|
|
75
|
+
python_jack_knife-0.6.4.dist-info/top_level.txt,sha256=r-Ef_I9SbVDL9jD-W0WtshstLos_7guWbpItYxxSllQ,4
|
|
76
|
+
python_jack_knife-0.6.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|