python-jack-knife 0.6.4__tar.gz → 0.6.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.6.4/src/python_jack_knife.egg-info → python_jack_knife-0.6.6}/PKG-INFO +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/common.py +7 -28
- python_jack_knife-0.6.6/src/pjk/components.py +138 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/opensearch_client.py +15 -17
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/opensearch_query_pipe.py +54 -18
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/postgres_pipe.py +27 -19
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/snowflake_pipe.py +27 -35
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/main.py +6 -9
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/man_page.py +15 -4
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/parser.py +72 -42
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/denorm.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/factory.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/filter.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/head.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/join.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/let_reduce.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/map.py +6 -4
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/move_field.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/progress_pipe.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/query_pipe.py +18 -9
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/remove_field.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/sample.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/select.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/sort.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/tail.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/user_pipe_factory.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/where.py +3 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/progress.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/registry.py +1 -1
- python_jack_knife-0.6.6/src/pjk/sinks/create_sink.py +107 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/devnull.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/dir_sink.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/expect.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/factory.py +2 -3
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/format_sink.py +5 -4
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/graph.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/s3_sink.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/sinks.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/stdout.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/tsv_sink.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/user_sink_factory.py +2 -1
- python_jack_knife-0.6.6/src/pjk/sources/configs_source.py +52 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/csv_source.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/dir_source.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/factory.py +6 -1
- python_jack_knife-0.6.6/src/pjk/sources/favorite_source.py +44 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/format_source.py +3 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/inline_source.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/json_source.py +2 -1
- python_jack_knife-0.6.6/src/pjk/sources/macro_source.py +46 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/npy_source.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/s3_source.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/source_list.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/sql_source.py +2 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/user_source_factory.py +2 -1
- python_jack_knife-0.6.4/src/pjk/base.py → python_jack_knife-0.6.6/src/pjk/usage.py +122 -191
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/version.py +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6/src/python_jack_knife.egg-info}/PKG-INFO +1 -1
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/python_jack_knife.egg-info/SOURCES.txt +5 -2
- python_jack_knife-0.6.4/src/pjk/integrations/ddb_sink.py +0 -54
- python_jack_knife-0.6.4/src/pjk/sinks/create_sink.py +0 -110
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/LICENSE +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/README.md +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/pyproject.toml +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/setup.cfg +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/log.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/python_jack_knife.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,13 @@ import sys, shutil, subprocess, contextlib, signal
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
import yaml
|
|
8
|
-
from pjk.
|
|
8
|
+
from pjk.usage import Usage, TokenError
|
|
9
|
+
from abc import ABC
|
|
10
|
+
|
|
11
|
+
# mixin
|
|
12
|
+
# just for distinguishing components for display
|
|
13
|
+
class Integration(ABC):
|
|
14
|
+
pass
|
|
9
15
|
|
|
10
16
|
class SafeNamespace:
|
|
11
17
|
def __init__(self, obj):
|
|
@@ -74,33 +80,6 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
|
|
|
74
80
|
style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
|
|
75
81
|
return text.replace(value, f"{style}{value}{RESET}")
|
|
76
82
|
|
|
77
|
-
class Config:
|
|
78
|
-
def __init__(self, instance_type: str, component_class: Source|Pipe, instance: str):
|
|
79
|
-
# instance = name of the instance, e.g. 'myindexcollection', instance_type = 'index'
|
|
80
|
-
# instance_type only used by automatic config template maker MUST BE STRING LITERAL!
|
|
81
|
-
self.configs_yaml = os.path.expanduser('~/.pjk/component_configs.yaml')
|
|
82
|
-
self.class_name = type(component_class).__name__
|
|
83
|
-
self.instance = instance
|
|
84
|
-
self._data = {}
|
|
85
|
-
self._load()
|
|
86
|
-
|
|
87
|
-
def _load(self):
|
|
88
|
-
if os.path.exists(self.configs_yaml):
|
|
89
|
-
with open(self.configs_yaml, 'r') as f:
|
|
90
|
-
self._data = yaml.safe_load(f) or {}
|
|
91
|
-
else:
|
|
92
|
-
self._data = {}
|
|
93
|
-
|
|
94
|
-
def lookup(self, param: str, default=None):
|
|
95
|
-
instance_key = f'{self.class_name}-{self.instance}'
|
|
96
|
-
entry = self._data.get(instance_key, None)
|
|
97
|
-
if not entry:
|
|
98
|
-
raise TokenError(
|
|
99
|
-
f"~/.pjk/component_configs.yaml does not contain entry for '{instance_key}' with required params."
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
return entry.get(param, default)
|
|
103
|
-
|
|
104
83
|
class ComponentFactory:
|
|
105
84
|
def __init__(self, core_components: dict):
|
|
106
85
|
self.num_orig = 0
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright 2024 Mike Schultz
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Any, Optional, List
|
|
6
|
+
from pjk.usage import Usage, NoBindUsage, ParsedToken
|
|
7
|
+
|
|
8
|
+
# mixin
|
|
9
|
+
class KeyedSource(ABC):
|
|
10
|
+
@classmethod
|
|
11
|
+
def usage(cls):
|
|
12
|
+
return Usage(
|
|
13
|
+
name=cls.__name__,
|
|
14
|
+
desc=f"{cls.__name__} component"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def lookup(self, left_rec) -> Optional[dict]:
|
|
19
|
+
"""Return the record associated with the given key, or None."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
def get_unlookedup_records(self) -> List[Any]:
|
|
23
|
+
# for outer join
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def deep_copy(self):
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
class Source(ABC):
|
|
30
|
+
@classmethod
|
|
31
|
+
def usage(cls):
|
|
32
|
+
return NoBindUsage(
|
|
33
|
+
name=cls.__name__,
|
|
34
|
+
desc=f"{cls.__name__} component",
|
|
35
|
+
component_class=cls
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def __iter__(self):
|
|
40
|
+
raise NotImplementedError("__iter__ must be implemented by subclasses")
|
|
41
|
+
|
|
42
|
+
def __next__(self):
|
|
43
|
+
# lazily create an internal iterator the first time next() is called
|
|
44
|
+
if not hasattr(self, "_iter"):
|
|
45
|
+
self._iter = iter(self)
|
|
46
|
+
return next(self._iter)
|
|
47
|
+
|
|
48
|
+
def deep_copy(self):
|
|
49
|
+
return None # Default: not copyable unless overridden
|
|
50
|
+
|
|
51
|
+
def close(self):
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def _get_sources(self, source_list: list):
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
class Pipe(Source):
|
|
58
|
+
arity: int = 1
|
|
59
|
+
|
|
60
|
+
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
61
|
+
self.ptok = ptok
|
|
62
|
+
self.usage = usage
|
|
63
|
+
self.left = None # left source for convience
|
|
64
|
+
self.right = None # right source for convience
|
|
65
|
+
self.inputs: List[Source] = []
|
|
66
|
+
|
|
67
|
+
def add_source(self, source: Source) -> None:
|
|
68
|
+
self.inputs.append(source)
|
|
69
|
+
# first two are assigned left, right
|
|
70
|
+
if self.left is None:
|
|
71
|
+
self.left = source
|
|
72
|
+
elif self.right is None:
|
|
73
|
+
self.right = self.left
|
|
74
|
+
self.left = source
|
|
75
|
+
|
|
76
|
+
def reset(self):
|
|
77
|
+
pass # optional hook
|
|
78
|
+
|
|
79
|
+
def deep_copy(self) -> Optional["Pipe"]:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def _get_sources(self, source_list: list):
|
|
83
|
+
for ix in self.inputs:
|
|
84
|
+
source_list.append(ix)
|
|
85
|
+
ix._get_sources(source_list)
|
|
86
|
+
|
|
87
|
+
class DeepCopyPipe(Pipe):
|
|
88
|
+
def deep_copy(self):
|
|
89
|
+
"""
|
|
90
|
+
Generic deep_copy: clone left source, re-instantiate
|
|
91
|
+
this pipe class with the same ptok/usage, and attach.
|
|
92
|
+
"""
|
|
93
|
+
source_clone = self.left.deep_copy()
|
|
94
|
+
if not source_clone:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
# re-instantiate using the actual subclass
|
|
98
|
+
pipe = type(self)(self.ptok, self.usage)
|
|
99
|
+
pipe.add_source(source_clone)
|
|
100
|
+
return pipe
|
|
101
|
+
|
|
102
|
+
class Sink(ABC):
|
|
103
|
+
@classmethod
|
|
104
|
+
def usage(cls):
|
|
105
|
+
return NoBindUsage(
|
|
106
|
+
name=cls.__name__,
|
|
107
|
+
desc=f"{cls.__name__} component",
|
|
108
|
+
component_class=cls
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def __init__(self, ptok: ParsedToken, usage: Usage = None):
|
|
112
|
+
self.ptok = ptok
|
|
113
|
+
self.usage = usage
|
|
114
|
+
|
|
115
|
+
def drain(self):
|
|
116
|
+
self.process()
|
|
117
|
+
self.close()
|
|
118
|
+
|
|
119
|
+
# get all inputs in the execution chain for closing
|
|
120
|
+
inputs = [self.input]
|
|
121
|
+
self.input._get_sources(inputs)
|
|
122
|
+
for input in inputs:
|
|
123
|
+
input.close()
|
|
124
|
+
|
|
125
|
+
# optional
|
|
126
|
+
def close(self):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
def add_source(self, source: Source) -> None:
|
|
130
|
+
self.input = source
|
|
131
|
+
|
|
132
|
+
@abstractmethod
|
|
133
|
+
def process(self) -> None:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
def deep_copy(self):
|
|
137
|
+
return None
|
|
138
|
+
|
{python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/opensearch_client.py
RENAMED
|
@@ -1,28 +1,26 @@
|
|
|
1
|
-
from pjk.
|
|
1
|
+
from pjk.usage import Usage
|
|
2
2
|
|
|
3
3
|
class OpenSearchClient:
|
|
4
4
|
|
|
5
5
|
@classmethod
|
|
6
|
-
def get_client(cls,
|
|
7
|
-
aws_auth =
|
|
8
|
-
scheme =
|
|
9
|
-
verify_certs =
|
|
10
|
-
ca_certs =
|
|
11
|
-
region =
|
|
12
|
-
service =
|
|
13
|
-
username =
|
|
14
|
-
password =
|
|
15
|
-
timeout =
|
|
16
|
-
ssl_assert_hostname =
|
|
17
|
-
ssl_show_warn =
|
|
18
|
-
host =
|
|
19
|
-
port =
|
|
6
|
+
def get_client(cls, u: Usage):
|
|
7
|
+
aws_auth = u.get_config("os_auth_use_aws")
|
|
8
|
+
scheme = u.get_config("os_scheme")
|
|
9
|
+
verify_certs = u.get_config("os_verify_certs")
|
|
10
|
+
ca_certs = u.get_config("os_ca_certs")
|
|
11
|
+
region = u.get_config("os_region")
|
|
12
|
+
service = u.get_config("os_service")
|
|
13
|
+
username = u.get_config("os_username")
|
|
14
|
+
password = u.get_config("os_password")
|
|
15
|
+
timeout = u.get_config("os_timeout")
|
|
16
|
+
ssl_assert_hostname = u.get_config("os_ssl_assert_hostname")
|
|
17
|
+
ssl_show_warn = u.get_config("os_ssl_show_warn")
|
|
18
|
+
host = u.get_config("os_host")
|
|
19
|
+
port = u.get_config("os_port")
|
|
20
20
|
|
|
21
21
|
# Reasonable port defaults
|
|
22
22
|
if port is None:
|
|
23
23
|
port = 443 if scheme == "https" else 9200
|
|
24
|
-
else:
|
|
25
|
-
port = int(port)
|
|
26
24
|
|
|
27
25
|
if host is None:
|
|
28
26
|
raise ValueError("Config os_host is required (set os_host + os_port/os_scheme, or a connection profile).")
|
{python_jack_knife-0.6.4 → python_jack_knife-0.6.6}/src/pjk/integrations/opensearch_query_pipe.py
RENAMED
|
@@ -4,9 +4,9 @@ import traceback
|
|
|
4
4
|
from copy import deepcopy
|
|
5
5
|
from typing import Optional, Iterator, Dict, Any, Iterable
|
|
6
6
|
|
|
7
|
-
from pjk.
|
|
7
|
+
from pjk.usage import ParsedToken, Usage
|
|
8
8
|
from pjk.pipes.query_pipe import QueryPipe
|
|
9
|
-
from pjk.common import
|
|
9
|
+
from pjk.common import Integration
|
|
10
10
|
from pjk.integrations.opensearch_client import OpenSearchClient
|
|
11
11
|
|
|
12
12
|
def build_body_from_string(query_string: str) -> dict:
|
|
@@ -23,24 +23,39 @@ def build_body_from_string(query_string: str) -> dict:
|
|
|
23
23
|
|
|
24
24
|
class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
25
25
|
name = "os_query"
|
|
26
|
-
desc = "Opensearch query pipe. Uses record['
|
|
27
|
-
arg0 = ("
|
|
26
|
+
desc = "Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query"
|
|
27
|
+
arg0 = ("instance", "instance to query over.")
|
|
28
28
|
examples = [
|
|
29
|
-
["{'
|
|
30
|
-
["{'
|
|
31
|
-
["{'
|
|
32
|
-
["{'
|
|
29
|
+
["{'query': '_ping'}", 'os_query:myindex', '-'],
|
|
30
|
+
["{'query': '*'}", 'os_query:myindex', '-'],
|
|
31
|
+
["{'query': 'dog'}", 'os_query:myindex', '-'],
|
|
32
|
+
["{'query': 'dog AND cat'}", 'os_query:myindex', '-'],
|
|
33
|
+
["{'os_query_object': {query: {...}}", 'os_query:myindex', '-'],
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# name, type, default
|
|
37
|
+
config_tuples = [
|
|
38
|
+
("index_name", str, None),
|
|
39
|
+
("os_auth_use_aws", bool, "true"),
|
|
40
|
+
("os_scheme", str, "https"),
|
|
41
|
+
("os_verify_certs", bool, "true"),
|
|
42
|
+
("os_ca_certs", str, None),
|
|
43
|
+
("os_region", str, None),
|
|
44
|
+
("os_service", str, "es"),
|
|
45
|
+
("os_username", str, None),
|
|
46
|
+
("os_password", str, None),
|
|
47
|
+
("os_timeout", float, 30),
|
|
48
|
+
("os_ssl_assert_hostname", bool, "true"),
|
|
49
|
+
("os_ssl_show_warn", bool, "false"),
|
|
50
|
+
("os_host", str, None),
|
|
51
|
+
("os_port", int, None)
|
|
33
52
|
]
|
|
34
53
|
|
|
35
54
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
36
55
|
super().__init__(ptok, usage)
|
|
37
56
|
|
|
38
|
-
|
|
39
|
-
self.
|
|
40
|
-
|
|
41
|
-
# Build the OpenSearch client (handles AWS/basic/none)
|
|
42
|
-
config = Config('index', self, self.index)
|
|
43
|
-
self.client = OpenSearchClient.get_client(config)
|
|
57
|
+
self.index = usage.get_config("index_name")
|
|
58
|
+
self.client = OpenSearchClient.get_client(usage)
|
|
44
59
|
|
|
45
60
|
# Iteration state
|
|
46
61
|
self.cur_record: Optional[Dict[str, Any]] = None
|
|
@@ -53,14 +68,35 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
|
53
68
|
def close(self):
|
|
54
69
|
pass
|
|
55
70
|
|
|
56
|
-
def
|
|
57
|
-
|
|
71
|
+
def ping(self):
|
|
72
|
+
indexes = self.client.indices.get_alias(index="*")
|
|
73
|
+
index_list = []
|
|
74
|
+
|
|
75
|
+
yield {'num_indexes': len(indexes.keys())}
|
|
76
|
+
for index_name in sorted(indexes.keys()):
|
|
77
|
+
try:
|
|
78
|
+
count = self.client.count(index=index_name)["count"]
|
|
79
|
+
yield {'index': index_name, 'count': count}
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(f"{index_name}: failed to count ({e})")
|
|
83
|
+
|
|
84
|
+
def execute_query_returning_S_xO_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
|
|
85
|
+
query_string = query_record.get('query', None)
|
|
58
86
|
query_body = None
|
|
59
87
|
|
|
60
88
|
if query_string:
|
|
89
|
+
if query_string == '_ping':
|
|
90
|
+
yield from self.ping()
|
|
91
|
+
return
|
|
92
|
+
|
|
61
93
|
query_body = build_body_from_string(query_string)
|
|
62
94
|
else:
|
|
63
|
-
query_body = query_record.get('
|
|
95
|
+
query_body = query_record.get('os_query_object')
|
|
96
|
+
|
|
97
|
+
if not query_body:
|
|
98
|
+
yield {'_error': "query_record missing 'query' or 'os_query_object' field"}
|
|
99
|
+
return
|
|
64
100
|
|
|
65
101
|
try:
|
|
66
102
|
# Build final request body
|
|
@@ -83,7 +119,7 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
|
83
119
|
"took_ms": took,
|
|
84
120
|
"total_hits": total_hits,
|
|
85
121
|
"index": self.index,
|
|
86
|
-
"
|
|
122
|
+
"os_query_object": req_body
|
|
87
123
|
}
|
|
88
124
|
|
|
89
125
|
# Emit each hit
|
|
@@ -9,8 +9,8 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.
|
|
13
|
-
from pjk.common import
|
|
12
|
+
from pjk.usage import ParsedToken, Usage
|
|
13
|
+
from pjk.common import Integration
|
|
14
14
|
from pjk.pipes.query_pipe import QueryPipe
|
|
15
15
|
|
|
16
16
|
|
|
@@ -19,11 +19,11 @@ class DBClient:
|
|
|
19
19
|
_connection = None
|
|
20
20
|
|
|
21
21
|
def __init__(self, host: str, username: str, password: Optional[str],
|
|
22
|
-
|
|
22
|
+
db_name: str, port: int = 5432, ssl: bool = False):
|
|
23
23
|
import pg8000 # lazy import
|
|
24
24
|
if DBClient._connection is None:
|
|
25
25
|
try:
|
|
26
|
-
kwargs = dict(user=username, password=password, host=host, database=
|
|
26
|
+
kwargs = dict(user=username, password=password, host=host, database=db_name, port=port)
|
|
27
27
|
if ssl:
|
|
28
28
|
import ssl as _ssl
|
|
29
29
|
kwargs["ssl_context"] = _ssl.create_default_context()
|
|
@@ -92,26 +92,34 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
|
|
|
92
92
|
|
|
93
93
|
class PostgresPipe(QueryPipe,Integration):
|
|
94
94
|
name = 'postgres'
|
|
95
|
-
desc = "Postgres query pipe; executes SQL
|
|
96
|
-
arg0 = ("
|
|
95
|
+
desc = "Postgres query pipe; executes SQL over input record['query']."
|
|
96
|
+
arg0 = ("instance", 'instance of database.')
|
|
97
97
|
examples = [
|
|
98
98
|
['myquery.sql', 'postgres:mydb', '-'],
|
|
99
99
|
["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
|
|
100
100
|
["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
|
|
101
101
|
]
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
# name, type, default
|
|
104
|
+
config_tuples = [
|
|
105
|
+
('db_name', str, None),
|
|
106
|
+
('host', str, None),
|
|
107
|
+
('user', str, None),
|
|
108
|
+
('password', str, None),
|
|
109
|
+
('port', int, 5432),
|
|
110
|
+
('ssl', bool, False)
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
def __init__(self, ptok: ParsedToken, u: Usage):
|
|
114
|
+
super().__init__(ptok, u)
|
|
105
115
|
|
|
106
|
-
self.
|
|
107
|
-
|
|
108
|
-
self.
|
|
109
|
-
self.
|
|
110
|
-
self.
|
|
111
|
-
self.
|
|
112
|
-
self.db_ssl = bool(config.lookup("ssl", False))
|
|
116
|
+
self.db_name = u.get_config('db_name')
|
|
117
|
+
self.db_host = u.get_config("host")
|
|
118
|
+
self.db_user = u.get_config("user")
|
|
119
|
+
self.db_pass = u.get_config("password")
|
|
120
|
+
self.db_port = u.get_config("port")
|
|
121
|
+
self.db_ssl = u.get_config("ssl")
|
|
113
122
|
|
|
114
|
-
self.query_field = usage.get_param('query_field')
|
|
115
123
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
116
124
|
|
|
117
125
|
def reset(self):
|
|
@@ -124,7 +132,7 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
124
132
|
Figures out result, rowcount, function automatically.
|
|
125
133
|
"""
|
|
126
134
|
h = {
|
|
127
|
-
"db": self.
|
|
135
|
+
"db": self.db_name,
|
|
128
136
|
"dbhost": self.db_host,
|
|
129
137
|
}
|
|
130
138
|
if params:
|
|
@@ -145,12 +153,12 @@ class PostgresPipe(QueryPipe,Integration):
|
|
|
145
153
|
|
|
146
154
|
return h
|
|
147
155
|
|
|
148
|
-
def
|
|
156
|
+
def execute_query_returning_S_xO_iterable(self, record):
|
|
149
157
|
client = DBClient(
|
|
150
158
|
host=self.db_host,
|
|
151
159
|
username=self.db_user,
|
|
152
160
|
password=self.db_pass,
|
|
153
|
-
|
|
161
|
+
db_name=self.db_name,
|
|
154
162
|
port=self.db_port,
|
|
155
163
|
ssl=self.db_ssl,
|
|
156
164
|
)
|
|
@@ -9,10 +9,9 @@ import uuid
|
|
|
9
9
|
from decimal import Decimal
|
|
10
10
|
from typing import Any, Dict, Optional
|
|
11
11
|
|
|
12
|
-
from pjk.
|
|
12
|
+
from pjk.usage import ParsedToken, TokenError, Usage
|
|
13
13
|
from pjk.pipes.query_pipe import QueryPipe
|
|
14
|
-
from pjk.common import
|
|
15
|
-
|
|
14
|
+
from pjk.common import Integration
|
|
16
15
|
|
|
17
16
|
# ---------- utilities ----------
|
|
18
17
|
|
|
@@ -128,41 +127,34 @@ class SnowflakePipe(QueryPipe, Integration):
|
|
|
128
127
|
Connection/session settings are pulled from ~/.pjk/component_configs.yaml under the arg name.
|
|
129
128
|
"""
|
|
130
129
|
name = 'snowflake'
|
|
131
|
-
desc = "Snowflake query pipe; executes
|
|
132
|
-
arg0 = ('
|
|
130
|
+
desc = "Snowflake query pipe; executes SQL over input record['query']."
|
|
131
|
+
arg0 = ('instance', 'instance of the database.')
|
|
133
132
|
examples = [
|
|
134
133
|
["{'query': 'SELECT CURRENT_ROLE();'}", "snowflake:EDLDB", "-"],
|
|
135
134
|
["myquery.sql", "snowflake:EDLDB", "-"]
|
|
136
135
|
]
|
|
137
136
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
] if not v]
|
|
160
|
-
if missing:
|
|
161
|
-
raise TokenError(
|
|
162
|
-
f"config entry '{self.dbname}' missing: {', '.join(missing)}"
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
self.query_field = usage.get_param('query_field')
|
|
137
|
+
# name, type, default
|
|
138
|
+
config_tuples = [
|
|
139
|
+
("account", str, None),
|
|
140
|
+
("user", str, None),
|
|
141
|
+
("authenticator", str, None),
|
|
142
|
+
("role", str, None),
|
|
143
|
+
("warehouse", str, None),
|
|
144
|
+
("schema", str, None),
|
|
145
|
+
('db_name', str, None)
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
def __init__(self, ptok: ParsedToken, u: Usage):
|
|
149
|
+
super().__init__(ptok, u)
|
|
150
|
+
self.sf_account = u.get_config("account")
|
|
151
|
+
self.sf_user = u.get_config("user")
|
|
152
|
+
self.sf_auth = u.get_config("authenticator")
|
|
153
|
+
self.sf_role = u.get_config("role")
|
|
154
|
+
self.sf_wh = u.get_config("warehouse")
|
|
155
|
+
self.sf_schema = u.get_config("schema")
|
|
156
|
+
self.sf_db = u.get_config('db_name')
|
|
157
|
+
|
|
166
158
|
self.params_field = "params" # optional: list/tuple (positional) or dict (named)
|
|
167
159
|
|
|
168
160
|
def reset(self):
|
|
@@ -174,7 +166,7 @@ class SnowflakePipe(QueryPipe, Integration):
|
|
|
174
166
|
Build a header record with query metadata and session context.
|
|
175
167
|
"""
|
|
176
168
|
h: Dict[str, Any] = {
|
|
177
|
-
"
|
|
169
|
+
"db_name": self.sf_db,
|
|
178
170
|
"account": self.sf_account,
|
|
179
171
|
"role": self.sf_role,
|
|
180
172
|
"warehouse": self.sf_wh,
|
|
@@ -198,7 +190,7 @@ class SnowflakePipe(QueryPipe, Integration):
|
|
|
198
190
|
h["result"] = "ok"
|
|
199
191
|
return h
|
|
200
192
|
|
|
201
|
-
def
|
|
193
|
+
def execute_query_returning_S_xO_iterable(self, record):
|
|
202
194
|
client = SnowflakeClient(
|
|
203
195
|
account=self.sf_account,
|
|
204
196
|
user=self.sf_user,
|
|
@@ -4,15 +4,14 @@
|
|
|
4
4
|
#!/usr/bin/env python
|
|
5
5
|
import sys
|
|
6
6
|
import os
|
|
7
|
-
import signal
|
|
8
7
|
import shlex
|
|
8
|
+
import shutil
|
|
9
9
|
from typing import List
|
|
10
10
|
from pjk.parser import ExpressionParser
|
|
11
|
-
from pjk.
|
|
11
|
+
from pjk.usage import UsageError
|
|
12
12
|
from pjk.log import init as init_logging
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
import shutil
|
|
16
15
|
import traceback
|
|
17
16
|
import concurrent.futures
|
|
18
17
|
from pjk.registry import ComponentRegistry
|
|
@@ -70,12 +69,10 @@ def execute_threaded(sinks, stop_progress=None):
|
|
|
70
69
|
def initialize():
|
|
71
70
|
init_logging()
|
|
72
71
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
dst_dir =
|
|
76
|
-
|
|
77
|
-
shutil.copy(src, dst_dir / src.name)
|
|
78
|
-
'''
|
|
72
|
+
#src = Path("src/pjk/resources/configs.tmpl")
|
|
73
|
+
#dst_dir = Path.home() / ".pjk"
|
|
74
|
+
#dst_dir.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
#hutil.copy(src, dst_dir / src.name)
|
|
79
76
|
|
|
80
77
|
def execute(command: str):
|
|
81
78
|
tokens = shlex.split(command, comments=True, posix=True)
|
|
@@ -5,11 +5,21 @@ from pjk.pipes.factory import PipeFactory
|
|
|
5
5
|
from pjk.sources.factory import SourceFactory
|
|
6
6
|
from pjk.sinks.factory import SinkFactory
|
|
7
7
|
from pjk.parser import ExpressionParser
|
|
8
|
-
from pjk.
|
|
8
|
+
from pjk.components import Source, Pipe, Sink
|
|
9
|
+
from pjk.usage import Usage, ParsedToken
|
|
9
10
|
from pjk.registry import ComponentRegistry
|
|
10
11
|
from pjk.common import pager_stdout, highlight
|
|
11
12
|
from contextlib import nullcontext
|
|
12
13
|
|
|
14
|
+
def get_base_class(usage: Usage, as_string: bool = False):
|
|
15
|
+
if issubclass(usage.comp_class, Sink):
|
|
16
|
+
return 'sink' if as_string else Sink
|
|
17
|
+
elif issubclass(usage.comp_class, Pipe):
|
|
18
|
+
return 'pipe' if as_string else Pipe
|
|
19
|
+
elif issubclass(usage.comp_class, Source):
|
|
20
|
+
return 'source' if as_string else Source
|
|
21
|
+
raise 'improper class'
|
|
22
|
+
|
|
13
23
|
def smart_print(expr_tokens: list[str], name: str):
|
|
14
24
|
import re
|
|
15
25
|
SAFE_UNQUOTED_RE = re.compile(r"^[a-zA-Z0-9._/:=+-]+$")
|
|
@@ -59,13 +69,13 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
|
|
|
59
69
|
print()
|
|
60
70
|
|
|
61
71
|
def print_man(registry: ComponentRegistry, name: str, usage: Usage):
|
|
62
|
-
comp_type =
|
|
72
|
+
comp_type = get_base_class(usage, as_string=True)
|
|
63
73
|
header = f'{name} is a {comp_type}'
|
|
74
|
+
|
|
64
75
|
print('===================================')
|
|
65
76
|
print(' ', highlight(header, 'bold', name))
|
|
66
77
|
print('===================================')
|
|
67
78
|
|
|
68
|
-
print()
|
|
69
79
|
print(usage.get_usage_text())
|
|
70
80
|
|
|
71
81
|
examples = usage.get_examples()
|
|
@@ -88,8 +98,9 @@ def do_examples(token:str, registry: ComponentRegistry):
|
|
|
88
98
|
for name, comp_class in factory.get_component_name_class_tuples():
|
|
89
99
|
usage = comp_class.usage()
|
|
90
100
|
|
|
91
|
-
comp_type =
|
|
101
|
+
comp_type = get_base_class(usage, as_string=True)
|
|
92
102
|
header = f'{name} is a {comp_type}'
|
|
103
|
+
|
|
93
104
|
print('===================================')
|
|
94
105
|
print(' ', highlight(header, 'bold', name))
|
|
95
106
|
print('===================================')
|