python-jack-knife 0.6.6__tar.gz → 0.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/PKG-INFO +1 -1
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/opensearch_client.py +18 -1
- python_jack_knife-0.6.7/src/pjk/integrations/opensearch_index_sink.py +94 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/opensearch_query_pipe.py +16 -26
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/select.py +1 -1
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/factory.py +2 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/json_source.py +19 -8
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/usage.py +8 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/version.py +1 -1
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/SOURCES.txt +1 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/LICENSE +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/README.md +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/pyproject.toml +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/setup.cfg +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/__init__.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/common.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/components.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/postgres_pipe.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/snowflake_pipe.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/log.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/main.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/man_page.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/parser.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/__init__.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/denorm.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/factory.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/filter.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/head.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/join.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/let_reduce.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/map.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/move_field.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/progress_pipe.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/query_pipe.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/remove_field.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/sample.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/sort.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/tail.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/user_pipe_factory.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/pipes/where.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/progress.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/registry.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/__init__.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/create_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/csv_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/devnull.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/dir_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/expect.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/format_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/graph.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/graph_bar_line.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/graph_cumulative.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/graph_hist.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/graph_scatter.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/json_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/s3_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/s3_stream.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/sinks.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/stdout.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/tsv_sink.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sinks/user_sink_factory.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/__init__.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/configs_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/csv_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/dir_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/factory.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/favorite_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/format_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/inline_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/lazy_file.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/lazy_file_local.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/lazy_file_s3.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/macro_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/npy_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/parquet_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/s3_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/source_list.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/sql_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/tsv_source.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/sources/user_source_factory.py +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/requires.txt +0 -0
- {python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/top_level.txt +0 -0
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/opensearch_client.py
RENAMED
|
@@ -1,7 +1,24 @@
|
|
|
1
1
|
from pjk.usage import Usage
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
# name, type, default
|
|
4
|
+
OS_CONFIG_TUPLES = [
|
|
5
|
+
("default_index", str, None),
|
|
6
|
+
("os_auth_use_aws", bool, "true"),
|
|
7
|
+
("os_scheme", str, "https"),
|
|
8
|
+
("os_verify_certs", bool, "true"),
|
|
9
|
+
("os_ca_certs", str, None),
|
|
10
|
+
("os_region", str, None),
|
|
11
|
+
("os_service", str, "es"),
|
|
12
|
+
("os_username", str, None),
|
|
13
|
+
("os_password", str, None),
|
|
14
|
+
("os_timeout", float, 30),
|
|
15
|
+
("os_ssl_assert_hostname", bool, "true"),
|
|
16
|
+
("os_ssl_show_warn", bool, "false"),
|
|
17
|
+
("os_host", str, None),
|
|
18
|
+
("os_port", int, None)
|
|
19
|
+
]
|
|
4
20
|
|
|
21
|
+
class OpenSearchClient:
|
|
5
22
|
@classmethod
|
|
6
23
|
def get_client(cls, u: Usage):
|
|
7
24
|
aws_auth = u.get_config("os_auth_use_aws")
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# djk/sinks/opensearch_sink.py
|
|
2
|
+
|
|
3
|
+
from pjk.components import Sink
|
|
4
|
+
from pjk.common import Integration
|
|
5
|
+
from pjk.usage import ParsedToken, Usage, CONFIG_FILE
|
|
6
|
+
from pjk.integrations.opensearch_client import OpenSearchClient, OS_CONFIG_TUPLES
|
|
7
|
+
from opensearchpy import helpers
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
examples = [
|
|
11
|
+
["index_docs.json", 'let:index:myidx', 'os_index:myinst'],
|
|
12
|
+
["{'_command': 'create', 'config': {settings: {...}, mappings: {...}}, 'index': 'myidx'}", 'os_index:myinst'],
|
|
13
|
+
["{'_command': 'delete_index', 'index': 'myidx'}", "os_index:myinst"]
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
class OpenSearchIndexSink(Sink, Integration):
|
|
17
|
+
@classmethod
|
|
18
|
+
def usage(cls):
|
|
19
|
+
usage = Usage(
|
|
20
|
+
name="os_index",
|
|
21
|
+
component_class=cls,
|
|
22
|
+
desc="Opensearch indexer sink. All input records must contain 'index' field.\nOptional param 'id_field' can be specified."
|
|
23
|
+
)
|
|
24
|
+
usage.def_arg("instance", f"Instance in {CONFIG_FILE} to index into.")
|
|
25
|
+
usage.def_param('id_field', usage='field to be used as unique id')
|
|
26
|
+
usage.def_param('DDHHMM_key', usage='All commands require key equal to current day, hour, minute', is_num=True, default="0")
|
|
27
|
+
usage.def_config_tuples(OS_CONFIG_TUPLES)
|
|
28
|
+
for e in examples:
|
|
29
|
+
usage.def_example(e, None)
|
|
30
|
+
return usage
|
|
31
|
+
|
|
32
|
+
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
33
|
+
super().__init__(ptok, usage)
|
|
34
|
+
self.client = OpenSearchClient.get_client(usage)
|
|
35
|
+
self.bulk_size = 500
|
|
36
|
+
self.buffer = []
|
|
37
|
+
self.total_written = 0
|
|
38
|
+
self.id_field = usage.get_param('id_field')
|
|
39
|
+
self.ddhhmm_key = usage.get_param('DDHHMM_key')
|
|
40
|
+
|
|
41
|
+
def execute_command(self, command: str, index: str, record: dict):
|
|
42
|
+
current_ddhhmm = int(datetime.now().strftime("%d%H%M"))
|
|
43
|
+
if self.ddhhmm_key != current_ddhhmm:
|
|
44
|
+
raise Exception('All os commands require the DDHHMM_key corresponding to now, e.g. 031431')
|
|
45
|
+
|
|
46
|
+
if command == 'create':
|
|
47
|
+
config = record.pop('config', None)
|
|
48
|
+
if not config:
|
|
49
|
+
raise Exception('create command missing config object.')
|
|
50
|
+
self.client.indices.create(index=index, body=config)
|
|
51
|
+
|
|
52
|
+
elif command == 'delete_index':
|
|
53
|
+
self.client.indices.delete(index=index)
|
|
54
|
+
|
|
55
|
+
else:
|
|
56
|
+
raise Exception(f'unknown os command: {command}')
|
|
57
|
+
|
|
58
|
+
def process(self):
|
|
59
|
+
for record in self.input:
|
|
60
|
+
index = record.pop('index', None)
|
|
61
|
+
if not index:
|
|
62
|
+
raise Exception("All index records must contain an 'index' field")
|
|
63
|
+
|
|
64
|
+
# Create index if config present
|
|
65
|
+
command = record.pop('_command', None)
|
|
66
|
+
if command:
|
|
67
|
+
self.execute_command(command, index, record)
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
action = {
|
|
71
|
+
"_op_type": "index", # use "create" to fail if doc already exists
|
|
72
|
+
"_index": index,
|
|
73
|
+
"_source": record # meta fields removed; store only the actual doc
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
doc_id = record.get(self.id_field, None)
|
|
77
|
+
if doc_id:
|
|
78
|
+
action['_id'] = doc_id
|
|
79
|
+
|
|
80
|
+
self.buffer.append(action)
|
|
81
|
+
|
|
82
|
+
if len(self.buffer) >= self.bulk_size:
|
|
83
|
+
self.flush()
|
|
84
|
+
|
|
85
|
+
self.flush()
|
|
86
|
+
|
|
87
|
+
def flush(self):
|
|
88
|
+
if self.buffer:
|
|
89
|
+
helpers.bulk(self.client, self.buffer)
|
|
90
|
+
self.total_written += len(self.buffer)
|
|
91
|
+
self.buffer.clear()
|
|
92
|
+
|
|
93
|
+
def deep_copy(self):
|
|
94
|
+
return None # until deep sync available
|
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/pjk/integrations/opensearch_query_pipe.py
RENAMED
|
@@ -7,7 +7,7 @@ from typing import Optional, Iterator, Dict, Any, Iterable
|
|
|
7
7
|
from pjk.usage import ParsedToken, Usage
|
|
8
8
|
from pjk.pipes.query_pipe import QueryPipe
|
|
9
9
|
from pjk.common import Integration
|
|
10
|
-
from pjk.integrations.opensearch_client import OpenSearchClient
|
|
10
|
+
from pjk.integrations.opensearch_client import OpenSearchClient, OS_CONFIG_TUPLES
|
|
11
11
|
|
|
12
12
|
def build_body_from_string(query_string: str) -> dict:
|
|
13
13
|
if query_string == "*":
|
|
@@ -23,38 +23,24 @@ def build_body_from_string(query_string: str) -> dict:
|
|
|
23
23
|
|
|
24
24
|
class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
25
25
|
name = "os_query"
|
|
26
|
-
desc = "Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query"
|
|
26
|
+
desc = ("Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query\n"
|
|
27
|
+
"An instance may define 'default_index' otherwise the query object must include an 'index' field.\n")
|
|
27
28
|
arg0 = ("instance", "instance to query over.")
|
|
28
29
|
examples = [
|
|
29
|
-
["{'query': '_ping'}", 'os_query:
|
|
30
|
-
["{'query': '*'}", 'os_query:
|
|
31
|
-
["{'query': 'dog'}", 'os_query:
|
|
32
|
-
["{'
|
|
33
|
-
["{'os_query_object': {query: {...}}", 'os_query:
|
|
30
|
+
["{'query': '_ping'}", 'os_query:myinst', '-'],
|
|
31
|
+
["{'index': 'myidx', 'query': '*'}", 'os_query:myinst', '-'],
|
|
32
|
+
["{'index': 'myidx', 'query': 'dog AND cat'}", 'os_query:myinst', '-'],
|
|
33
|
+
["{'index': 'myidx', 'query': 'dog'}", 'os_query:myinst', '-'],
|
|
34
|
+
["{'os_query_object': {query: {...}}", 'os_query:myinst', '-', ' # uses instance.default_index'],
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
# name, type, default
|
|
37
|
-
config_tuples =
|
|
38
|
-
("index_name", str, None),
|
|
39
|
-
("os_auth_use_aws", bool, "true"),
|
|
40
|
-
("os_scheme", str, "https"),
|
|
41
|
-
("os_verify_certs", bool, "true"),
|
|
42
|
-
("os_ca_certs", str, None),
|
|
43
|
-
("os_region", str, None),
|
|
44
|
-
("os_service", str, "es"),
|
|
45
|
-
("os_username", str, None),
|
|
46
|
-
("os_password", str, None),
|
|
47
|
-
("os_timeout", float, 30),
|
|
48
|
-
("os_ssl_assert_hostname", bool, "true"),
|
|
49
|
-
("os_ssl_show_warn", bool, "false"),
|
|
50
|
-
("os_host", str, None),
|
|
51
|
-
("os_port", int, None)
|
|
52
|
-
]
|
|
38
|
+
config_tuples = OS_CONFIG_TUPLES
|
|
53
39
|
|
|
54
40
|
def __init__(self, ptok: ParsedToken, usage: Usage):
|
|
55
41
|
super().__init__(ptok, usage)
|
|
56
42
|
|
|
57
|
-
self.index = usage.get_config("
|
|
43
|
+
self.index = usage.get_config("default_index")
|
|
58
44
|
self.client = OpenSearchClient.get_client(usage)
|
|
59
45
|
|
|
60
46
|
# Iteration state
|
|
@@ -83,6 +69,10 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
|
83
69
|
|
|
84
70
|
def execute_query_returning_S_xO_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
|
|
85
71
|
query_string = query_record.get('query', None)
|
|
72
|
+
query_index = query_record.get('index', None)
|
|
73
|
+
if query_index:
|
|
74
|
+
self.index = query_index # overwrite the default query
|
|
75
|
+
|
|
86
76
|
query_body = None
|
|
87
77
|
|
|
88
78
|
if query_string:
|
|
@@ -131,8 +121,8 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
|
|
|
131
121
|
yield {"_type": "os_query_hit", "_hit": hit}
|
|
132
122
|
|
|
133
123
|
except Exception as e:
|
|
134
|
-
print("OpenSearch query error:", e, file=sys.stderr)
|
|
135
|
-
traceback.print_exc()
|
|
124
|
+
#print("OpenSearch query error:", e, file=sys.stderr)
|
|
125
|
+
#traceback.print_exc()
|
|
136
126
|
yield {
|
|
137
127
|
"_type": "os_query_error",
|
|
138
128
|
"error": str(e),
|
|
@@ -11,7 +11,7 @@ class SelectFields(DeepCopyPipe):
|
|
|
11
11
|
def usage(cls):
|
|
12
12
|
usage = Usage(
|
|
13
13
|
name='sel',
|
|
14
|
-
desc='
|
|
14
|
+
desc='Select specific fields from each record.',
|
|
15
15
|
component_class=cls
|
|
16
16
|
)
|
|
17
17
|
usage.def_arg(name='fields', usage='Comma-separated list of fields to retain')
|
|
@@ -14,6 +14,7 @@ from pjk.sinks.tsv_sink import TSVSink
|
|
|
14
14
|
from pjk.sinks.expect import ExpectSink
|
|
15
15
|
from pjk.sinks.format_sink import FormatSink
|
|
16
16
|
from pjk.sinks.create_sink import CreateSink
|
|
17
|
+
from pjk.integrations.opensearch_index_sink import OpenSearchIndexSink
|
|
17
18
|
from pjk.sinks.user_sink_factory import UserSinkFactory
|
|
18
19
|
|
|
19
20
|
COMPONENTS = {
|
|
@@ -23,6 +24,7 @@ COMPONENTS = {
|
|
|
23
24
|
'json': JsonSink,
|
|
24
25
|
'csv': CSVSink,
|
|
25
26
|
'tsv': TSVSink,
|
|
27
|
+
'os_index': OpenSearchIndexSink,
|
|
26
28
|
'create': CreateSink
|
|
27
29
|
}
|
|
28
30
|
|
|
@@ -6,6 +6,7 @@ from pjk.usage import NoBindUsage
|
|
|
6
6
|
from pjk.components import Source
|
|
7
7
|
from pjk.sources.lazy_file import LazyFile
|
|
8
8
|
from pjk.sources.format_source import FormatSource
|
|
9
|
+
from typing import Any, Dict, Iterable, Optional
|
|
9
10
|
from pjk.log import logger
|
|
10
11
|
|
|
11
12
|
class JsonSource(FormatSource):
|
|
@@ -15,6 +16,16 @@ class JsonSource(FormatSource):
|
|
|
15
16
|
self.lazy_file = lazy_file
|
|
16
17
|
self.num_recs = 0
|
|
17
18
|
|
|
19
|
+
def as_whole_file(self) -> Iterable[Dict[str, Any]]:
|
|
20
|
+
with self.lazy_file.open() as f:
|
|
21
|
+
string = f.read()
|
|
22
|
+
object = json.loads(string)
|
|
23
|
+
if isinstance(object, list):
|
|
24
|
+
for item in object:
|
|
25
|
+
yield item
|
|
26
|
+
else:
|
|
27
|
+
yield object
|
|
28
|
+
|
|
18
29
|
def __iter__(self):
|
|
19
30
|
with self.lazy_file.open() as f:
|
|
20
31
|
for line in f:
|
|
@@ -22,11 +33,11 @@ class JsonSource(FormatSource):
|
|
|
22
33
|
try:
|
|
23
34
|
yield json.loads(line)
|
|
24
35
|
except json.JSONDecodeError as e:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
36
|
+
break
|
|
37
|
+
|
|
38
|
+
# attempt to decode the file as a whole
|
|
39
|
+
try:
|
|
40
|
+
yield from self.as_whole_file()
|
|
41
|
+
|
|
42
|
+
except:
|
|
43
|
+
logger.error(f'cannot decode {self.lazy_file.path}')
|
|
@@ -41,6 +41,14 @@ class Config:
|
|
|
41
41
|
f"{CONFIG_FILE} does not contain entry for '{instance_key}' with required params."
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
+
_alias = entry.get('_alias', None) # _alias must = another entry instance_key
|
|
45
|
+
if _alias:
|
|
46
|
+
entry = self._data.get(_alias, None)
|
|
47
|
+
if not entry:
|
|
48
|
+
raise TokenError(
|
|
49
|
+
f"'{instance_key}:_alias' in {CONFIG_FILE} points to a non-existent entry: '{_alias}'."
|
|
50
|
+
)
|
|
51
|
+
|
|
44
52
|
raw = entry.get(param, param_default)
|
|
45
53
|
|
|
46
54
|
if not raw:
|
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/SOURCES.txt
RENAMED
|
@@ -13,6 +13,7 @@ src/pjk/registry.py
|
|
|
13
13
|
src/pjk/usage.py
|
|
14
14
|
src/pjk/version.py
|
|
15
15
|
src/pjk/integrations/opensearch_client.py
|
|
16
|
+
src/pjk/integrations/opensearch_index_sink.py
|
|
16
17
|
src/pjk/integrations/opensearch_query_pipe.py
|
|
17
18
|
src/pjk/integrations/postgres_pipe.py
|
|
18
19
|
src/pjk/integrations/snowflake_pipe.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/requires.txt
RENAMED
|
File without changes
|
{python_jack_knife-0.6.6 → python_jack_knife-0.6.7}/src/python_jack_knife.egg-info/top_level.txt
RENAMED
|
File without changes
|