python-jack-knife 0.6.4__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {python_jack_knife-0.6.4/src/python_jack_knife.egg-info → python_jack_knife-0.6.5}/PKG-INFO +1 -1
  2. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/common.py +7 -28
  3. python_jack_knife-0.6.5/src/pjk/components.py +138 -0
  4. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/integrations/opensearch_client.py +15 -17
  5. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/integrations/opensearch_query_pipe.py +54 -18
  6. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/integrations/postgres_pipe.py +27 -19
  7. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/integrations/snowflake_pipe.py +27 -35
  8. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/main.py +6 -9
  9. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/man_page.py +15 -4
  10. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/parser.py +72 -42
  11. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/denorm.py +2 -1
  12. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/factory.py +2 -1
  13. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/filter.py +2 -1
  14. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/head.py +2 -1
  15. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/join.py +2 -1
  16. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/let_reduce.py +2 -1
  17. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/map.py +6 -4
  18. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/move_field.py +2 -1
  19. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/progress_pipe.py +1 -1
  20. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/query_pipe.py +18 -9
  21. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/remove_field.py +2 -1
  22. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/sample.py +2 -1
  23. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/select.py +2 -1
  24. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/sort.py +2 -1
  25. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/tail.py +2 -1
  26. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/user_pipe_factory.py +2 -1
  27. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/where.py +3 -1
  28. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/progress.py +1 -1
  29. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/registry.py +1 -1
  30. python_jack_knife-0.6.5/src/pjk/sinks/create_sink.py +107 -0
  31. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/devnull.py +2 -1
  32. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/dir_sink.py +2 -1
  33. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/expect.py +2 -1
  34. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/factory.py +2 -3
  35. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/format_sink.py +5 -4
  36. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/graph.py +2 -1
  37. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/s3_sink.py +1 -1
  38. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/sinks.py +2 -1
  39. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/stdout.py +2 -1
  40. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/tsv_sink.py +1 -1
  41. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/user_sink_factory.py +2 -1
  42. python_jack_knife-0.6.5/src/pjk/sources/configs_source.py +52 -0
  43. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/csv_source.py +2 -1
  44. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/dir_source.py +1 -1
  45. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/factory.py +6 -1
  46. python_jack_knife-0.6.5/src/pjk/sources/favorite_source.py +44 -0
  47. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/format_source.py +3 -1
  48. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/inline_source.py +2 -1
  49. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/json_source.py +2 -1
  50. python_jack_knife-0.6.5/src/pjk/sources/macro_source.py +46 -0
  51. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/npy_source.py +2 -1
  52. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/s3_source.py +1 -1
  53. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/source_list.py +1 -1
  54. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/sql_source.py +2 -1
  55. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/user_source_factory.py +2 -1
  56. python_jack_knife-0.6.4/src/pjk/base.py → python_jack_knife-0.6.5/src/pjk/usage.py +122 -191
  57. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/version.py +1 -1
  58. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5/src/python_jack_knife.egg-info}/PKG-INFO +1 -1
  59. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/python_jack_knife.egg-info/SOURCES.txt +5 -2
  60. python_jack_knife-0.6.4/src/pjk/integrations/ddb_sink.py +0 -54
  61. python_jack_knife-0.6.4/src/pjk/sinks/create_sink.py +0 -110
  62. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/LICENSE +0 -0
  63. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/README.md +0 -0
  64. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/pyproject.toml +0 -0
  65. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/setup.cfg +0 -0
  66. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/__init__.py +0 -0
  67. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/log.py +0 -0
  68. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/pipes/__init__.py +0 -0
  69. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/__init__.py +0 -0
  70. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/csv_sink.py +0 -0
  71. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/graph_bar_line.py +0 -0
  72. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/graph_cumulative.py +0 -0
  73. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/graph_hist.py +0 -0
  74. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/graph_scatter.py +0 -0
  75. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/json_sink.py +0 -0
  76. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sinks/s3_stream.py +0 -0
  77. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/__init__.py +0 -0
  78. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/lazy_file.py +0 -0
  79. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/lazy_file_local.py +0 -0
  80. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/lazy_file_s3.py +0 -0
  81. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/parquet_source.py +0 -0
  82. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/pjk/sources/tsv_source.py +0 -0
  83. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  84. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  85. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/python_jack_knife.egg-info/requires.txt +0 -0
  86. {python_jack_knife-0.6.4 → python_jack_knife-0.6.5}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.6.4
3
+ Version: 0.6.5
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -5,7 +5,13 @@ import sys, shutil, subprocess, contextlib, signal
5
5
  import os
6
6
  import re
7
7
  import yaml
8
- from pjk.base import TokenError, Integration, Source, Pipe
8
+ from pjk.usage import Usage, TokenError
9
+ from abc import ABC
10
+
11
+ # mixin
12
+ # just for distinguishing components for display
13
+ class Integration(ABC):
14
+ pass
9
15
 
10
16
  class SafeNamespace:
11
17
  def __init__(self, obj):
@@ -74,33 +80,6 @@ def highlight(text: str, color: str = 'bold', value: str = None) -> str:
74
80
  style = COLOR_CODES.get(color.lower(), COLOR_CODES['bold'])
75
81
  return text.replace(value, f"{style}{value}{RESET}")
76
82
 
77
- class Config:
78
- def __init__(self, instance_type: str, component_class: Source|Pipe, instance: str):
79
- # instance = name of the instance, e.g. 'myindexcollection', instance_type = 'index'
80
- # instance_type only used by automatic config template maker MUST BE STRING LITERAL!
81
- self.configs_yaml = os.path.expanduser('~/.pjk/component_configs.yaml')
82
- self.class_name = type(component_class).__name__
83
- self.instance = instance
84
- self._data = {}
85
- self._load()
86
-
87
- def _load(self):
88
- if os.path.exists(self.configs_yaml):
89
- with open(self.configs_yaml, 'r') as f:
90
- self._data = yaml.safe_load(f) or {}
91
- else:
92
- self._data = {}
93
-
94
- def lookup(self, param: str, default=None):
95
- instance_key = f'{self.class_name}-{self.instance}'
96
- entry = self._data.get(instance_key, None)
97
- if not entry:
98
- raise TokenError(
99
- f"~/.pjk/component_configs.yaml does not contain entry for '{instance_key}' with required params."
100
- )
101
-
102
- return entry.get(param, default)
103
-
104
83
  class ComponentFactory:
105
84
  def __init__(self, core_components: dict):
106
85
  self.num_orig = 0
@@ -0,0 +1,138 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Optional, List
6
+ from pjk.usage import Usage, NoBindUsage, ParsedToken
7
+
8
+ # mixin
9
+ class KeyedSource(ABC):
10
+ @classmethod
11
+ def usage(cls):
12
+ return Usage(
13
+ name=cls.__name__,
14
+ desc=f"{cls.__name__} component"
15
+ )
16
+
17
+ @abstractmethod
18
+ def lookup(self, left_rec) -> Optional[dict]:
19
+ """Return the record associated with the given key, or None."""
20
+ pass
21
+
22
+ def get_unlookedup_records(self) -> List[Any]:
23
+ # for outer join
24
+ pass
25
+
26
+ def deep_copy(self):
27
+ return None
28
+
29
+ class Source(ABC):
30
+ @classmethod
31
+ def usage(cls):
32
+ return NoBindUsage(
33
+ name=cls.__name__,
34
+ desc=f"{cls.__name__} component",
35
+ component_class=cls
36
+ )
37
+
38
+ @abstractmethod
39
+ def __iter__(self):
40
+ raise NotImplementedError("__iter__ must be implemented by subclasses")
41
+
42
+ def __next__(self):
43
+ # lazily create an internal iterator the first time next() is called
44
+ if not hasattr(self, "_iter"):
45
+ self._iter = iter(self)
46
+ return next(self._iter)
47
+
48
+ def deep_copy(self):
49
+ return None # Default: not copyable unless overridden
50
+
51
+ def close(self):
52
+ pass
53
+
54
+ def _get_sources(self, source_list: list):
55
+ pass
56
+
57
+ class Pipe(Source):
58
+ arity: int = 1
59
+
60
+ def __init__(self, ptok: ParsedToken, usage: Usage = None):
61
+ self.ptok = ptok
62
+ self.usage = usage
63
+ self.left = None # left source for convience
64
+ self.right = None # right source for convience
65
+ self.inputs: List[Source] = []
66
+
67
+ def add_source(self, source: Source) -> None:
68
+ self.inputs.append(source)
69
+ # first two are assigned left, right
70
+ if self.left is None:
71
+ self.left = source
72
+ elif self.right is None:
73
+ self.right = self.left
74
+ self.left = source
75
+
76
+ def reset(self):
77
+ pass # optional hook
78
+
79
+ def deep_copy(self) -> Optional["Pipe"]:
80
+ return None
81
+
82
+ def _get_sources(self, source_list: list):
83
+ for ix in self.inputs:
84
+ source_list.append(ix)
85
+ ix._get_sources(source_list)
86
+
87
+ class DeepCopyPipe(Pipe):
88
+ def deep_copy(self):
89
+ """
90
+ Generic deep_copy: clone left source, re-instantiate
91
+ this pipe class with the same ptok/usage, and attach.
92
+ """
93
+ source_clone = self.left.deep_copy()
94
+ if not source_clone:
95
+ return None
96
+
97
+ # re-instantiate using the actual subclass
98
+ pipe = type(self)(self.ptok, self.usage)
99
+ pipe.add_source(source_clone)
100
+ return pipe
101
+
102
+ class Sink(ABC):
103
+ @classmethod
104
+ def usage(cls):
105
+ return NoBindUsage(
106
+ name=cls.__name__,
107
+ desc=f"{cls.__name__} component",
108
+ component_class=cls
109
+ )
110
+
111
+ def __init__(self, ptok: ParsedToken, usage: Usage = None):
112
+ self.ptok = ptok
113
+ self.usage = usage
114
+
115
+ def drain(self):
116
+ self.process()
117
+ self.close()
118
+
119
+ # get all inputs in the execution chain for closing
120
+ inputs = [self.input]
121
+ self.input._get_sources(inputs)
122
+ for input in inputs:
123
+ input.close()
124
+
125
+ # optional
126
+ def close(self):
127
+ pass
128
+
129
+ def add_source(self, source: Source) -> None:
130
+ self.input = source
131
+
132
+ @abstractmethod
133
+ def process(self) -> None:
134
+ pass
135
+
136
+ def deep_copy(self):
137
+ return None
138
+
@@ -1,28 +1,26 @@
1
- from pjk.common import Config
1
+ from pjk.usage import Usage
2
2
 
3
3
  class OpenSearchClient:
4
4
 
5
5
  @classmethod
6
- def get_client(cls, config: Config):
7
- aws_auth = config.lookup("os_auth_use_aws", "true") == 'true'
8
- scheme = config.lookup("os_scheme", "https")
9
- verify_certs = config.lookup("os_verify_certs", "true") == 'true'
10
- ca_certs = config.lookup("os_ca_certs", None)
11
- region = config.lookup("os_region", None)
12
- service = config.lookup("os_service", "es")
13
- username = config.lookup("os_username", None)
14
- password = config.lookup("os_password", None)
15
- timeout = float(config.lookup("os_timeout", 30))
16
- ssl_assert_hostname = config.lookup("os_ssl_assert_hostname", "true") == 'true'
17
- ssl_show_warn = config.lookup("os_ssl_show_warn", "false") == 'true'
18
- host = config.lookup("os_host", None)
19
- port = config.lookup("os_port", None)
6
+ def get_client(cls, u: Usage):
7
+ aws_auth = u.get_config("os_auth_use_aws")
8
+ scheme = u.get_config("os_scheme")
9
+ verify_certs = u.get_config("os_verify_certs")
10
+ ca_certs = u.get_config("os_ca_certs")
11
+ region = u.get_config("os_region")
12
+ service = u.get_config("os_service")
13
+ username = u.get_config("os_username")
14
+ password = u.get_config("os_password")
15
+ timeout = u.get_config("os_timeout")
16
+ ssl_assert_hostname = u.get_config("os_ssl_assert_hostname")
17
+ ssl_show_warn = u.get_config("os_ssl_show_warn")
18
+ host = u.get_config("os_host")
19
+ port = u.get_config("os_port")
20
20
 
21
21
  # Reasonable port defaults
22
22
  if port is None:
23
23
  port = 443 if scheme == "https" else 9200
24
- else:
25
- port = int(port)
26
24
 
27
25
  if host is None:
28
26
  raise ValueError("Config os_host is required (set os_host + os_port/os_scheme, or a connection profile).")
@@ -4,9 +4,9 @@ import traceback
4
4
  from copy import deepcopy
5
5
  from typing import Optional, Iterator, Dict, Any, Iterable
6
6
 
7
- from pjk.base import Pipe, ParsedToken, Usage, Integration
7
+ from pjk.usage import ParsedToken, Usage
8
8
  from pjk.pipes.query_pipe import QueryPipe
9
- from pjk.common import Config
9
+ from pjk.common import Integration
10
10
  from pjk.integrations.opensearch_client import OpenSearchClient
11
11
 
12
12
  def build_body_from_string(query_string: str) -> dict:
@@ -23,24 +23,39 @@ def build_body_from_string(query_string: str) -> dict:
23
23
 
24
24
  class OpenSearchQueryPipe(QueryPipe, Integration):
25
25
  name = "os_query"
26
- desc = "Opensearch query pipe. Uses record['query_string'] or record['query_object'] for os query"
27
- arg0 = ("index", "index to query over")
26
+ desc = "Opensearch query pipe. Uses record['query'] or record['os_query_object'] for os query"
27
+ arg0 = ("instance", "instance to query over.")
28
28
  examples = [
29
- ["{'query_string': '*'}", 'os_query:myindex', '-'],
30
- ["{'query_string': 'dog'}", 'os_query:myindex', '-'],
31
- ["{'query_string': 'dog AND cat'}", 'os_query:myindex', '-'],
32
- ["{'query_object': {query: {...}}", 'os_query:myindex', '-'],
29
+ ["{'query': '_ping'}", 'os_query:myindex', '-'],
30
+ ["{'query': '*'}", 'os_query:myindex', '-'],
31
+ ["{'query': 'dog'}", 'os_query:myindex', '-'],
32
+ ["{'query': 'dog AND cat'}", 'os_query:myindex', '-'],
33
+ ["{'os_query_object': {query: {...}}", 'os_query:myindex', '-'],
34
+ ]
35
+
36
+ # name, type, default
37
+ config_tuples = [
38
+ ("index_name", str, None),
39
+ ("os_auth_use_aws", bool, "true"),
40
+ ("os_scheme", str, "https"),
41
+ ("os_verify_certs", bool, "true"),
42
+ ("os_ca_certs", str, None),
43
+ ("os_region", str, None),
44
+ ("os_service", str, "es"),
45
+ ("os_username", str, None),
46
+ ("os_password", str, None),
47
+ ("os_timeout", float, 30),
48
+ ("os_ssl_assert_hostname", bool, "true"),
49
+ ("os_ssl_show_warn", bool, "false"),
50
+ ("os_host", str, None),
51
+ ("os_port", int, None)
33
52
  ]
34
53
 
35
54
  def __init__(self, ptok: ParsedToken, usage: Usage):
36
55
  super().__init__(ptok, usage)
37
56
 
38
- # index from arg0 or config
39
- self.index = ptok.get_arg(0)
40
-
41
- # Build the OpenSearch client (handles AWS/basic/none)
42
- config = Config('index', self, self.index)
43
- self.client = OpenSearchClient.get_client(config)
57
+ self.index = usage.get_config("index_name")
58
+ self.client = OpenSearchClient.get_client(usage)
44
59
 
45
60
  # Iteration state
46
61
  self.cur_record: Optional[Dict[str, Any]] = None
@@ -53,14 +68,35 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
53
68
  def close(self):
54
69
  pass
55
70
 
56
- def execute_query_returning_Q_xR_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
57
- query_string = query_record.get('query_string', None)
71
+ def ping(self):
72
+ indexes = self.client.indices.get_alias(index="*")
73
+ index_list = []
74
+
75
+ yield {'num_indexes': len(indexes.keys())}
76
+ for index_name in sorted(indexes.keys()):
77
+ try:
78
+ count = self.client.count(index=index_name)["count"]
79
+ yield {'index': index_name, 'count': count}
80
+
81
+ except Exception as e:
82
+ print(f"{index_name}: failed to count ({e})")
83
+
84
+ def execute_query_returning_S_xO_iterable(self, query_record: dict) -> Iterator[Dict[str, Any]]:
85
+ query_string = query_record.get('query', None)
58
86
  query_body = None
59
87
 
60
88
  if query_string:
89
+ if query_string == '_ping':
90
+ yield from self.ping()
91
+ return
92
+
61
93
  query_body = build_body_from_string(query_string)
62
94
  else:
63
- query_body = query_record.get('query_object')
95
+ query_body = query_record.get('os_query_object')
96
+
97
+ if not query_body:
98
+ yield {'_error': "query_record missing 'query' or 'os_query_object' field"}
99
+ return
64
100
 
65
101
  try:
66
102
  # Build final request body
@@ -83,7 +119,7 @@ class OpenSearchQueryPipe(QueryPipe, Integration):
83
119
  "took_ms": took,
84
120
  "total_hits": total_hits,
85
121
  "index": self.index,
86
- "os_query_body": req_body
122
+ "os_query_object": req_body
87
123
  }
88
124
 
89
125
  # Emit each hit
@@ -9,8 +9,8 @@ import uuid
9
9
  from decimal import Decimal
10
10
  from typing import Any, Dict, Optional
11
11
 
12
- from pjk.base import Integration, ParsedToken, Usage
13
- from pjk.common import Config
12
+ from pjk.usage import ParsedToken, Usage
13
+ from pjk.common import Integration
14
14
  from pjk.pipes.query_pipe import QueryPipe
15
15
 
16
16
 
@@ -19,11 +19,11 @@ class DBClient:
19
19
  _connection = None
20
20
 
21
21
  def __init__(self, host: str, username: str, password: Optional[str],
22
- dbname: str, port: int = 5432, ssl: bool = False):
22
+ db_name: str, port: int = 5432, ssl: bool = False):
23
23
  import pg8000 # lazy import
24
24
  if DBClient._connection is None:
25
25
  try:
26
- kwargs = dict(user=username, password=password, host=host, database=dbname, port=port)
26
+ kwargs = dict(user=username, password=password, host=host, database=db_name, port=port)
27
27
  if ssl:
28
28
  import ssl as _ssl
29
29
  kwargs["ssl_context"] = _ssl.create_default_context()
@@ -92,26 +92,34 @@ def _row_to_dict(cursor, row) -> Dict[str, Any]:
92
92
 
93
93
  class PostgresPipe(QueryPipe,Integration):
94
94
  name = 'postgres'
95
- desc = "Postgres query pipe; executes SQL from input."
96
- arg0 = ("dbname", 'database name.')
95
+ desc = "Postgres query pipe; executes SQL over input record['query']."
96
+ arg0 = ("instance", 'instance of database.')
97
97
  examples = [
98
98
  ['myquery.sql', 'postgres:mydb', '-'],
99
99
  ["{'query': 'SELECT * from MY_TABLE;'}", 'postgres:mydb', '-'],
100
100
  ["{'query': 'SELECT * FROM pg_catalog.pg_tables;'}", 'postgres:mydb']
101
101
  ]
102
102
 
103
- def __init__(self, ptok: ParsedToken, usage: Usage):
104
- super().__init__(ptok, usage)
103
+ # name, type, default
104
+ config_tuples = [
105
+ ('db_name', str, None),
106
+ ('host', str, None),
107
+ ('user', str, None),
108
+ ('password', str, None),
109
+ ('port', int, 5432),
110
+ ('ssl', bool, False)
111
+ ]
112
+
113
+ def __init__(self, ptok: ParsedToken, u: Usage):
114
+ super().__init__(ptok, u)
105
115
 
106
- self.dbname = usage.get_arg("dbname")
107
- config = Config('dbname', self, self.dbname)
108
- self.db_host = config.lookup("host")
109
- self.db_user = config.lookup("user")
110
- self.db_pass = config.lookup("password")
111
- self.db_port = int(config.lookup("port", 5432))
112
- self.db_ssl = bool(config.lookup("ssl", False))
116
+ self.db_name = u.get_config('db_name')
117
+ self.db_host = u.get_config("host")
118
+ self.db_user = u.get_config("user")
119
+ self.db_pass = u.get_config("password")
120
+ self.db_port = u.get_config("port")
121
+ self.db_ssl = u.get_config("ssl")
113
122
 
114
- self.query_field = usage.get_param('query_field')
115
123
  self.params_field = "params" # optional: list/tuple (positional) or dict (named)
116
124
 
117
125
  def reset(self):
@@ -124,7 +132,7 @@ class PostgresPipe(QueryPipe,Integration):
124
132
  Figures out result, rowcount, function automatically.
125
133
  """
126
134
  h = {
127
- "db": self.dbname,
135
+ "db": self.db_name,
128
136
  "dbhost": self.db_host,
129
137
  }
130
138
  if params:
@@ -145,12 +153,12 @@ class PostgresPipe(QueryPipe,Integration):
145
153
 
146
154
  return h
147
155
 
148
- def execute_query_returning_Q_xR_iterable(self, record):
156
+ def execute_query_returning_S_xO_iterable(self, record):
149
157
  client = DBClient(
150
158
  host=self.db_host,
151
159
  username=self.db_user,
152
160
  password=self.db_pass,
153
- dbname=self.dbname,
161
+ db_name=self.db_name,
154
162
  port=self.db_port,
155
163
  ssl=self.db_ssl,
156
164
  )
@@ -9,10 +9,9 @@ import uuid
9
9
  from decimal import Decimal
10
10
  from typing import Any, Dict, Optional
11
11
 
12
- from pjk.base import ParsedToken, TokenError, Usage, Integration
12
+ from pjk.usage import ParsedToken, TokenError, Usage
13
13
  from pjk.pipes.query_pipe import QueryPipe
14
- from pjk.common import Config
15
-
14
+ from pjk.common import Integration
16
15
 
17
16
  # ---------- utilities ----------
18
17
 
@@ -128,41 +127,34 @@ class SnowflakePipe(QueryPipe, Integration):
128
127
  Connection/session settings are pulled from ~/.pjk/component_configs.yaml under the arg name.
129
128
  """
130
129
  name = 'snowflake'
131
- desc = "Snowflake query pipe; executes an SQL query for each input record."
132
- arg0 = ('dbname', 'database name.')
130
+ desc = "Snowflake query pipe; executes SQL over input record['query']."
131
+ arg0 = ('instance', 'instance of the database.')
133
132
  examples = [
134
133
  ["{'query': 'SELECT CURRENT_ROLE();'}", "snowflake:EDLDB", "-"],
135
134
  ["myquery.sql", "snowflake:EDLDB", "-"]
136
135
  ]
137
136
 
138
- def __init__(self, ptok: ParsedToken, usage: Usage):
139
- super().__init__(ptok, usage)
140
-
141
- self.dbname = usage.get_arg('dbname')
142
- config = Config('dbname', self, self.dbname)
143
- self.sf_account = config.lookup("account")
144
- self.sf_user = config.lookup("user")
145
- self.sf_auth = config.lookup("authenticator")
146
- self.sf_role = config.lookup("role")
147
- self.sf_wh = config.lookup("warehouse")
148
- self.sf_schema = config.lookup("schema")
149
- self.sf_db = self.dbname
150
-
151
- # Basic validation
152
- missing = [k for k, v in [
153
- ("account", self.sf_account),
154
- ("user", self.sf_user),
155
- ("authenticator|password", self.sf_auth or self.sf_password),
156
- ("role", self.sf_role),
157
- ("warehouse", self.sf_wh),
158
- ("schema", self.sf_schema),
159
- ] if not v]
160
- if missing:
161
- raise TokenError(
162
- f"config entry '{self.dbname}' missing: {', '.join(missing)}"
163
- )
164
-
165
- self.query_field = usage.get_param('query_field')
137
+ # name, type, default
138
+ config_tuples = [
139
+ ("account", str, None),
140
+ ("user", str, None),
141
+ ("authenticator", str, None),
142
+ ("role", str, None),
143
+ ("warehouse", str, None),
144
+ ("schema", str, None),
145
+ ('db_name', str, None)
146
+ ]
147
+
148
+ def __init__(self, ptok: ParsedToken, u: Usage):
149
+ super().__init__(ptok, u)
150
+ self.sf_account = u.get_config("account")
151
+ self.sf_user = u.get_config("user")
152
+ self.sf_auth = u.get_config("authenticator")
153
+ self.sf_role = u.get_config("role")
154
+ self.sf_wh = u.get_config("warehouse")
155
+ self.sf_schema = u.get_config("schema")
156
+ self.sf_db = u.get_config('db_name')
157
+
166
158
  self.params_field = "params" # optional: list/tuple (positional) or dict (named)
167
159
 
168
160
  def reset(self):
@@ -174,7 +166,7 @@ class SnowflakePipe(QueryPipe, Integration):
174
166
  Build a header record with query metadata and session context.
175
167
  """
176
168
  h: Dict[str, Any] = {
177
- "db": self.dbname,
169
+ "db_name": self.sf_db,
178
170
  "account": self.sf_account,
179
171
  "role": self.sf_role,
180
172
  "warehouse": self.sf_wh,
@@ -198,7 +190,7 @@ class SnowflakePipe(QueryPipe, Integration):
198
190
  h["result"] = "ok"
199
191
  return h
200
192
 
201
- def execute_query_returning_Q_xR_iterable(self, record):
193
+ def execute_query_returning_S_xO_iterable(self, record):
202
194
  client = SnowflakeClient(
203
195
  account=self.sf_account,
204
196
  user=self.sf_user,
@@ -4,15 +4,14 @@
4
4
  #!/usr/bin/env python
5
5
  import sys
6
6
  import os
7
- import signal
8
7
  import shlex
8
+ import shutil
9
9
  from typing import List
10
10
  from pjk.parser import ExpressionParser
11
- from pjk.base import UsageError
11
+ from pjk.usage import UsageError
12
12
  from pjk.log import init as init_logging
13
13
  from datetime import datetime
14
14
  from pathlib import Path
15
- import shutil
16
15
  import traceback
17
16
  import concurrent.futures
18
17
  from pjk.registry import ComponentRegistry
@@ -70,12 +69,10 @@ def execute_threaded(sinks, stop_progress=None):
70
69
  def initialize():
71
70
  init_logging()
72
71
 
73
- '''
74
- src = Path("src/pjk/resources/component_configs.tmpl")
75
- dst_dir = Path.home() / ".pjk"
76
- dst_dir.mkdir(parents=True, exist_ok=True)
77
- shutil.copy(src, dst_dir / src.name)
78
- '''
72
+ #src = Path("src/pjk/resources/configs.tmpl")
73
+ #dst_dir = Path.home() / ".pjk"
74
+ #dst_dir.mkdir(parents=True, exist_ok=True)
75
+ #hutil.copy(src, dst_dir / src.name)
79
76
 
80
77
  def execute(command: str):
81
78
  tokens = shlex.split(command, comments=True, posix=True)
@@ -5,11 +5,21 @@ from pjk.pipes.factory import PipeFactory
5
5
  from pjk.sources.factory import SourceFactory
6
6
  from pjk.sinks.factory import SinkFactory
7
7
  from pjk.parser import ExpressionParser
8
- from pjk.base import Usage
8
+ from pjk.components import Source, Pipe, Sink
9
+ from pjk.usage import Usage, ParsedToken
9
10
  from pjk.registry import ComponentRegistry
10
11
  from pjk.common import pager_stdout, highlight
11
12
  from contextlib import nullcontext
12
13
 
14
+ def get_base_class(usage: Usage, as_string: bool = False):
15
+ if issubclass(usage.comp_class, Sink):
16
+ return 'sink' if as_string else Sink
17
+ elif issubclass(usage.comp_class, Pipe):
18
+ return 'pipe' if as_string else Pipe
19
+ elif issubclass(usage.comp_class, Source):
20
+ return 'source' if as_string else Source
21
+ raise 'improper class'
22
+
13
23
  def smart_print(expr_tokens: list[str], name: str):
14
24
  import re
15
25
  SAFE_UNQUOTED_RE = re.compile(r"^[a-zA-Z0-9._/:=+-]+$")
@@ -59,13 +69,13 @@ def do_all_man(registry: ComponentRegistry, no_pager: bool = True):
59
69
  print()
60
70
 
61
71
  def print_man(registry: ComponentRegistry, name: str, usage: Usage):
62
- comp_type = usage.get_base_class(as_string=True)
72
+ comp_type = get_base_class(usage, as_string=True)
63
73
  header = f'{name} is a {comp_type}'
74
+
64
75
  print('===================================')
65
76
  print(' ', highlight(header, 'bold', name))
66
77
  print('===================================')
67
78
 
68
- print()
69
79
  print(usage.get_usage_text())
70
80
 
71
81
  examples = usage.get_examples()
@@ -88,8 +98,9 @@ def do_examples(token:str, registry: ComponentRegistry):
88
98
  for name, comp_class in factory.get_component_name_class_tuples():
89
99
  usage = comp_class.usage()
90
100
 
91
- comp_type = usage.get_base_class(as_string=True)
101
+ comp_type = get_base_class(usage, as_string=True)
92
102
  header = f'{name} is a {comp_type}'
103
+
93
104
  print('===================================')
94
105
  print(' ', highlight(header, 'bold', name))
95
106
  print('===================================')