python-jack-knife 0.7.6__tar.gz → 0.7.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/PKG-INFO +1 -1
  2. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/__init__.py +2 -2
  3. python_jack_knife-0.7.7/src/pjk/engine.py +94 -0
  4. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/history.py +4 -0
  5. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/integrations/snowflake_pipe.py +1 -1
  6. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/log.py +37 -12
  7. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/man_page.py +6 -5
  8. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/parser.py +5 -3
  9. python_jack_knife-0.7.7/src/pjk/paths.py +53 -0
  10. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/registry.py +6 -3
  11. python_jack_knife-0.7.7/src/pjk/runtime.py +18 -0
  12. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/create_sink.py +2 -1
  13. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/dict_list_source.py +1 -1
  14. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/favorite_source.py +3 -16
  15. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/usage.py +22 -12
  16. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/version.py +1 -1
  17. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/PKG-INFO +1 -1
  18. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/SOURCES.txt +2 -0
  19. python_jack_knife-0.7.6/src/pjk/engine.py +0 -51
  20. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/LICENSE +0 -0
  21. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/README.md +0 -0
  22. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/pyproject.toml +0 -0
  23. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/setup.cfg +0 -0
  24. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/common.py +0 -0
  25. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/components.py +0 -0
  26. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/integrations/opensearch_client.py +0 -0
  27. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/integrations/opensearch_index_sink.py +0 -0
  28. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/integrations/opensearch_query_pipe.py +0 -0
  29. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/integrations/postgres_pipe.py +0 -0
  30. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/main.py +0 -0
  31. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/parse_pjk_file.py +0 -0
  32. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/__init__.py +0 -0
  33. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/ddiff.py +0 -0
  34. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/denorm.py +0 -0
  35. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/factory.py +0 -0
  36. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/filter.py +0 -0
  37. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/head.py +0 -0
  38. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/join.py +0 -0
  39. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/let_reduce.py +0 -0
  40. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/map.py +0 -0
  41. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/move_field.py +0 -0
  42. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/progress_pipe.py +0 -0
  43. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/query_pipe.py +0 -0
  44. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/remove_field.py +0 -0
  45. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/sample.py +0 -0
  46. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/select.py +0 -0
  47. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/sort.py +0 -0
  48. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/tail.py +0 -0
  49. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/user_pipe_factory.py +0 -0
  50. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/pipes/where.py +0 -0
  51. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/progress.py +0 -0
  52. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/__init__.py +0 -0
  53. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/csv_sink.py +0 -0
  54. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/devnull.py +0 -0
  55. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/dir_sink.py +0 -0
  56. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/expect.py +0 -0
  57. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/factory.py +0 -0
  58. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/format_sink.py +0 -0
  59. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph.py +0 -0
  60. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph_axis.py +0 -0
  61. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph_bar_line.py +0 -0
  62. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph_cumulative.py +0 -0
  63. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph_hist.py +0 -0
  64. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/graph_scatter.py +0 -0
  65. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/json_sink.py +0 -0
  66. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/s3_sink.py +0 -0
  67. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/s3_stream.py +0 -0
  68. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/sinks.py +0 -0
  69. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/stdout.py +0 -0
  70. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/tsv_sink.py +0 -0
  71. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sinks/user_sink_factory.py +0 -0
  72. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/__init__.py +0 -0
  73. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/csv_source.py +0 -0
  74. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/dir_source.py +0 -0
  75. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/factory.py +0 -0
  76. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/format_source.py +0 -0
  77. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/http_source.py +0 -0
  78. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/inline_source.py +0 -0
  79. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/json_source.py +0 -0
  80. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/lazy_file.py +0 -0
  81. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/lazy_file_local.py +0 -0
  82. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/lazy_file_s3.py +0 -0
  83. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/npy_source.py +0 -0
  84. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/parquet_source.py +0 -0
  85. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/s3_select_source.py +0 -0
  86. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/s3_source.py +0 -0
  87. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/source_list.py +0 -0
  88. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/sql_source.py +0 -0
  89. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/tsv_source.py +0 -0
  90. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/pjk/sources/user_source_factory.py +0 -0
  91. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/dependency_links.txt +0 -0
  92. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/entry_points.txt +0 -0
  93. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/requires.txt +0 -0
  94. {python_jack_knife-0.7.6 → python_jack_knife-0.7.7}/src/python_jack_knife.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.7.6
3
+ Version: 0.7.7
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -1,6 +1,6 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
  from .version import __version__
4
- from .engine import PjkEngine
4
+ from .engine import PjkStream
5
5
 
6
- __all__ = ["__version__", "PjkEngine"]
6
+ __all__ = ["__version__", "PjkStream"]
@@ -0,0 +1,94 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import shlex
5
+ from pathlib import Path
6
+ from typing import Iterator, List, Optional, Sequence, Union
7
+
8
+ from pjk.log import init_stream
9
+ from pjk.parser import ExpressionParser, expand_macros
10
+ from pjk.registry import ComponentRegistry
11
+ from pjk.runtime import enter_pjk_stream, exit_pjk_stream
12
+ from pjk.sources.dict_list_source import DictListSource
13
+
14
+
15
+ class PjkStream:
16
+ """
17
+ Lazy record stream from a pjk pipeline.
18
+
19
+ Build with a named factory, then iterate:
20
+
21
+ for rec in PjkStream.expression('{foo: 1} select:foo -'):
22
+ ...
23
+
24
+ The trailing sink token is required for parsing; records are taken from the
25
+ pipe chain (sink.drain is not called).
26
+
27
+ With inrecs supplied, the pipeline source is replaced by those records.
28
+ The expression may include a source (which is overridden) or be pipes-only.
29
+
30
+ PjkStream never writes CLI history and uses stderr logging (CloudWatch on AWS).
31
+ """
32
+
33
+ def __init__(self, tokens: Sequence[str], inrecs: Optional[List[dict]] = None):
34
+ self._tokens = list(tokens)
35
+ self._inrecs = inrecs
36
+
37
+ @classmethod
38
+ def expression(cls, expr: str, *, inrecs: Optional[List[dict]] = None) -> "PjkStream":
39
+ """From a CLI-style expression string (shlex-split)."""
40
+ try:
41
+ tokens = shlex.split(expr, comments=True, posix=True)
42
+ except ValueError as e:
43
+ raise ValueError(f"Invalid pjk expression: {e}") from e
44
+ return cls(tokens, inrecs)
45
+
46
+ @classmethod
47
+ def tokens(cls, tokens: Sequence[str], *, inrecs: Optional[List[dict]] = None) -> "PjkStream":
48
+ """From an explicit token list."""
49
+ return cls(list(tokens), inrecs)
50
+
51
+ @classmethod
52
+ def file(cls, path: Union[Path, str], *, inrecs: Optional[List[dict]] = None) -> "PjkStream":
53
+ """From a .pjk file path (also supports m: macros as a single token via tokens())."""
54
+ path = Path(path)
55
+ if path.suffix != ".pjk":
56
+ raise ValueError(f"PjkStream.file() expects a .pjk path, got: {path}")
57
+ return cls([str(path)], inrecs)
58
+
59
+ def __iter__(self) -> Iterator[dict]:
60
+ stream_token = enter_pjk_stream()
61
+ init_stream()
62
+ try:
63
+ yield from self._iter_records()
64
+ finally:
65
+ exit_pjk_stream(stream_token)
66
+
67
+ def _iter_records(self) -> Iterator[dict]:
68
+ registry = ComponentRegistry()
69
+ parser = ExpressionParser(registry)
70
+ expanded = expand_macros(self._tokens)
71
+
72
+ if self._inrecs is not None:
73
+ source_override = DictListSource(self._inrecs)
74
+ try:
75
+ first_is_source = registry.create_source(expanded[0]) is not None
76
+ except Exception:
77
+ first_is_source = False
78
+ if first_is_source:
79
+ expanded = ["{to_override: 'true'}"] + expanded[1:]
80
+ else:
81
+ expanded = ["{to_override: 'true'}"] + expanded
82
+ else:
83
+ source_override = None
84
+
85
+ sink = parser.parse(expanded, source_override=source_override)
86
+
87
+ inputs = [sink.input]
88
+ sink.input._get_sources(inputs)
89
+ try:
90
+ for record in sink.input:
91
+ yield record
92
+ finally:
93
+ for inp in inputs:
94
+ inp.close()
@@ -3,6 +3,7 @@ import os
3
3
  import shlex
4
4
  from typing import List, Set
5
5
  from pjk.common import pager_stdout, highlight
6
+ from pjk.runtime import pjk_stream_active
6
7
 
7
8
  LOG_FILE = '.pjk-history.txt'
8
9
 
@@ -50,6 +51,9 @@ def read_history(log_path: str) -> List[int]:
50
51
  return clist, cset
51
52
 
52
53
  def write_history(tokens: list):
54
+ if pjk_stream_active():
55
+ return
56
+
53
57
  if os.environ.get("PJK_NO_HISTORY") == "1":
54
58
  return
55
59
 
@@ -124,7 +124,7 @@ class SnowflakeClient:
124
124
  class SnowflakePipe(QueryPipe, Integration):
125
125
  """
126
126
  Snowflake query pipe; executes SQL found in input record['query'] and streams rows.
127
- Connection/session settings are pulled from ~/.pjk/component_configs.yaml under the arg name.
127
+ Connection/session settings are pulled from configs.yaml under PJK_HOME (or PJK_CONFIG_FILE).
128
128
  """
129
129
  name = 'snowflake'
130
130
  desc = "Snowflake query pipe; executes SQL over input record['query']."
@@ -1,22 +1,37 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- import logging, os, tempfile
4
+ import logging, os, sys
5
5
  from logging.handlers import RotatingFileHandler
6
6
  from pathlib import Path
7
7
  from typing import Optional
8
8
 
9
+ from pjk.paths import logs_dir_path
10
+
9
11
  logger = logging.getLogger("pjk")
10
12
 
11
13
  def _truthy(v: Optional[str]) -> bool:
12
14
  return str(v).lower() in ("1", "true", "yes", "on")
13
15
 
16
+ def _level_from_env(explicit: Optional[int]) -> int:
17
+ if explicit is not None:
18
+ return explicit
19
+ return logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO
20
+
21
+ def _configure_logger(logger_obj, handlers, level: int) -> None:
22
+ logger_obj.handlers.clear()
23
+ for handler in handlers:
24
+ handler.setLevel(level)
25
+ logger_obj.addHandler(handler)
26
+ logger_obj.setLevel(level)
27
+ logger_obj.propagate = False
28
+
14
29
  def init(force: bool = False, level: Optional[int] = None):
15
30
  """
16
31
  Initialize 'pjk' logging.
17
32
 
18
33
  - Rotates at DJK_LOG_MAX_MB (default 2 MB), keeps DJK_LOG_BACKUPS (default 3).
19
- - Files under ~/.pjk/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
34
+ - Files under PJK_HOME/logs by default; override with DJK_LOG_DIR / DJK_LOG_FILE.
20
35
  - Set DJK_DEBUG=1|true|yes for DEBUG, else INFO (or pass explicit level).
21
36
  - If the log directory is not writable, fall back to console logging
22
37
  (stderr → CloudWatch in AWS).
@@ -26,13 +41,13 @@ def init(force: bool = False, level: Optional[int] = None):
26
41
  return
27
42
  logger.handlers.clear()
28
43
 
29
- level = level or (logging.DEBUG if _truthy(os.getenv("DJK_DEBUG")) else logging.INFO)
44
+ level = _level_from_env(level)
30
45
  fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
31
46
  formatter = logging.Formatter(fmt)
32
47
 
33
48
  try:
34
- # Preferred: rotating file handler under ~/.pjk/logs
35
- log_dir = Path(os.getenv("DJK_LOG_DIR", Path.home() / ".pjk" / "logs"))
49
+ # Preferred: rotating file handler under PJK_HOME/logs
50
+ log_dir = Path(os.getenv("DJK_LOG_DIR", logs_dir_path()))
36
51
  log_dir.mkdir(parents=True, exist_ok=True)
37
52
 
38
53
  log_file = log_dir / os.getenv("DJK_LOG_FILE", "pjk.log")
@@ -46,17 +61,27 @@ def init(force: bool = False, level: Optional[int] = None):
46
61
  encoding="utf-8",
47
62
  delay=False,
48
63
  )
49
- fh.setLevel(level)
50
64
  fh.setFormatter(formatter)
51
- logger.addHandler(fh)
65
+ _configure_logger(logger, [fh], level)
52
66
  except Exception:
53
67
  # Fallback: console handler
54
68
  ch = logging.StreamHandler()
55
- ch.setLevel(level)
56
69
  ch.setFormatter(formatter)
57
- logger.addHandler(ch)
70
+ _configure_logger(logger, [ch], level)
58
71
  logger.warning("Falling back to console logging (log file not writable)")
59
72
 
60
- logger.setLevel(level)
61
- # Do not propagate to root
62
- logger.propagate = False
73
+ def init_stream(force: bool = False, level: Optional[int] = None):
74
+ """
75
+ Console-only logging for PjkStream and other embedded/library use.
76
+
77
+ Writes to stderr so AWS Lambda, ECS, and Fargate ship logs to CloudWatch
78
+ without relying on PJK_HOME/logs or a writable home directory.
79
+ """
80
+ if logger.handlers and not force:
81
+ return
82
+
83
+ level = _level_from_env(level)
84
+ fmt = "[%(levelname)s] [%(threadName)s] %(message)s"
85
+ handler = logging.StreamHandler(sys.stderr)
86
+ handler.setFormatter(logging.Formatter(fmt))
87
+ _configure_logger(logger, [handler], level)
@@ -1,9 +1,10 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- from pjk.parser import ExpressionParser, MACRO_PREFIX, MACROS_FILE, read_macros
4
+ from pjk.parser import ExpressionParser, MACRO_PREFIX, read_macros
5
5
  from pjk.components import Source, Pipe, Sink
6
- from pjk.usage import Usage, CONFIG_FILE
6
+ from pjk.paths import config_file_path, macros_file_path
7
+ from pjk.usage import Usage
7
8
  from pjk.registry import ComponentRegistry
8
9
  from pjk.common import pager_stdout, highlight, ComponentOrigin
9
10
  from contextlib import nullcontext
@@ -88,7 +89,7 @@ def print_man(registry: ComponentRegistry, name: str, usage: Usage):
88
89
  print_example(registry, expr_tokens, expect, name)
89
90
 
90
91
  def display_configs():
91
- path = Path(CONFIG_FILE).expanduser()
92
+ path = config_file_path()
92
93
 
93
94
  with pager_stdout():
94
95
  with path.open("r", encoding="utf-8") as f:
@@ -96,7 +97,7 @@ def display_configs():
96
97
  if not isinstance(data, dict):
97
98
  raise ValueError("Top-level YAML must be a mapping of records")
98
99
 
99
- print(f'Component configs defined in {CONFIG_FILE}')
100
+ print(f'Component configs defined in {path}')
100
101
  print()
101
102
  for name, body_dict in data.items():
102
103
  print('=========================================')
@@ -123,7 +124,7 @@ def display_macros():
123
124
  macros = read_macros()
124
125
 
125
126
  with pager_stdout():
126
- print(f"Macros defined in '{MACROS_FILE}'")
127
+ print(f"Macros defined in '{macros_file_path()}'")
127
128
 
128
129
  print(f"Usage: pjk [...] {MACRO_PREFIX}:<macro_name> [...]")
129
130
  print()
@@ -16,11 +16,13 @@ from pjk.progress import ProgressIgnore
16
16
  from pjk.parse_pjk_file import handle_pjk_file
17
17
  from pjk.common import SafeNamespace
18
18
 
19
- MACROS_FILE = '~/.pjk/macros.txt'
19
+ from pjk.paths import macros_file_path
20
+
20
21
  MACRO_PREFIX = 'm'
21
- def read_macros(file_name: str = MACROS_FILE) -> Dict[str, str]:
22
+
23
+ def read_macros(file_name: str | None = None) -> Dict[str, str]:
22
24
  out: Dict[str, str] = {}
23
- path = Path(file_name).expanduser()
25
+ path = Path(file_name) if file_name else macros_file_path()
24
26
  with path.open(encoding="utf-8") as f:
25
27
  for raw in f:
26
28
  line = raw.split("#", 1)[0].strip()
@@ -0,0 +1,53 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ PJK_HOME_ENV = "PJK_HOME"
8
+ PJK_CONFIG_FILE_ENV = "PJK_CONFIG_FILE"
9
+ PJK_MACROS_FILE_ENV = "PJK_MACROS_FILE"
10
+ PJK_PLUGINS_DIR_ENV = "PJK_PLUGINS_DIR"
11
+
12
+
13
+ def pjk_home() -> Path:
14
+ """
15
+ Root directory for pjk runtime files.
16
+
17
+ PJK_HOME if set, otherwise ~/.pjk (dev default).
18
+ """
19
+ override = os.environ.get(PJK_HOME_ENV)
20
+ if override:
21
+ return Path(override).expanduser().resolve()
22
+ return Path.home() / ".pjk"
23
+
24
+
25
+ def config_file_path() -> Path:
26
+ """Component instance configs (OpenSearchQueryPipe-products, etc.)."""
27
+ override = os.environ.get(PJK_CONFIG_FILE_ENV)
28
+ if override:
29
+ return Path(override).expanduser().resolve()
30
+ return pjk_home() / "configs.yaml"
31
+
32
+
33
+ def macros_file_path() -> Path:
34
+ override = os.environ.get(PJK_MACROS_FILE_ENV)
35
+ if override:
36
+ return Path(override).expanduser().resolve()
37
+ return pjk_home() / "macros.txt"
38
+
39
+
40
+ def plugins_dir_path() -> Path:
41
+ override = os.environ.get(PJK_PLUGINS_DIR_ENV)
42
+ if override:
43
+ return Path(override).expanduser().resolve()
44
+ return pjk_home() / "plugins"
45
+
46
+
47
+ def logs_dir_path() -> Path:
48
+ return pjk_home() / "logs"
49
+
50
+
51
+ def config_file_display() -> str:
52
+ """Human-readable config path for errors and man pages."""
53
+ return str(config_file_path())
@@ -11,6 +11,7 @@ from pjk.sources.format_source import FormatSource
11
11
  import importlib.util
12
12
  import importlib
13
13
  from pjk.components import Pipe, Source, Sink
14
+ from pjk.paths import plugins_dir_path
14
15
  from pjk.common import ComponentFactory, highlight, ComponentOrigin, pager_stdout
15
16
  from typing import List, Type
16
17
 
@@ -73,7 +74,7 @@ class ComponentRegistry:
73
74
 
74
75
  self.print_non_core([ComponentOrigin.CORE,ComponentOrigin.EXTERNAL], is_integration=True, header='integrations')
75
76
  self.print_non_core([ComponentOrigin.EXTERNAL], is_integration=False, header='apps')
76
- self.print_non_core([ComponentOrigin.USER], is_integration=None, header='user components (~/.pjk/plugins)')
77
+ self.print_non_core([ComponentOrigin.USER], is_integration=None, header=f'user components ({plugins_dir_path()})')
77
78
 
78
79
  # is_integration = True|False|None None=don't care
79
80
  def print_non_core(self, origin_list: List[ComponentOrigin], is_integration: bool, header:str):
@@ -96,7 +97,9 @@ class ComponentRegistry:
96
97
  line = f' {name:<17} {temp:<15} {lines[0]}'
97
98
  print(line)
98
99
 
99
- def load_user_components(self, path=os.path.expanduser("~/.pjk/plugins")):
100
+ def load_user_components(self, path=None):
101
+ if path is None:
102
+ path = str(plugins_dir_path())
100
103
  if not os.path.isdir(path):
101
104
  return
102
105
 
@@ -113,7 +116,7 @@ class ComponentRegistry:
113
116
  sys.modules[spec.name] = module
114
117
  spec.loader.exec_module(module)
115
118
  except Exception as e:
116
- print(f"[pjk] Failed to load {fname} from ~/.pjk/plugins: {e}")
119
+ print(f"[pjk] Failed to load {fname} from {path}: {e}")
117
120
  continue
118
121
 
119
122
  for obj in vars(module).values():
@@ -0,0 +1,18 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2024 Mike Schultz
3
+
4
+ import contextvars
5
+
6
+ _pjk_stream = contextvars.ContextVar("pjk_stream", default=False)
7
+
8
+
9
+ def pjk_stream_active() -> bool:
10
+ return _pjk_stream.get()
11
+
12
+
13
+ def enter_pjk_stream():
14
+ return _pjk_stream.set(True)
15
+
16
+
17
+ def exit_pjk_stream(token) -> None:
18
+ _pjk_stream.reset(token)
@@ -2,6 +2,7 @@
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
4
  from pjk.components import Sink
5
+ from pjk.paths import plugins_dir_path
5
6
  from pjk.usage import ParsedToken, Usage
6
7
  from importlib.resources import files
7
8
  from pathlib import Path
@@ -45,7 +46,7 @@ class CreateSink(Sink):
45
46
  desc=(
46
47
  "Write a bare bones source, pipe or sink python file.\n"
47
48
  "Requires input record of the form: \"{<type>: '<name>'}\". E.g. \"{pipe: 'mypipe'}\"\n"
48
- "User components can be used inline or deposited in ~/.pjk/plugins"
49
+ f"User components can be used inline or deposited in {plugins_dir_path()}"
49
50
  ),
50
51
  component_class=cls
51
52
  )
@@ -5,7 +5,7 @@ from pjk.components import Source
5
5
 
6
6
 
7
7
  class DictListSource(Source):
8
- """Source that yields records from a list of dicts. Used by PjkEngine."""
8
+ """Source that yields records from a list of dicts. Used by PjkStream."""
9
9
 
10
10
  def __init__(self, records: list):
11
11
  super().__init__(root=None)
@@ -1,28 +1,15 @@
1
+ from pjk.parser import read_macros
2
+ from pjk.paths import macros_file_path
1
3
  from pjk.usage import Usage, ParsedToken
2
4
  from pjk.components import Source
3
- from pathlib import Path
4
5
  from typing import Dict
5
6
 
6
- MACRO_FILE = '~/.pjk/macros.txt'
7
-
8
- def read_macros(file_name: str = MACRO_FILE) -> Dict[str, str]:
9
- out: Dict[str, str] = {}
10
- path = Path(file_name).expanduser()
11
- with path.open(encoding="utf-8") as f:
12
- for raw in f:
13
- line = raw.split("#", 1)[0].strip()
14
- if not line or ":" not in line:
15
- continue
16
- key, val = line.split(":", 1)
17
- out[key.strip()] = val.strip()
18
- return out
19
-
20
7
  class MacroSource(Source):
21
8
  @classmethod
22
9
  def usage(cls):
23
10
  u = Usage(
24
11
  name='macros',
25
- desc=f"Source to list the macro expressions stored in {MACRO_FILE}.\n"
12
+ desc=f"Source to list the macro expressions stored in {macros_file_path()}.\n"
26
13
  "A specific macro is referenced using 'm:<instance>, e.g. pjk m:hw -",
27
14
  component_class=cls
28
15
  )
@@ -2,18 +2,27 @@ from typing import Optional, Set, List
2
2
  import os
3
3
  import yaml
4
4
 
5
- CONFIG_FILE = '~/.pjk/configs.yaml'
5
+ from pjk.paths import config_file_display, config_file_path
6
+
6
7
  EXTENDS_KEY = '_extends'
7
8
 
9
+ # Display path for errors; resolved via PJK_HOME or PJK_CONFIG_FILE.
10
+ CONFIG_FILE = config_file_display()
11
+
8
12
  class Config:
9
13
  def __init__(self):
10
- self.configs_yaml = os.path.expanduser(CONFIG_FILE)
14
+ self._loaded_path: Optional[str] = None
11
15
  self._data = {}
12
- self._load()
13
-
14
- def _load(self):
15
- if os.path.exists(self.configs_yaml):
16
- with open(self.configs_yaml, 'r') as f:
16
+
17
+ def _ensure_loaded(self):
18
+ path = str(config_file_path())
19
+ if path != self._loaded_path:
20
+ self._loaded_path = path
21
+ self._load(path)
22
+
23
+ def _load(self, path: str):
24
+ if os.path.exists(path):
25
+ with open(path, 'r') as f:
17
26
  self._data = yaml.safe_load(f) or {}
18
27
  else:
19
28
  self._data = {}
@@ -38,14 +47,14 @@ class Config:
38
47
  visiting = set()
39
48
  if instance_key in visiting:
40
49
  raise TokenError(
41
- f"Cycle in {CONFIG_FILE} config inheritance involving '{instance_key}'."
50
+ f"Cycle in {config_file_display()} config inheritance involving '{instance_key}'."
42
51
  )
43
52
  visiting.add(instance_key)
44
53
 
45
54
  base_entry = self._resolve_entry(base_key, visiting)
46
55
  if base_entry is None:
47
56
  raise TokenError(
48
- f"'{instance_key}:{EXTENDS_KEY}' in {CONFIG_FILE} points to a non-existent entry: '{base_key}'. "
57
+ f"'{instance_key}:{EXTENDS_KEY}' in {config_file_display()} points to a non-existent entry: '{base_key}'. "
49
58
  f"Add '{base_key}' or define params directly on '{instance_key}'."
50
59
  )
51
60
 
@@ -56,10 +65,11 @@ class Config:
56
65
  return merged
57
66
 
58
67
  def lookup(self, usage: "Usage", param: str):
68
+ self._ensure_loaded()
59
69
  # this should be advertised as a well-known requirement: usage must define a 'instance' arg
60
70
  instance = usage.get_arg("instance")
61
71
  if not instance:
62
- raise TokenError(f"'instance' arg must be defined when using configs in {CONFIG_FILE}")
72
+ raise TokenError(f"'instance' arg must be defined when using configs in {config_file_display()}")
63
73
 
64
74
  component_class = usage.get_component_class()
65
75
  class_name = component_class.__name__
@@ -76,7 +86,7 @@ class Config:
76
86
  entry = self._data.get(instance_key, None)
77
87
  if not entry:
78
88
  raise TokenError(
79
- f"{CONFIG_FILE} does not contain entry for '{instance_key}' with required params."
89
+ f"{config_file_display()} does not contain entry for '{instance_key}' with required params."
80
90
  )
81
91
 
82
92
  resolved = self._resolve_entry(instance_key)
@@ -86,7 +96,7 @@ class Config:
86
96
  if hint:
87
97
  base_key = entry.get(EXTENDS_KEY)
88
98
  raise TokenError(
89
- f"'{param}' missing from '{instance_key}' in {CONFIG_FILE}{hint}. "
99
+ f"'{param}' missing from '{instance_key}' in {config_file_display()}{hint}. "
90
100
  f"Set it on '{base_key}' or override it on '{instance_key}'."
91
101
  )
92
102
 
@@ -1,4 +1,4 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  # Copyright 2024 Mike Schultz
3
3
 
4
- __version__ = "0.7.6"
4
+ __version__ = "0.7.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-jack-knife
3
- Version: 0.7.6
3
+ Version: 0.7.7
4
4
  Summary: Python Jack Knife – a command line data processor
5
5
  Author-email: Mike Schultz <mike.schultz@gmail.com>
6
6
  License:
@@ -11,8 +11,10 @@ src/pjk/main.py
11
11
  src/pjk/man_page.py
12
12
  src/pjk/parse_pjk_file.py
13
13
  src/pjk/parser.py
14
+ src/pjk/paths.py
14
15
  src/pjk/progress.py
15
16
  src/pjk/registry.py
17
+ src/pjk/runtime.py
16
18
  src/pjk/usage.py
17
19
  src/pjk/version.py
18
20
  src/pjk/integrations/opensearch_client.py
@@ -1,51 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- # Copyright 2024 Mike Schultz
3
-
4
- from typing import Iterator, List, Optional
5
-
6
- from pjk.parser import ExpressionParser, expand_macros
7
- from pjk.registry import ComponentRegistry
8
- from pjk.sources.dict_list_source import DictListSource
9
-
10
-
11
- class PjkEngine:
12
- """
13
- Run a pjk pipeline from a .pjk file, optionally with supplied input records.
14
-
15
- - inrecs supplied: the source in the .pjk file is replaced with inrecs.
16
- Expression may be full (source + pipes + sink) or pipes-only.
17
- - inrecs=None: expression.pjk is fully self-contained (source, pipes, sink)
18
- """
19
-
20
- def __init__(self, inrecs: Optional[List[dict]] = None, pjk_file: str = ""):
21
- self.inrecs = inrecs
22
- self.pjk_file = pjk_file
23
-
24
- def __iter__(self) -> Iterator[dict]:
25
- registry = ComponentRegistry()
26
- parser = ExpressionParser(registry)
27
- expanded = expand_macros([self.pjk_file])
28
-
29
- if self.inrecs is not None:
30
- source_override = DictListSource(self.inrecs)
31
- try:
32
- first_is_source = registry.create_source(expanded[0]) is not None
33
- except Exception:
34
- first_is_source = False
35
- if first_is_source:
36
- expanded = ["{to_override: 'true'}"] + expanded[1:]
37
- else:
38
- expanded = ["{to_override: 'true'}"] + expanded
39
- else:
40
- source_override = None
41
-
42
- sink = parser.parse(expanded, source_override=source_override)
43
-
44
- inputs = [sink.input]
45
- sink.input._get_sources(inputs)
46
- try:
47
- for record in sink.input:
48
- yield record
49
- finally:
50
- for inp in inputs:
51
- inp.close()