ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,54 +0,0 @@
1
- """Prefect core features for pipeline orchestration.
2
-
3
- This module provides clean re-exports of essential Prefect functionality.
4
-
5
- IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
6
- unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
7
- The standard Prefect decorators are exported here only for extremely
8
- limited edge cases where the pipeline decorators cannot be used.
9
-
10
- Always prefer:
11
- >>> from ai_pipeline_core import pipeline_task, pipeline_flow
12
- >>>
13
- >>> @pipeline_task
14
- >>> async def my_task(...): ...
15
- >>>
16
- >>> @pipeline_flow
17
- >>> async def my_flow(...): ...
18
-
19
- The `task` and `flow` decorators should only be used when:
20
- - You absolutely cannot convert to async (pipeline decorators require async)
21
- - You have a very specific Prefect integration that conflicts with tracing
22
- - You are writing test utilities or infrastructure code
23
-
24
- Exported components:
25
- task: Prefect task decorator (AVOID - use pipeline_task instead).
26
- flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
27
- disable_run_logger: Context manager to suppress Prefect logging.
28
- prefect_test_harness: Test harness for unit testing flows/tasks.
29
-
30
- Testing utilities (use as fixtures):
31
- The disable_run_logger and prefect_test_harness should be used as
32
- pytest fixtures as shown in tests/conftest.py:
33
-
34
- >>> @pytest.fixture(autouse=True, scope="session")
35
- >>> def prefect_test_fixture():
36
- ... with prefect_test_harness():
37
- ... yield
38
- >>>
39
- >>> @pytest.fixture(autouse=True)
40
- >>> def disable_prefect_logging():
41
- ... with disable_run_logger():
42
- ... yield
43
-
44
- Note:
45
- The pipeline_task and pipeline_flow decorators from
46
- ai_pipeline_core.pipeline provide async-only execution with
47
- integrated LMNR tracing and are the standard for this library.
48
- """
49
-
50
- from prefect import flow, task
51
- from prefect.logging import disable_run_logger
52
- from prefect.testing.utilities import prefect_test_harness
53
-
54
- __all__ = ["task", "flow", "disable_run_logger", "prefect_test_harness"]
@@ -1,24 +0,0 @@
1
- """Simple pipeline execution for local development.
2
-
3
- Utilities for running AI pipelines locally without full Prefect orchestration.
4
- """
5
-
6
- from .cli import run_cli
7
- from .simple_runner import (
8
- ConfigSequence,
9
- FlowSequence,
10
- load_documents_from_directory,
11
- run_pipeline,
12
- run_pipelines,
13
- save_documents_to_directory,
14
- )
15
-
16
- __all__ = [
17
- "run_cli",
18
- "run_pipeline",
19
- "run_pipelines",
20
- "load_documents_from_directory",
21
- "save_documents_to_directory",
22
- "FlowSequence",
23
- "ConfigSequence",
24
- ]
@@ -1,255 +0,0 @@
1
- """Command-line interface for simple pipeline execution."""
2
-
3
- from __future__ import annotations
4
-
5
- import asyncio
6
- import os
7
- import sys
8
- from contextlib import ExitStack
9
- from pathlib import Path
10
- from typing import Callable, Type, TypeVar, cast
11
-
12
- from lmnr import Laminar
13
- from pydantic import ValidationError
14
- from pydantic_settings import CliPositionalArg, SettingsConfigDict
15
-
16
- from ai_pipeline_core.documents import DocumentList
17
- from ai_pipeline_core.flow.options import FlowOptions
18
- from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
19
- from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
20
- from ai_pipeline_core.settings import settings
21
-
22
- from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_documents_to_directory
23
-
24
- logger = get_pipeline_logger(__name__)
25
-
26
- TOptions = TypeVar("TOptions", bound=FlowOptions)
27
- """Type variable for FlowOptions subclasses used in CLI."""
28
-
29
- InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
30
- """Function type for custom pipeline initialization.
31
-
32
- Initializers can create initial documents or setup project state
33
- before flow execution begins.
34
-
35
- Args:
36
- FlowOptions: Parsed CLI options
37
-
38
- Returns:
39
- Tuple of (project_name, initial_documents) or None
40
- """
41
-
42
-
43
- def _initialize_environment() -> None:
44
- """Initialize logging and observability systems.
45
-
46
- Sets up the pipeline logging configuration and attempts to
47
- initialize LMNR (Laminar) for distributed tracing. Failures
48
- in LMNR initialization are logged but don't stop execution.
49
-
50
- Side effects:
51
- - Configures Python logging system
52
- - Initializes Laminar SDK if API key is available
53
- - Logs initialization status
54
-
55
- Note:
56
- Called automatically by run_cli before parsing arguments.
57
- """
58
- setup_logging()
59
- try:
60
- Laminar.initialize()
61
- logger.info("LMNR tracing initialized.")
62
- except Exception as e:
63
- logger.warning(f"Failed to initialize LMNR tracing: {e}")
64
-
65
-
66
- def _running_under_pytest() -> bool:
67
- """Check if code is running under pytest.
68
-
69
- Detects pytest execution context to determine whether test
70
- fixtures will provide necessary contexts (like Prefect test
71
- harness). This prevents duplicate context setup.
72
-
73
- Returns:
74
- True if running under pytest, False otherwise.
75
-
76
- Detection methods:
77
- - PYTEST_CURRENT_TEST environment variable (set by pytest)
78
- - 'pytest' module in sys.modules (imported by test runner)
79
-
80
- Note:
81
- Used to avoid setting up test harness when pytest fixtures
82
- already provide it.
83
- """
84
- return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
85
-
86
-
87
- def run_cli(
88
- *,
89
- flows: FlowSequence,
90
- flow_configs: ConfigSequence,
91
- options_cls: Type[TOptions],
92
- initializer: InitializerFunc = None,
93
- trace_name: str | None = None,
94
- ) -> None:
95
- """Execute pipeline flows from command-line arguments.
96
-
97
- Environment setup:
98
- - Initializes logging system
99
- - Sets up LMNR tracing (if API key configured)
100
- - Creates Prefect test harness (if no API key and not in pytest)
101
- - Manages context stack for proper cleanup
102
-
103
- Raises:
104
- ValueError: If project name is empty after initialization.
105
-
106
- Example:
107
- >>> # In __main__.py
108
- >>> from ai_pipeline_core.simple_runner import run_cli
109
- >>> from .flows import AnalysisFlow, SummaryFlow
110
- >>> from .config import AnalysisConfig, AnalysisOptions
111
- >>>
112
- >>> if __name__ == "__main__":
113
- ... run_cli(
114
- ... flows=[AnalysisFlow, SummaryFlow],
115
- ... flow_configs=[
116
- ... (AnalysisConfig, AnalysisOptions),
117
- ... (AnalysisConfig, AnalysisOptions)
118
- ... ],
119
- ... options_cls=AnalysisOptions,
120
- ... trace_name="document-analysis"
121
- ... )
122
-
123
- Command line:
124
- $ python -m my_module ./output --temperature 0.5 --model gpt-5
125
- $ python -m my_module ./output --start 2 # Skip first flow
126
-
127
- Note:
128
- - Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
129
- - Boolean fields become flags (--verbose/--no-verbose)
130
- - Field descriptions from Pydantic become help text
131
- - Type hints are enforced during parsing
132
- - Validation errors show helpful messages with field names
133
- - Includes hints for common error types (numbers, ranges)
134
- - Exits with status 1 on error
135
- - Shows --help when no arguments provided
136
- """
137
- # Check if no arguments provided before initialization
138
- if len(sys.argv) == 1:
139
- # Add --help to show usage when run without arguments
140
- sys.argv.append("--help")
141
-
142
- _initialize_environment()
143
-
144
- class _RunnerOptions( # type: ignore[reportRedeclaration]
145
- options_cls,
146
- cli_parse_args=True,
147
- cli_kebab_case=True,
148
- cli_exit_on_error=True, # Let it exit normally on error
149
- cli_prog_name="ai-pipeline",
150
- cli_use_class_docs_for_groups=True,
151
- ):
152
- """Internal options class combining user options with CLI arguments.
153
-
154
- Dynamically created class that inherits from user's options_cls
155
- and adds standard CLI arguments for pipeline execution.
156
- """
157
-
158
- working_directory: CliPositionalArg[Path]
159
- project_name: str | None = None
160
- start: int = 1
161
- end: int | None = None
162
-
163
- model_config = SettingsConfigDict(frozen=True, extra="ignore")
164
-
165
- try:
166
- opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
167
- except ValidationError as e:
168
- print("\nError: Invalid command line arguments\n", file=sys.stderr)
169
- for error in e.errors():
170
- field = " -> ".join(str(loc) for loc in error["loc"])
171
- msg = error["msg"]
172
- value = error.get("input", "")
173
-
174
- # Format the field name nicely (convert from snake_case to kebab-case for CLI)
175
- cli_field = field.replace("_", "-")
176
-
177
- print(f" --{cli_field}: {msg}", file=sys.stderr)
178
- if value:
179
- print(f" Provided value: '{value}'", file=sys.stderr)
180
-
181
- # Add helpful hints for common errors
182
- if error["type"] == "float_parsing":
183
- print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
184
- elif error["type"] == "int_parsing":
185
- print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
186
- elif error["type"] == "literal_error":
187
- ctx = error.get("ctx", {})
188
- expected = ctx.get("expected", "valid options")
189
- print(f" Hint: Valid options are: {expected}", file=sys.stderr)
190
- elif error["type"] in [
191
- "less_than_equal",
192
- "greater_than_equal",
193
- "less_than",
194
- "greater_than",
195
- ]:
196
- ctx = error.get("ctx", {})
197
- if "le" in ctx:
198
- print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
199
- elif "ge" in ctx:
200
- print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
201
- elif "lt" in ctx:
202
- print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
203
- elif "gt" in ctx:
204
- print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
205
-
206
- print("\nRun with --help to see all available options\n", file=sys.stderr)
207
- sys.exit(1)
208
-
209
- wd: Path = cast(Path, getattr(opts, "working_directory"))
210
- wd.mkdir(parents=True, exist_ok=True)
211
-
212
- # Get project name from options or use directory basename
213
- project_name = getattr(opts, "project_name", None)
214
- if not project_name: # None or empty string
215
- project_name = wd.name
216
-
217
- # Ensure project_name is not empty
218
- if not project_name:
219
- raise ValueError("Project name cannot be empty")
220
-
221
- # Use initializer if provided, otherwise use defaults
222
- initial_documents = DocumentList([])
223
- if initializer:
224
- init_result = initializer(opts)
225
- # Always expect tuple format from initializer
226
- _, initial_documents = init_result # Ignore project name from initializer
227
-
228
- # Save initial documents if starting from first step
229
- if getattr(opts, "start", 1) == 1 and initial_documents:
230
- save_documents_to_directory(wd, initial_documents)
231
-
232
- # Setup context stack with optional test harness and tracing
233
- with ExitStack() as stack:
234
- if trace_name:
235
- stack.enter_context(
236
- Laminar.start_as_current_span(
237
- name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
238
- )
239
- )
240
-
241
- if not settings.prefect_api_key and not _running_under_pytest():
242
- stack.enter_context(prefect_test_harness())
243
- stack.enter_context(disable_run_logger())
244
-
245
- asyncio.run(
246
- run_pipelines(
247
- project_name=project_name,
248
- output_dir=wd,
249
- flows=flows,
250
- flow_configs=flow_configs,
251
- flow_options=opts,
252
- start_step=getattr(opts, "start", 1),
253
- end_step=getattr(opts, "end", None),
254
- )
255
- )