ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +83 -119
- ai_pipeline_core/deployment/__init__.py +34 -0
- ai_pipeline_core/deployment/base.py +861 -0
- ai_pipeline_core/deployment/contract.py +80 -0
- ai_pipeline_core/deployment/deploy.py +561 -0
- ai_pipeline_core/deployment/helpers.py +97 -0
- ai_pipeline_core/deployment/progress.py +126 -0
- ai_pipeline_core/deployment/remote.py +116 -0
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +14 -15
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +349 -1062
- ai_pipeline_core/documents/mime_type.py +40 -85
- ai_pipeline_core/documents/utils.py +62 -7
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +309 -0
- ai_pipeline_core/images/_processing.py +151 -0
- ai_pipeline_core/llm/__init__.py +5 -3
- ai_pipeline_core/llm/ai_messages.py +284 -73
- ai_pipeline_core/llm/client.py +462 -209
- ai_pipeline_core/llm/model_options.py +86 -53
- ai_pipeline_core/llm/model_response.py +187 -241
- ai_pipeline_core/llm/model_types.py +34 -54
- ai_pipeline_core/logging/__init__.py +2 -9
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -43
- ai_pipeline_core/logging/logging_mixin.py +17 -51
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/observability/_debug/_config.py +95 -0
- ai_pipeline_core/observability/_debug/_content.py +764 -0
- ai_pipeline_core/observability/_debug/_processor.py +98 -0
- ai_pipeline_core/observability/_debug/_summary.py +312 -0
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/observability/_debug/_writer.py +843 -0
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/observability/tracing.py +640 -0
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +26 -105
- ai_pipeline_core/settings.py +41 -32
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
- ai_pipeline_core/documents/document_list.py +0 -240
- ai_pipeline_core/documents/flow_document.py +0 -128
- ai_pipeline_core/documents/task_document.py +0 -133
- ai_pipeline_core/documents/temporary_document.py +0 -95
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -314
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -717
- ai_pipeline_core/prefect.py +0 -54
- ai_pipeline_core/simple_runner/__init__.py +0 -24
- ai_pipeline_core/simple_runner/cli.py +0 -255
- ai_pipeline_core/simple_runner/simple_runner.py +0 -385
- ai_pipeline_core/tracing.py +0 -475
- ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
- ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/prefect.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
"""Prefect core features for pipeline orchestration.
|
|
2
|
-
|
|
3
|
-
This module provides clean re-exports of essential Prefect functionality.
|
|
4
|
-
|
|
5
|
-
IMPORTANT: You should NEVER use the `task` and `flow` decorators directly
|
|
6
|
-
unless it is 100% impossible to use `pipeline_task` and `pipeline_flow`.
|
|
7
|
-
The standard Prefect decorators are exported here only for extremely
|
|
8
|
-
limited edge cases where the pipeline decorators cannot be used.
|
|
9
|
-
|
|
10
|
-
Always prefer:
|
|
11
|
-
>>> from ai_pipeline_core import pipeline_task, pipeline_flow
|
|
12
|
-
>>>
|
|
13
|
-
>>> @pipeline_task
|
|
14
|
-
>>> async def my_task(...): ...
|
|
15
|
-
>>>
|
|
16
|
-
>>> @pipeline_flow
|
|
17
|
-
>>> async def my_flow(...): ...
|
|
18
|
-
|
|
19
|
-
The `task` and `flow` decorators should only be used when:
|
|
20
|
-
- You absolutely cannot convert to async (pipeline decorators require async)
|
|
21
|
-
- You have a very specific Prefect integration that conflicts with tracing
|
|
22
|
-
- You are writing test utilities or infrastructure code
|
|
23
|
-
|
|
24
|
-
Exported components:
|
|
25
|
-
task: Prefect task decorator (AVOID - use pipeline_task instead).
|
|
26
|
-
flow: Prefect flow decorator (AVOID - use pipeline_flow instead).
|
|
27
|
-
disable_run_logger: Context manager to suppress Prefect logging.
|
|
28
|
-
prefect_test_harness: Test harness for unit testing flows/tasks.
|
|
29
|
-
|
|
30
|
-
Testing utilities (use as fixtures):
|
|
31
|
-
The disable_run_logger and prefect_test_harness should be used as
|
|
32
|
-
pytest fixtures as shown in tests/conftest.py:
|
|
33
|
-
|
|
34
|
-
>>> @pytest.fixture(autouse=True, scope="session")
|
|
35
|
-
>>> def prefect_test_fixture():
|
|
36
|
-
... with prefect_test_harness():
|
|
37
|
-
... yield
|
|
38
|
-
>>>
|
|
39
|
-
>>> @pytest.fixture(autouse=True)
|
|
40
|
-
>>> def disable_prefect_logging():
|
|
41
|
-
... with disable_run_logger():
|
|
42
|
-
... yield
|
|
43
|
-
|
|
44
|
-
Note:
|
|
45
|
-
The pipeline_task and pipeline_flow decorators from
|
|
46
|
-
ai_pipeline_core.pipeline provide async-only execution with
|
|
47
|
-
integrated LMNR tracing and are the standard for this library.
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
from prefect import flow, task
|
|
51
|
-
from prefect.logging import disable_run_logger
|
|
52
|
-
from prefect.testing.utilities import prefect_test_harness
|
|
53
|
-
|
|
54
|
-
__all__ = ["task", "flow", "disable_run_logger", "prefect_test_harness"]
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
"""Simple pipeline execution for local development.
|
|
2
|
-
|
|
3
|
-
Utilities for running AI pipelines locally without full Prefect orchestration.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from .cli import run_cli
|
|
7
|
-
from .simple_runner import (
|
|
8
|
-
ConfigSequence,
|
|
9
|
-
FlowSequence,
|
|
10
|
-
load_documents_from_directory,
|
|
11
|
-
run_pipeline,
|
|
12
|
-
run_pipelines,
|
|
13
|
-
save_documents_to_directory,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
__all__ = [
|
|
17
|
-
"run_cli",
|
|
18
|
-
"run_pipeline",
|
|
19
|
-
"run_pipelines",
|
|
20
|
-
"load_documents_from_directory",
|
|
21
|
-
"save_documents_to_directory",
|
|
22
|
-
"FlowSequence",
|
|
23
|
-
"ConfigSequence",
|
|
24
|
-
]
|
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
"""Command-line interface for simple pipeline execution."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
from contextlib import ExitStack
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import Callable, Type, TypeVar, cast
|
|
11
|
-
|
|
12
|
-
from lmnr import Laminar
|
|
13
|
-
from pydantic import ValidationError
|
|
14
|
-
from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
15
|
-
|
|
16
|
-
from ai_pipeline_core.documents import DocumentList
|
|
17
|
-
from ai_pipeline_core.flow.options import FlowOptions
|
|
18
|
-
from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
19
|
-
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
20
|
-
from ai_pipeline_core.settings import settings
|
|
21
|
-
|
|
22
|
-
from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_documents_to_directory
|
|
23
|
-
|
|
24
|
-
logger = get_pipeline_logger(__name__)
|
|
25
|
-
|
|
26
|
-
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
27
|
-
"""Type variable for FlowOptions subclasses used in CLI."""
|
|
28
|
-
|
|
29
|
-
InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
|
|
30
|
-
"""Function type for custom pipeline initialization.
|
|
31
|
-
|
|
32
|
-
Initializers can create initial documents or setup project state
|
|
33
|
-
before flow execution begins.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
FlowOptions: Parsed CLI options
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
Tuple of (project_name, initial_documents) or None
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def _initialize_environment() -> None:
|
|
44
|
-
"""Initialize logging and observability systems.
|
|
45
|
-
|
|
46
|
-
Sets up the pipeline logging configuration and attempts to
|
|
47
|
-
initialize LMNR (Laminar) for distributed tracing. Failures
|
|
48
|
-
in LMNR initialization are logged but don't stop execution.
|
|
49
|
-
|
|
50
|
-
Side effects:
|
|
51
|
-
- Configures Python logging system
|
|
52
|
-
- Initializes Laminar SDK if API key is available
|
|
53
|
-
- Logs initialization status
|
|
54
|
-
|
|
55
|
-
Note:
|
|
56
|
-
Called automatically by run_cli before parsing arguments.
|
|
57
|
-
"""
|
|
58
|
-
setup_logging()
|
|
59
|
-
try:
|
|
60
|
-
Laminar.initialize()
|
|
61
|
-
logger.info("LMNR tracing initialized.")
|
|
62
|
-
except Exception as e:
|
|
63
|
-
logger.warning(f"Failed to initialize LMNR tracing: {e}")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def _running_under_pytest() -> bool:
|
|
67
|
-
"""Check if code is running under pytest.
|
|
68
|
-
|
|
69
|
-
Detects pytest execution context to determine whether test
|
|
70
|
-
fixtures will provide necessary contexts (like Prefect test
|
|
71
|
-
harness). This prevents duplicate context setup.
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
True if running under pytest, False otherwise.
|
|
75
|
-
|
|
76
|
-
Detection methods:
|
|
77
|
-
- PYTEST_CURRENT_TEST environment variable (set by pytest)
|
|
78
|
-
- 'pytest' module in sys.modules (imported by test runner)
|
|
79
|
-
|
|
80
|
-
Note:
|
|
81
|
-
Used to avoid setting up test harness when pytest fixtures
|
|
82
|
-
already provide it.
|
|
83
|
-
"""
|
|
84
|
-
return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def run_cli(
|
|
88
|
-
*,
|
|
89
|
-
flows: FlowSequence,
|
|
90
|
-
flow_configs: ConfigSequence,
|
|
91
|
-
options_cls: Type[TOptions],
|
|
92
|
-
initializer: InitializerFunc = None,
|
|
93
|
-
trace_name: str | None = None,
|
|
94
|
-
) -> None:
|
|
95
|
-
"""Execute pipeline flows from command-line arguments.
|
|
96
|
-
|
|
97
|
-
Environment setup:
|
|
98
|
-
- Initializes logging system
|
|
99
|
-
- Sets up LMNR tracing (if API key configured)
|
|
100
|
-
- Creates Prefect test harness (if no API key and not in pytest)
|
|
101
|
-
- Manages context stack for proper cleanup
|
|
102
|
-
|
|
103
|
-
Raises:
|
|
104
|
-
ValueError: If project name is empty after initialization.
|
|
105
|
-
|
|
106
|
-
Example:
|
|
107
|
-
>>> # In __main__.py
|
|
108
|
-
>>> from ai_pipeline_core.simple_runner import run_cli
|
|
109
|
-
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
110
|
-
>>> from .config import AnalysisConfig, AnalysisOptions
|
|
111
|
-
>>>
|
|
112
|
-
>>> if __name__ == "__main__":
|
|
113
|
-
... run_cli(
|
|
114
|
-
... flows=[AnalysisFlow, SummaryFlow],
|
|
115
|
-
... flow_configs=[
|
|
116
|
-
... (AnalysisConfig, AnalysisOptions),
|
|
117
|
-
... (AnalysisConfig, AnalysisOptions)
|
|
118
|
-
... ],
|
|
119
|
-
... options_cls=AnalysisOptions,
|
|
120
|
-
... trace_name="document-analysis"
|
|
121
|
-
... )
|
|
122
|
-
|
|
123
|
-
Command line:
|
|
124
|
-
$ python -m my_module ./output --temperature 0.5 --model gpt-5
|
|
125
|
-
$ python -m my_module ./output --start 2 # Skip first flow
|
|
126
|
-
|
|
127
|
-
Note:
|
|
128
|
-
- Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
|
|
129
|
-
- Boolean fields become flags (--verbose/--no-verbose)
|
|
130
|
-
- Field descriptions from Pydantic become help text
|
|
131
|
-
- Type hints are enforced during parsing
|
|
132
|
-
- Validation errors show helpful messages with field names
|
|
133
|
-
- Includes hints for common error types (numbers, ranges)
|
|
134
|
-
- Exits with status 1 on error
|
|
135
|
-
- Shows --help when no arguments provided
|
|
136
|
-
"""
|
|
137
|
-
# Check if no arguments provided before initialization
|
|
138
|
-
if len(sys.argv) == 1:
|
|
139
|
-
# Add --help to show usage when run without arguments
|
|
140
|
-
sys.argv.append("--help")
|
|
141
|
-
|
|
142
|
-
_initialize_environment()
|
|
143
|
-
|
|
144
|
-
class _RunnerOptions( # type: ignore[reportRedeclaration]
|
|
145
|
-
options_cls,
|
|
146
|
-
cli_parse_args=True,
|
|
147
|
-
cli_kebab_case=True,
|
|
148
|
-
cli_exit_on_error=True, # Let it exit normally on error
|
|
149
|
-
cli_prog_name="ai-pipeline",
|
|
150
|
-
cli_use_class_docs_for_groups=True,
|
|
151
|
-
):
|
|
152
|
-
"""Internal options class combining user options with CLI arguments.
|
|
153
|
-
|
|
154
|
-
Dynamically created class that inherits from user's options_cls
|
|
155
|
-
and adds standard CLI arguments for pipeline execution.
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
working_directory: CliPositionalArg[Path]
|
|
159
|
-
project_name: str | None = None
|
|
160
|
-
start: int = 1
|
|
161
|
-
end: int | None = None
|
|
162
|
-
|
|
163
|
-
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
164
|
-
|
|
165
|
-
try:
|
|
166
|
-
opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
|
|
167
|
-
except ValidationError as e:
|
|
168
|
-
print("\nError: Invalid command line arguments\n", file=sys.stderr)
|
|
169
|
-
for error in e.errors():
|
|
170
|
-
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
171
|
-
msg = error["msg"]
|
|
172
|
-
value = error.get("input", "")
|
|
173
|
-
|
|
174
|
-
# Format the field name nicely (convert from snake_case to kebab-case for CLI)
|
|
175
|
-
cli_field = field.replace("_", "-")
|
|
176
|
-
|
|
177
|
-
print(f" --{cli_field}: {msg}", file=sys.stderr)
|
|
178
|
-
if value:
|
|
179
|
-
print(f" Provided value: '{value}'", file=sys.stderr)
|
|
180
|
-
|
|
181
|
-
# Add helpful hints for common errors
|
|
182
|
-
if error["type"] == "float_parsing":
|
|
183
|
-
print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
|
|
184
|
-
elif error["type"] == "int_parsing":
|
|
185
|
-
print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
|
|
186
|
-
elif error["type"] == "literal_error":
|
|
187
|
-
ctx = error.get("ctx", {})
|
|
188
|
-
expected = ctx.get("expected", "valid options")
|
|
189
|
-
print(f" Hint: Valid options are: {expected}", file=sys.stderr)
|
|
190
|
-
elif error["type"] in [
|
|
191
|
-
"less_than_equal",
|
|
192
|
-
"greater_than_equal",
|
|
193
|
-
"less_than",
|
|
194
|
-
"greater_than",
|
|
195
|
-
]:
|
|
196
|
-
ctx = error.get("ctx", {})
|
|
197
|
-
if "le" in ctx:
|
|
198
|
-
print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
|
|
199
|
-
elif "ge" in ctx:
|
|
200
|
-
print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
|
|
201
|
-
elif "lt" in ctx:
|
|
202
|
-
print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
|
|
203
|
-
elif "gt" in ctx:
|
|
204
|
-
print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
|
|
205
|
-
|
|
206
|
-
print("\nRun with --help to see all available options\n", file=sys.stderr)
|
|
207
|
-
sys.exit(1)
|
|
208
|
-
|
|
209
|
-
wd: Path = cast(Path, getattr(opts, "working_directory"))
|
|
210
|
-
wd.mkdir(parents=True, exist_ok=True)
|
|
211
|
-
|
|
212
|
-
# Get project name from options or use directory basename
|
|
213
|
-
project_name = getattr(opts, "project_name", None)
|
|
214
|
-
if not project_name: # None or empty string
|
|
215
|
-
project_name = wd.name
|
|
216
|
-
|
|
217
|
-
# Ensure project_name is not empty
|
|
218
|
-
if not project_name:
|
|
219
|
-
raise ValueError("Project name cannot be empty")
|
|
220
|
-
|
|
221
|
-
# Use initializer if provided, otherwise use defaults
|
|
222
|
-
initial_documents = DocumentList([])
|
|
223
|
-
if initializer:
|
|
224
|
-
init_result = initializer(opts)
|
|
225
|
-
# Always expect tuple format from initializer
|
|
226
|
-
_, initial_documents = init_result # Ignore project name from initializer
|
|
227
|
-
|
|
228
|
-
# Save initial documents if starting from first step
|
|
229
|
-
if getattr(opts, "start", 1) == 1 and initial_documents:
|
|
230
|
-
save_documents_to_directory(wd, initial_documents)
|
|
231
|
-
|
|
232
|
-
# Setup context stack with optional test harness and tracing
|
|
233
|
-
with ExitStack() as stack:
|
|
234
|
-
if trace_name:
|
|
235
|
-
stack.enter_context(
|
|
236
|
-
Laminar.start_as_current_span(
|
|
237
|
-
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
238
|
-
)
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
if not settings.prefect_api_key and not _running_under_pytest():
|
|
242
|
-
stack.enter_context(prefect_test_harness())
|
|
243
|
-
stack.enter_context(disable_run_logger())
|
|
244
|
-
|
|
245
|
-
asyncio.run(
|
|
246
|
-
run_pipelines(
|
|
247
|
-
project_name=project_name,
|
|
248
|
-
output_dir=wd,
|
|
249
|
-
flows=flows,
|
|
250
|
-
flow_configs=flow_configs,
|
|
251
|
-
flow_options=opts,
|
|
252
|
-
start_step=getattr(opts, "start", 1),
|
|
253
|
-
end_step=getattr(opts, "end", None),
|
|
254
|
-
)
|
|
255
|
-
)
|