ai-pipeline-core 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +86 -4
- ai_pipeline_core/documents/__init__.py +11 -0
- ai_pipeline_core/documents/document.py +1107 -131
- ai_pipeline_core/documents/document_list.py +147 -38
- ai_pipeline_core/documents/flow_document.py +112 -11
- ai_pipeline_core/documents/mime_type.py +173 -15
- ai_pipeline_core/documents/task_document.py +117 -12
- ai_pipeline_core/documents/temporary_document.py +95 -0
- ai_pipeline_core/documents/utils.py +41 -9
- ai_pipeline_core/exceptions.py +47 -11
- ai_pipeline_core/flow/__init__.py +2 -0
- ai_pipeline_core/flow/config.py +250 -23
- ai_pipeline_core/flow/options.py +50 -1
- ai_pipeline_core/llm/__init__.py +6 -0
- ai_pipeline_core/llm/ai_messages.py +125 -27
- ai_pipeline_core/llm/client.py +278 -26
- ai_pipeline_core/llm/model_options.py +130 -1
- ai_pipeline_core/llm/model_response.py +239 -35
- ai_pipeline_core/llm/model_types.py +67 -0
- ai_pipeline_core/logging/__init__.py +13 -0
- ai_pipeline_core/logging/logging_config.py +72 -20
- ai_pipeline_core/logging/logging_mixin.py +38 -32
- ai_pipeline_core/pipeline.py +308 -60
- ai_pipeline_core/prefect.py +48 -1
- ai_pipeline_core/prompt_manager.py +215 -24
- ai_pipeline_core/settings.py +108 -4
- ai_pipeline_core/simple_runner/__init__.py +5 -0
- ai_pipeline_core/simple_runner/cli.py +145 -17
- ai_pipeline_core/simple_runner/simple_runner.py +244 -6
- ai_pipeline_core/tracing.py +232 -30
- ai_pipeline_core-0.1.11.dist-info/METADATA +450 -0
- ai_pipeline_core-0.1.11.dist-info/RECORD +36 -0
- ai_pipeline_core-0.1.8.dist-info/METADATA +0 -558
- ai_pipeline_core-0.1.8.dist-info/RECORD +0 -35
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.8.dist-info → ai_pipeline_core-0.1.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,49 @@
|
|
|
1
|
+
"""Jinja2-based prompt template management system.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
This module provides the PromptManager class for loading and rendering
|
|
6
|
+
Jinja2 templates used as prompts for language models. It implements a
|
|
7
|
+
smart search strategy that looks for templates in both local and shared
|
|
8
|
+
directories.
|
|
9
|
+
|
|
10
|
+
Search strategy:
|
|
11
|
+
1. Local directory (same as calling module)
|
|
12
|
+
2. Local 'prompts' subdirectory
|
|
13
|
+
3. Parent 'prompts' directories (up to package boundary)
|
|
14
|
+
|
|
15
|
+
Key features:
|
|
16
|
+
- Automatic template discovery
|
|
17
|
+
- Jinja2 template rendering with context
|
|
18
|
+
- Smart path resolution (.jinja2/.jinja extension handling)
|
|
19
|
+
- Clear error messages for missing templates
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
>>> from ai_pipeline_core import PromptManager
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Initialize at module level (not inside functions)
|
|
25
|
+
>>> pm = PromptManager(__file__)
|
|
26
|
+
>>>
|
|
27
|
+
>>> # Render a template
|
|
28
|
+
>>> prompt = pm.get(
|
|
29
|
+
... "analyze.jinja2",
|
|
30
|
+
... document=doc,
|
|
31
|
+
... instructions="Extract key points"
|
|
32
|
+
... )
|
|
33
|
+
|
|
34
|
+
Template organization:
|
|
35
|
+
project/
|
|
36
|
+
├── my_module.py # Can use local templates
|
|
37
|
+
├── analyze.jinja2 # Local template (same directory)
|
|
38
|
+
└── prompts/ # Shared prompts directory
|
|
39
|
+
├── summarize.jinja2
|
|
40
|
+
└── extract.jinja2
|
|
41
|
+
|
|
42
|
+
Note:
|
|
43
|
+
Templates should use .jinja2 or .jinja extension.
|
|
44
|
+
The extension can be omitted when calling get().
|
|
45
|
+
"""
|
|
46
|
+
|
|
1
47
|
from pathlib import Path
|
|
2
48
|
from typing import Any
|
|
3
49
|
|
|
@@ -5,29 +51,125 @@ import jinja2
|
|
|
5
51
|
|
|
6
52
|
from ai_pipeline_core.logging import get_pipeline_logger
|
|
7
53
|
|
|
8
|
-
from .exceptions import PromptNotFoundError, PromptRenderError
|
|
54
|
+
from .exceptions import PromptError, PromptNotFoundError, PromptRenderError
|
|
9
55
|
|
|
10
56
|
logger = get_pipeline_logger(__name__)
|
|
11
57
|
|
|
12
58
|
|
|
13
59
|
class PromptManager:
|
|
14
|
-
"""
|
|
60
|
+
"""Manages Jinja2 prompt templates with smart path resolution.
|
|
61
|
+
|
|
62
|
+
@public
|
|
63
|
+
|
|
64
|
+
PromptManager provides a convenient interface for loading and rendering
|
|
65
|
+
Jinja2 templates used as prompts for LLMs. It automatically searches for
|
|
66
|
+
templates in multiple locations, supporting both local (module-specific)
|
|
67
|
+
and shared (project-wide) templates.
|
|
15
68
|
|
|
16
|
-
|
|
17
|
-
|
|
69
|
+
Search hierarchy:
|
|
70
|
+
1. Same directory as the calling module (for local templates)
|
|
71
|
+
2. 'prompts' subdirectory in the calling module's directory
|
|
72
|
+
3. 'prompts' directories in parent packages (up to package boundary)
|
|
73
|
+
|
|
74
|
+
Attributes:
|
|
75
|
+
search_paths: List of directories where templates are searched.
|
|
76
|
+
env: Jinja2 Environment configured for prompt rendering.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
>>> # BEST PRACTICE: Instantiate at module scope (top level), not inside functions
|
|
80
|
+
>>> # In flow/my_flow.py
|
|
81
|
+
>>> from ai_pipeline_core import PromptManager
|
|
82
|
+
>>> pm = PromptManager(__file__) # Module-level initialization
|
|
83
|
+
>>>
|
|
84
|
+
>>> # WRONG - Don't instantiate inside handlers or hot paths:
|
|
85
|
+
>>> # async def process():
|
|
86
|
+
>>> # pm = PromptManager(__file__) # NO! Creates new instance each call
|
|
87
|
+
>>>
|
|
88
|
+
>>> # Uses flow/prompts/analyze.jinja2 if it exists,
|
|
89
|
+
>>> # otherwise searches parent directories
|
|
90
|
+
>>> prompt = pm.get("analyze", context=data)
|
|
91
|
+
>>>
|
|
92
|
+
>>> # Can also use templates in same directory as module
|
|
93
|
+
>>> prompt = pm.get("local_template.jinja2")
|
|
94
|
+
|
|
95
|
+
Template format:
|
|
96
|
+
Templates use standard Jinja2 syntax:
|
|
97
|
+
```jinja2
|
|
98
|
+
Analyze the following document:
|
|
99
|
+
{{ document.name }}
|
|
100
|
+
|
|
101
|
+
{% if instructions %}
|
|
102
|
+
Instructions: {{ instructions }}
|
|
103
|
+
{% endif %}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Note:
|
|
107
|
+
- Autoescape is disabled for prompts (raw text output)
|
|
108
|
+
- Whitespace control is enabled (trim_blocks, lstrip_blocks)
|
|
109
|
+
|
|
110
|
+
Template Inheritance:
|
|
111
|
+
Templates support standard Jinja2 inheritance. Templates are searched
|
|
112
|
+
in order of search_paths, so templates in earlier paths override later ones.
|
|
113
|
+
Precedence (first match wins):
|
|
114
|
+
1. Same directory as module
|
|
115
|
+
2. Module's prompts/ subdirectory
|
|
116
|
+
3. Parent prompts/ directories (nearest to farthest)
|
|
117
|
+
- Templates are cached by Jinja2 for performance
|
|
18
118
|
"""
|
|
19
119
|
|
|
20
|
-
def __init__(self,
|
|
21
|
-
"""Initialize PromptManager with
|
|
120
|
+
def __init__(self, current_file: str, prompts_dir: str = "prompts"):
|
|
121
|
+
"""Initialize PromptManager with smart template discovery.
|
|
122
|
+
|
|
123
|
+
@public
|
|
124
|
+
|
|
125
|
+
Sets up the Jinja2 environment with a FileSystemLoader that searches
|
|
126
|
+
multiple directories for templates. The search starts from the calling
|
|
127
|
+
module's location and extends to parent package directories.
|
|
22
128
|
|
|
23
129
|
Args:
|
|
24
|
-
|
|
25
|
-
|
|
130
|
+
current_file: The __file__ path of the calling module. Must be
|
|
131
|
+
a valid file path (not __name__). Used as the
|
|
132
|
+
starting point for template discovery.
|
|
133
|
+
prompts_dir: Name of the prompts subdirectory to search for
|
|
134
|
+
in each package level. Defaults to "prompts".
|
|
135
|
+
Do not pass prompts_dir='prompts' because it is already the default.
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
PromptError: If current_file is not a valid file path (e.g.,
|
|
139
|
+
if __name__ was passed instead of __file__).
|
|
140
|
+
|
|
141
|
+
Note:
|
|
142
|
+
Search behavior - Given a module at /project/flows/my_flow.py:
|
|
143
|
+
1. /project/flows/ (local templates)
|
|
144
|
+
2. /project/flows/prompts/ (if exists)
|
|
145
|
+
3. /project/prompts/ (if /project has __init__.py)
|
|
146
|
+
|
|
147
|
+
Search stops when no __init__.py is found (package boundary).
|
|
148
|
+
|
|
149
|
+
Example:
|
|
150
|
+
>>> # Correct usage
|
|
151
|
+
>>> pm = PromptManager(__file__)
|
|
152
|
+
>>>
|
|
153
|
+
>>> # Custom prompts directory name
|
|
154
|
+
>>> pm = PromptManager(__file__, prompts_dir="templates")
|
|
155
|
+
>>>
|
|
156
|
+
>>> # Common mistake (will raise PromptError)
|
|
157
|
+
>>> pm = PromptManager(__name__) # Wrong!
|
|
158
|
+
|
|
159
|
+
Note:
|
|
160
|
+
The search is limited to 4 parent levels to prevent
|
|
161
|
+
excessive filesystem traversal.
|
|
26
162
|
"""
|
|
27
163
|
search_paths: list[Path] = []
|
|
28
164
|
|
|
29
165
|
# Start from the directory containing the calling file
|
|
30
|
-
current_path = Path(
|
|
166
|
+
current_path = Path(current_file).resolve()
|
|
167
|
+
if not current_path.exists():
|
|
168
|
+
raise PromptError(
|
|
169
|
+
f"PromptManager expected __file__ (a valid file path), "
|
|
170
|
+
f"but got {current_file!r}. Did you pass __name__ instead?"
|
|
171
|
+
)
|
|
172
|
+
|
|
31
173
|
if current_path.is_file():
|
|
32
174
|
current_path = current_path.parent
|
|
33
175
|
|
|
@@ -74,32 +216,81 @@ class PromptManager:
|
|
|
74
216
|
)
|
|
75
217
|
|
|
76
218
|
def get(self, prompt_path: str, **kwargs: Any) -> str:
|
|
77
|
-
"""
|
|
78
|
-
|
|
219
|
+
"""Load and render a Jinja2 template with the given context.
|
|
220
|
+
|
|
221
|
+
@public
|
|
222
|
+
|
|
223
|
+
Searches for the template in all configured search paths and renders
|
|
224
|
+
it with the provided context variables. Automatically tries adding
|
|
225
|
+
.jinja2 or .jinja extensions if the file is not found.
|
|
79
226
|
|
|
80
227
|
Args:
|
|
81
|
-
prompt_path:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
228
|
+
prompt_path: Path to the template file, relative to any search
|
|
229
|
+
directory. Can be a simple filename ("analyze")
|
|
230
|
+
or include subdirectories ("tasks/summarize").
|
|
231
|
+
Extensions (.jinja2, .jinja) are optional.
|
|
232
|
+
**kwargs: Context variables passed to the template. These become
|
|
233
|
+
available as variables within the Jinja2 template.
|
|
85
234
|
|
|
86
235
|
Returns:
|
|
87
|
-
The rendered
|
|
236
|
+
The rendered template as a string, ready to be sent to an LLM.
|
|
237
|
+
|
|
238
|
+
Raises:
|
|
239
|
+
PromptNotFoundError: If the template file cannot be found in
|
|
240
|
+
any search path.
|
|
241
|
+
PromptRenderError: If the template contains errors or if
|
|
242
|
+
rendering fails (e.g., missing variables,
|
|
243
|
+
syntax errors).
|
|
244
|
+
|
|
245
|
+
Note:
|
|
246
|
+
Template resolution - Given prompt_path="analyze":
|
|
247
|
+
1. Try "analyze" as-is
|
|
248
|
+
2. Try "analyze.jinja2"
|
|
249
|
+
3. Try "analyze.jinja"
|
|
250
|
+
|
|
251
|
+
The first matching file is used.
|
|
252
|
+
|
|
253
|
+
Example:
|
|
254
|
+
>>> pm = PromptManager(__file__)
|
|
255
|
+
>>>
|
|
256
|
+
>>> # Simple rendering
|
|
257
|
+
>>> prompt = pm.get("summarize", text="Long document...")
|
|
258
|
+
>>>
|
|
259
|
+
>>> # With complex context
|
|
260
|
+
>>> prompt = pm.get(
|
|
261
|
+
... "analyze",
|
|
262
|
+
... document=doc,
|
|
263
|
+
... max_length=500,
|
|
264
|
+
... style="technical",
|
|
265
|
+
... options={"include_metadata": True}
|
|
266
|
+
... )
|
|
267
|
+
>>>
|
|
268
|
+
>>> # Nested template path
|
|
269
|
+
>>> prompt = pm.get("flows/extraction/extract_entities")
|
|
270
|
+
|
|
271
|
+
Template example:
|
|
272
|
+
```jinja2
|
|
273
|
+
Summarize the following text in {{ max_length }} words:
|
|
274
|
+
|
|
275
|
+
{{ text }}
|
|
276
|
+
|
|
277
|
+
{% if style %}
|
|
278
|
+
Style: {{ style }}
|
|
279
|
+
{% endif %}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Note:
|
|
283
|
+
All Jinja2 features are available: loops, conditionals,
|
|
284
|
+
filters, macros, inheritance, etc.
|
|
88
285
|
"""
|
|
89
286
|
try:
|
|
90
287
|
template = self.env.get_template(prompt_path)
|
|
91
288
|
return template.render(**kwargs)
|
|
92
289
|
except jinja2.TemplateNotFound:
|
|
93
290
|
# If the template wasn't found and doesn't end with .jinja2, try adding the extension
|
|
94
|
-
|
|
95
|
-
try:
|
|
96
|
-
template = self.env.get_template(prompt_path + ".jinja2")
|
|
97
|
-
return template.render(**kwargs)
|
|
98
|
-
except jinja2.TemplateNotFound:
|
|
99
|
-
pass # Fall through to the original error
|
|
100
|
-
if not prompt_path.endswith(".jinja"):
|
|
291
|
+
for extension in [".jinja2", ".jinja", ".j2"]:
|
|
101
292
|
try:
|
|
102
|
-
template = self.env.get_template(prompt_path +
|
|
293
|
+
template = self.env.get_template(prompt_path + extension)
|
|
103
294
|
return template.render(**kwargs)
|
|
104
295
|
except jinja2.TemplateNotFound:
|
|
105
296
|
pass # Fall through to the original error
|
ai_pipeline_core/settings.py
CHANGED
|
@@ -1,12 +1,115 @@
|
|
|
1
|
-
"""Core configuration settings for pipeline operations.
|
|
1
|
+
"""Core configuration settings for pipeline operations.
|
|
2
|
+
|
|
3
|
+
@public
|
|
4
|
+
|
|
5
|
+
This module provides the Settings base class for configuration management.
|
|
6
|
+
Applications should inherit from Settings to create their own ProjectSettings
|
|
7
|
+
class with additional configuration fields.
|
|
8
|
+
|
|
9
|
+
Environment variables:
|
|
10
|
+
OPENAI_BASE_URL: LiteLLM proxy endpoint (e.g., http://localhost:4000)
|
|
11
|
+
OPENAI_API_KEY: API key for LiteLLM proxy authentication
|
|
12
|
+
PREFECT_API_URL: Prefect server endpoint for flow orchestration
|
|
13
|
+
PREFECT_API_KEY: Prefect API authentication key
|
|
14
|
+
LMNR_PROJECT_API_KEY: Laminar project key for observability
|
|
15
|
+
|
|
16
|
+
Configuration precedence:
|
|
17
|
+
1. Environment variables (highest priority)
|
|
18
|
+
2. .env file in current directory
|
|
19
|
+
3. Default values (empty strings)
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
>>> from ai_pipeline_core import Settings
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Create your project's settings class
|
|
25
|
+
>>> class ProjectSettings(Settings):
|
|
26
|
+
... app_name: str = "my-app"
|
|
27
|
+
... debug_mode: bool = False
|
|
28
|
+
>>>
|
|
29
|
+
>>> # Create singleton instance
|
|
30
|
+
>>> settings = ProjectSettings()
|
|
31
|
+
>>>
|
|
32
|
+
>>> # Access configuration
|
|
33
|
+
>>> print(settings.openai_base_url)
|
|
34
|
+
>>> print(settings.app_name)
|
|
35
|
+
|
|
36
|
+
.env file format:
|
|
37
|
+
OPENAI_BASE_URL=http://localhost:4000
|
|
38
|
+
OPENAI_API_KEY=sk-1234567890
|
|
39
|
+
PREFECT_API_URL=http://localhost:4200/api
|
|
40
|
+
PREFECT_API_KEY=pnu_abc123
|
|
41
|
+
LMNR_PROJECT_API_KEY=lmnr_proj_xyz
|
|
42
|
+
APP_NAME=production-app
|
|
43
|
+
DEBUG_MODE=false
|
|
44
|
+
|
|
45
|
+
Note:
|
|
46
|
+
Settings are loaded once at initialization and frozen. There is no
|
|
47
|
+
built-in reload mechanism - the process must be restarted to pick up
|
|
48
|
+
changes to environment variables or .env file. This is by design to
|
|
49
|
+
ensure consistency during execution.
|
|
50
|
+
"""
|
|
2
51
|
|
|
3
52
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
53
|
|
|
5
54
|
|
|
6
55
|
class Settings(BaseSettings):
|
|
7
|
-
"""
|
|
56
|
+
"""Base configuration class for AI Pipeline applications.
|
|
57
|
+
|
|
58
|
+
@public
|
|
59
|
+
|
|
60
|
+
Settings is designed to be inherited by your application's configuration
|
|
61
|
+
class. It provides core AI Pipeline settings and type-safe configuration
|
|
62
|
+
management with automatic loading from environment variables and .env files.
|
|
63
|
+
All settings are immutable after initialization.
|
|
64
|
+
|
|
65
|
+
Inherit from Settings to add your application-specific configuration:
|
|
66
|
+
|
|
67
|
+
>>> from ai_pipeline_core import Settings
|
|
68
|
+
>>>
|
|
69
|
+
>>> class ProjectSettings(Settings):
|
|
70
|
+
... # Your custom settings
|
|
71
|
+
... app_name: str = "my-app"
|
|
72
|
+
... max_retries: int = 3
|
|
73
|
+
... enable_cache: bool = True
|
|
74
|
+
>>>
|
|
75
|
+
>>> # Create singleton instance for your app
|
|
76
|
+
>>> settings = ProjectSettings()
|
|
77
|
+
|
|
78
|
+
Core Attributes:
|
|
79
|
+
openai_base_url: LiteLLM proxy URL for OpenAI-compatible API.
|
|
80
|
+
Required for all LLM operations. Usually
|
|
81
|
+
http://localhost:4000 for local development.
|
|
82
|
+
|
|
83
|
+
openai_api_key: Authentication key for LiteLLM proxy. Required
|
|
84
|
+
for LLM operations. Format depends on proxy config.
|
|
85
|
+
|
|
86
|
+
prefect_api_url: Prefect server API endpoint. Required for flow
|
|
87
|
+
deployment and remote execution. Leave empty for
|
|
88
|
+
local-only execution.
|
|
89
|
+
|
|
90
|
+
prefect_api_key: Prefect API authentication key. Required only
|
|
91
|
+
when connecting to Prefect Cloud or secured server.
|
|
92
|
+
|
|
93
|
+
lmnr_project_api_key: Laminar (LMNR) project API key for tracing
|
|
94
|
+
and observability. Optional but recommended
|
|
95
|
+
for production monitoring.
|
|
96
|
+
|
|
97
|
+
Configuration sources:
|
|
98
|
+
- Environment variables (highest priority)
|
|
99
|
+
- .env file in current directory
|
|
100
|
+
- Default values in class definition
|
|
101
|
+
|
|
102
|
+
Note:
|
|
103
|
+
Empty strings are used as defaults to allow optional services.
|
|
104
|
+
Check for empty values before using service-specific settings.
|
|
105
|
+
"""
|
|
8
106
|
|
|
9
|
-
model_config = SettingsConfigDict(
|
|
107
|
+
model_config = SettingsConfigDict(
|
|
108
|
+
env_file=".env",
|
|
109
|
+
env_file_encoding="utf-8",
|
|
110
|
+
extra="ignore",
|
|
111
|
+
frozen=True, # Settings are immutable after initialization
|
|
112
|
+
)
|
|
10
113
|
|
|
11
114
|
# LLM API Configuration
|
|
12
115
|
openai_base_url: str = ""
|
|
@@ -20,5 +123,6 @@ class Settings(BaseSettings):
|
|
|
20
123
|
lmnr_project_api_key: str = ""
|
|
21
124
|
|
|
22
125
|
|
|
23
|
-
#
|
|
126
|
+
# Legacy: Module-level instance for backwards compatibility
|
|
127
|
+
# Applications should create their own settings instance
|
|
24
128
|
settings = Settings()
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Command-line interface for simple pipeline execution."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
import asyncio
|
|
@@ -8,6 +10,7 @@ from pathlib import Path
|
|
|
8
10
|
from typing import Callable, Type, TypeVar, cast
|
|
9
11
|
|
|
10
12
|
from lmnr import Laminar
|
|
13
|
+
from pydantic import ValidationError
|
|
11
14
|
from pydantic_settings import CliPositionalArg, SettingsConfigDict
|
|
12
15
|
|
|
13
16
|
from ai_pipeline_core.documents import DocumentList
|
|
@@ -21,10 +24,37 @@ from .simple_runner import ConfigSequence, FlowSequence, run_pipelines, save_doc
|
|
|
21
24
|
logger = get_pipeline_logger(__name__)
|
|
22
25
|
|
|
23
26
|
TOptions = TypeVar("TOptions", bound=FlowOptions)
|
|
27
|
+
"""Type variable for FlowOptions subclasses used in CLI."""
|
|
28
|
+
|
|
24
29
|
InitializerFunc = Callable[[FlowOptions], tuple[str, DocumentList]] | None
|
|
30
|
+
"""Function type for custom pipeline initialization.
|
|
31
|
+
|
|
32
|
+
Initializers can create initial documents or setup project state
|
|
33
|
+
before flow execution begins.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
FlowOptions: Parsed CLI options
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tuple of (project_name, initial_documents) or None
|
|
40
|
+
"""
|
|
25
41
|
|
|
26
42
|
|
|
27
43
|
def _initialize_environment() -> None:
|
|
44
|
+
"""Initialize logging and observability systems.
|
|
45
|
+
|
|
46
|
+
Sets up the pipeline logging configuration and attempts to
|
|
47
|
+
initialize LMNR (Laminar) for distributed tracing. Failures
|
|
48
|
+
in LMNR initialization are logged but don't stop execution.
|
|
49
|
+
|
|
50
|
+
Side effects:
|
|
51
|
+
- Configures Python logging system
|
|
52
|
+
- Initializes Laminar SDK if API key is available
|
|
53
|
+
- Logs initialization status
|
|
54
|
+
|
|
55
|
+
Note:
|
|
56
|
+
Called automatically by run_cli before parsing arguments.
|
|
57
|
+
"""
|
|
28
58
|
setup_logging()
|
|
29
59
|
try:
|
|
30
60
|
Laminar.initialize()
|
|
@@ -33,8 +63,24 @@ def _initialize_environment() -> None:
|
|
|
33
63
|
logger.warning(f"Failed to initialize LMNR tracing: {e}")
|
|
34
64
|
|
|
35
65
|
|
|
36
|
-
def _running_under_pytest() -> bool:
|
|
37
|
-
"""
|
|
66
|
+
def _running_under_pytest() -> bool:
|
|
67
|
+
"""Check if code is running under pytest.
|
|
68
|
+
|
|
69
|
+
Detects pytest execution context to determine whether test
|
|
70
|
+
fixtures will provide necessary contexts (like Prefect test
|
|
71
|
+
harness). This prevents duplicate context setup.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
True if running under pytest, False otherwise.
|
|
75
|
+
|
|
76
|
+
Detection methods:
|
|
77
|
+
- PYTEST_CURRENT_TEST environment variable (set by pytest)
|
|
78
|
+
- 'pytest' module in sys.modules (imported by test runner)
|
|
79
|
+
|
|
80
|
+
Note:
|
|
81
|
+
Used to avoid setting up test harness when pytest fixtures
|
|
82
|
+
already provide it.
|
|
83
|
+
"""
|
|
38
84
|
return "PYTEST_CURRENT_TEST" in os.environ or "pytest" in sys.modules
|
|
39
85
|
|
|
40
86
|
|
|
@@ -46,17 +92,51 @@ def run_cli(
|
|
|
46
92
|
initializer: InitializerFunc = None,
|
|
47
93
|
trace_name: str | None = None,
|
|
48
94
|
) -> None:
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
95
|
+
"""Execute pipeline flows from command-line arguments.
|
|
96
|
+
|
|
97
|
+
Environment setup:
|
|
98
|
+
- Initializes logging system
|
|
99
|
+
- Sets up LMNR tracing (if API key configured)
|
|
100
|
+
- Creates Prefect test harness (if no API key and not in pytest)
|
|
101
|
+
- Manages context stack for proper cleanup
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
ValueError: If project name is empty after initialization.
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
>>> # In __main__.py
|
|
108
|
+
>>> from ai_pipeline_core.simple_runner import run_cli
|
|
109
|
+
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
110
|
+
>>> from .config import AnalysisConfig, AnalysisOptions
|
|
111
|
+
>>>
|
|
112
|
+
>>> if __name__ == "__main__":
|
|
113
|
+
... run_cli(
|
|
114
|
+
... flows=[AnalysisFlow, SummaryFlow],
|
|
115
|
+
... flow_configs=[
|
|
116
|
+
... (AnalysisConfig, AnalysisOptions),
|
|
117
|
+
... (AnalysisConfig, AnalysisOptions)
|
|
118
|
+
... ],
|
|
119
|
+
... options_cls=AnalysisOptions,
|
|
120
|
+
... trace_name="document-analysis"
|
|
121
|
+
... )
|
|
122
|
+
|
|
123
|
+
Command line:
|
|
124
|
+
$ python -m my_module ./output --temperature 0.5 --model gpt-5
|
|
125
|
+
$ python -m my_module ./output --start 2 # Skip first flow
|
|
126
|
+
|
|
127
|
+
Note:
|
|
128
|
+
- Field names are converted to kebab-case for CLI (max_tokens → --max-tokens)
|
|
129
|
+
- Boolean fields become flags (--verbose/--no-verbose)
|
|
130
|
+
- Field descriptions from Pydantic become help text
|
|
131
|
+
- Type hints are enforced during parsing
|
|
132
|
+
- Validation errors show helpful messages with field names
|
|
133
|
+
- Includes hints for common error types (numbers, ranges)
|
|
134
|
+
- Exits with status 1 on error
|
|
135
|
+
- Shows --help when no arguments provided
|
|
56
136
|
"""
|
|
57
137
|
# Check if no arguments provided before initialization
|
|
58
138
|
if len(sys.argv) == 1:
|
|
59
|
-
# Add --help to show usage
|
|
139
|
+
# Add --help to show usage when run without arguments
|
|
60
140
|
sys.argv.append("--help")
|
|
61
141
|
|
|
62
142
|
_initialize_environment()
|
|
@@ -69,6 +149,12 @@ def run_cli(
|
|
|
69
149
|
cli_prog_name="ai-pipeline",
|
|
70
150
|
cli_use_class_docs_for_groups=True,
|
|
71
151
|
):
|
|
152
|
+
"""Internal options class combining user options with CLI arguments.
|
|
153
|
+
|
|
154
|
+
Dynamically created class that inherits from user's options_cls
|
|
155
|
+
and adds standard CLI arguments for pipeline execution.
|
|
156
|
+
"""
|
|
157
|
+
|
|
72
158
|
working_directory: CliPositionalArg[Path]
|
|
73
159
|
project_name: str | None = None
|
|
74
160
|
start: int = 1
|
|
@@ -76,7 +162,49 @@ def run_cli(
|
|
|
76
162
|
|
|
77
163
|
model_config = SettingsConfigDict(frozen=True, extra="ignore")
|
|
78
164
|
|
|
79
|
-
|
|
165
|
+
try:
|
|
166
|
+
opts = cast(FlowOptions, _RunnerOptions()) # type: ignore[reportCallIssue]
|
|
167
|
+
except ValidationError as e:
|
|
168
|
+
print("\nError: Invalid command line arguments\n", file=sys.stderr)
|
|
169
|
+
for error in e.errors():
|
|
170
|
+
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
171
|
+
msg = error["msg"]
|
|
172
|
+
value = error.get("input", "")
|
|
173
|
+
|
|
174
|
+
# Format the field name nicely (convert from snake_case to kebab-case for CLI)
|
|
175
|
+
cli_field = field.replace("_", "-")
|
|
176
|
+
|
|
177
|
+
print(f" --{cli_field}: {msg}", file=sys.stderr)
|
|
178
|
+
if value:
|
|
179
|
+
print(f" Provided value: '{value}'", file=sys.stderr)
|
|
180
|
+
|
|
181
|
+
# Add helpful hints for common errors
|
|
182
|
+
if error["type"] == "float_parsing":
|
|
183
|
+
print(" Hint: Please provide a valid number (e.g., 0.7)", file=sys.stderr)
|
|
184
|
+
elif error["type"] == "int_parsing":
|
|
185
|
+
print(" Hint: Please provide a valid integer (e.g., 10)", file=sys.stderr)
|
|
186
|
+
elif error["type"] == "literal_error":
|
|
187
|
+
ctx = error.get("ctx", {})
|
|
188
|
+
expected = ctx.get("expected", "valid options")
|
|
189
|
+
print(f" Hint: Valid options are: {expected}", file=sys.stderr)
|
|
190
|
+
elif error["type"] in [
|
|
191
|
+
"less_than_equal",
|
|
192
|
+
"greater_than_equal",
|
|
193
|
+
"less_than",
|
|
194
|
+
"greater_than",
|
|
195
|
+
]:
|
|
196
|
+
ctx = error.get("ctx", {})
|
|
197
|
+
if "le" in ctx:
|
|
198
|
+
print(f" Hint: Value must be ≤ {ctx['le']}", file=sys.stderr)
|
|
199
|
+
elif "ge" in ctx:
|
|
200
|
+
print(f" Hint: Value must be ≥ {ctx['ge']}", file=sys.stderr)
|
|
201
|
+
elif "lt" in ctx:
|
|
202
|
+
print(f" Hint: Value must be < {ctx['lt']}", file=sys.stderr)
|
|
203
|
+
elif "gt" in ctx:
|
|
204
|
+
print(f" Hint: Value must be > {ctx['gt']}", file=sys.stderr)
|
|
205
|
+
|
|
206
|
+
print("\nRun with --help to see all available options\n", file=sys.stderr)
|
|
207
|
+
sys.exit(1)
|
|
80
208
|
|
|
81
209
|
wd: Path = cast(Path, getattr(opts, "working_directory"))
|
|
82
210
|
wd.mkdir(parents=True, exist_ok=True)
|
|
@@ -97,23 +225,23 @@ def run_cli(
|
|
|
97
225
|
# Always expect tuple format from initializer
|
|
98
226
|
_, initial_documents = init_result # Ignore project name from initializer
|
|
99
227
|
|
|
228
|
+
# Save initial documents if starting from first step
|
|
100
229
|
if getattr(opts, "start", 1) == 1 and initial_documents:
|
|
101
230
|
save_documents_to_directory(wd, initial_documents)
|
|
102
231
|
|
|
103
232
|
# Setup context stack with optional test harness and tracing
|
|
104
|
-
|
|
105
233
|
with ExitStack() as stack:
|
|
106
|
-
if not settings.prefect_api_key and not _running_under_pytest():
|
|
107
|
-
stack.enter_context(prefect_test_harness())
|
|
108
|
-
stack.enter_context(disable_run_logger())
|
|
109
|
-
|
|
110
234
|
if trace_name:
|
|
111
235
|
stack.enter_context(
|
|
112
|
-
Laminar.
|
|
236
|
+
Laminar.start_as_current_span(
|
|
113
237
|
name=f"{trace_name}-{project_name}", input=[opts.model_dump_json()]
|
|
114
238
|
)
|
|
115
239
|
)
|
|
116
240
|
|
|
241
|
+
if not settings.prefect_api_key and not _running_under_pytest():
|
|
242
|
+
stack.enter_context(prefect_test_harness())
|
|
243
|
+
stack.enter_context(disable_run_logger())
|
|
244
|
+
|
|
117
245
|
asyncio.run(
|
|
118
246
|
run_pipelines(
|
|
119
247
|
project_name=project_name,
|