ai-pipeline-core 0.2.6__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. ai_pipeline_core/__init__.py +78 -125
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +130 -81
  37. ai_pipeline_core/llm/client.py +327 -193
  38. ai_pipeline_core/llm/model_options.py +14 -86
  39. ai_pipeline_core/llm/model_response.py +60 -103
  40. ai_pipeline_core/llm/model_types.py +16 -34
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -283
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -483
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/simple_runner/__init__.py +0 -14
  85. ai_pipeline_core/simple_runner/cli.py +0 -254
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -247
  87. ai_pipeline_core/storage/__init__.py +0 -8
  88. ai_pipeline_core/storage/storage.py +0 -628
  89. ai_pipeline_core/utils/__init__.py +0 -8
  90. ai_pipeline_core/utils/deploy.py +0 -373
  91. ai_pipeline_core/utils/remote_deployment.py +0 -269
  92. ai_pipeline_core-0.2.6.dist-info/METADATA +0 -500
  93. ai_pipeline_core-0.2.6.dist-info/RECORD +0 -41
  94. {ai_pipeline_core-0.2.6.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,16 @@
1
+ """Flow options base class for pipeline execution."""
2
+
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+
6
+ class FlowOptions(BaseSettings):
7
+ """Base configuration for pipeline flows.
8
+
9
+ Subclass to add flow-specific parameters. Uses pydantic-settings
10
+ for environment variable overrides. Immutable after creation.
11
+ """
12
+
13
+ model_config = SettingsConfigDict(frozen=True, extra="allow")
14
+
15
+
16
+ __all__ = ["FlowOptions"]
@@ -1,7 +1,5 @@
1
1
  """Jinja2-based prompt template management system.
2
2
 
3
- @public
4
-
5
3
  This module provides the PromptManager class for loading and rendering
6
4
  Jinja2 templates used as prompts for language models. It implements a
7
5
  smart search strategy that looks for templates in both local and shared
@@ -13,27 +11,6 @@ Search strategy:
13
11
  3. Parent 'prompts' directories (search ascends parent packages up to the package
14
12
  boundary or after 4 parent levels, whichever comes first)
15
13
 
16
- Key features:
17
- - Automatic template discovery
18
- - Jinja2 template rendering with context
19
- - Smart path resolution (.jinja2/.jinja extension handling)
20
- - Clear error messages for missing templates
21
- - Built-in global variables:
22
- - current_date: Current date in format "03 January 2025" (string)
23
-
24
- Example:
25
- >>> from ai_pipeline_core import PromptManager
26
- >>>
27
- >>> # Initialize at module level (not inside functions)
28
- >>> pm = PromptManager(__file__)
29
- >>>
30
- >>> # Render a template
31
- >>> prompt = pm.get(
32
- ... "analyze.jinja2",
33
- ... document=doc,
34
- ... instructions="Extract key points"
35
- ... )
36
-
37
14
  Template organization:
38
15
  project/
39
16
  ├── my_module.py # Can use local templates
@@ -42,9 +19,8 @@ Template organization:
42
19
  ├── summarize.jinja2
43
20
  └── extract.jinja2
44
21
 
45
- Note:
46
- Templates should use .jinja2 or .jinja extension.
47
- The extension can be omitted when calling get().
22
+ Templates should use .jinja2 or .jinja extension.
23
+ The extension can be omitted when calling get().
48
24
  """
49
25
 
50
26
  from datetime import datetime
@@ -63,8 +39,6 @@ logger = get_pipeline_logger(__name__)
63
39
  class PromptManager:
64
40
  """Manages Jinja2 prompt templates with smart path resolution.
65
41
 
66
- @public
67
-
68
42
  PromptManager provides a convenient interface for loading and rendering
69
43
  Jinja2 templates used as prompts for LLMs. It automatically searches for
70
44
  templates in multiple locations, supporting both local (module-specific)
@@ -80,23 +54,6 @@ class PromptManager:
80
54
  search_paths: List of directories where templates are searched.
81
55
  env: Jinja2 Environment configured for prompt rendering.
82
56
 
83
- Example:
84
- >>> # BEST PRACTICE: Instantiate at module scope (top level), not inside functions
85
- >>> # In flow/my_flow.py
86
- >>> from ai_pipeline_core import PromptManager
87
- >>> pm = PromptManager(__file__) # Module-level initialization
88
- >>>
89
- >>> # WRONG - Don't instantiate inside handlers or hot paths:
90
- >>> # async def process():
91
- >>> # pm = PromptManager(__file__) # NO! Creates new instance each call
92
- >>>
93
- >>> # Uses flow/prompts/analyze.jinja2 if it exists,
94
- >>> # otherwise searches parent directories
95
- >>> prompt = pm.get("analyze", context=data)
96
- >>>
97
- >>> # Can also use templates in same directory as module
98
- >>> prompt = pm.get("local_template.jinja2")
99
-
100
57
  Template format:
101
58
  Templates use standard Jinja2 syntax:
102
59
  ```jinja2
@@ -110,9 +67,8 @@ class PromptManager:
110
67
  Date: {{ current_date }} # Current date in format "03 January 2025"
111
68
  ```
112
69
 
113
- Note:
114
- - Autoescape is disabled for prompts (raw text output)
115
- - Whitespace control is enabled (trim_blocks, lstrip_blocks)
70
+ Autoescape is disabled for prompts (raw text output).
71
+ Whitespace control is enabled (trim_blocks, lstrip_blocks).
116
72
 
117
73
  Template Inheritance:
118
74
  Templates support standard Jinja2 inheritance. Templates are searched
@@ -127,8 +83,6 @@ class PromptManager:
127
83
  def __init__(self, current_file: str, prompts_dir: str = "prompts"):
128
84
  """Initialize PromptManager with smart template discovery.
129
85
 
130
- @public
131
-
132
86
  Sets up the Jinja2 environment with a FileSystemLoader that searches
133
87
  multiple directories for templates. The search starts from the calling
134
88
  module's location and extends to parent package directories.
@@ -145,34 +99,19 @@ class PromptManager:
145
99
  PromptError: If current_file is not a valid file path (e.g.,
146
100
  if __name__ was passed instead of __file__).
147
101
 
148
- Note:
149
- Search behavior - Given a module at /project/flows/my_flow.py:
150
- 1. /project/flows/ (local templates)
151
- 2. /project/flows/prompts/ (if exists)
102
+ Search behavior - Given a module at /project/tasks/my_task.py:
103
+ 1. /project/tasks/ (local templates)
104
+ 2. /project/tasks/prompts/ (if exists)
152
105
  3. /project/prompts/ (if /project has __init__.py)
153
-
154
- Search ascends parent packages up to the package boundary or after 4 parent
155
- levels, whichever comes first.
156
-
157
- Example:
158
- >>> # Correct usage
159
- >>> pm = PromptManager(__file__)
160
- >>>
161
- >>> # Custom prompts directory name
162
- >>> pm = PromptManager(__file__, prompts_dir="templates")
163
- >>>
164
- >>> # Common mistake (will raise PromptError)
165
- >>> pm = PromptManager(__name__) # Wrong!
106
+ Search ascends parent packages up to the package boundary or after 4 parent
107
+ levels, whichever comes first.
166
108
  """
167
109
  search_paths: list[Path] = []
168
110
 
169
111
  # Start from the directory containing the calling file
170
112
  current_path = Path(current_file).resolve()
171
113
  if not current_path.exists():
172
- raise PromptError(
173
- f"PromptManager expected __file__ (a valid file path), "
174
- f"but got {current_file!r}. Did you pass __name__ instead?"
175
- )
114
+ raise PromptError(f"PromptManager expected __file__ (a valid file path), but got {current_file!r}. Did you pass __name__ instead?")
176
115
 
177
116
  if current_path.is_file():
178
117
  current_path = current_path.parent
@@ -225,8 +164,6 @@ class PromptManager:
225
164
  def get(self, prompt_path: str, **kwargs: Any) -> str:
226
165
  """Load and render a Jinja2 template with the given context.
227
166
 
228
- @public
229
-
230
167
  Searches for the template in all configured search paths and renders
231
168
  it with the provided context variables. Automatically tries adding
232
169
  .jinja2 or .jinja extensions if the file is not found.
@@ -249,31 +186,11 @@ class PromptManager:
249
186
  rendering fails (e.g., missing variables,
250
187
  syntax errors).
251
188
 
252
- Note:
253
- Template resolution - Given prompt_path="analyze":
189
+ Template resolution - Given prompt_path="analyze":
254
190
  1. Try "analyze" as-is
255
191
  2. Try "analyze.jinja2"
256
192
  3. Try "analyze.jinja"
257
-
258
- The first matching file is used.
259
-
260
- Example:
261
- >>> pm = PromptManager(__file__)
262
- >>>
263
- >>> # Simple rendering
264
- >>> prompt = pm.get("summarize", text="Long document...")
265
- >>>
266
- >>> # With complex context
267
- >>> prompt = pm.get(
268
- ... "analyze",
269
- ... document=doc,
270
- ... max_length=500,
271
- ... style="technical",
272
- ... options={"include_metadata": True}
273
- ... )
274
- >>>
275
- >>> # Nested template path
276
- >>> prompt = pm.get("flows/extraction/extract_entities")
193
+ The first matching file is used.
277
194
 
278
195
  Template example:
279
196
  ```jinja2
@@ -286,9 +203,8 @@ class PromptManager:
286
203
  {% endif %}
287
204
  ```
288
205
 
289
- Note:
290
- All Jinja2 features are available: loops, conditionals,
291
- filters, macros, inheritance, etc.
206
+ All Jinja2 features are available: loops, conditionals,
207
+ filters, macros, inheritance, etc.
292
208
  """
293
209
  try:
294
210
  template = self.env.get_template(prompt_path)
@@ -301,13 +217,11 @@ class PromptManager:
301
217
  return template.render(**kwargs)
302
218
  except jinja2.TemplateNotFound:
303
219
  pass # Fall through to the original error
304
- raise PromptNotFoundError(
305
- f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
306
- )
220
+ raise PromptNotFoundError(f"Prompt template '{prompt_path}' not found (searched in {self.search_paths}).") from None
307
221
  except jinja2.TemplateError as e:
308
222
  raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
309
223
  except PromptNotFoundError:
310
224
  raise # Re-raise our custom exception
311
- except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
225
+ except (OSError, KeyError, TypeError, AttributeError, ValueError) as e:
312
226
  logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
313
227
  raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e
@@ -1,7 +1,5 @@
1
1
  """Core configuration settings for pipeline operations.
2
2
 
3
- @public
4
-
5
3
  This module provides the Settings base class for configuration management.
6
4
  Applications should inherit from Settings to create their own ProjectSettings
7
5
  class with additional configuration fields.
@@ -12,43 +10,27 @@ Environment variables:
12
10
  PREFECT_API_URL: Prefect server endpoint for flow orchestration
13
11
  PREFECT_API_KEY: Prefect API authentication key
14
12
  LMNR_PROJECT_API_KEY: Laminar project key for observability
15
- GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
13
+ GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file (for Prefect deployment bundles)
16
14
 
17
15
  Configuration precedence:
18
16
  1. Environment variables (highest priority)
19
17
  2. .env file in current directory
20
18
  3. Default values (empty strings)
21
19
 
22
- Example:
23
- >>> from ai_pipeline_core import Settings
24
- >>>
25
- >>> # Create your project's settings class
26
- >>> class ProjectSettings(Settings):
27
- ... app_name: str = "my-app"
28
- ... debug_mode: bool = False
29
- >>>
30
- >>> # Create singleton instance
31
- >>> settings = ProjectSettings()
32
- >>>
33
- >>> # Access configuration
34
- >>> print(settings.openai_base_url)
35
- >>> print(settings.app_name)
36
-
37
20
  .env file format:
38
21
  OPENAI_BASE_URL=http://localhost:4000
39
22
  OPENAI_API_KEY=sk-1234567890
40
23
  PREFECT_API_URL=http://localhost:4200/api
41
24
  PREFECT_API_KEY=pnu_abc123
42
25
  LMNR_PROJECT_API_KEY=lmnr_proj_xyz
43
- GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
26
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # For Prefect deployment
44
27
  APP_NAME=production-app
45
28
  DEBUG_MODE=false
46
29
 
47
- Note:
48
- Settings are loaded once at initialization and frozen. There is no
49
- built-in reload mechanism - the process must be restarted to pick up
50
- changes to environment variables or .env file. This is by design to
51
- ensure consistency during execution.
30
+ Settings are loaded once at initialization and frozen. There is no
31
+ built-in reload mechanism - the process must be restarted to pick up
32
+ changes to environment variables or .env file. This is by design to
33
+ ensure consistency during execution.
52
34
  """
53
35
 
54
36
  from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -57,8 +39,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
57
39
  class Settings(BaseSettings):
58
40
  """Base configuration class for AI Pipeline applications.
59
41
 
60
- @public
61
-
62
42
  Settings is designed to be inherited by your application's configuration
63
43
  class. It provides core AI Pipeline settings and type-safe configuration
64
44
  management with automatic loading from environment variables and .env files.
@@ -99,7 +79,7 @@ class Settings(BaseSettings):
99
79
  enable debug-level logging. Empty string by default.
100
80
 
101
81
  gcs_service_account_file: Path to GCS service account JSON file.
102
- Used for authenticating with Google Cloud Storage.
82
+ Used for Prefect deployment bundles to GCS.
103
83
  Optional - if not set, default credentials will be used.
104
84
 
105
85
  Configuration sources:
@@ -107,9 +87,8 @@ class Settings(BaseSettings):
107
87
  - .env file in current directory
108
88
  - Default values in class definition
109
89
 
110
- Note:
111
- Empty strings are used as defaults to allow optional services.
112
- Check for empty values before using service-specific settings.
90
+ Empty strings are used as defaults to allow optional services.
91
+ Check for empty values before using service-specific settings.
113
92
  """
114
93
 
115
94
  model_config = SettingsConfigDict(
@@ -135,8 +114,24 @@ class Settings(BaseSettings):
135
114
  lmnr_project_api_key: str = ""
136
115
  lmnr_debug: str = ""
137
116
 
138
- # Storage Configuration
117
+ # GCS (for Prefect deployment bundles)
139
118
  gcs_service_account_file: str = "" # Path to GCS service account JSON file
140
119
 
120
+ # ClickHouse tracking
121
+ clickhouse_host: str = ""
122
+ clickhouse_port: int = 8443
123
+ clickhouse_database: str = "default"
124
+ clickhouse_user: str = "default"
125
+ clickhouse_password: str = ""
126
+ clickhouse_secure: bool = True
127
+
128
+ # Tracking behavior
129
+ tracking_enabled: bool = True
130
+ tracking_summary_model: str = "gemini-3-flash"
131
+
132
+ # Document summary generation (store-level)
133
+ doc_summary_enabled: bool = True
134
+ doc_summary_model: str = "gemini-3-flash"
135
+
141
136
 
142
137
  settings = Settings()
@@ -0,0 +1,9 @@
1
+ """Test utilities for pipeline development.
2
+
3
+ Re-exports Prefect testing helpers used in pipeline test suites.
4
+ """
5
+
6
+ from prefect.logging import disable_run_logger
7
+ from prefect.testing.utilities import prefect_test_harness
8
+
9
+ __all__ = ["disable_run_logger", "prefect_test_harness"]