ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,16 @@
1
+ """Flow options base class for pipeline execution."""
2
+
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+
6
+ class FlowOptions(BaseSettings):
7
+ """Base configuration for pipeline flows.
8
+
9
+ Subclass to add flow-specific parameters. Uses pydantic-settings
10
+ for environment variable overrides. Immutable after creation.
11
+ """
12
+
13
+ model_config = SettingsConfigDict(frozen=True, extra="allow")
14
+
15
+
16
+ __all__ = ["FlowOptions"]
@@ -1,7 +1,5 @@
1
1
  """Jinja2-based prompt template management system.
2
2
 
3
- @public
4
-
5
3
  This module provides the PromptManager class for loading and rendering
6
4
  Jinja2 templates used as prompts for language models. It implements a
7
5
  smart search strategy that looks for templates in both local and shared
@@ -10,26 +8,8 @@ directories.
10
8
  Search strategy:
11
9
  1. Local directory (same as calling module)
12
10
  2. Local 'prompts' subdirectory
13
- 3. Parent 'prompts' directories (up to package boundary)
14
-
15
- Key features:
16
- - Automatic template discovery
17
- - Jinja2 template rendering with context
18
- - Smart path resolution (.jinja2/.jinja extension handling)
19
- - Clear error messages for missing templates
20
-
21
- Example:
22
- >>> from ai_pipeline_core import PromptManager
23
- >>>
24
- >>> # Initialize at module level (not inside functions)
25
- >>> pm = PromptManager(__file__)
26
- >>>
27
- >>> # Render a template
28
- >>> prompt = pm.get(
29
- ... "analyze.jinja2",
30
- ... document=doc,
31
- ... instructions="Extract key points"
32
- ... )
11
+ 3. Parent 'prompts' directories (search ascends parent packages up to the package
12
+ boundary or after 4 parent levels, whichever comes first)
33
13
 
34
14
  Template organization:
35
15
  project/
@@ -39,11 +19,11 @@ Template organization:
39
19
  ├── summarize.jinja2
40
20
  └── extract.jinja2
41
21
 
42
- Note:
43
- Templates should use .jinja2 or .jinja extension.
44
- The extension can be omitted when calling get().
22
+ Templates should use .jinja2 or .jinja extension.
23
+ The extension can be omitted when calling get().
45
24
  """
46
25
 
26
+ from datetime import datetime
47
27
  from pathlib import Path
48
28
  from typing import Any
49
29
 
@@ -59,8 +39,6 @@ logger = get_pipeline_logger(__name__)
59
39
  class PromptManager:
60
40
  """Manages Jinja2 prompt templates with smart path resolution.
61
41
 
62
- @public
63
-
64
42
  PromptManager provides a convenient interface for loading and rendering
65
43
  Jinja2 templates used as prompts for LLMs. It automatically searches for
66
44
  templates in multiple locations, supporting both local (module-specific)
@@ -69,29 +47,13 @@ class PromptManager:
69
47
  Search hierarchy:
70
48
  1. Same directory as the calling module (for local templates)
71
49
  2. 'prompts' subdirectory in the calling module's directory
72
- 3. 'prompts' directories in parent packages (up to package boundary)
50
+ 3. 'prompts' directories in parent packages (search ascends parent packages up to the
51
+ package boundary or after 4 parent levels, whichever comes first)
73
52
 
74
53
  Attributes:
75
54
  search_paths: List of directories where templates are searched.
76
55
  env: Jinja2 Environment configured for prompt rendering.
77
56
 
78
- Example:
79
- >>> # BEST PRACTICE: Instantiate at module scope (top level), not inside functions
80
- >>> # In flow/my_flow.py
81
- >>> from ai_pipeline_core import PromptManager
82
- >>> pm = PromptManager(__file__) # Module-level initialization
83
- >>>
84
- >>> # WRONG - Don't instantiate inside handlers or hot paths:
85
- >>> # async def process():
86
- >>> # pm = PromptManager(__file__) # NO! Creates new instance each call
87
- >>>
88
- >>> # Uses flow/prompts/analyze.jinja2 if it exists,
89
- >>> # otherwise searches parent directories
90
- >>> prompt = pm.get("analyze", context=data)
91
- >>>
92
- >>> # Can also use templates in same directory as module
93
- >>> prompt = pm.get("local_template.jinja2")
94
-
95
57
  Template format:
96
58
  Templates use standard Jinja2 syntax:
97
59
  ```jinja2
@@ -101,11 +63,12 @@ class PromptManager:
101
63
  {% if instructions %}
102
64
  Instructions: {{ instructions }}
103
65
  {% endif %}
66
+
67
+ Date: {{ current_date }} # Current date in format "03 January 2025"
104
68
  ```
105
69
 
106
- Note:
107
- - Autoescape is disabled for prompts (raw text output)
108
- - Whitespace control is enabled (trim_blocks, lstrip_blocks)
70
+ Autoescape is disabled for prompts (raw text output).
71
+ Whitespace control is enabled (trim_blocks, lstrip_blocks).
109
72
 
110
73
  Template Inheritance:
111
74
  Templates support standard Jinja2 inheritance. Templates are searched
@@ -120,8 +83,6 @@ class PromptManager:
120
83
  def __init__(self, current_file: str, prompts_dir: str = "prompts"):
121
84
  """Initialize PromptManager with smart template discovery.
122
85
 
123
- @public
124
-
125
86
  Sets up the Jinja2 environment with a FileSystemLoader that searches
126
87
  multiple directories for templates. The search starts from the calling
127
88
  module's location and extends to parent package directories.
@@ -138,37 +99,19 @@ class PromptManager:
138
99
  PromptError: If current_file is not a valid file path (e.g.,
139
100
  if __name__ was passed instead of __file__).
140
101
 
141
- Note:
142
- Search behavior - Given a module at /project/flows/my_flow.py:
143
- 1. /project/flows/ (local templates)
144
- 2. /project/flows/prompts/ (if exists)
102
+ Search behavior - Given a module at /project/tasks/my_task.py:
103
+ 1. /project/tasks/ (local templates)
104
+ 2. /project/tasks/prompts/ (if exists)
145
105
  3. /project/prompts/ (if /project has __init__.py)
146
-
147
- Search stops when no __init__.py is found (package boundary).
148
-
149
- Example:
150
- >>> # Correct usage
151
- >>> pm = PromptManager(__file__)
152
- >>>
153
- >>> # Custom prompts directory name
154
- >>> pm = PromptManager(__file__, prompts_dir="templates")
155
- >>>
156
- >>> # Common mistake (will raise PromptError)
157
- >>> pm = PromptManager(__name__) # Wrong!
158
-
159
- Note:
160
- The search is limited to 4 parent levels to prevent
161
- excessive filesystem traversal.
106
+ Search ascends parent packages up to the package boundary or after 4 parent
107
+ levels, whichever comes first.
162
108
  """
163
109
  search_paths: list[Path] = []
164
110
 
165
111
  # Start from the directory containing the calling file
166
112
  current_path = Path(current_file).resolve()
167
113
  if not current_path.exists():
168
- raise PromptError(
169
- f"PromptManager expected __file__ (a valid file path), "
170
- f"but got {current_file!r}. Did you pass __name__ instead?"
171
- )
114
+ raise PromptError(f"PromptManager expected __file__ (a valid file path), but got {current_file!r}. Did you pass __name__ instead?")
172
115
 
173
116
  if current_path.is_file():
174
117
  current_path = current_path.parent
@@ -215,11 +158,12 @@ class PromptManager:
215
158
  autoescape=False, # Important for prompt engineering
216
159
  )
217
160
 
161
+ # Add current_date as a global string (format: "03 January 2025")
162
+ self.env.globals["current_date"] = datetime.now().strftime("%d %B %Y") # type: ignore[assignment]
163
+
218
164
  def get(self, prompt_path: str, **kwargs: Any) -> str:
219
165
  """Load and render a Jinja2 template with the given context.
220
166
 
221
- @public
222
-
223
167
  Searches for the template in all configured search paths and renders
224
168
  it with the provided context variables. Automatically tries adding
225
169
  .jinja2 or .jinja extensions if the file is not found.
@@ -242,31 +186,11 @@ class PromptManager:
242
186
  rendering fails (e.g., missing variables,
243
187
  syntax errors).
244
188
 
245
- Note:
246
- Template resolution - Given prompt_path="analyze":
189
+ Template resolution - Given prompt_path="analyze":
247
190
  1. Try "analyze" as-is
248
191
  2. Try "analyze.jinja2"
249
192
  3. Try "analyze.jinja"
250
-
251
- The first matching file is used.
252
-
253
- Example:
254
- >>> pm = PromptManager(__file__)
255
- >>>
256
- >>> # Simple rendering
257
- >>> prompt = pm.get("summarize", text="Long document...")
258
- >>>
259
- >>> # With complex context
260
- >>> prompt = pm.get(
261
- ... "analyze",
262
- ... document=doc,
263
- ... max_length=500,
264
- ... style="technical",
265
- ... options={"include_metadata": True}
266
- ... )
267
- >>>
268
- >>> # Nested template path
269
- >>> prompt = pm.get("flows/extraction/extract_entities")
193
+ The first matching file is used.
270
194
 
271
195
  Template example:
272
196
  ```jinja2
@@ -279,9 +203,8 @@ class PromptManager:
279
203
  {% endif %}
280
204
  ```
281
205
 
282
- Note:
283
- All Jinja2 features are available: loops, conditionals,
284
- filters, macros, inheritance, etc.
206
+ All Jinja2 features are available: loops, conditionals,
207
+ filters, macros, inheritance, etc.
285
208
  """
286
209
  try:
287
210
  template = self.env.get_template(prompt_path)
@@ -294,13 +217,11 @@ class PromptManager:
294
217
  return template.render(**kwargs)
295
218
  except jinja2.TemplateNotFound:
296
219
  pass # Fall through to the original error
297
- raise PromptNotFoundError(
298
- f"Prompt template '{prompt_path}' not found (searched in {self.search_paths})."
299
- )
220
+ raise PromptNotFoundError(f"Prompt template '{prompt_path}' not found (searched in {self.search_paths}).") from None
300
221
  except jinja2.TemplateError as e:
301
222
  raise PromptRenderError(f"Template error in '{prompt_path}': {e}") from e
302
223
  except PromptNotFoundError:
303
224
  raise # Re-raise our custom exception
304
- except (KeyError, TypeError, AttributeError, IOError, ValueError) as e:
225
+ except (OSError, KeyError, TypeError, AttributeError, ValueError) as e:
305
226
  logger.error(f"Unexpected error rendering '{prompt_path}'", exc_info=True)
306
227
  raise PromptRenderError(f"Failed to render prompt '{prompt_path}': {e}") from e
@@ -1,7 +1,5 @@
1
1
  """Core configuration settings for pipeline operations.
2
2
 
3
- @public
4
-
5
3
  This module provides the Settings base class for configuration management.
6
4
  Applications should inherit from Settings to create their own ProjectSettings
7
5
  class with additional configuration fields.
@@ -12,41 +10,27 @@ Environment variables:
12
10
  PREFECT_API_URL: Prefect server endpoint for flow orchestration
13
11
  PREFECT_API_KEY: Prefect API authentication key
14
12
  LMNR_PROJECT_API_KEY: Laminar project key for observability
13
+ GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file (for Prefect deployment bundles)
15
14
 
16
15
  Configuration precedence:
17
16
  1. Environment variables (highest priority)
18
17
  2. .env file in current directory
19
18
  3. Default values (empty strings)
20
19
 
21
- Example:
22
- >>> from ai_pipeline_core import Settings
23
- >>>
24
- >>> # Create your project's settings class
25
- >>> class ProjectSettings(Settings):
26
- ... app_name: str = "my-app"
27
- ... debug_mode: bool = False
28
- >>>
29
- >>> # Create singleton instance
30
- >>> settings = ProjectSettings()
31
- >>>
32
- >>> # Access configuration
33
- >>> print(settings.openai_base_url)
34
- >>> print(settings.app_name)
35
-
36
20
  .env file format:
37
21
  OPENAI_BASE_URL=http://localhost:4000
38
22
  OPENAI_API_KEY=sk-1234567890
39
23
  PREFECT_API_URL=http://localhost:4200/api
40
24
  PREFECT_API_KEY=pnu_abc123
41
25
  LMNR_PROJECT_API_KEY=lmnr_proj_xyz
26
+ GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # For Prefect deployment
42
27
  APP_NAME=production-app
43
28
  DEBUG_MODE=false
44
29
 
45
- Note:
46
- Settings are loaded once at initialization and frozen. There is no
47
- built-in reload mechanism - the process must be restarted to pick up
48
- changes to environment variables or .env file. This is by design to
49
- ensure consistency during execution.
30
+ Settings are loaded once at initialization and frozen. There is no
31
+ built-in reload mechanism - the process must be restarted to pick up
32
+ changes to environment variables or .env file. This is by design to
33
+ ensure consistency during execution.
50
34
  """
51
35
 
52
36
  from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -55,8 +39,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
55
39
  class Settings(BaseSettings):
56
40
  """Base configuration class for AI Pipeline applications.
57
41
 
58
- @public
59
-
60
42
  Settings is designed to be inherited by your application's configuration
61
43
  class. It provides core AI Pipeline settings and type-safe configuration
62
44
  management with automatic loading from environment variables and .env files.
@@ -90,18 +72,23 @@ class Settings(BaseSettings):
90
72
  prefect_api_key: Prefect API authentication key. Required only
91
73
  when connecting to Prefect Cloud or secured server.
92
74
 
93
- lmnr_project_api_key: Laminar (LMNR) project API key for tracing
94
- and observability. Optional but recommended
95
- for production monitoring.
75
+ lmnr_project_api_key: Laminar (LMNR) project API key for observability.
76
+ Optional but recommended for production monitoring.
77
+
78
+ lmnr_debug: Debug mode flag for Laminar. Set to "true" to
79
+ enable debug-level logging. Empty string by default.
80
+
81
+ gcs_service_account_file: Path to GCS service account JSON file.
82
+ Used for Prefect deployment bundles to GCS.
83
+ Optional - if not set, default credentials will be used.
96
84
 
97
85
  Configuration sources:
98
86
  - Environment variables (highest priority)
99
87
  - .env file in current directory
100
88
  - Default values in class definition
101
89
 
102
- Note:
103
- Empty strings are used as defaults to allow optional services.
104
- Check for empty values before using service-specific settings.
90
+ Empty strings are used as defaults to allow optional services.
91
+ Check for empty values before using service-specific settings.
105
92
  """
106
93
 
107
94
  model_config = SettingsConfigDict(
@@ -118,11 +105,33 @@ class Settings(BaseSettings):
118
105
  # Prefect Configuration
119
106
  prefect_api_url: str = ""
120
107
  prefect_api_key: str = ""
108
+ prefect_api_auth_string: str = ""
109
+ prefect_work_pool_name: str = "default"
110
+ prefect_work_queue_name: str = "default"
111
+ prefect_gcs_bucket: str = ""
121
112
 
122
113
  # Observability
123
114
  lmnr_project_api_key: str = ""
115
+ lmnr_debug: str = ""
116
+
117
+ # GCS (for Prefect deployment bundles)
118
+ gcs_service_account_file: str = "" # Path to GCS service account JSON file
119
+
120
+ # ClickHouse tracking
121
+ clickhouse_host: str = ""
122
+ clickhouse_port: int = 8443
123
+ clickhouse_database: str = "default"
124
+ clickhouse_user: str = "default"
125
+ clickhouse_password: str = ""
126
+ clickhouse_secure: bool = True
127
+
128
+ # Tracking behavior
129
+ tracking_enabled: bool = True
130
+ tracking_summary_model: str = "gemini-3-flash"
131
+
132
+ # Document summary generation (store-level)
133
+ doc_summary_enabled: bool = True
134
+ doc_summary_model: str = "gemini-3-flash"
124
135
 
125
136
 
126
- # Legacy: Module-level instance for backwards compatibility
127
- # Applications should create their own settings instance
128
137
  settings = Settings()
@@ -0,0 +1,9 @@
1
+ """Test utilities for pipeline development.
2
+
3
+ Re-exports Prefect testing helpers used in pipeline test suites.
4
+ """
5
+
6
+ from prefect.logging import disable_run_logger
7
+ from prefect.testing.utilities import prefect_test_harness
8
+
9
+ __all__ = ["disable_run_logger", "prefect_test_harness"]