ai-pipeline-core 0.1.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. ai_pipeline_core/__init__.py +83 -119
  2. ai_pipeline_core/deployment/__init__.py +34 -0
  3. ai_pipeline_core/deployment/base.py +861 -0
  4. ai_pipeline_core/deployment/contract.py +80 -0
  5. ai_pipeline_core/deployment/deploy.py +561 -0
  6. ai_pipeline_core/deployment/helpers.py +97 -0
  7. ai_pipeline_core/deployment/progress.py +126 -0
  8. ai_pipeline_core/deployment/remote.py +116 -0
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +14 -15
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +349 -1062
  30. ai_pipeline_core/documents/mime_type.py +40 -85
  31. ai_pipeline_core/documents/utils.py +62 -7
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +309 -0
  34. ai_pipeline_core/images/_processing.py +151 -0
  35. ai_pipeline_core/llm/__init__.py +5 -3
  36. ai_pipeline_core/llm/ai_messages.py +284 -73
  37. ai_pipeline_core/llm/client.py +462 -209
  38. ai_pipeline_core/llm/model_options.py +86 -53
  39. ai_pipeline_core/llm/model_response.py +187 -241
  40. ai_pipeline_core/llm/model_types.py +34 -54
  41. ai_pipeline_core/logging/__init__.py +2 -9
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -43
  44. ai_pipeline_core/logging/logging_mixin.py +17 -51
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/observability/_debug/_config.py +95 -0
  49. ai_pipeline_core/observability/_debug/_content.py +764 -0
  50. ai_pipeline_core/observability/_debug/_processor.py +98 -0
  51. ai_pipeline_core/observability/_debug/_summary.py +312 -0
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/observability/_debug/_writer.py +843 -0
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/observability/tracing.py +640 -0
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +26 -105
  70. ai_pipeline_core/settings.py +41 -32
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.1.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.1.dist-info/RECORD +76 -0
  74. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/WHEEL +1 -1
  75. ai_pipeline_core/documents/document_list.py +0 -240
  76. ai_pipeline_core/documents/flow_document.py +0 -128
  77. ai_pipeline_core/documents/task_document.py +0 -133
  78. ai_pipeline_core/documents/temporary_document.py +0 -95
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -314
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -717
  83. ai_pipeline_core/prefect.py +0 -54
  84. ai_pipeline_core/simple_runner/__init__.py +0 -24
  85. ai_pipeline_core/simple_runner/cli.py +0 -255
  86. ai_pipeline_core/simple_runner/simple_runner.py +0 -385
  87. ai_pipeline_core/tracing.py +0 -475
  88. ai_pipeline_core-0.1.12.dist-info/METADATA +0 -450
  89. ai_pipeline_core-0.1.12.dist-info/RECORD +0 -36
  90. {ai_pipeline_core-0.1.12.dist-info → ai_pipeline_core-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +0,0 @@
1
- """Flow configuration and options for Prefect-based pipeline flows."""
2
-
3
- from .config import FlowConfig
4
- from .options import FlowOptions
5
-
6
- __all__ = [
7
- "FlowConfig",
8
- "FlowOptions",
9
- ]
@@ -1,314 +0,0 @@
1
- """Flow configuration system for type-safe pipeline definitions.
2
-
3
- @public
4
-
5
- This module provides the FlowConfig abstract base class that enforces
6
- type safety for flow inputs and outputs in the pipeline system.
7
-
8
- Best Practice:
9
- Always finish @pipeline_flow functions with create_and_validate_output()
10
- to ensure type safety and proper validation of output documents.
11
- """
12
-
13
- from abc import ABC
14
- from typing import Any, ClassVar, Iterable
15
-
16
- from ai_pipeline_core.documents import DocumentList, FlowDocument
17
- from ai_pipeline_core.exceptions import DocumentValidationError
18
-
19
-
20
- class FlowConfig(ABC):
21
- """Abstract base class for type-safe flow configuration.
22
-
23
- @public
24
-
25
- FlowConfig defines the contract for flow inputs and outputs, ensuring
26
- type safety and preventing circular dependencies in pipeline flows.
27
- Each flow must have a corresponding FlowConfig subclass that specifies
28
- its input document types and output document type.
29
-
30
- CRITICAL RULE: OUTPUT_DOCUMENT_TYPE must NEVER be in INPUT_DOCUMENT_TYPES!
31
- This prevents circular dependencies as flows chain together.
32
- Each flow transforms input types to a DIFFERENT output type.
33
-
34
- Class Variables:
35
- INPUT_DOCUMENT_TYPES: List of FlowDocument types this flow accepts
36
- OUTPUT_DOCUMENT_TYPE: Single FlowDocument type this flow produces
37
-
38
- Validation Rules:
39
- - INPUT_DOCUMENT_TYPES and OUTPUT_DOCUMENT_TYPE must be defined
40
- - OUTPUT_DOCUMENT_TYPE cannot be in INPUT_DOCUMENT_TYPES (prevents cycles)
41
- - Field names must be exact (common typos are detected)
42
-
43
- Why this matters:
44
- Flows connect in pipelines where one flow's output becomes another's input.
45
- Same input/output types would create infinite loops or circular dependencies.
46
-
47
- Example:
48
- >>> # CORRECT - Different output type from inputs
49
- >>> class ProcessingFlowConfig(FlowConfig):
50
- ... INPUT_DOCUMENT_TYPES = [RawDataDocument]
51
- ... OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Different type!
52
- >>>
53
- >>> # Use in @pipeline_flow - RECOMMENDED PATTERN
54
- >>> @pipeline_flow(name="processing")
55
- >>> async def process(config: ProcessingFlowConfig, docs: DocumentList) -> DocumentList:
56
- ... outputs = []
57
- ... # ... processing logic ...
58
- ... return config.create_and_validate_output(outputs)
59
-
60
- >>> # WRONG - Will raise TypeError
61
- >>> class BadConfig(FlowConfig):
62
- ... INPUT_DOCUMENT_TYPES = [DataDocument]
63
- ... OUTPUT_DOCUMENT_TYPE = DataDocument # SAME TYPE - NOT ALLOWED!
64
-
65
- Note:
66
- - Validation happens at class definition time
67
- - Helps catch configuration errors early
68
- - Used by simple_runner to manage document flow
69
- """
70
-
71
- INPUT_DOCUMENT_TYPES: ClassVar[list[type[FlowDocument]]]
72
- OUTPUT_DOCUMENT_TYPE: ClassVar[type[FlowDocument]]
73
-
74
- def __init_subclass__(cls, **kwargs: Any):
75
- """Validate flow configuration at subclass definition time.
76
-
77
- Performs comprehensive validation when a FlowConfig subclass is defined:
78
- 1. Checks for common field name mistakes (typos)
79
- 2. Ensures required fields are defined
80
- 3. Prevents circular dependencies (output != input)
81
-
82
- Args:
83
- **kwargs: Additional arguments for parent __init_subclass__.
84
-
85
- Raises:
86
- TypeError: If configuration violates any validation rules:
87
- - Missing required fields
88
- - Incorrect field names
89
- - Circular dependency detected
90
-
91
- Note:
92
- This runs at class definition time, not instantiation,
93
- providing immediate feedback during development.
94
- """
95
- super().__init_subclass__(**kwargs)
96
-
97
- # Skip validation for the abstract base class itself
98
- if cls.__name__ == "FlowConfig":
99
- return
100
-
101
- # Check for invalid field names (common mistakes)
102
- allowed_fields = {"INPUT_DOCUMENT_TYPES", "OUTPUT_DOCUMENT_TYPE"}
103
- class_attrs = {name for name in dir(cls) if not name.startswith("_") and name.isupper()}
104
-
105
- # Find fields that look like they might be mistakes
106
- suspicious_fields = class_attrs - allowed_fields
107
- common_mistakes = {
108
- "OUTPUT_DOCUMENT_TYPES": "OUTPUT_DOCUMENT_TYPE",
109
- "INPUT_DOCUMENT_TYPE": "INPUT_DOCUMENT_TYPES",
110
- }
111
-
112
- for field in suspicious_fields:
113
- # Skip inherited attributes from parent classes
114
- if any(hasattr(base, field) for base in cls.__bases__):
115
- continue
116
-
117
- if field in common_mistakes:
118
- raise TypeError(
119
- f"FlowConfig {cls.__name__}: Found '{field}' but expected "
120
- f"'{common_mistakes[field]}'. Please use the correct field name."
121
- )
122
- elif "DOCUMENT" in field:
123
- raise TypeError(
124
- f"FlowConfig {cls.__name__}: Invalid field '{field}'. "
125
- f"Only 'INPUT_DOCUMENT_TYPES' and 'OUTPUT_DOCUMENT_TYPE' are allowed."
126
- )
127
-
128
- # Ensure required attributes are defined
129
- if not hasattr(cls, "INPUT_DOCUMENT_TYPES"):
130
- raise TypeError(f"FlowConfig {cls.__name__} must define INPUT_DOCUMENT_TYPES")
131
- if not hasattr(cls, "OUTPUT_DOCUMENT_TYPE"):
132
- raise TypeError(f"FlowConfig {cls.__name__} must define OUTPUT_DOCUMENT_TYPE")
133
-
134
- # Validate that output type is not in input types
135
- if cls.OUTPUT_DOCUMENT_TYPE in cls.INPUT_DOCUMENT_TYPES:
136
- raise TypeError(
137
- f"FlowConfig {cls.__name__}: OUTPUT_DOCUMENT_TYPE "
138
- f"({cls.OUTPUT_DOCUMENT_TYPE.__name__}) cannot be in INPUT_DOCUMENT_TYPES"
139
- )
140
-
141
- @classmethod
142
- def get_input_document_types(cls) -> list[type[FlowDocument]]:
143
- """Get the list of input document types this flow accepts.
144
-
145
- Returns:
146
- List of FlowDocument subclasses that this flow requires
147
- as input.
148
-
149
- Example:
150
- >>> types = MyFlowConfig.get_input_document_types()
151
- >>> print([t.__name__ for t in types])
152
- ['InputDoc', 'ConfigDoc']
153
- """
154
- return cls.INPUT_DOCUMENT_TYPES
155
-
156
- @classmethod
157
- def get_output_document_type(cls) -> type[FlowDocument]:
158
- """Get the output document type this flow produces.
159
-
160
- Returns:
161
- Single FlowDocument subclass that this flow outputs.
162
-
163
- Example:
164
- >>> output_type = MyFlowConfig.get_output_document_type()
165
- >>> print(output_type.__name__)
166
- 'ProcessedDataDocument'
167
- """
168
- return cls.OUTPUT_DOCUMENT_TYPE
169
-
170
- @classmethod
171
- def has_input_documents(cls, documents: DocumentList) -> bool:
172
- """Check if all required input documents are present.
173
-
174
- Verifies that the document list contains at least one instance
175
- of each required input document type.
176
-
177
- Args:
178
- documents: DocumentList to check for required inputs.
179
-
180
- Returns:
181
- True if all required document types are present,
182
- False if any are missing.
183
-
184
- Example:
185
- >>> docs = DocumentList([input_doc, config_doc])
186
- >>> if MyFlowConfig.has_input_documents(docs):
187
- ... # Safe to proceed with flow
188
- ... pass
189
-
190
- Note:
191
- Use this before get_input_documents() to avoid exceptions.
192
- """
193
- for doc_cls in cls.INPUT_DOCUMENT_TYPES:
194
- if not any(isinstance(doc, doc_cls) for doc in documents):
195
- return False
196
- return True
197
-
198
- @classmethod
199
- def get_input_documents(cls, documents: DocumentList) -> DocumentList:
200
- """Extract and return all required input documents.
201
-
202
- Filters the provided document list to return only documents
203
- matching the required input types. Returns all matching documents,
204
- not just the first of each type.
205
-
206
- Args:
207
- documents: DocumentList containing mixed document types.
208
-
209
- Returns:
210
- DocumentList containing only the required input documents.
211
-
212
- Raises:
213
- ValueError: If any required document type is missing.
214
-
215
- Example:
216
- >>> all_docs = DocumentList([input1, input2, other_doc])
217
- >>> input_docs = MyFlowConfig.get_input_documents(all_docs)
218
- >>> len(input_docs) # Contains only input1 and input2
219
- 2
220
-
221
- Note:
222
- Call has_input_documents() first to check availability.
223
- """
224
- input_documents = DocumentList()
225
- for doc_cls in cls.INPUT_DOCUMENT_TYPES:
226
- filtered_documents = [doc for doc in documents if isinstance(doc, doc_cls)]
227
- if not filtered_documents:
228
- raise ValueError(f"No input document found for class {doc_cls.__name__}")
229
- input_documents.extend(filtered_documents)
230
- return input_documents
231
-
232
- @classmethod
233
- def validate_output_documents(cls, documents: Any) -> None:
234
- """Validate that output documents match the expected type.
235
-
236
- Ensures all documents in the list are instances of the
237
- declared OUTPUT_DOCUMENT_TYPE.
238
-
239
- Args:
240
- documents: DocumentList to validate.
241
-
242
- Raises:
243
- DocumentValidationError: If documents is not a DocumentList or if any
244
- document has incorrect type.
245
-
246
- Example:
247
- >>> output = DocumentList([ProcessedDoc(...)])
248
- >>> MyFlowConfig.validate_output_documents(output)
249
- >>> # No exception means valid
250
-
251
- Note:
252
- Used internally by create_and_validate_output().
253
- Uses explicit exceptions for validation (works with python -O).
254
- """
255
- if not isinstance(documents, DocumentList):
256
- raise DocumentValidationError("Documents must be a DocumentList")
257
-
258
- output_document_class = cls.get_output_document_type()
259
-
260
- for doc in documents:
261
- if not isinstance(doc, output_document_class):
262
- raise DocumentValidationError(
263
- f"Document '{doc.name}' has incorrect type. "
264
- f"Expected: {output_document_class.__name__}, "
265
- f"Got: {type(doc).__name__}"
266
- )
267
-
268
- @classmethod
269
- def create_and_validate_output(
270
- cls, output: FlowDocument | Iterable[FlowDocument] | DocumentList
271
- ) -> DocumentList:
272
- """Create and validate flow output documents.
273
-
274
- @public
275
-
276
- RECOMMENDED: Always use this method at the end of @pipeline_flow functions
277
- to ensure type safety and proper output validation.
278
-
279
- Convenience method that wraps output in a DocumentList if needed
280
- and validates it matches the expected OUTPUT_DOCUMENT_TYPE.
281
-
282
- Args:
283
- output: Single document, iterable of documents, or DocumentList.
284
-
285
- Returns:
286
- Validated DocumentList containing the output documents.
287
-
288
- Raises:
289
- DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
290
-
291
- Example:
292
- >>> @pipeline_flow(name="my_flow")
293
- >>> async def process_flow(config: MyFlowConfig, ...) -> DocumentList:
294
- >>> outputs = []
295
- >>> # ... processing logic ...
296
- >>> outputs.append(OutputDoc(...))
297
- >>>
298
- >>> # Always finish with this validation
299
- >>> return config.create_and_validate_output(outputs)
300
-
301
- Note:
302
- This is the recommended pattern for all @pipeline_flow functions.
303
- It ensures type safety and catches output errors immediately.
304
- """
305
- documents: DocumentList
306
- if isinstance(output, FlowDocument):
307
- documents = DocumentList([output])
308
- elif isinstance(output, DocumentList):
309
- documents = output
310
- else:
311
- # Handle any iterable of FlowDocuments
312
- documents = DocumentList(list(output)) # type: ignore[arg-type]
313
- cls.validate_output_documents(documents)
314
- return documents
@@ -1,75 +0,0 @@
1
- """Flow options configuration for pipeline execution.
2
-
3
- @public
4
-
5
- Provides base configuration settings for AI pipeline flows,
6
- including model selection and runtime parameters.
7
- """
8
-
9
- from typing import TypeVar
10
-
11
- from pydantic import Field
12
- from pydantic_settings import BaseSettings, SettingsConfigDict
13
-
14
- from ai_pipeline_core.llm import ModelName
15
-
16
- T = TypeVar("T", bound="FlowOptions")
17
-
18
-
19
- class FlowOptions(BaseSettings):
20
- """Base configuration settings for AI pipeline flows.
21
-
22
- @public
23
-
24
- FlowOptions provides runtime configuration for pipeline flows,
25
- including model selection and other parameters. It uses pydantic-settings
26
- to support environment variable overrides and is immutable (frozen) by default.
27
-
28
- This class is designed to be subclassed for flow-specific configuration:
29
-
30
- Example:
31
- >>> class MyFlowOptions(FlowOptions):
32
- ... temperature: float = Field(0.7, ge=0, le=2)
33
- ... batch_size: int = Field(10, gt=0)
34
- ... custom_param: str = "default"
35
-
36
- >>> # Use in CLI with run_cli:
37
- >>> run_cli(
38
- ... flows=[my_flow],
39
- ... options_cls=MyFlowOptions # Will parse CLI args
40
- ... )
41
-
42
- >>> # Or create programmatically:
43
- >>> options = MyFlowOptions(
44
- ... core_model="gemini-2.5-pro",
45
- ... temperature=0.9
46
- ... )
47
-
48
- Attributes:
49
- core_model: Primary LLM for complex tasks (default: gpt-5)
50
- small_model: Fast model for simple tasks (default: gpt-5-mini)
51
-
52
- Configuration:
53
- - Frozen (immutable) after creation
54
- - Extra fields ignored (not strict)
55
- - Can be populated from environment variables
56
- - Used by simple_runner.cli for command-line parsing
57
-
58
- Note:
59
- The base class provides model selection. Subclasses should
60
- add flow-specific parameters with appropriate validation.
61
- """
62
-
63
- core_model: ModelName | str = Field(
64
- default="gpt-5",
65
- description="Primary model for complex analysis and generation tasks.",
66
- )
67
- small_model: ModelName | str = Field(
68
- default="gpt-5-mini",
69
- description="Fast, cost-effective model for simple tasks and orchestration.",
70
- )
71
-
72
- model_config = SettingsConfigDict(frozen=True, extra="ignore")
73
-
74
-
75
- __all__ = ["FlowOptions"]