ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +70 -144
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +37 -82
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +106 -81
  37. ai_pipeline_core/llm/client.py +267 -158
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,569 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: ai-pipeline-core
3
- Version: 0.3.3
4
- Summary: Core utilities for AI-powered processing pipelines using prefect
5
- Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
- Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
7
- Project-URL: Issues, https://github.com/bbarwik/ai-pipeline-core/issues
8
- Author-email: bbarwik <bbarwik@gmail.com>
9
- License: MIT
10
- License-File: LICENSE
11
- Classifier: Development Status :: 4 - Beta
12
- Classifier: Intended Audience :: Developers
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Programming Language :: Python :: 3.13
17
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
- Classifier: Typing :: Typed
20
- Requires-Python: >=3.12
21
- Requires-Dist: httpx>=0.28.1
22
- Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.18
24
- Requires-Dist: openai>=1.109.1
25
- Requires-Dist: pillow>=10.0.0
26
- Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
27
- Requires-Dist: prefect>=3.4.21
28
- Requires-Dist: pydantic-settings>=2.10.1
29
- Requires-Dist: pydantic>=2.11.9
30
- Requires-Dist: python-magic>=0.4.27
31
- Requires-Dist: ruamel-yaml>=0.18.14
32
- Requires-Dist: tiktoken>=0.12.0
33
- Provides-Extra: dev
34
- Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
35
- Requires-Dist: bump2version>=1.0.1; extra == 'dev'
36
- Requires-Dist: interrogate>=1.5.0; extra == 'dev'
37
- Requires-Dist: pre-commit>=4.3.0; extra == 'dev'
38
- Requires-Dist: pydoc-markdown[jinja]>=4.8.0; extra == 'dev'
39
- Requires-Dist: pytest-asyncio>=1.1.0; extra == 'dev'
40
- Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
41
- Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
42
- Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
43
- Requires-Dist: pytest>=8.4.1; extra == 'dev'
44
- Requires-Dist: ruff>=0.14.1; extra == 'dev'
45
- Description-Content-Type: text/markdown
46
-
47
- # AI Pipeline Core
48
-
49
- A high-performance async framework for building type-safe AI pipelines with LLMs, document processing, and workflow orchestration.
50
-
51
- [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
52
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
53
- [![Code Style: Ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
54
- [![Type Checked: Basedpyright](https://img.shields.io/badge/type%20checked-basedpyright-blue)](https://github.com/DetachHead/basedpyright)
55
-
56
- ## Overview
57
-
58
- AI Pipeline Core is a production-ready framework that combines document processing, LLM integration, and workflow orchestration into a unified system. Built with strong typing (Pydantic), automatic retries, cost tracking, and distributed tracing, it enforces best practices while maintaining high performance through fully async operations.
59
-
60
- ### Key Features
61
-
62
- - **Document Processing**: Type-safe handling of text, JSON, YAML, PDFs, and images with automatic MIME type detection and provenance tracking
63
- - **LLM Integration**: Unified interface to any model via LiteLLM proxy with configurable context caching
64
- - **Structured Output**: Type-safe generation with Pydantic model validation
65
- - **Workflow Orchestration**: Prefect-based flows and tasks with automatic retries
66
- - **Observability**: Built-in distributed tracing via Laminar (LMNR) with cost tracking for debugging and monitoring
67
- - **Deployment**: Unified pipeline execution for local, CLI, and production environments
68
-
69
- ## Installation
70
-
71
- ```bash
72
- pip install ai-pipeline-core
73
- ```
74
-
75
- ### Requirements
76
-
77
- - Python 3.12 or higher
78
- - Linux/macOS (Windows via WSL2)
79
-
80
- ### Development Installation
81
-
82
- ```bash
83
- git clone https://github.com/bbarwik/ai-pipeline-core.git
84
- cd ai-pipeline-core
85
- pip install -e ".[dev]"
86
- make install-dev # Installs pre-commit hooks
87
- ```
88
-
89
- ## Quick Start
90
-
91
- ### Basic Pipeline
92
-
93
- ```python
94
- from ai_pipeline_core import (
95
- pipeline_flow,
96
- FlowDocument,
97
- DocumentList,
98
- FlowOptions,
99
- FlowConfig,
100
- llm,
101
- AIMessages
102
- )
103
-
104
- # Define document types
105
- class InputDoc(FlowDocument):
106
- """Input document for processing."""
107
-
108
- class OutputDoc(FlowDocument):
109
- """Analysis result document."""
110
-
111
- # Define flow configuration
112
- class AnalysisConfig(FlowConfig):
113
- INPUT_DOCUMENT_TYPES = [InputDoc]
114
- OUTPUT_DOCUMENT_TYPE = OutputDoc
115
-
116
- # Create pipeline flow with required config
117
- @pipeline_flow(config=AnalysisConfig)
118
- async def analyze_flow(
119
- project_name: str,
120
- documents: DocumentList,
121
- flow_options: FlowOptions
122
- ) -> DocumentList:
123
- # Process documents
124
- outputs = []
125
- for doc in documents:
126
- # Use AIMessages for LLM interaction
127
- response = await llm.generate(
128
- model="gpt-5.1",
129
- messages=AIMessages([doc])
130
- )
131
-
132
- output = OutputDoc.create(
133
- name=f"analysis_{doc.name}",
134
- content=response.content
135
- )
136
- outputs.append(output)
137
-
138
- # RECOMMENDED: Always validate output
139
- return AnalysisConfig.create_and_validate_output(outputs)
140
- ```
141
-
142
- ### Structured Output
143
-
144
- ```python
145
- from pydantic import BaseModel
146
- from ai_pipeline_core import llm
147
-
148
- class Analysis(BaseModel):
149
- summary: str
150
- sentiment: float
151
- key_points: list[str]
152
-
153
- # Generate structured output
154
- response = await llm.generate_structured(
155
- model="gpt-5.1",
156
- response_format=Analysis,
157
- messages="Analyze this product review: ..."
158
- )
159
-
160
- # Access parsed result with type safety
161
- analysis = response.parsed
162
- print(f"Sentiment: {analysis.sentiment}")
163
- for point in analysis.key_points:
164
- print(f"- {point}")
165
- ```
166
-
167
- ### Document Handling
168
-
169
- ```python
170
- from ai_pipeline_core import FlowDocument, TemporaryDocument
171
-
172
- # Create documents with automatic conversion
173
- doc = MyDocument.create(
174
- name="data.json",
175
- content={"key": "value"} # Automatically converted to JSON bytes
176
- )
177
-
178
- # Parse back to original type
179
- data = doc.parse(dict) # Returns {"key": "value"}
180
-
181
- # Document provenance tracking
182
- doc_with_sources = MyDocument.create(
183
- name="derived.json",
184
- content={"result": "processed"},
185
- sources=[source_doc.sha256, "https://api.example.com/data"]
186
- )
187
-
188
- # Check provenance
189
- for hash in doc_with_sources.get_source_documents():
190
- print(f"Derived from document: {hash}")
191
- for ref in doc_with_sources.get_source_references():
192
- print(f"External source: {ref}")
193
-
194
- # Temporary documents (never persisted)
195
- temp = TemporaryDocument.create(
196
- name="api_response.json",
197
- content={"status": "ok"}
198
- )
199
- ```
200
-
201
- ## Core Concepts
202
-
203
- ### Documents
204
-
205
- Documents are immutable Pydantic models that wrap binary content with metadata:
206
-
207
- - **FlowDocument**: Persists across flow runs, saved to filesystem
208
- - **TaskDocument**: Temporary within task execution, not persisted
209
- - **TemporaryDocument**: Never persisted, useful for sensitive data
210
-
211
- ```python
212
- class MyDocument(FlowDocument):
213
- """Custom document type."""
214
-
215
- # Use create() for automatic conversion
216
- doc = MyDocument.create(
217
- name="data.json",
218
- content={"key": "value"} # Auto-converts to JSON
219
- )
220
-
221
- # Access content
222
- if doc.is_text:
223
- print(doc.text)
224
-
225
- # Parse structured data
226
- data = doc.as_json() # or as_yaml(), as_pydantic_model()
227
-
228
- # Convert between document types
229
- task_doc = flow_doc.model_convert(TaskDocument) # Convert FlowDocument to TaskDocument
230
- new_doc = doc.model_convert(OtherDocType, content={"new": "data"}) # With content update
231
-
232
- # Enhanced filtering
233
- filtered = documents.filter_by([Doc1, Doc2, Doc3]) # Multiple types
234
- named = documents.filter_by(["file1.txt", "file2.txt"]) # Multiple names
235
-
236
- # Immutable collections
237
- frozen_docs = DocumentList(docs, frozen=True) # Immutable document list
238
- frozen_msgs = AIMessages(messages, frozen=True) # Immutable message list
239
- ```
240
-
241
- ### LLM Integration
242
-
243
- The framework provides a unified interface for LLM interactions with smart caching:
244
-
245
- ```python
246
- from ai_pipeline_core import llm, AIMessages, ModelOptions
247
-
248
- # Simple generation
249
- response = await llm.generate(
250
- model="gpt-5.1",
251
- messages="Explain quantum computing"
252
- )
253
- print(response.content)
254
-
255
- # With context caching (saves 50-90% tokens)
256
- static_context = AIMessages([large_document])
257
-
258
- # First call: caches context
259
- r1 = await llm.generate(
260
- model="gpt-5.1",
261
- context=static_context, # Cached for 120 seconds by default
262
- messages="Summarize" # Dynamic query
263
- )
264
-
265
- # Second call: reuses cache
266
- r2 = await llm.generate(
267
- model="gpt-5.1",
268
- context=static_context, # Reused from cache!
269
- messages="Key points?" # Different query
270
- )
271
-
272
- # Custom cache TTL
273
- response = await llm.generate(
274
- model="gpt-5.1",
275
- context=static_context,
276
- messages="Analyze",
277
- options=ModelOptions(cache_ttl="300s") # Cache for 5 minutes
278
- )
279
-
280
- # Disable caching for dynamic contexts
281
- response = await llm.generate(
282
- model="gpt-5.1",
283
- context=dynamic_context,
284
- messages="Process",
285
- options=ModelOptions(cache_ttl=None) # No caching
286
- )
287
- ```
288
-
289
- ### Flow Configuration
290
-
291
- Type-safe flow configuration ensures proper document flow:
292
-
293
- ```python
294
- from ai_pipeline_core import FlowConfig
295
-
296
- class ProcessingConfig(FlowConfig):
297
- INPUT_DOCUMENT_TYPES = [RawDataDocument]
298
- OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Must be different!
299
-
300
- # Use in flows for validation
301
- @pipeline_flow(config=ProcessingConfig)
302
- async def process(
303
- project_name: str,
304
- documents: DocumentList,
305
- flow_options: FlowOptions
306
- ) -> DocumentList:
307
- # ... processing logic ...
308
- return ProcessingConfig.create_and_validate_output(outputs)
309
- ```
310
-
311
- ### Pipeline Decorators
312
-
313
- Enhanced decorators with built-in tracing and monitoring:
314
-
315
- ```python
316
- from ai_pipeline_core import pipeline_flow, pipeline_task, set_trace_cost
317
-
318
- @pipeline_task # Automatic retry, tracing, and monitoring
319
- async def process_chunk(data: str) -> str:
320
- result = await transform(data)
321
- set_trace_cost(0.05) # Track costs
322
- return result
323
-
324
- @pipeline_flow(
325
- config=MyFlowConfig,
326
- trace_trim_documents=True # Trim large documents in traces
327
- )
328
- async def main_flow(
329
- project_name: str,
330
- documents: DocumentList,
331
- flow_options: FlowOptions
332
- ) -> DocumentList:
333
- # Your pipeline logic
334
- # Large documents are automatically trimmed to 100 chars in traces
335
- # for better observability without overwhelming the tracing UI
336
- return DocumentList(results)
337
- ```
338
-
339
- ### Local Trace Debugging
340
-
341
- Save all trace spans to the local filesystem for LLM-assisted debugging:
342
-
343
- ```bash
344
- export TRACE_DEBUG_PATH=/path/to/debug/output
345
- ```
346
-
347
- This creates a hierarchical directory structure that mirrors the execution flow with automatic deduplication:
348
-
349
- ```
350
- 20260128_152932_abc12345_my_flow/
351
- ├── _trace.yaml # Trace metadata
352
- ├── _index.yaml # Span ID → path mapping
353
- ├── _summary.md # Unified summary for human inspection and LLM debugging
354
- ├── artifacts/ # Deduplicated content storage
355
- │ └── sha256/
356
- │ └── ab/cd/ # Sharded by hash prefix
357
- │ └── abcdef...1234.txt # Large content (>10KB)
358
- └── 0001_my_flow/ # Root span (numbered for execution order)
359
- ├── _span.yaml # Span metadata (timing, status, I/O refs)
360
- ├── input.yaml # Structured inputs (inline or refs)
361
- ├── output.yaml # Structured outputs (inline or refs)
362
- ├── 0002_task_1/ # Child spans nested inside parent
363
- │ ├── _span.yaml
364
- │ ├── input.yaml
365
- │ ├── output.yaml
366
- │ └── 0003_llm_call/
367
- │ ├── _span.yaml
368
- │ ├── input.yaml # LLM messages with inline/external content
369
- │ └── output.yaml
370
- └── 0004_task_2/
371
- └── ...
372
- ```
373
-
374
- **Key Features:**
375
- - **Automatic Deduplication**: Identical content (e.g., system prompts) stored once in `artifacts/`
376
- - **Smart Externalization**: Large content (>10KB) externalized with 2KB inline previews
377
- - **AI-Friendly**: Files capped at 50KB for easy LLM processing
378
- - **Lossless**: Full content reconstruction via `content_ref` pointers
379
-
380
- Example `input.yaml` with externalization:
381
- ```yaml
382
- format_version: 3
383
- type: llm_messages
384
- messages:
385
- - role: system
386
- parts:
387
- - type: text
388
- size_bytes: 28500
389
- content_ref: # Large content → artifact
390
- hash: sha256:a1b2c3d4...
391
- path: artifacts/sha256/a1/b2/a1b2c3d4...txt
392
- excerpt: "You are a helpful assistant...\n[TRUNCATED]"
393
- - role: user
394
- parts:
395
- - type: text
396
- content: "Hello!" # Small content stays inline
397
- ```
398
-
399
- Run `tree` on the output directory to visualize the entire execution hierarchy. Feed `_summary.md` to an LLM for debugging assistance - it combines high-level overview with detailed navigation for comprehensive trace analysis.
400
-
401
- ## Configuration
402
-
403
- ### Environment Variables
404
-
405
- ```bash
406
- # LLM Configuration (via LiteLLM proxy)
407
- OPENAI_BASE_URL=http://localhost:4000
408
- OPENAI_API_KEY=your-api-key
409
-
410
- # Optional: Observability
411
- LMNR_PROJECT_API_KEY=your-lmnr-key
412
- LMNR_DEBUG=true # Enable debug traces
413
-
414
- # Optional: Local Trace Debugging
415
- TRACE_DEBUG_PATH=/path/to/trace/output # Save traces locally for LLM-assisted debugging
416
-
417
- # Optional: Orchestration
418
- PREFECT_API_URL=http://localhost:4200/api
419
- PREFECT_API_KEY=your-prefect-key
420
-
421
- # Optional: Storage (for Google Cloud Storage)
422
- GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json # GCS auth file
423
- ```
424
-
425
- ### Settings Management
426
-
427
- Create custom settings by inheriting from the base Settings class:
428
-
429
- ```python
430
- from ai_pipeline_core import Settings
431
-
432
- class ProjectSettings(Settings):
433
- """Project-specific configuration."""
434
- app_name: str = "my-app"
435
- max_retries: int = 3
436
- enable_cache: bool = True
437
-
438
- # Create singleton instance
439
- settings = ProjectSettings()
440
-
441
- # Access configuration
442
- print(settings.openai_base_url)
443
- print(settings.app_name)
444
- ```
445
-
446
- ## Best Practices
447
-
448
- ### Framework Rules (90% Use Cases)
449
-
450
- 1. **Decorators**: Use `@pipeline_task` WITHOUT parameters, `@pipeline_flow` WITH config
451
- 2. **Logging**: Use `get_pipeline_logger(__name__)` - NEVER `print()` or `logging` module
452
- 3. **LLM calls**: Use `AIMessages` or `str`. Wrap Documents in `AIMessages`
453
- 4. **Options**: Omit `ModelOptions` unless specifically needed (defaults are optimal)
454
- 5. **Documents**: Create with just `name` and `content` - skip `description`
455
- 6. **FlowConfig**: `OUTPUT_DOCUMENT_TYPE` must differ from all `INPUT_DOCUMENT_TYPES`
456
- 7. **Initialization**: `PromptManager` and logger at module scope, not in functions
457
- 8. **DocumentList**: Use default constructor - no validation flags needed
458
- 9. **setup_logging()**: Only in application `main()`, never at import time
459
-
460
- ### Import Convention
461
-
462
- Always import from the top-level package:
463
-
464
- ```python
465
- # CORRECT
466
- from ai_pipeline_core import llm, pipeline_flow, FlowDocument
467
-
468
- # WRONG - Never import from submodules
469
- from ai_pipeline_core.llm import generate # NO!
470
- from ai_pipeline_core.documents import FlowDocument # NO!
471
- ```
472
-
473
- ## Development
474
-
475
- ### Running Tests
476
-
477
- ```bash
478
- make test # Run all tests
479
- make test-cov # Run with coverage report
480
- make test-showcase # Test showcase example
481
- ```
482
-
483
- ### Code Quality
484
-
485
- ```bash
486
- make lint # Run linting
487
- make format # Auto-format code
488
- make typecheck # Type checking with basedpyright
489
- ```
490
-
491
- ### Building Documentation
492
-
493
- ```bash
494
- make docs-build # Generate API.md
495
- make docs-check # Verify documentation is up-to-date
496
- ```
497
-
498
- ## Examples
499
-
500
- The `examples/` directory contains:
501
-
502
- - `showcase.py` - Comprehensive example demonstrating all major features
503
- - Run with: `cd examples && python showcase.py /path/to/documents`
504
-
505
- ## API Reference
506
-
507
- See [API.md](API.md) for complete API documentation.
508
-
509
- ### Navigation Tips
510
-
511
- For humans:
512
- ```bash
513
- grep -n '^##' API.md # List all main sections
514
- grep -n '^###' API.md # List all classes and functions
515
- ```
516
-
517
- For AI assistants:
518
- - Use pattern `^##` to find module sections
519
- - Use pattern `^###` for classes and functions
520
- - Use pattern `^####` for methods and properties
521
-
522
- ## Project Structure
523
-
524
- ```
525
- ai-pipeline-core/
526
- ├── ai_pipeline_core/
527
- │ ├── deployment/ # Pipeline deployment and execution
528
- │ ├── documents/ # Document abstraction system
529
- │ ├── flow/ # Flow configuration and options
530
- │ ├── llm/ # LLM client and response handling
531
- │ ├── logging/ # Logging infrastructure
532
- │ ├── prompt_builder/ # Document-aware prompt construction
533
- │ ├── pipeline.py # Pipeline decorators
534
- │ ├── progress.py # Intra-flow progress tracking
535
- │ ├── prompt_manager.py # Jinja2 template management
536
- │ ├── settings.py # Configuration management
537
- │ └── tracing.py # Distributed tracing
538
- ├── tests/ # Comprehensive test suite
539
- ├── examples/ # Usage examples
540
- ├── API.md # Complete API reference
541
- └── pyproject.toml # Project configuration
542
- ```
543
-
544
- ## Contributing
545
-
546
- 1. Fork the repository
547
- 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
548
- 3. Make changes following the project's style guide
549
- 4. Run tests and linting (`make test lint typecheck`)
550
- 5. Commit your changes
551
- 6. Push to the branch (`git push origin feature/amazing-feature`)
552
- 7. Open a Pull Request
553
-
554
- ## License
555
-
556
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
557
-
558
- ## Support
559
-
560
- - **Issues**: [GitHub Issues](https://github.com/bbarwik/ai-pipeline-core/issues)
561
- - **Discussions**: [GitHub Discussions](https://github.com/bbarwik/ai-pipeline-core/discussions)
562
- - **Documentation**: [API Reference](API.md)
563
-
564
- ## Acknowledgments
565
-
566
- - Built on [Prefect](https://www.prefect.io/) for workflow orchestration
567
- - Uses [LiteLLM](https://github.com/BerriAI/litellm) for LLM provider abstraction
568
- - Integrates [Laminar (LMNR)](https://www.lmnr.ai/) for observability
569
- - Type checking with [Pydantic](https://pydantic.dev/) and [basedpyright](https://github.com/DetachHead/basedpyright)
@@ -1,57 +0,0 @@
1
- ai_pipeline_core/__init__.py,sha256=2jzEQktQJp-A3bzDU-A7c9xntnY3x9I-1XwYcojYjYE,6452
2
- ai_pipeline_core/exceptions.py,sha256=vx-XLTw2fJSPs-vwtXVYtqoQUcOc0JeI7UmHqRqQYWU,1569
3
- ai_pipeline_core/pipeline.py,sha256=t9qH-V6umpKY5MhGuXFgUGfdzGyxzVlS0n9RoKLfnug,28704
4
- ai_pipeline_core/prefect.py,sha256=91ZgLJHsDsRUW77CpNmkKxYs3RCJuucPM3pjKmNBeDg,2199
5
- ai_pipeline_core/progress.py,sha256=Ppxk4OOm84Y0x3t-Y3CmHsL4PovQLNUxXMu24zRCD-Q,3621
6
- ai_pipeline_core/prompt_manager.py,sha256=FAtb1yK7bGuAeuIJ523LOX9bd7TrcHG-TqZ7Lz4RJC0,12087
7
- ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- ai_pipeline_core/settings.py,sha256=IMrFaX0i-WIlaOA5O53ipNSta6KQVSFHc1aJXmS3nSo,5078
9
- ai_pipeline_core/tracing.py,sha256=HJ_DJhCEk6W_u3skecjETMQVLyOmbuPcqcOuoMIJlPs,33194
10
- ai_pipeline_core/debug/__init__.py,sha256=wOc9KotFqGYzBEtZUZ7ATfJf3dXWarYm6PXs6yW9uwE,756
11
- ai_pipeline_core/debug/config.py,sha256=l5WC2xbd6PgC-CcuioZg696iva_MkqyZj4C9TFdwfMs,3205
12
- ai_pipeline_core/debug/content.py,sha256=REtA1cJnOJy3OqaGud59B3Bug8cOJszm8w1GCqdAKJs,26696
13
- ai_pipeline_core/debug/processor.py,sha256=Cvm1HKc6lKRm80Xx7WXi_Z8pWoKH6actVZvntP9Mons,3935
14
- ai_pipeline_core/debug/summary.py,sha256=pzXC7QoFOBeen_XZ-AMFAVvaOtDuf28YB-WwCbsHYdQ,8017
15
- ai_pipeline_core/debug/writer.py,sha256=IF5eyML10EBFBqCGqlVwcWKraFvTgfqbU8WJ_XG_RU4,33108
16
- ai_pipeline_core/deployment/__init__.py,sha256=FN2HVoM80x2GJuNs7o4DnccB8HWWibgM1pJesB942CM,1259
17
- ai_pipeline_core/deployment/base.py,sha256=JYf8XLFR73c0H24dr6atK7yUcoE0vLxbYZ8EkQpEwN4,24791
18
- ai_pipeline_core/deployment/contract.py,sha256=0DKt5eqNE-grcITwMNq9CuBdo5WxdopEjDeQFzFZxhU,2225
19
- ai_pipeline_core/deployment/helpers.py,sha256=3nRuCyABkUEDZiL0q9u19XHpjA4527B6rsxQNOGTohw,3460
20
- ai_pipeline_core/documents/__init__.py,sha256=WHStvGZiSyybOcMTYxSV24U6MA3Am_0_Az5p-DuMFrk,738
21
- ai_pipeline_core/documents/document.py,sha256=hdTh36KGEcrDollTnQmTI66DJIqYfe4X42Y0q7Cm4fY,68153
22
- ai_pipeline_core/documents/document_list.py,sha256=Y_NCjfM_CjkIwHRD2iyGgYBuIykN8lT2IIH_uWOiGis,16254
23
- ai_pipeline_core/documents/flow_document.py,sha256=QK6RxNQu449IRAosOHSk3G_5yIq5I7yLBOSQPCd3m64,4141
24
- ai_pipeline_core/documents/mime_type.py,sha256=JFEOq4HwlIW2snobyNfWwySdT7urZSWkobiRMVs2fSE,7959
25
- ai_pipeline_core/documents/task_document.py,sha256=uASmAaxNkYtuqQrBM57vutFT9DXNTbqv0wbwwF55E3I,4300
26
- ai_pipeline_core/documents/temporary_document.py,sha256=jaz2ZHC5CmSbVbkXdI7pOB5DGEuhH16C0Yutv-lS_UI,2708
27
- ai_pipeline_core/documents/utils.py,sha256=ZyJNjFN7ihWno0K7dJZed7twYmmPLA0z40UzFw1A3A8,5465
28
- ai_pipeline_core/flow/__init__.py,sha256=2BfWYMOPYW5teGzwo-qzpn_bom1lxxry0bPsjVgcsCk,188
29
- ai_pipeline_core/flow/config.py,sha256=a9FALpgrFsdz-D7HU3diVeUzbaBvLwI8hsPviuj001s,19389
30
- ai_pipeline_core/flow/options.py,sha256=s5GBTy5lwFa1irf8BKrWO8NMZ5s_f4tqq7Wg9WQ7TTg,2302
31
- ai_pipeline_core/images/__init__.py,sha256=6R6Ncif6oRyVOH7LsdwNvEuMGHuljo-_gImY8C3Z_ow,9877
32
- ai_pipeline_core/images/_processing.py,sha256=wKSBAFe5TO-mo64ll20nmN9huazHwvVWFfNJB6g7u2Q,4421
33
- ai_pipeline_core/llm/__init__.py,sha256=3B_vtEzxrzidP1qOUNQ4RxlUmxZ2MBKQcUhQiTybM9g,661
34
- ai_pipeline_core/llm/ai_messages.py,sha256=XR2fwzguuh7v-HQ9LEJX_xwNX1D_-6f4T7E8_iNVTS4,15680
35
- ai_pipeline_core/llm/client.py,sha256=777Zf5BBRA-6g1I4Og8mitpoCPdYMp66WE52wH-1I-o,24910
36
- ai_pipeline_core/llm/model_options.py,sha256=uRNIHfVeh2sgt1mZBiOUx6hPQ6GKjB8b7TytZJ6afKg,11768
37
- ai_pipeline_core/llm/model_response.py,sha256=zEANsfuSAYVRKPwKx9gFIqHbdVG_1_JNMRHNoE43_YM,13503
38
- ai_pipeline_core/llm/model_types.py,sha256=wx-m0up7_NncTmSYmMsL-l-RgydjjJ905u7RMEAg7tI,2710
39
- ai_pipeline_core/logging/__init__.py,sha256=Nz6-ghAoENsgNmLD2ma9TW9M0U2_QfxuQ5DDW6Vt6M0,651
40
- ai_pipeline_core/logging/logging.yml,sha256=YTW48keO_K5bkkb-KXGM7ZuaYKiquLsjsURei8Ql0V4,1353
41
- ai_pipeline_core/logging/logging_config.py,sha256=pV2x6GgMPXrzPH27sicCSXfw56beio4C2JKCJ3NsXrg,6207
42
- ai_pipeline_core/logging/logging_mixin.py,sha256=OpdR3ASiM2ZwKZYGjZRJFUloGWUCv2Grnr8RqUWlYn8,8094
43
- ai_pipeline_core/prompt_builder/__init__.py,sha256=-v0SKZlir07xRzxXwv75VP66aINRUiKH0VUgB-PCDmI,195
44
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2,sha256=LPql5AaFhFWtDfhnBWvi-bWbz5vdgsWqKGzcqxWfLIM,1075
45
- ai_pipeline_core/prompt_builder/global_cache.py,sha256=9_9zoF6-sr3KBMxF5QLD3vxqXg9B2tT8o9ViplzUCNg,2811
46
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2,sha256=M8uPpwf-uLpsWWJT9DY_DnjrLToGPVnrD-gVhQrQdaQ,229
47
- ai_pipeline_core/prompt_builder/prompt_builder.py,sha256=4TrDRPiOMFwEfi6QGfriTHfjzj_CtbEjAcgQrVfRqhw,9378
48
- ai_pipeline_core/prompt_builder/system_prompt.jinja2,sha256=-1jLcfvAG07Zfl-dnYrjfVcAG4PWeeoeWpaKJGY3rKQ,3945
49
- ai_pipeline_core/storage/__init__.py,sha256=tcIkjJ3zPBLCyetwiJDewBvS2sbRJrDlBh3gEsQm08E,184
50
- ai_pipeline_core/storage/storage.py,sha256=ClMr419Y-eU2RuOjZYd51dC0stWQk28Vb56PvQaoUwc,20007
51
- ai_pipeline_core/utils/__init__.py,sha256=TJSmEm1Quf-gKwXrxM96u2IGzVolUyeNNfLMPoLstXI,254
52
- ai_pipeline_core/utils/deploy.py,sha256=N3i7B97DQJs1lwgYN3sa1UgwCNjseKXfjs50ZJUMCEI,22106
53
- ai_pipeline_core/utils/remote_deployment.py,sha256=U7MNJ1SU1mg3RrJyLqpuN_4pwqm8LSsFZbypJvjGPoo,4630
54
- ai_pipeline_core-0.3.3.dist-info/METADATA,sha256=WCRBGS2kO8916jlGc6jY_YuPwzw8diXfeNSrNFKxuvk,17893
55
- ai_pipeline_core-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
- ai_pipeline_core-0.3.3.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
57
- ai_pipeline_core-0.3.3.dist-info/RECORD,,