ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +21 -13
- ai_pipeline_core/documents/document.py +93 -50
- ai_pipeline_core/documents/document_list.py +70 -23
- ai_pipeline_core/documents/flow_document.py +2 -6
- ai_pipeline_core/documents/task_document.py +0 -4
- ai_pipeline_core/documents/temporary_document.py +1 -8
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/llm/__init__.py +1 -1
- ai_pipeline_core/llm/ai_messages.py +14 -4
- ai_pipeline_core/llm/client.py +116 -59
- ai_pipeline_core/llm/model_options.py +2 -5
- ai_pipeline_core/llm/model_response.py +17 -16
- ai_pipeline_core/llm/model_types.py +0 -4
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +45 -68
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +6 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -189
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +3 -26
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +19 -17
- ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
@public
|
|
4
4
|
|
|
5
|
-
Provides enhanced response classes that
|
|
5
|
+
Provides enhanced response classes that use OpenAI-compatible base types via LiteLLM
|
|
6
6
|
with additional metadata, cost tracking, and structured output support.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -23,8 +23,8 @@ class ModelResponse(ChatCompletion):
|
|
|
23
23
|
|
|
24
24
|
Primary usage is adding to AIMessages for multi-turn conversations:
|
|
25
25
|
|
|
26
|
-
>>> response = await llm.generate(messages=messages)
|
|
27
|
-
>>> messages.
|
|
26
|
+
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
27
|
+
>>> messages.append(response) # Add assistant response to conversation
|
|
28
28
|
>>> print(response.content) # Access generated text
|
|
29
29
|
|
|
30
30
|
The two main interactions with ModelResponse:
|
|
@@ -35,13 +35,13 @@ class ModelResponse(ChatCompletion):
|
|
|
35
35
|
like token usage and cost tracking are available but rarely needed.
|
|
36
36
|
|
|
37
37
|
Example:
|
|
38
|
-
>>> from ai_pipeline_core
|
|
38
|
+
>>> from ai_pipeline_core import llm, AIMessages
|
|
39
39
|
>>>
|
|
40
|
-
>>> messages = AIMessages("Explain quantum computing")
|
|
41
|
-
>>> response = await generate(messages=messages)
|
|
40
|
+
>>> messages = AIMessages(["Explain quantum computing"])
|
|
41
|
+
>>> response = await llm.generate("gpt-5", messages=messages)
|
|
42
42
|
>>>
|
|
43
43
|
>>> # Primary usage: add to conversation
|
|
44
|
-
>>> messages.
|
|
44
|
+
>>> messages.append(response)
|
|
45
45
|
>>>
|
|
46
46
|
>>> # Access generated text
|
|
47
47
|
>>> print(response.content)
|
|
@@ -96,17 +96,17 @@ class ModelResponse(ChatCompletion):
|
|
|
96
96
|
@public
|
|
97
97
|
|
|
98
98
|
Primary property for accessing the LLM's response text.
|
|
99
|
-
This
|
|
99
|
+
This is the main property you'll use with ModelResponse.
|
|
100
100
|
|
|
101
101
|
Returns:
|
|
102
102
|
Generated text from the model, or empty string if none.
|
|
103
103
|
|
|
104
104
|
Example:
|
|
105
|
-
>>> response = await generate(messages="Hello")
|
|
105
|
+
>>> response = await generate("gpt-5", messages="Hello")
|
|
106
106
|
>>> text = response.content # The generated response
|
|
107
107
|
>>>
|
|
108
108
|
>>> # Common pattern: add to messages then use content
|
|
109
|
-
>>> messages.
|
|
109
|
+
>>> messages.append(response)
|
|
110
110
|
>>> if "error" in response.content.lower():
|
|
111
111
|
... # Handle error case
|
|
112
112
|
"""
|
|
@@ -189,8 +189,7 @@ class ModelResponse(ChatCompletion):
|
|
|
189
189
|
>>> response = await llm.generate(
|
|
190
190
|
... "gpt-5",
|
|
191
191
|
... context=large_doc,
|
|
192
|
-
... messages="Summarize this"
|
|
193
|
-
... options=ModelOptions(cache_ttl="300s")
|
|
192
|
+
... messages="Summarize this"
|
|
194
193
|
... )
|
|
195
194
|
>>>
|
|
196
195
|
>>> # Get comprehensive metadata
|
|
@@ -292,6 +291,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
292
291
|
... summary: str
|
|
293
292
|
>>>
|
|
294
293
|
>>> response = await generate_structured(
|
|
294
|
+
... "gpt-5",
|
|
295
295
|
... response_format=Analysis,
|
|
296
296
|
... messages="Analyze this text..."
|
|
297
297
|
... )
|
|
@@ -301,7 +301,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
301
301
|
>>> print(f"Sentiment: {analysis.sentiment}")
|
|
302
302
|
>>>
|
|
303
303
|
>>> # Can add to messages for conversation
|
|
304
|
-
>>> messages.
|
|
304
|
+
>>> messages.append(response)
|
|
305
305
|
|
|
306
306
|
The two main interactions:
|
|
307
307
|
1. Accessing .parsed property for the structured data
|
|
@@ -377,6 +377,7 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
377
377
|
... age: int
|
|
378
378
|
>>>
|
|
379
379
|
>>> response = await generate_structured(
|
|
380
|
+
... "gpt-5",
|
|
380
381
|
... response_format=UserInfo,
|
|
381
382
|
... messages="Extract user info..."
|
|
382
383
|
... )
|
|
@@ -386,11 +387,11 @@ class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
|
386
387
|
>>> print(f"{user.name} is {user.age} years old")
|
|
387
388
|
>>>
|
|
388
389
|
>>> # Can also add to messages
|
|
389
|
-
>>> messages.
|
|
390
|
+
>>> messages.append(response)
|
|
390
391
|
|
|
391
392
|
Note:
|
|
392
|
-
Type-safe with full IDE support. This property
|
|
393
|
-
|
|
393
|
+
Type-safe with full IDE support. This is the main property
|
|
394
|
+
you'll use with structured responses.
|
|
394
395
|
"""
|
|
395
396
|
if self._parsed_value is not None:
|
|
396
397
|
return self._parsed_value
|
|
@@ -79,8 +79,4 @@ Note:
|
|
|
79
79
|
The ModelName type includes both predefined literals and str,
|
|
80
80
|
allowing full flexibility while maintaining IDE support for
|
|
81
81
|
common models.
|
|
82
|
-
|
|
83
|
-
See Also:
|
|
84
|
-
- llm.generate: Main generation function
|
|
85
|
-
- ModelOptions: Model configuration options
|
|
86
82
|
"""
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Centralized logging configuration for AI Pipeline Core.
|
|
2
2
|
|
|
3
|
-
@public
|
|
4
|
-
|
|
5
3
|
Provides logging configuration management that integrates with Prefect's logging system.
|
|
6
4
|
"""
|
|
7
5
|
|
|
@@ -26,8 +24,6 @@ DEFAULT_LOG_LEVELS = {
|
|
|
26
24
|
class LoggingConfig:
|
|
27
25
|
"""Manages logging configuration for the pipeline.
|
|
28
26
|
|
|
29
|
-
@public
|
|
30
|
-
|
|
31
27
|
Provides centralized logging configuration with Prefect integration.
|
|
32
28
|
|
|
33
29
|
Configuration precedence:
|
|
@@ -144,8 +140,6 @@ _logging_config: Optional[LoggingConfig] = None
|
|
|
144
140
|
def setup_logging(config_path: Optional[Path] = None, level: Optional[str] = None):
|
|
145
141
|
"""Setup logging for the AI Pipeline Core library.
|
|
146
142
|
|
|
147
|
-
@public
|
|
148
|
-
|
|
149
143
|
Initializes logging configuration for the pipeline system.
|
|
150
144
|
|
|
151
145
|
IMPORTANT: Call setup_logging exactly once in your application entry point
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
"""Logging mixin for consistent logging across components using Prefect logging.
|
|
2
|
-
|
|
3
|
-
@public
|
|
4
|
-
"""
|
|
1
|
+
"""Logging mixin for consistent logging across components using Prefect logging."""
|
|
5
2
|
|
|
6
3
|
import contextlib
|
|
7
4
|
import time
|
|
@@ -17,8 +14,6 @@ from prefect.logging import get_logger
|
|
|
17
14
|
class LoggerMixin:
|
|
18
15
|
"""Mixin class that provides consistent logging functionality using Prefect's logging system.
|
|
19
16
|
|
|
20
|
-
@public
|
|
21
|
-
|
|
22
17
|
Note for users: In your code, always obtain loggers via get_pipeline_logger(__name__).
|
|
23
18
|
The mixin's internal behavior routes to the appropriate backend; you should not call
|
|
24
19
|
logging.getLogger directly.
|
|
@@ -94,10 +89,7 @@ class LoggerMixin:
|
|
|
94
89
|
|
|
95
90
|
|
|
96
91
|
class StructuredLoggerMixin(LoggerMixin):
|
|
97
|
-
"""Extended mixin for structured logging with Prefect.
|
|
98
|
-
|
|
99
|
-
@public
|
|
100
|
-
"""
|
|
92
|
+
"""Extended mixin for structured logging with Prefect."""
|
|
101
93
|
|
|
102
94
|
def log_event(self, event: str, **kwargs: Any) -> None:
|
|
103
95
|
"""Log a structured event.
|
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -36,6 +36,7 @@ from prefect.utilities.annotations import NotSet
|
|
|
36
36
|
from typing_extensions import TypeAlias
|
|
37
37
|
|
|
38
38
|
from ai_pipeline_core.documents import DocumentList
|
|
39
|
+
from ai_pipeline_core.flow.config import FlowConfig
|
|
39
40
|
from ai_pipeline_core.flow.options import FlowOptions
|
|
40
41
|
from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
|
|
41
42
|
|
|
@@ -100,7 +101,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
100
101
|
project_name: Name of the project/pipeline.
|
|
101
102
|
documents: Input DocumentList to process.
|
|
102
103
|
flow_options: Configuration options (FlowOptions or subclass).
|
|
103
|
-
*args, **kwargs: Additional flow-specific parameters.
|
|
104
104
|
|
|
105
105
|
Returns:
|
|
106
106
|
DocumentList: Processed documents.
|
|
@@ -114,8 +114,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
114
114
|
project_name: str,
|
|
115
115
|
documents: DocumentList,
|
|
116
116
|
flow_options: FO_contra,
|
|
117
|
-
*args: Any,
|
|
118
|
-
**kwargs: Any,
|
|
119
117
|
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
120
118
|
|
|
121
119
|
|
|
@@ -146,8 +144,6 @@ class _FlowLike(Protocol[FO_contra]):
|
|
|
146
144
|
project_name: str,
|
|
147
145
|
documents: DocumentList,
|
|
148
146
|
flow_options: FO_contra,
|
|
149
|
-
*args: Any,
|
|
150
|
-
**kwargs: Any,
|
|
151
147
|
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
152
148
|
|
|
153
149
|
name: str | None
|
|
@@ -420,10 +416,6 @@ def pipeline_task(
|
|
|
420
416
|
set_trace_cost(trace_cost)
|
|
421
417
|
return result
|
|
422
418
|
|
|
423
|
-
# Preserve the original function name for Prefect
|
|
424
|
-
_wrapper.__name__ = fname
|
|
425
|
-
_wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
|
|
426
|
-
|
|
427
419
|
traced_fn = trace(
|
|
428
420
|
level=trace_level,
|
|
429
421
|
name=name or fname,
|
|
@@ -470,11 +462,10 @@ def pipeline_task(
|
|
|
470
462
|
# --------------------------------------------------------------------------- #
|
|
471
463
|
# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
|
|
472
464
|
# --------------------------------------------------------------------------- #
|
|
473
|
-
@overload
|
|
474
|
-
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
475
|
-
@overload
|
|
476
465
|
def pipeline_flow(
|
|
477
466
|
*,
|
|
467
|
+
# config
|
|
468
|
+
config: type[FlowConfig],
|
|
478
469
|
# tracing
|
|
479
470
|
trace_level: TraceLevel = "always",
|
|
480
471
|
trace_ignore_input: bool = False,
|
|
@@ -503,42 +494,7 @@ def pipeline_flow(
|
|
|
503
494
|
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
504
495
|
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
505
496
|
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
506
|
-
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
def pipeline_flow(
|
|
510
|
-
__fn: _DocumentsFlowCallable[FO_contra] | None = None,
|
|
511
|
-
/,
|
|
512
|
-
*,
|
|
513
|
-
# tracing
|
|
514
|
-
trace_level: TraceLevel = "always",
|
|
515
|
-
trace_ignore_input: bool = False,
|
|
516
|
-
trace_ignore_output: bool = False,
|
|
517
|
-
trace_ignore_inputs: list[str] | None = None,
|
|
518
|
-
trace_input_formatter: Callable[..., str] | None = None,
|
|
519
|
-
trace_output_formatter: Callable[..., str] | None = None,
|
|
520
|
-
trace_cost: float | None = None,
|
|
521
|
-
# prefect passthrough
|
|
522
|
-
name: str | None = None,
|
|
523
|
-
version: str | None = None,
|
|
524
|
-
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
525
|
-
retries: int | None = None,
|
|
526
|
-
retry_delay_seconds: int | float | None = None,
|
|
527
|
-
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
528
|
-
description: str | None = None,
|
|
529
|
-
timeout_seconds: int | float | None = None,
|
|
530
|
-
validate_parameters: bool = True,
|
|
531
|
-
persist_result: bool | None = None,
|
|
532
|
-
result_storage: ResultStorage | str | None = None,
|
|
533
|
-
result_serializer: ResultSerializer | str | None = None,
|
|
534
|
-
cache_result_in_memory: bool = True,
|
|
535
|
-
log_prints: bool | None = None,
|
|
536
|
-
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
537
|
-
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
538
|
-
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
539
|
-
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
540
|
-
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
541
|
-
) -> _FlowLike[FO_contra] | Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
497
|
+
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
542
498
|
"""Decorate an async flow for document processing.
|
|
543
499
|
|
|
544
500
|
@public
|
|
@@ -558,16 +514,15 @@ def pipeline_flow(
|
|
|
558
514
|
project_name: str, # Project/pipeline identifier
|
|
559
515
|
documents: DocumentList, # Input documents to process
|
|
560
516
|
flow_options: FlowOptions, # Configuration (or subclass)
|
|
561
|
-
*args, # Additional positional args for custom parameters
|
|
562
|
-
**kwargs # Additional keyword args for custom parameters
|
|
563
517
|
) -> DocumentList # Must return DocumentList
|
|
564
518
|
|
|
565
|
-
Note: *args and **kwargs allow for defining custom parameters on your flow
|
|
566
|
-
function, which can be passed during execution for flow-specific needs.
|
|
567
|
-
|
|
568
519
|
Args:
|
|
569
520
|
__fn: Function to decorate (when used without parentheses).
|
|
570
521
|
|
|
522
|
+
Config parameter:
|
|
523
|
+
config: Required FlowConfig class for document loading/saving. Enables
|
|
524
|
+
automatic loading from string paths and saving outputs.
|
|
525
|
+
|
|
571
526
|
Tracing parameters:
|
|
572
527
|
trace_level: When to trace ("always", "debug", "off").
|
|
573
528
|
- "always": Always trace (default)
|
|
@@ -608,10 +563,14 @@ def pipeline_flow(
|
|
|
608
563
|
while enforcing document processing conventions.
|
|
609
564
|
|
|
610
565
|
Example:
|
|
611
|
-
>>> from ai_pipeline_core import FlowOptions
|
|
566
|
+
>>> from ai_pipeline_core import FlowOptions, FlowConfig
|
|
612
567
|
>>>
|
|
613
|
-
>>>
|
|
614
|
-
|
|
568
|
+
>>> class MyFlowConfig(FlowConfig):
|
|
569
|
+
... INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
570
|
+
... OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
571
|
+
>>>
|
|
572
|
+
>>> # Standard usage with config
|
|
573
|
+
>>> @pipeline_flow(config=MyFlowConfig)
|
|
615
574
|
>>> async def analyze_documents(
|
|
616
575
|
... project_name: str,
|
|
617
576
|
... documents: DocumentList,
|
|
@@ -624,8 +583,8 @@ def pipeline_flow(
|
|
|
624
583
|
... results.append(result)
|
|
625
584
|
... return DocumentList(results)
|
|
626
585
|
>>>
|
|
627
|
-
>>> # With parameters
|
|
628
|
-
>>> @pipeline_flow(retries=2)
|
|
586
|
+
>>> # With additional parameters:
|
|
587
|
+
>>> @pipeline_flow(config=MyFlowConfig, retries=2)
|
|
629
588
|
>>> async def critical_flow(
|
|
630
589
|
... project_name: str,
|
|
631
590
|
... documents: DocumentList,
|
|
@@ -682,14 +641,19 @@ def pipeline_flow(
|
|
|
682
641
|
"'project_name, documents, flow_options' as its first three parameters"
|
|
683
642
|
)
|
|
684
643
|
|
|
644
|
+
@wraps(fn)
|
|
685
645
|
async def _wrapper(
|
|
686
646
|
project_name: str,
|
|
687
|
-
documents: DocumentList,
|
|
647
|
+
documents: str | DocumentList,
|
|
688
648
|
flow_options: FO_contra,
|
|
689
|
-
*args: Any,
|
|
690
|
-
**kwargs: Any,
|
|
691
649
|
) -> DocumentList:
|
|
692
|
-
|
|
650
|
+
save_path: str | None = None
|
|
651
|
+
if isinstance(documents, str):
|
|
652
|
+
save_path = documents
|
|
653
|
+
documents = await config.load_documents(documents)
|
|
654
|
+
result = await fn(project_name, documents, flow_options)
|
|
655
|
+
if save_path:
|
|
656
|
+
await config.save_documents(save_path, result)
|
|
693
657
|
if trace_cost is not None and trace_cost > 0:
|
|
694
658
|
set_trace_cost(trace_cost)
|
|
695
659
|
if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
@@ -698,10 +662,6 @@ def pipeline_flow(
|
|
|
698
662
|
)
|
|
699
663
|
return result
|
|
700
664
|
|
|
701
|
-
# Preserve the original function name for Prefect
|
|
702
|
-
_wrapper.__name__ = fname
|
|
703
|
-
_wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
|
|
704
|
-
|
|
705
665
|
traced = trace(
|
|
706
666
|
level=trace_level,
|
|
707
667
|
name=name or fname,
|
|
@@ -712,7 +672,21 @@ def pipeline_flow(
|
|
|
712
672
|
output_formatter=trace_output_formatter,
|
|
713
673
|
)(_wrapper)
|
|
714
674
|
|
|
715
|
-
|
|
675
|
+
# --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
|
|
676
|
+
_sig = inspect.signature(fn)
|
|
677
|
+
_params = [
|
|
678
|
+
p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
|
|
679
|
+
for p in _sig.parameters.values()
|
|
680
|
+
]
|
|
681
|
+
if hasattr(traced, "__signature__"):
|
|
682
|
+
setattr(traced, "__signature__", _sig.replace(parameters=_params))
|
|
683
|
+
if hasattr(traced, "__annotations__"):
|
|
684
|
+
traced.__annotations__ = {
|
|
685
|
+
**getattr(traced, "__annotations__", {}),
|
|
686
|
+
"documents": str | DocumentList,
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
flow_obj = cast(
|
|
716
690
|
_FlowLike[FO_contra],
|
|
717
691
|
flow_decorator(
|
|
718
692
|
name=name or fname,
|
|
@@ -736,8 +710,11 @@ def pipeline_flow(
|
|
|
736
710
|
on_running=on_running,
|
|
737
711
|
)(traced),
|
|
738
712
|
)
|
|
713
|
+
# Attach config to the flow object for later access
|
|
714
|
+
flow_obj.config = config # type: ignore[attr-defined]
|
|
715
|
+
return flow_obj
|
|
739
716
|
|
|
740
|
-
return _apply
|
|
717
|
+
return _apply
|
|
741
718
|
|
|
742
719
|
|
|
743
720
|
__all__ = ["pipeline_task", "pipeline_flow"]
|
ai_pipeline_core/prefect.py
CHANGED
|
@@ -47,8 +47,17 @@ Note:
|
|
|
47
47
|
integrated LMNR tracing and are the standard for this library.
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
from prefect import flow, task
|
|
50
|
+
from prefect import deploy, flow, serve, task
|
|
51
51
|
from prefect.logging import disable_run_logger
|
|
52
52
|
from prefect.testing.utilities import prefect_test_harness
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
from prefect.types.entrypoint import EntrypointType
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"task",
|
|
57
|
+
"flow",
|
|
58
|
+
"disable_run_logger",
|
|
59
|
+
"prefect_test_harness",
|
|
60
|
+
"serve",
|
|
61
|
+
"deploy",
|
|
62
|
+
"EntrypointType",
|
|
63
|
+
]
|
|
@@ -10,7 +10,8 @@ directories.
|
|
|
10
10
|
Search strategy:
|
|
11
11
|
1. Local directory (same as calling module)
|
|
12
12
|
2. Local 'prompts' subdirectory
|
|
13
|
-
3. Parent 'prompts' directories (up to package
|
|
13
|
+
3. Parent 'prompts' directories (search ascends parent packages up to the package
|
|
14
|
+
boundary or after 4 parent levels, whichever comes first)
|
|
14
15
|
|
|
15
16
|
Key features:
|
|
16
17
|
- Automatic template discovery
|
|
@@ -69,7 +70,8 @@ class PromptManager:
|
|
|
69
70
|
Search hierarchy:
|
|
70
71
|
1. Same directory as the calling module (for local templates)
|
|
71
72
|
2. 'prompts' subdirectory in the calling module's directory
|
|
72
|
-
3. 'prompts' directories in parent packages (up to
|
|
73
|
+
3. 'prompts' directories in parent packages (search ascends parent packages up to the
|
|
74
|
+
package boundary or after 4 parent levels, whichever comes first)
|
|
73
75
|
|
|
74
76
|
Attributes:
|
|
75
77
|
search_paths: List of directories where templates are searched.
|
|
@@ -144,7 +146,8 @@ class PromptManager:
|
|
|
144
146
|
2. /project/flows/prompts/ (if exists)
|
|
145
147
|
3. /project/prompts/ (if /project has __init__.py)
|
|
146
148
|
|
|
147
|
-
Search
|
|
149
|
+
Search ascends parent packages up to the package boundary or after 4 parent
|
|
150
|
+
levels, whichever comes first.
|
|
148
151
|
|
|
149
152
|
Example:
|
|
150
153
|
>>> # Correct usage
|
|
@@ -155,10 +158,6 @@ class PromptManager:
|
|
|
155
158
|
>>>
|
|
156
159
|
>>> # Common mistake (will raise PromptError)
|
|
157
160
|
>>> pm = PromptManager(__name__) # Wrong!
|
|
158
|
-
|
|
159
|
-
Note:
|
|
160
|
-
The search is limited to 4 parent levels to prevent
|
|
161
|
-
excessive filesystem traversal.
|
|
162
161
|
"""
|
|
163
162
|
search_paths: list[Path] = []
|
|
164
163
|
|
ai_pipeline_core/settings.py
CHANGED
|
@@ -12,6 +12,7 @@ Environment variables:
|
|
|
12
12
|
PREFECT_API_URL: Prefect server endpoint for flow orchestration
|
|
13
13
|
PREFECT_API_KEY: Prefect API authentication key
|
|
14
14
|
LMNR_PROJECT_API_KEY: Laminar project key for observability
|
|
15
|
+
GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
|
|
15
16
|
|
|
16
17
|
Configuration precedence:
|
|
17
18
|
1. Environment variables (highest priority)
|
|
@@ -39,6 +40,7 @@ Example:
|
|
|
39
40
|
PREFECT_API_URL=http://localhost:4200/api
|
|
40
41
|
PREFECT_API_KEY=pnu_abc123
|
|
41
42
|
LMNR_PROJECT_API_KEY=lmnr_proj_xyz
|
|
43
|
+
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
|
|
42
44
|
APP_NAME=production-app
|
|
43
45
|
DEBUG_MODE=false
|
|
44
46
|
|
|
@@ -90,12 +92,15 @@ class Settings(BaseSettings):
|
|
|
90
92
|
prefect_api_key: Prefect API authentication key. Required only
|
|
91
93
|
when connecting to Prefect Cloud or secured server.
|
|
92
94
|
|
|
93
|
-
lmnr_project_api_key: Laminar (LMNR) project API key for
|
|
94
|
-
|
|
95
|
-
for production monitoring.
|
|
95
|
+
lmnr_project_api_key: Laminar (LMNR) project API key for observability.
|
|
96
|
+
Optional but recommended for production monitoring.
|
|
96
97
|
|
|
97
|
-
lmnr_debug: Debug mode flag for Laminar
|
|
98
|
-
enable debug-level
|
|
98
|
+
lmnr_debug: Debug mode flag for Laminar. Set to "true" to
|
|
99
|
+
enable debug-level logging. Empty string by default.
|
|
100
|
+
|
|
101
|
+
gcs_service_account_file: Path to GCS service account JSON file.
|
|
102
|
+
Used for authenticating with Google Cloud Storage.
|
|
103
|
+
Optional - if not set, default credentials will be used.
|
|
99
104
|
|
|
100
105
|
Configuration sources:
|
|
101
106
|
- Environment variables (highest priority)
|
|
@@ -126,6 +131,9 @@ class Settings(BaseSettings):
|
|
|
126
131
|
lmnr_project_api_key: str = ""
|
|
127
132
|
lmnr_debug: str = ""
|
|
128
133
|
|
|
134
|
+
# Storage Configuration
|
|
135
|
+
gcs_service_account_file: str = "" # Path to GCS service account JSON file
|
|
136
|
+
|
|
129
137
|
|
|
130
138
|
# Legacy: Module-level instance for backwards compatibility
|
|
131
139
|
# Applications should create their own settings instance
|
|
@@ -4,21 +4,11 @@ Utilities for running AI pipelines locally without full Prefect orchestration.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from .cli import run_cli
|
|
7
|
-
from .simple_runner import
|
|
8
|
-
ConfigSequence,
|
|
9
|
-
FlowSequence,
|
|
10
|
-
load_documents_from_directory,
|
|
11
|
-
run_pipeline,
|
|
12
|
-
run_pipelines,
|
|
13
|
-
save_documents_to_directory,
|
|
14
|
-
)
|
|
7
|
+
from .simple_runner import FlowSequence, run_pipeline, run_pipelines
|
|
15
8
|
|
|
16
9
|
__all__ = [
|
|
17
10
|
"run_cli",
|
|
18
11
|
"run_pipeline",
|
|
19
12
|
"run_pipelines",
|
|
20
|
-
"load_documents_from_directory",
|
|
21
|
-
"save_documents_to_directory",
|
|
22
13
|
"FlowSequence",
|
|
23
|
-
"ConfigSequence",
|
|
24
14
|
]
|
|
@@ -19,7 +19,7 @@ from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
|
19
19
|
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
20
20
|
from ai_pipeline_core.settings import settings
|
|
21
21
|
|
|
22
|
-
from .simple_runner import
|
|
22
|
+
from .simple_runner import FlowSequence, run_pipelines
|
|
23
23
|
|
|
24
24
|
logger = get_pipeline_logger(__name__)
|
|
25
25
|
|
|
@@ -87,7 +87,6 @@ def _running_under_pytest() -> bool:
|
|
|
87
87
|
def run_cli(
|
|
88
88
|
*,
|
|
89
89
|
flows: FlowSequence,
|
|
90
|
-
flow_configs: ConfigSequence,
|
|
91
90
|
options_cls: Type[TOptions],
|
|
92
91
|
initializer: InitializerFunc = None,
|
|
93
92
|
trace_name: str | None = None,
|
|
@@ -105,17 +104,13 @@ def run_cli(
|
|
|
105
104
|
|
|
106
105
|
Example:
|
|
107
106
|
>>> # In __main__.py
|
|
108
|
-
>>> from ai_pipeline_core
|
|
107
|
+
>>> from ai_pipeline_core import simple_runner
|
|
109
108
|
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
110
|
-
>>> from .config import
|
|
109
|
+
>>> from .config import AnalysisOptions
|
|
111
110
|
>>>
|
|
112
111
|
>>> if __name__ == "__main__":
|
|
113
|
-
... run_cli(
|
|
112
|
+
... simple_runner.run_cli(
|
|
114
113
|
... flows=[AnalysisFlow, SummaryFlow],
|
|
115
|
-
... flow_configs=[
|
|
116
|
-
... (AnalysisConfig, AnalysisOptions),
|
|
117
|
-
... (AnalysisConfig, AnalysisOptions)
|
|
118
|
-
... ],
|
|
119
114
|
... options_cls=AnalysisOptions,
|
|
120
115
|
... trace_name="document-analysis"
|
|
121
116
|
... )
|
|
@@ -226,8 +221,15 @@ def run_cli(
|
|
|
226
221
|
_, initial_documents = init_result # Ignore project name from initializer
|
|
227
222
|
|
|
228
223
|
# Save initial documents if starting from first step
|
|
229
|
-
if getattr(opts, "start", 1) == 1 and initial_documents:
|
|
230
|
-
|
|
224
|
+
if getattr(opts, "start", 1) == 1 and initial_documents and flows:
|
|
225
|
+
# Get config from the first flow
|
|
226
|
+
first_flow_config = getattr(flows[0], "config", None)
|
|
227
|
+
if first_flow_config:
|
|
228
|
+
asyncio.run(
|
|
229
|
+
first_flow_config.save_documents(
|
|
230
|
+
str(wd), initial_documents, validate_output_type=False
|
|
231
|
+
)
|
|
232
|
+
)
|
|
231
233
|
|
|
232
234
|
# Setup context stack with optional test harness and tracing
|
|
233
235
|
with ExitStack() as stack:
|
|
@@ -247,7 +249,6 @@ def run_cli(
|
|
|
247
249
|
project_name=project_name,
|
|
248
250
|
output_dir=wd,
|
|
249
251
|
flows=flows,
|
|
250
|
-
flow_configs=flow_configs,
|
|
251
252
|
flow_options=opts,
|
|
252
253
|
start_step=getattr(opts, "start", 1),
|
|
253
254
|
end_step=getattr(opts, "end", None),
|