ai-pipeline-core 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +25 -14
- ai_pipeline_core/documents/__init__.py +2 -1
- ai_pipeline_core/documents/document.py +317 -49
- ai_pipeline_core/documents/document_list.py +136 -33
- ai_pipeline_core/documents/flow_document.py +8 -29
- ai_pipeline_core/documents/task_document.py +6 -27
- ai_pipeline_core/documents/temporary_document.py +6 -27
- ai_pipeline_core/documents/utils.py +64 -1
- ai_pipeline_core/flow/config.py +174 -5
- ai_pipeline_core/flow/options.py +2 -2
- ai_pipeline_core/llm/__init__.py +6 -1
- ai_pipeline_core/llm/ai_messages.py +14 -7
- ai_pipeline_core/llm/client.py +143 -55
- ai_pipeline_core/llm/model_options.py +20 -5
- ai_pipeline_core/llm/model_response.py +77 -29
- ai_pipeline_core/llm/model_types.py +38 -40
- ai_pipeline_core/logging/__init__.py +0 -2
- ai_pipeline_core/logging/logging_config.py +0 -6
- ai_pipeline_core/logging/logging_mixin.py +2 -10
- ai_pipeline_core/pipeline.py +68 -65
- ai_pipeline_core/prefect.py +12 -3
- ai_pipeline_core/prompt_manager.py +6 -7
- ai_pipeline_core/settings.py +13 -5
- ai_pipeline_core/simple_runner/__init__.py +1 -11
- ai_pipeline_core/simple_runner/cli.py +13 -12
- ai_pipeline_core/simple_runner/simple_runner.py +34 -172
- ai_pipeline_core/storage/__init__.py +8 -0
- ai_pipeline_core/storage/storage.py +628 -0
- ai_pipeline_core/tracing.py +110 -26
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/METADATA +60 -23
- ai_pipeline_core-0.2.0.dist-info/RECORD +38 -0
- ai_pipeline_core-0.1.13.dist-info/RECORD +0 -36
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.1.13.dist-info → ai_pipeline_core-0.2.0.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -10,6 +10,7 @@ from __future__ import annotations
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
import inspect
|
|
13
|
+
from functools import wraps
|
|
13
14
|
from typing import (
|
|
14
15
|
Any,
|
|
15
16
|
Callable,
|
|
@@ -35,8 +36,9 @@ from prefect.utilities.annotations import NotSet
|
|
|
35
36
|
from typing_extensions import TypeAlias
|
|
36
37
|
|
|
37
38
|
from ai_pipeline_core.documents import DocumentList
|
|
39
|
+
from ai_pipeline_core.flow.config import FlowConfig
|
|
38
40
|
from ai_pipeline_core.flow.options import FlowOptions
|
|
39
|
-
from ai_pipeline_core.tracing import TraceLevel, trace
|
|
41
|
+
from ai_pipeline_core.tracing import TraceLevel, set_trace_cost, trace
|
|
40
42
|
|
|
41
43
|
# --------------------------------------------------------------------------- #
|
|
42
44
|
# Public callback aliases (Prefect stubs omit these exact types)
|
|
@@ -99,7 +101,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
99
101
|
project_name: Name of the project/pipeline.
|
|
100
102
|
documents: Input DocumentList to process.
|
|
101
103
|
flow_options: Configuration options (FlowOptions or subclass).
|
|
102
|
-
*args, **kwargs: Additional flow-specific parameters.
|
|
103
104
|
|
|
104
105
|
Returns:
|
|
105
106
|
DocumentList: Processed documents.
|
|
@@ -113,8 +114,6 @@ class _DocumentsFlowCallable(Protocol[FO_contra]):
|
|
|
113
114
|
project_name: str,
|
|
114
115
|
documents: DocumentList,
|
|
115
116
|
flow_options: FO_contra,
|
|
116
|
-
*args: Any,
|
|
117
|
-
**kwargs: Any,
|
|
118
117
|
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
119
118
|
|
|
120
119
|
|
|
@@ -145,8 +144,6 @@ class _FlowLike(Protocol[FO_contra]):
|
|
|
145
144
|
project_name: str,
|
|
146
145
|
documents: DocumentList,
|
|
147
146
|
flow_options: FO_contra,
|
|
148
|
-
*args: Any,
|
|
149
|
-
**kwargs: Any,
|
|
150
147
|
) -> Coroutine[Any, Any, DocumentList]: ...
|
|
151
148
|
|
|
152
149
|
name: str | None
|
|
@@ -224,6 +221,7 @@ def pipeline_task(
|
|
|
224
221
|
trace_ignore_inputs: list[str] | None = None,
|
|
225
222
|
trace_input_formatter: Callable[..., str] | None = None,
|
|
226
223
|
trace_output_formatter: Callable[..., str] | None = None,
|
|
224
|
+
trace_cost: float | None = None,
|
|
227
225
|
# prefect passthrough
|
|
228
226
|
name: str | None = None,
|
|
229
227
|
description: str | None = None,
|
|
@@ -263,6 +261,7 @@ def pipeline_task(
|
|
|
263
261
|
trace_ignore_inputs: list[str] | None = None,
|
|
264
262
|
trace_input_formatter: Callable[..., str] | None = None,
|
|
265
263
|
trace_output_formatter: Callable[..., str] | None = None,
|
|
264
|
+
trace_cost: float | None = None,
|
|
266
265
|
# prefect passthrough
|
|
267
266
|
name: str | None = None,
|
|
268
267
|
description: str | None = None,
|
|
@@ -316,6 +315,9 @@ def pipeline_task(
|
|
|
316
315
|
trace_ignore_inputs: List of parameter names to exclude from tracing.
|
|
317
316
|
trace_input_formatter: Custom formatter for input tracing.
|
|
318
317
|
trace_output_formatter: Custom formatter for output tracing.
|
|
318
|
+
trace_cost: Optional cost value to track in metadata. When provided and > 0,
|
|
319
|
+
sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
|
|
320
|
+
Also forces trace level to "always" if not already set.
|
|
319
321
|
|
|
320
322
|
Prefect task parameters:
|
|
321
323
|
name: Task name (defaults to function name).
|
|
@@ -405,6 +407,15 @@ def pipeline_task(
|
|
|
405
407
|
)
|
|
406
408
|
|
|
407
409
|
fname = _callable_name(fn, "task")
|
|
410
|
+
|
|
411
|
+
# Create wrapper to handle trace_cost if provided
|
|
412
|
+
@wraps(fn)
|
|
413
|
+
async def _wrapper(*args: Any, **kwargs: Any) -> R_co:
|
|
414
|
+
result = await fn(*args, **kwargs)
|
|
415
|
+
if trace_cost is not None and trace_cost > 0:
|
|
416
|
+
set_trace_cost(trace_cost)
|
|
417
|
+
return result
|
|
418
|
+
|
|
408
419
|
traced_fn = trace(
|
|
409
420
|
level=trace_level,
|
|
410
421
|
name=name or fname,
|
|
@@ -413,7 +424,7 @@ def pipeline_task(
|
|
|
413
424
|
ignore_inputs=trace_ignore_inputs,
|
|
414
425
|
input_formatter=trace_input_formatter,
|
|
415
426
|
output_formatter=trace_output_formatter,
|
|
416
|
-
)(
|
|
427
|
+
)(_wrapper)
|
|
417
428
|
|
|
418
429
|
return cast(
|
|
419
430
|
_TaskLike[R_co],
|
|
@@ -451,45 +462,10 @@ def pipeline_task(
|
|
|
451
462
|
# --------------------------------------------------------------------------- #
|
|
452
463
|
# @pipeline_flow — async-only, traced, returns Prefect's flow wrapper
|
|
453
464
|
# --------------------------------------------------------------------------- #
|
|
454
|
-
@overload
|
|
455
|
-
def pipeline_flow(__fn: _DocumentsFlowCallable[FO_contra], /) -> _FlowLike[FO_contra]: ...
|
|
456
|
-
@overload
|
|
457
|
-
def pipeline_flow(
|
|
458
|
-
*,
|
|
459
|
-
# tracing
|
|
460
|
-
trace_level: TraceLevel = "always",
|
|
461
|
-
trace_ignore_input: bool = False,
|
|
462
|
-
trace_ignore_output: bool = False,
|
|
463
|
-
trace_ignore_inputs: list[str] | None = None,
|
|
464
|
-
trace_input_formatter: Callable[..., str] | None = None,
|
|
465
|
-
trace_output_formatter: Callable[..., str] | None = None,
|
|
466
|
-
# prefect passthrough
|
|
467
|
-
name: str | None = None,
|
|
468
|
-
version: str | None = None,
|
|
469
|
-
flow_run_name: Union[Callable[[], str], str] | None = None,
|
|
470
|
-
retries: int | None = None,
|
|
471
|
-
retry_delay_seconds: int | float | None = None,
|
|
472
|
-
task_runner: TaskRunner[PrefectFuture[Any]] | None = None,
|
|
473
|
-
description: str | None = None,
|
|
474
|
-
timeout_seconds: int | float | None = None,
|
|
475
|
-
validate_parameters: bool = True,
|
|
476
|
-
persist_result: bool | None = None,
|
|
477
|
-
result_storage: ResultStorage | str | None = None,
|
|
478
|
-
result_serializer: ResultSerializer | str | None = None,
|
|
479
|
-
cache_result_in_memory: bool = True,
|
|
480
|
-
log_prints: bool | None = None,
|
|
481
|
-
on_completion: list[FlowStateHook[Any, Any]] | None = None,
|
|
482
|
-
on_failure: list[FlowStateHook[Any, Any]] | None = None,
|
|
483
|
-
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
484
|
-
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
485
|
-
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
486
|
-
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]: ...
|
|
487
|
-
|
|
488
|
-
|
|
489
465
|
def pipeline_flow(
|
|
490
|
-
__fn: _DocumentsFlowCallable[FO_contra] | None = None,
|
|
491
|
-
/,
|
|
492
466
|
*,
|
|
467
|
+
# config
|
|
468
|
+
config: type[FlowConfig],
|
|
493
469
|
# tracing
|
|
494
470
|
trace_level: TraceLevel = "always",
|
|
495
471
|
trace_ignore_input: bool = False,
|
|
@@ -497,6 +473,7 @@ def pipeline_flow(
|
|
|
497
473
|
trace_ignore_inputs: list[str] | None = None,
|
|
498
474
|
trace_input_formatter: Callable[..., str] | None = None,
|
|
499
475
|
trace_output_formatter: Callable[..., str] | None = None,
|
|
476
|
+
trace_cost: float | None = None,
|
|
500
477
|
# prefect passthrough
|
|
501
478
|
name: str | None = None,
|
|
502
479
|
version: str | None = None,
|
|
@@ -517,7 +494,7 @@ def pipeline_flow(
|
|
|
517
494
|
on_cancellation: list[FlowStateHook[Any, Any]] | None = None,
|
|
518
495
|
on_crashed: list[FlowStateHook[Any, Any]] | None = None,
|
|
519
496
|
on_running: list[FlowStateHook[Any, Any]] | None = None,
|
|
520
|
-
) ->
|
|
497
|
+
) -> Callable[[_DocumentsFlowCallable[FO_contra]], _FlowLike[FO_contra]]:
|
|
521
498
|
"""Decorate an async flow for document processing.
|
|
522
499
|
|
|
523
500
|
@public
|
|
@@ -537,16 +514,15 @@ def pipeline_flow(
|
|
|
537
514
|
project_name: str, # Project/pipeline identifier
|
|
538
515
|
documents: DocumentList, # Input documents to process
|
|
539
516
|
flow_options: FlowOptions, # Configuration (or subclass)
|
|
540
|
-
*args, # Additional positional args for custom parameters
|
|
541
|
-
**kwargs # Additional keyword args for custom parameters
|
|
542
517
|
) -> DocumentList # Must return DocumentList
|
|
543
518
|
|
|
544
|
-
Note: *args and **kwargs allow for defining custom parameters on your flow
|
|
545
|
-
function, which can be passed during execution for flow-specific needs.
|
|
546
|
-
|
|
547
519
|
Args:
|
|
548
520
|
__fn: Function to decorate (when used without parentheses).
|
|
549
521
|
|
|
522
|
+
Config parameter:
|
|
523
|
+
config: Required FlowConfig class for document loading/saving. Enables
|
|
524
|
+
automatic loading from string paths and saving outputs.
|
|
525
|
+
|
|
550
526
|
Tracing parameters:
|
|
551
527
|
trace_level: When to trace ("always", "debug", "off").
|
|
552
528
|
- "always": Always trace (default)
|
|
@@ -557,6 +533,9 @@ def pipeline_flow(
|
|
|
557
533
|
trace_ignore_inputs: Parameter names to exclude from tracing.
|
|
558
534
|
trace_input_formatter: Custom input formatter.
|
|
559
535
|
trace_output_formatter: Custom output formatter.
|
|
536
|
+
trace_cost: Optional cost value to track in metadata. When provided and > 0,
|
|
537
|
+
sets gen_ai.usage.output_cost, gen_ai.usage.cost, and cost metadata.
|
|
538
|
+
Also forces trace level to "always" if not already set.
|
|
560
539
|
|
|
561
540
|
Prefect flow parameters:
|
|
562
541
|
name: Flow name (defaults to function name).
|
|
@@ -584,10 +563,14 @@ def pipeline_flow(
|
|
|
584
563
|
while enforcing document processing conventions.
|
|
585
564
|
|
|
586
565
|
Example:
|
|
587
|
-
>>> from ai_pipeline_core import FlowOptions
|
|
566
|
+
>>> from ai_pipeline_core import FlowOptions, FlowConfig
|
|
588
567
|
>>>
|
|
589
|
-
>>>
|
|
590
|
-
|
|
568
|
+
>>> class MyFlowConfig(FlowConfig):
|
|
569
|
+
... INPUT_DOCUMENT_TYPES = [InputDoc]
|
|
570
|
+
... OUTPUT_DOCUMENT_TYPE = OutputDoc
|
|
571
|
+
>>>
|
|
572
|
+
>>> # Standard usage with config
|
|
573
|
+
>>> @pipeline_flow(config=MyFlowConfig)
|
|
591
574
|
>>> async def analyze_documents(
|
|
592
575
|
... project_name: str,
|
|
593
576
|
... documents: DocumentList,
|
|
@@ -600,8 +583,8 @@ def pipeline_flow(
|
|
|
600
583
|
... results.append(result)
|
|
601
584
|
... return DocumentList(results)
|
|
602
585
|
>>>
|
|
603
|
-
>>> # With parameters
|
|
604
|
-
>>> @pipeline_flow(retries=2)
|
|
586
|
+
>>> # With additional parameters:
|
|
587
|
+
>>> @pipeline_flow(config=MyFlowConfig, retries=2)
|
|
605
588
|
>>> async def critical_flow(
|
|
606
589
|
... project_name: str,
|
|
607
590
|
... documents: DocumentList,
|
|
@@ -658,24 +641,27 @@ def pipeline_flow(
|
|
|
658
641
|
"'project_name, documents, flow_options' as its first three parameters"
|
|
659
642
|
)
|
|
660
643
|
|
|
644
|
+
@wraps(fn)
|
|
661
645
|
async def _wrapper(
|
|
662
646
|
project_name: str,
|
|
663
|
-
documents: DocumentList,
|
|
647
|
+
documents: str | DocumentList,
|
|
664
648
|
flow_options: FO_contra,
|
|
665
|
-
*args: Any,
|
|
666
|
-
**kwargs: Any,
|
|
667
649
|
) -> DocumentList:
|
|
668
|
-
|
|
650
|
+
save_path: str | None = None
|
|
651
|
+
if isinstance(documents, str):
|
|
652
|
+
save_path = documents
|
|
653
|
+
documents = await config.load_documents(documents)
|
|
654
|
+
result = await fn(project_name, documents, flow_options)
|
|
655
|
+
if save_path:
|
|
656
|
+
await config.save_documents(save_path, result)
|
|
657
|
+
if trace_cost is not None and trace_cost > 0:
|
|
658
|
+
set_trace_cost(trace_cost)
|
|
669
659
|
if not isinstance(result, DocumentList): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
670
660
|
raise TypeError(
|
|
671
661
|
f"Flow '{fname}' must return DocumentList, got {type(result).__name__}"
|
|
672
662
|
)
|
|
673
663
|
return result
|
|
674
664
|
|
|
675
|
-
# Preserve the original function name for Prefect
|
|
676
|
-
_wrapper.__name__ = fname
|
|
677
|
-
_wrapper.__qualname__ = getattr(fn, "__qualname__", fname)
|
|
678
|
-
|
|
679
665
|
traced = trace(
|
|
680
666
|
level=trace_level,
|
|
681
667
|
name=name or fname,
|
|
@@ -686,7 +672,21 @@ def pipeline_flow(
|
|
|
686
672
|
output_formatter=trace_output_formatter,
|
|
687
673
|
)(_wrapper)
|
|
688
674
|
|
|
689
|
-
|
|
675
|
+
# --- Publish a schema where `documents` accepts str (path) OR DocumentList ---
|
|
676
|
+
_sig = inspect.signature(fn)
|
|
677
|
+
_params = [
|
|
678
|
+
p.replace(annotation=(str | DocumentList)) if p.name == "documents" else p
|
|
679
|
+
for p in _sig.parameters.values()
|
|
680
|
+
]
|
|
681
|
+
if hasattr(traced, "__signature__"):
|
|
682
|
+
setattr(traced, "__signature__", _sig.replace(parameters=_params))
|
|
683
|
+
if hasattr(traced, "__annotations__"):
|
|
684
|
+
traced.__annotations__ = {
|
|
685
|
+
**getattr(traced, "__annotations__", {}),
|
|
686
|
+
"documents": str | DocumentList,
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
flow_obj = cast(
|
|
690
690
|
_FlowLike[FO_contra],
|
|
691
691
|
flow_decorator(
|
|
692
692
|
name=name or fname,
|
|
@@ -710,8 +710,11 @@ def pipeline_flow(
|
|
|
710
710
|
on_running=on_running,
|
|
711
711
|
)(traced),
|
|
712
712
|
)
|
|
713
|
+
# Attach config to the flow object for later access
|
|
714
|
+
flow_obj.config = config # type: ignore[attr-defined]
|
|
715
|
+
return flow_obj
|
|
713
716
|
|
|
714
|
-
return _apply
|
|
717
|
+
return _apply
|
|
715
718
|
|
|
716
719
|
|
|
717
720
|
__all__ = ["pipeline_task", "pipeline_flow"]
|
ai_pipeline_core/prefect.py
CHANGED
|
@@ -47,8 +47,17 @@ Note:
|
|
|
47
47
|
integrated LMNR tracing and are the standard for this library.
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
from prefect import flow, task
|
|
50
|
+
from prefect import deploy, flow, serve, task
|
|
51
51
|
from prefect.logging import disable_run_logger
|
|
52
52
|
from prefect.testing.utilities import prefect_test_harness
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
from prefect.types.entrypoint import EntrypointType
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"task",
|
|
57
|
+
"flow",
|
|
58
|
+
"disable_run_logger",
|
|
59
|
+
"prefect_test_harness",
|
|
60
|
+
"serve",
|
|
61
|
+
"deploy",
|
|
62
|
+
"EntrypointType",
|
|
63
|
+
]
|
|
@@ -10,7 +10,8 @@ directories.
|
|
|
10
10
|
Search strategy:
|
|
11
11
|
1. Local directory (same as calling module)
|
|
12
12
|
2. Local 'prompts' subdirectory
|
|
13
|
-
3. Parent 'prompts' directories (up to package
|
|
13
|
+
3. Parent 'prompts' directories (search ascends parent packages up to the package
|
|
14
|
+
boundary or after 4 parent levels, whichever comes first)
|
|
14
15
|
|
|
15
16
|
Key features:
|
|
16
17
|
- Automatic template discovery
|
|
@@ -69,7 +70,8 @@ class PromptManager:
|
|
|
69
70
|
Search hierarchy:
|
|
70
71
|
1. Same directory as the calling module (for local templates)
|
|
71
72
|
2. 'prompts' subdirectory in the calling module's directory
|
|
72
|
-
3. 'prompts' directories in parent packages (up to
|
|
73
|
+
3. 'prompts' directories in parent packages (search ascends parent packages up to the
|
|
74
|
+
package boundary or after 4 parent levels, whichever comes first)
|
|
73
75
|
|
|
74
76
|
Attributes:
|
|
75
77
|
search_paths: List of directories where templates are searched.
|
|
@@ -144,7 +146,8 @@ class PromptManager:
|
|
|
144
146
|
2. /project/flows/prompts/ (if exists)
|
|
145
147
|
3. /project/prompts/ (if /project has __init__.py)
|
|
146
148
|
|
|
147
|
-
Search
|
|
149
|
+
Search ascends parent packages up to the package boundary or after 4 parent
|
|
150
|
+
levels, whichever comes first.
|
|
148
151
|
|
|
149
152
|
Example:
|
|
150
153
|
>>> # Correct usage
|
|
@@ -155,10 +158,6 @@ class PromptManager:
|
|
|
155
158
|
>>>
|
|
156
159
|
>>> # Common mistake (will raise PromptError)
|
|
157
160
|
>>> pm = PromptManager(__name__) # Wrong!
|
|
158
|
-
|
|
159
|
-
Note:
|
|
160
|
-
The search is limited to 4 parent levels to prevent
|
|
161
|
-
excessive filesystem traversal.
|
|
162
161
|
"""
|
|
163
162
|
search_paths: list[Path] = []
|
|
164
163
|
|
ai_pipeline_core/settings.py
CHANGED
|
@@ -12,6 +12,7 @@ Environment variables:
|
|
|
12
12
|
PREFECT_API_URL: Prefect server endpoint for flow orchestration
|
|
13
13
|
PREFECT_API_KEY: Prefect API authentication key
|
|
14
14
|
LMNR_PROJECT_API_KEY: Laminar project key for observability
|
|
15
|
+
GCS_SERVICE_ACCOUNT_FILE: Path to GCS service account JSON file
|
|
15
16
|
|
|
16
17
|
Configuration precedence:
|
|
17
18
|
1. Environment variables (highest priority)
|
|
@@ -39,6 +40,7 @@ Example:
|
|
|
39
40
|
PREFECT_API_URL=http://localhost:4200/api
|
|
40
41
|
PREFECT_API_KEY=pnu_abc123
|
|
41
42
|
LMNR_PROJECT_API_KEY=lmnr_proj_xyz
|
|
43
|
+
GCS_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
|
|
42
44
|
APP_NAME=production-app
|
|
43
45
|
DEBUG_MODE=false
|
|
44
46
|
|
|
@@ -90,12 +92,15 @@ class Settings(BaseSettings):
|
|
|
90
92
|
prefect_api_key: Prefect API authentication key. Required only
|
|
91
93
|
when connecting to Prefect Cloud or secured server.
|
|
92
94
|
|
|
93
|
-
lmnr_project_api_key: Laminar (LMNR) project API key for
|
|
94
|
-
|
|
95
|
-
for production monitoring.
|
|
95
|
+
lmnr_project_api_key: Laminar (LMNR) project API key for observability.
|
|
96
|
+
Optional but recommended for production monitoring.
|
|
96
97
|
|
|
97
|
-
lmnr_debug: Debug mode flag for Laminar
|
|
98
|
-
enable debug-level
|
|
98
|
+
lmnr_debug: Debug mode flag for Laminar. Set to "true" to
|
|
99
|
+
enable debug-level logging. Empty string by default.
|
|
100
|
+
|
|
101
|
+
gcs_service_account_file: Path to GCS service account JSON file.
|
|
102
|
+
Used for authenticating with Google Cloud Storage.
|
|
103
|
+
Optional - if not set, default credentials will be used.
|
|
99
104
|
|
|
100
105
|
Configuration sources:
|
|
101
106
|
- Environment variables (highest priority)
|
|
@@ -126,6 +131,9 @@ class Settings(BaseSettings):
|
|
|
126
131
|
lmnr_project_api_key: str = ""
|
|
127
132
|
lmnr_debug: str = ""
|
|
128
133
|
|
|
134
|
+
# Storage Configuration
|
|
135
|
+
gcs_service_account_file: str = "" # Path to GCS service account JSON file
|
|
136
|
+
|
|
129
137
|
|
|
130
138
|
# Legacy: Module-level instance for backwards compatibility
|
|
131
139
|
# Applications should create their own settings instance
|
|
@@ -4,21 +4,11 @@ Utilities for running AI pipelines locally without full Prefect orchestration.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from .cli import run_cli
|
|
7
|
-
from .simple_runner import
|
|
8
|
-
ConfigSequence,
|
|
9
|
-
FlowSequence,
|
|
10
|
-
load_documents_from_directory,
|
|
11
|
-
run_pipeline,
|
|
12
|
-
run_pipelines,
|
|
13
|
-
save_documents_to_directory,
|
|
14
|
-
)
|
|
7
|
+
from .simple_runner import FlowSequence, run_pipeline, run_pipelines
|
|
15
8
|
|
|
16
9
|
__all__ = [
|
|
17
10
|
"run_cli",
|
|
18
11
|
"run_pipeline",
|
|
19
12
|
"run_pipelines",
|
|
20
|
-
"load_documents_from_directory",
|
|
21
|
-
"save_documents_to_directory",
|
|
22
13
|
"FlowSequence",
|
|
23
|
-
"ConfigSequence",
|
|
24
14
|
]
|
|
@@ -19,7 +19,7 @@ from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
|
|
|
19
19
|
from ai_pipeline_core.prefect import disable_run_logger, prefect_test_harness
|
|
20
20
|
from ai_pipeline_core.settings import settings
|
|
21
21
|
|
|
22
|
-
from .simple_runner import
|
|
22
|
+
from .simple_runner import FlowSequence, run_pipelines
|
|
23
23
|
|
|
24
24
|
logger = get_pipeline_logger(__name__)
|
|
25
25
|
|
|
@@ -87,7 +87,6 @@ def _running_under_pytest() -> bool:
|
|
|
87
87
|
def run_cli(
|
|
88
88
|
*,
|
|
89
89
|
flows: FlowSequence,
|
|
90
|
-
flow_configs: ConfigSequence,
|
|
91
90
|
options_cls: Type[TOptions],
|
|
92
91
|
initializer: InitializerFunc = None,
|
|
93
92
|
trace_name: str | None = None,
|
|
@@ -105,17 +104,13 @@ def run_cli(
|
|
|
105
104
|
|
|
106
105
|
Example:
|
|
107
106
|
>>> # In __main__.py
|
|
108
|
-
>>> from ai_pipeline_core
|
|
107
|
+
>>> from ai_pipeline_core import simple_runner
|
|
109
108
|
>>> from .flows import AnalysisFlow, SummaryFlow
|
|
110
|
-
>>> from .config import
|
|
109
|
+
>>> from .config import AnalysisOptions
|
|
111
110
|
>>>
|
|
112
111
|
>>> if __name__ == "__main__":
|
|
113
|
-
... run_cli(
|
|
112
|
+
... simple_runner.run_cli(
|
|
114
113
|
... flows=[AnalysisFlow, SummaryFlow],
|
|
115
|
-
... flow_configs=[
|
|
116
|
-
... (AnalysisConfig, AnalysisOptions),
|
|
117
|
-
... (AnalysisConfig, AnalysisOptions)
|
|
118
|
-
... ],
|
|
119
114
|
... options_cls=AnalysisOptions,
|
|
120
115
|
... trace_name="document-analysis"
|
|
121
116
|
... )
|
|
@@ -226,8 +221,15 @@ def run_cli(
|
|
|
226
221
|
_, initial_documents = init_result # Ignore project name from initializer
|
|
227
222
|
|
|
228
223
|
# Save initial documents if starting from first step
|
|
229
|
-
if getattr(opts, "start", 1) == 1 and initial_documents:
|
|
230
|
-
|
|
224
|
+
if getattr(opts, "start", 1) == 1 and initial_documents and flows:
|
|
225
|
+
# Get config from the first flow
|
|
226
|
+
first_flow_config = getattr(flows[0], "config", None)
|
|
227
|
+
if first_flow_config:
|
|
228
|
+
asyncio.run(
|
|
229
|
+
first_flow_config.save_documents(
|
|
230
|
+
str(wd), initial_documents, validate_output_type=False
|
|
231
|
+
)
|
|
232
|
+
)
|
|
231
233
|
|
|
232
234
|
# Setup context stack with optional test harness and tracing
|
|
233
235
|
with ExitStack() as stack:
|
|
@@ -247,7 +249,6 @@ def run_cli(
|
|
|
247
249
|
project_name=project_name,
|
|
248
250
|
output_dir=wd,
|
|
249
251
|
flows=flows,
|
|
250
|
-
flow_configs=flow_configs,
|
|
251
252
|
flow_options=opts,
|
|
252
253
|
start_step=getattr(opts, "start", 1),
|
|
253
254
|
end_step=getattr(opts, "end", None),
|