ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,40 +1,46 @@
1
1
  """Core classes for pipeline deployments.
2
2
 
3
- @public
4
-
5
3
  Provides the PipelineDeployment base class and related types for
6
4
  creating unified, type-safe pipeline deployments with:
7
- - Per-flow caching (skip if outputs exist)
5
+ - Per-flow resume (skip if outputs exist in DocumentStore)
8
6
  - Per-flow uploads (immediate, not just at end)
9
7
  - Prefect state hooks (on_running, on_completion, etc.)
10
- - Smart storage provisioning (override provision_storage)
11
8
  - Upload on failure (partial results saved)
12
9
  """
13
10
 
14
11
  import asyncio
12
+ import contextlib
13
+ import hashlib
15
14
  import os
16
- import re
17
15
  import sys
18
16
  from abc import abstractmethod
17
+ from collections.abc import Callable
19
18
  from contextlib import ExitStack
20
19
  from dataclasses import dataclass
21
- from datetime import datetime, timedelta, timezone
22
- from hashlib import sha256
20
+ from datetime import UTC, datetime
23
21
  from pathlib import Path
24
- from typing import Any, Callable, ClassVar, Generic, Protocol, TypeVar, cast, final
25
- from uuid import UUID
22
+ from typing import Any, ClassVar, Generic, Protocol, TypeVar, cast, final
23
+ from uuid import UUID, uuid4
26
24
 
27
25
  import httpx
28
26
  from lmnr import Laminar
29
- from prefect import get_client
27
+ from opentelemetry import trace as otel_trace
28
+ from prefect import flow, get_client, runtime
30
29
  from pydantic import BaseModel, ConfigDict, Field
31
30
  from pydantic_settings import CliPositionalArg, SettingsConfigDict
32
31
 
33
- from ai_pipeline_core.documents import DocumentList
34
- from ai_pipeline_core.flow.options import FlowOptions
32
+ from ai_pipeline_core.document_store import SummaryGenerator, create_document_store, get_document_store, set_document_store
33
+ from ai_pipeline_core.document_store.local import LocalDocumentStore
34
+ from ai_pipeline_core.document_store.memory import MemoryDocumentStore
35
+ from ai_pipeline_core.documents import Document
36
+ from ai_pipeline_core.documents.context import RunContext, reset_run_context, set_run_context
35
37
  from ai_pipeline_core.logging import get_pipeline_logger, setup_logging
36
- from ai_pipeline_core.prefect import disable_run_logger, flow, prefect_test_harness
38
+ from ai_pipeline_core.observability._debug import LocalDebugSpanProcessor, LocalTraceWriter, TraceDebugConfig
39
+ from ai_pipeline_core.observability._initialization import get_tracking_service, initialize_observability
40
+ from ai_pipeline_core.observability._tracking._models import RunStatus
41
+ from ai_pipeline_core.pipeline.options import FlowOptions
37
42
  from ai_pipeline_core.settings import settings
43
+ from ai_pipeline_core.testing import disable_run_logger, prefect_test_harness
38
44
 
39
45
  from .contract import CompletedRun, DeploymentResultData, FailedRun, ProgressRun
40
46
  from .helpers import (
@@ -49,8 +55,44 @@ from .helpers import (
49
55
  logger = get_pipeline_logger(__name__)
50
56
 
51
57
 
58
+ def _build_summary_generator() -> SummaryGenerator | None:
59
+ """Build a summary generator callable from settings, or None if disabled/unavailable."""
60
+ if not settings.doc_summary_enabled:
61
+ return None
62
+
63
+ from ai_pipeline_core.observability._summary import generate_document_summary
64
+
65
+ model = settings.doc_summary_model
66
+
67
+ async def _generator(name: str, excerpt: str) -> str:
68
+ return await generate_document_summary(name, excerpt, model=model)
69
+
70
+ return _generator
71
+
72
+
73
+ # Fields added by run_cli()'s _CliOptions that should not affect the run scope fingerprint
74
+ _CLI_FIELDS: set[str] = {"working_directory", "project_name", "start", "end", "no_trace"}
75
+
76
+
77
+ def _compute_run_scope(project_name: str, documents: list[Document], options: FlowOptions) -> str:
78
+ """Compute a run scope that fingerprints inputs and options.
79
+
80
+ Different inputs or options produce a different scope, preventing
81
+ stale cache hits when re-running with the same project name.
82
+ Falls back to just project_name when no documents are provided
83
+ (e.g. --start N resume without initializer).
84
+ """
85
+ if not documents:
86
+ return project_name
87
+ sha256s = sorted(doc.sha256 for doc in documents)
88
+ exclude = _CLI_FIELDS & set(type(options).model_fields)
89
+ options_json = options.model_dump_json(exclude=exclude, exclude_none=True)
90
+ fingerprint = hashlib.sha256(f"{':'.join(sha256s)}|{options_json}".encode()).hexdigest()[:16]
91
+ return f"{project_name}:{fingerprint}"
92
+
93
+
52
94
  class DeploymentContext(BaseModel):
53
- """@public Infrastructure configuration for deployments.
95
+ """Infrastructure configuration for deployments.
54
96
 
55
97
  Webhooks are optional - provide URLs to enable:
56
98
  - progress_webhook_url: Per-flow progress (started/completed/cached)
@@ -58,8 +100,8 @@ class DeploymentContext(BaseModel):
58
100
  - completion_webhook_url: Final result when deployment ends
59
101
  """
60
102
 
61
- input_documents_urls: list[str] = Field(default_factory=list)
62
- output_documents_urls: dict[str, str] = Field(default_factory=dict)
103
+ input_documents_urls: tuple[str, ...] = Field(default_factory=tuple)
104
+ output_documents_urls: dict[str, str] = Field(default_factory=dict) # nosemgrep: mutable-field-on-frozen-pydantic-model
63
105
 
64
106
  progress_webhook_url: str = ""
65
107
  status_webhook_url: str = ""
@@ -69,7 +111,7 @@ class DeploymentContext(BaseModel):
69
111
 
70
112
 
71
113
  class DeploymentResult(BaseModel):
72
- """@public Base class for deployment results."""
114
+ """Base class for deployment results."""
73
115
 
74
116
  success: bool
75
117
  error: str | None = None
@@ -84,19 +126,28 @@ TResult = TypeVar("TResult", bound=DeploymentResult)
84
126
  class FlowCallable(Protocol):
85
127
  """Protocol for @pipeline_flow decorated functions."""
86
128
 
87
- config: Any
88
129
  name: str
89
130
  __name__: str
131
+ input_document_types: list[type[Document]]
132
+ output_document_types: list[type[Document]]
133
+ estimated_minutes: int
90
134
 
91
- def __call__(
92
- self, project_name: str, documents: DocumentList, flow_options: FlowOptions
93
- ) -> Any: ...
135
+ def __call__(self, project_name: str, documents: list[Document], flow_options: FlowOptions) -> Any: # type: ignore[type-arg]
136
+ """Execute the flow with standard pipeline signature."""
137
+ ...
94
138
 
95
139
  def with_options(self, **kwargs: Any) -> "FlowCallable":
96
- """Return a copy with overridden Prefect flow options."""
140
+ """Return a copy with overridden Prefect flow options (e.g., hooks)."""
97
141
  ...
98
142
 
99
143
 
144
+ def _reattach_flow_metadata(original: FlowCallable, target: Any) -> None:
145
+ """Reattach custom flow attributes that Prefect's with_options() may strip."""
146
+ for attr in ("input_document_types", "output_document_types", "estimated_minutes"):
147
+ if hasattr(original, attr) and not hasattr(target, attr):
148
+ setattr(target, attr, getattr(original, attr))
149
+
150
+
100
151
  @dataclass(slots=True)
101
152
  class _StatusWebhookHook:
102
153
  """Prefect hook that sends status webhooks on state transitions."""
@@ -118,7 +169,7 @@ class _StatusWebhookHook:
118
169
  "flow_name": self.flow_name,
119
170
  "state": state.type.value if hasattr(state.type, "value") else str(state.type),
120
171
  "state_name": state.name or "",
121
- "timestamp": datetime.now(timezone.utc).isoformat(),
172
+ "timestamp": datetime.now(UTC).isoformat(),
122
173
  }
123
174
  try:
124
175
  async with httpx.AsyncClient(timeout=10) as client:
@@ -127,11 +178,44 @@ class _StatusWebhookHook:
127
178
  logger.warning(f"Status webhook failed: {e}")
128
179
 
129
180
 
181
+ def _validate_flow_chain(deployment_name: str, flows: list[Any]) -> None:
182
+ """Validate that each flow's input types are satisfiable by preceding flows' outputs.
183
+
184
+ Simulates a type pool: starts with the first flow's input types, adds each flow's
185
+ output types after processing. For subsequent flows, each required input type must
186
+ be satisfiable by at least one type in the pool (via issubclass).
187
+ """
188
+ type_pool: set[type[Document]] = set()
189
+
190
+ for i, flow_fn in enumerate(flows):
191
+ input_types: list[type[Document]] = getattr(flow_fn, "input_document_types", [])
192
+ output_types: list[type[Document]] = getattr(flow_fn, "output_document_types", [])
193
+ flow_name = getattr(flow_fn, "name", getattr(flow_fn, "__name__", f"flow[{i}]"))
194
+
195
+ if i == 0:
196
+ # First flow: its input types seed the pool
197
+ type_pool.update(input_types)
198
+ elif input_types:
199
+ # Subsequent flows: at least one declared input type must be satisfiable
200
+ # from the pool (union semantics — flow accepts any of the declared types)
201
+ any_satisfied = any(any(issubclass(available, t) for available in type_pool) for t in input_types)
202
+ if not any_satisfied:
203
+ input_names = sorted(t.__name__ for t in input_types)
204
+ pool_names = sorted(t.__name__ for t in type_pool) if type_pool else ["(empty)"]
205
+ raise TypeError(
206
+ f"{deployment_name}: flow '{flow_name}' (step {i + 1}) requires input types "
207
+ f"{input_names} but none are produced by preceding flows. "
208
+ f"Available types: {pool_names}"
209
+ )
210
+
211
+ type_pool.update(output_types)
212
+
213
+
130
214
  class PipelineDeployment(Generic[TOptions, TResult]):
131
- """@public Base class for pipeline deployments.
215
+ """Base class for pipeline deployments.
132
216
 
133
- Features enabled by default when URLs/storage provided:
134
- - Per-flow caching: Skip flows if outputs exist in storage
217
+ Features enabled by default:
218
+ - Per-flow resume: Skip flows if outputs exist in DocumentStore
135
219
  - Per-flow uploads: Upload documents after each flow
136
220
  - Prefect hooks: Attach state hooks if status_webhook_url provided
137
221
  - Upload on failure: Save partial results if pipeline fails
@@ -153,12 +237,9 @@ class PipelineDeployment(Generic[TOptions, TResult]):
153
237
 
154
238
  cls.name = class_name_to_deployment_name(cls.__name__)
155
239
 
156
- options_type, result_type = extract_generic_params(cls)
240
+ options_type, result_type = extract_generic_params(cls, PipelineDeployment)
157
241
  if options_type is None or result_type is None:
158
- raise TypeError(
159
- f"{cls.__name__} must specify Generic parameters: "
160
- f"class {cls.__name__}(PipelineDeployment[MyOptions, MyResult])"
161
- )
242
+ raise TypeError(f"{cls.__name__} must specify Generic parameters: class {cls.__name__}(PipelineDeployment[MyOptions, MyResult])")
162
243
 
163
244
  cls.options_type = options_type
164
245
  cls.result_type = result_type
@@ -166,70 +247,38 @@ class PipelineDeployment(Generic[TOptions, TResult]):
166
247
  if not cls.flows:
167
248
  raise TypeError(f"{cls.__name__}.flows cannot be empty")
168
249
 
169
- @staticmethod
170
- @abstractmethod
171
- def build_result(project_name: str, documents: DocumentList, options: TOptions) -> TResult:
172
- """Extract typed result from accumulated pipeline documents."""
173
- ...
174
-
175
- async def provision_storage(
176
- self,
177
- project_name: str,
178
- documents: DocumentList,
179
- options: TOptions,
180
- context: DeploymentContext,
181
- ) -> str:
182
- """Provision GCS storage bucket based on project name and content hash.
183
-
184
- Default: Creates `{project}-{date}-{hash}` bucket on GCS.
185
- Returns empty string if GCS is unavailable or creation fails.
186
- Override for custom storage provisioning logic.
187
- """
188
- if not documents:
189
- return ""
250
+ # build_result must be implemented (not still abstract from PipelineDeployment)
251
+ build_result_fn = getattr(cls, "build_result", None)
252
+ if build_result_fn is None or getattr(build_result_fn, "__isabstractmethod__", False):
253
+ raise TypeError(f"{cls.__name__} must implement 'build_result' static method")
190
254
 
191
- try:
192
- from ai_pipeline_core.storage.storage import GcsStorage # noqa: PLC0415
193
- except ImportError:
194
- return ""
195
-
196
- content_hash = sha256(b"".join(sorted(d.content for d in documents))).hexdigest()[:6]
197
- base = re.sub(r"[^a-z0-9-]", "-", project_name.lower()).strip("-") or "project"
198
- today = datetime.now(timezone.utc).strftime("%y-%m-%d")
199
- yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%y-%m-%d")
255
+ # No duplicate flows (by identity)
256
+ seen_ids: set[int] = set()
257
+ for flow_fn in cls.flows:
258
+ fid = id(flow_fn)
259
+ if fid in seen_ids:
260
+ flow_name = getattr(flow_fn, "name", getattr(flow_fn, "__name__", str(flow_fn)))
261
+ raise TypeError(f"{cls.__name__}.flows contains duplicate flow '{flow_name}'")
262
+ seen_ids.add(fid)
200
263
 
201
- today_bucket = f"{base[:30]}-{today}-{content_hash}"
202
- yesterday_bucket = f"{base[:30]}-{yesterday}-{content_hash}"
264
+ # Flow type chain validation: simulate a type pool
265
+ _validate_flow_chain(cls.__name__, cls.flows)
203
266
 
204
- # Try today's bucket, then yesterday's, then create new
205
- for bucket_name in (today_bucket, yesterday_bucket):
206
- try:
207
- storage = GcsStorage(bucket_name)
208
- if await storage.list(recursive=False):
209
- logger.info(f"Using existing bucket: {bucket_name}")
210
- return f"gs://{bucket_name}"
211
- except Exception:
212
- continue
267
+ @staticmethod
268
+ @abstractmethod
269
+ def build_result(project_name: str, documents: list[Document], options: TOptions) -> TResult:
270
+ """Extract typed result from pipeline documents loaded from DocumentStore."""
271
+ ...
213
272
 
214
- try:
215
- storage = GcsStorage(today_bucket)
216
- await storage.create_bucket()
217
- logger.info(f"Created new bucket: {today_bucket}")
218
- return f"gs://{today_bucket}"
219
- except Exception as e:
220
- logger.warning(f"Failed to provision GCS storage: {e}")
221
- return ""
222
-
223
- async def _load_cached_output(
224
- self, flow_fn: FlowCallable, storage_uri: str
225
- ) -> DocumentList | None:
226
- """Load cached outputs if they exist. Override for custom cache logic."""
227
- try:
228
- output_type = flow_fn.config.OUTPUT_DOCUMENT_TYPE
229
- docs = await flow_fn.config.load_documents_by_type(storage_uri, [output_type])
230
- return docs if docs else None
231
- except Exception:
232
- return None
273
+ def _all_document_types(self) -> list[type[Document]]:
274
+ """Collect all document types from all flows (inputs + outputs), deduplicated."""
275
+ types: dict[str, type[Document]] = {}
276
+ for flow_fn in self.flows:
277
+ for t in getattr(flow_fn, "input_document_types", []):
278
+ types[t.__name__] = t
279
+ for t in getattr(flow_fn, "output_document_types", []):
280
+ types[t.__name__] = t
281
+ return list(types.values())
233
282
 
234
283
  def _build_status_hooks(
235
284
  self,
@@ -262,7 +311,6 @@ class PipelineDeployment(Generic[TOptions, TResult]):
262
311
  context: DeploymentContext,
263
312
  flow_run_id: str,
264
313
  project_name: str,
265
- storage_uri: str,
266
314
  step: int,
267
315
  total_steps: int,
268
316
  flow_name: str,
@@ -271,15 +319,19 @@ class PipelineDeployment(Generic[TOptions, TResult]):
271
319
  message: str = "",
272
320
  ) -> None:
273
321
  """Send progress webhook and update flow run labels."""
274
- progress = round((step - 1 + step_progress) / total_steps, 4)
322
+ # Use estimated_minutes for weighted progress calculation
323
+ flow_minutes = [getattr(f, "estimated_minutes", 1) for f in self.flows]
324
+ total_minutes = sum(flow_minutes) or 1
325
+ completed_minutes = sum(flow_minutes[: max(step - 1, 0)])
326
+ current_flow_minutes = flow_minutes[step - 1] if step - 1 < len(flow_minutes) else 1
327
+ progress = round(max(0.0, min(1.0, (completed_minutes + current_flow_minutes * step_progress) / total_minutes)), 4)
275
328
 
276
329
  if context.progress_webhook_url:
277
330
  payload = ProgressRun(
278
331
  flow_run_id=UUID(flow_run_id) if flow_run_id else UUID(int=0),
279
332
  project_name=project_name,
280
333
  state="RUNNING",
281
- timestamp=datetime.now(timezone.utc),
282
- storage_uri=storage_uri,
334
+ timestamp=datetime.now(UTC),
283
335
  step=step,
284
336
  total_steps=total_steps,
285
337
  flow_name=flow_name,
@@ -316,7 +368,6 @@ class PipelineDeployment(Generic[TOptions, TResult]):
316
368
  context: DeploymentContext,
317
369
  flow_run_id: str,
318
370
  project_name: str,
319
- storage_uri: str,
320
371
  result: TResult | None,
321
372
  error: str | None,
322
373
  ) -> None:
@@ -324,7 +375,7 @@ class PipelineDeployment(Generic[TOptions, TResult]):
324
375
  if not context.completion_webhook_url:
325
376
  return
326
377
  try:
327
- now = datetime.now(timezone.utc)
378
+ now = datetime.now(UTC)
328
379
  frid = UUID(flow_run_id) if flow_run_id else UUID(int=0)
329
380
  payload: CompletedRun | FailedRun
330
381
  if result is not None:
@@ -332,7 +383,6 @@ class PipelineDeployment(Generic[TOptions, TResult]):
332
383
  flow_run_id=frid,
333
384
  project_name=project_name,
334
385
  timestamp=now,
335
- storage_uri=storage_uri,
336
386
  state="COMPLETED",
337
387
  result=DeploymentResultData.model_validate(result.model_dump()),
338
388
  )
@@ -341,7 +391,6 @@ class PipelineDeployment(Generic[TOptions, TResult]):
341
391
  flow_run_id=frid,
342
392
  project_name=project_name,
343
393
  timestamp=now,
344
- storage_uri=storage_uri,
345
394
  state="FAILED",
346
395
  error=error or "Unknown error",
347
396
  )
@@ -353,27 +402,24 @@ class PipelineDeployment(Generic[TOptions, TResult]):
353
402
  async def run(
354
403
  self,
355
404
  project_name: str,
356
- documents: str | DocumentList,
405
+ documents: list[Document],
357
406
  options: TOptions,
358
407
  context: DeploymentContext,
359
408
  ) -> TResult:
360
- """Execute flows with caching, uploads, and webhooks enabled by default."""
361
- from prefect import runtime # noqa: PLC0415
409
+ """Execute all flows with resume, per-flow uploads, and webhooks.
362
410
 
363
- total_steps = len(self.flows)
364
- flow_run_id = str(runtime.flow_run.get_id()) if runtime.flow_run else "" # pyright: ignore[reportAttributeAccessIssue]
411
+ Args:
412
+ project_name: Unique identifier for this pipeline run (used as run_scope).
413
+ documents: Initial input documents for the first flow.
414
+ options: Flow options passed to each flow.
415
+ context: Deployment context with webhook URLs and document upload config.
365
416
 
366
- # Resolve storage URI and documents
367
- if isinstance(documents, str):
368
- storage_uri = documents
369
- docs = await self.flows[0].config.load_documents(storage_uri)
370
- else:
371
- docs = documents
372
- storage_uri = await self.provision_storage(project_name, docs, options, context)
373
- if storage_uri and docs:
374
- await self.flows[0].config.save_documents(
375
- storage_uri, docs, validate_output_type=False
376
- )
417
+ Returns:
418
+ Typed deployment result built from all pipeline documents.
419
+ """
420
+ store = get_document_store()
421
+ total_steps = len(self.flows)
422
+ flow_run_id: str = str(runtime.flow_run.get_id()) if runtime.flow_run else "" # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType]
377
423
 
378
424
  # Write identity labels for polling endpoint
379
425
  if flow_run_id:
@@ -381,62 +427,80 @@ class PipelineDeployment(Generic[TOptions, TResult]):
381
427
  async with get_client() as client:
382
428
  await client.update_flow_run_labels(
383
429
  flow_run_id=UUID(flow_run_id),
384
- labels={
385
- "pipeline.project_name": project_name,
386
- "pipeline.storage_uri": storage_uri,
387
- },
430
+ labels={"pipeline.project_name": project_name},
388
431
  )
389
432
  except Exception as e:
390
433
  logger.warning(f"Identity label update failed: {e}")
391
434
 
392
435
  # Download additional input documents
436
+ input_docs = list(documents)
393
437
  if context.input_documents_urls:
394
- first_input_type = self.flows[0].config.INPUT_DOCUMENT_TYPES[0]
395
- downloaded = await download_documents(context.input_documents_urls, first_input_type)
396
- docs = DocumentList(list(docs) + list(downloaded))
438
+ downloaded = await download_documents(list(context.input_documents_urls))
439
+ input_docs.extend(downloaded)
440
+
441
+ # Compute run scope AFTER downloads so the fingerprint includes all inputs
442
+ run_scope = _compute_run_scope(project_name, input_docs, options)
443
+
444
+ if not store and total_steps > 1:
445
+ logger.warning("No DocumentStore configured for multi-step pipeline — intermediate outputs will not accumulate between flows")
397
446
 
398
- accumulated_docs = docs
399
447
  completion_sent = False
400
448
 
449
+ # Tracking lifecycle
450
+ tracking_svc = None
451
+ run_uuid: UUID | None = None
452
+ run_failed = False
401
453
  try:
454
+ tracking_svc = get_tracking_service()
455
+ if tracking_svc:
456
+ run_uuid = UUID(flow_run_id) if flow_run_id else uuid4()
457
+ tracking_svc.set_run_context(run_id=run_uuid, project_name=project_name, flow_name=self.name, run_scope=run_scope)
458
+ tracking_svc.track_run_start(run_id=run_uuid, project_name=project_name, flow_name=self.name, run_scope=run_scope)
459
+ except Exception:
460
+ tracking_svc = None
461
+
462
+ # Set RunContext for the entire pipeline run
463
+ run_token = set_run_context(RunContext(run_scope=run_scope))
464
+ try:
465
+ # Save initial input documents to store
466
+ if store and input_docs:
467
+ await store.save_batch(input_docs, run_scope)
468
+
402
469
  for step, flow_fn in enumerate(self.flows, start=1):
403
470
  flow_name = getattr(flow_fn, "name", flow_fn.__name__)
404
- flow_run_id = str(runtime.flow_run.get_id()) if runtime.flow_run else "" # pyright: ignore[reportAttributeAccessIssue]
405
-
406
- # Per-flow caching: check if outputs exist
407
- if storage_uri:
408
- cached = await self._load_cached_output(flow_fn, storage_uri)
409
- if cached is not None:
410
- logger.info(f"[{step}/{total_steps}] Cache hit: {flow_name}")
411
- accumulated_docs = DocumentList(list(accumulated_docs) + list(cached))
471
+ flow_run_id = str(runtime.flow_run.get_id()) if runtime.flow_run else "" # pyright: ignore[reportAttributeAccessIssue, reportUnknownMemberType, reportUnknownArgumentType]
472
+
473
+ # Resume check: skip if output documents already exist in store
474
+ output_types = getattr(flow_fn, "output_document_types", [])
475
+ if store and output_types:
476
+ all_outputs_exist = all([await store.has_documents(run_scope, ot) for ot in output_types])
477
+ if all_outputs_exist:
478
+ logger.info(f"[{step}/{total_steps}] Resume: skipping {flow_name} (outputs exist)")
412
479
  await self._send_progress(
413
480
  context,
414
481
  flow_run_id,
415
482
  project_name,
416
- storage_uri,
417
483
  step,
418
484
  total_steps,
419
485
  flow_name,
420
486
  "cached",
421
487
  step_progress=1.0,
422
- message=f"Loaded from cache: {flow_name}",
488
+ message=f"Resumed from store: {flow_name}",
423
489
  )
424
490
  continue
425
491
 
426
492
  # Prefect state hooks
427
493
  active_flow = flow_fn
428
494
  if context.status_webhook_url:
429
- hooks = self._build_status_hooks(
430
- context, flow_run_id, project_name, step, total_steps, flow_name
431
- )
495
+ hooks = self._build_status_hooks(context, flow_run_id, project_name, step, total_steps, flow_name)
432
496
  active_flow = flow_fn.with_options(**hooks)
497
+ _reattach_flow_metadata(flow_fn, active_flow)
433
498
 
434
499
  # Progress: started
435
500
  await self._send_progress(
436
501
  context,
437
502
  flow_run_id,
438
503
  project_name,
439
- storage_uri,
440
504
  step,
441
505
  total_steps,
442
506
  flow_name,
@@ -447,40 +511,34 @@ class PipelineDeployment(Generic[TOptions, TResult]):
447
511
 
448
512
  logger.info(f"[{step}/{total_steps}] Starting: {flow_name}")
449
513
 
450
- # Load documents for this flow
451
- if storage_uri:
452
- current_docs = await flow_fn.config.load_documents(storage_uri)
514
+ # Load input documents from store
515
+ input_types = getattr(flow_fn, "input_document_types", [])
516
+ if store and input_types:
517
+ current_docs = await store.load(run_scope, input_types)
453
518
  else:
454
- current_docs = accumulated_docs
519
+ current_docs = input_docs
455
520
 
456
521
  try:
457
- new_docs = await active_flow(project_name, current_docs, options)
522
+ await active_flow(project_name, current_docs, options)
458
523
  except Exception as e:
459
524
  # Upload partial results on failure
460
- if context.output_documents_urls:
461
- await upload_documents(accumulated_docs, context.output_documents_urls)
462
- await self._send_completion(
463
- context, flow_run_id, project_name, storage_uri, result=None, error=str(e)
464
- )
525
+ if context.output_documents_urls and store:
526
+ all_docs = await store.load(run_scope, self._all_document_types())
527
+ await upload_documents(all_docs, context.output_documents_urls)
528
+ await self._send_completion(context, flow_run_id, project_name, result=None, error=str(e))
465
529
  completion_sent = True
466
530
  raise
467
531
 
468
- # Save to storage
469
- if storage_uri:
470
- await flow_fn.config.save_documents(storage_uri, new_docs)
471
-
472
- accumulated_docs = DocumentList(list(accumulated_docs) + list(new_docs))
473
-
474
- # Per-flow upload
475
- if context.output_documents_urls:
476
- await upload_documents(new_docs, context.output_documents_urls)
532
+ # Per-flow upload (load from store since @pipeline_flow saves there)
533
+ if context.output_documents_urls and store and output_types:
534
+ flow_docs = await store.load(run_scope, output_types)
535
+ await upload_documents(flow_docs, context.output_documents_urls)
477
536
 
478
537
  # Progress: completed
479
538
  await self._send_progress(
480
539
  context,
481
540
  flow_run_id,
482
541
  project_name,
483
- storage_uri,
484
542
  step,
485
543
  total_steps,
486
544
  flow_name,
@@ -491,43 +549,68 @@ class PipelineDeployment(Generic[TOptions, TResult]):
491
549
 
492
550
  logger.info(f"[{step}/{total_steps}] Completed: {flow_name}")
493
551
 
494
- result = self.build_result(project_name, accumulated_docs, options)
495
- await self._send_completion(
496
- context, flow_run_id, project_name, storage_uri, result=result, error=None
497
- )
552
+ # Build result from all documents in store
553
+ if store:
554
+ all_docs = await store.load(run_scope, self._all_document_types())
555
+ else:
556
+ all_docs = input_docs
557
+ result = self.build_result(project_name, all_docs, options)
558
+ await self._send_completion(context, flow_run_id, project_name, result=result, error=None)
498
559
  return result
499
560
 
500
561
  except Exception as e:
562
+ run_failed = True
501
563
  if not completion_sent:
502
- await self._send_completion(
503
- context, flow_run_id, project_name, storage_uri, result=None, error=str(e)
504
- )
564
+ await self._send_completion(context, flow_run_id, project_name, result=None, error=str(e))
505
565
  raise
566
+ finally:
567
+ reset_run_context(run_token)
568
+ store = get_document_store()
569
+ if store:
570
+ with contextlib.suppress(Exception):
571
+ store.flush()
572
+ if (svc := tracking_svc) is not None and run_uuid is not None:
573
+ with contextlib.suppress(Exception):
574
+ svc.track_run_end(run_id=run_uuid, status=RunStatus.FAILED if run_failed else RunStatus.COMPLETED)
575
+ svc.flush()
506
576
 
507
577
  @final
508
578
  def run_local(
509
579
  self,
510
580
  project_name: str,
511
- documents: str | DocumentList,
581
+ documents: list[Document],
512
582
  options: TOptions,
513
583
  context: DeploymentContext | None = None,
514
584
  output_dir: Path | None = None,
515
585
  ) -> TResult:
516
- """Run locally with Prefect test harness."""
586
+ """Run locally with Prefect test harness and in-memory document store.
587
+
588
+ Args:
589
+ project_name: Pipeline run identifier.
590
+ documents: Initial input documents.
591
+ options: Flow options.
592
+ context: Optional deployment context (defaults to empty).
593
+ output_dir: Optional directory for writing result.json.
594
+
595
+ Returns:
596
+ Typed deployment result.
597
+ """
517
598
  if context is None:
518
599
  context = DeploymentContext()
519
600
 
520
- # If output_dir provided and documents is DocumentList, use output_dir as storage
521
- if output_dir and isinstance(documents, DocumentList):
601
+ if output_dir:
522
602
  output_dir.mkdir(parents=True, exist_ok=True)
523
- documents = str(output_dir)
524
603
 
525
- with prefect_test_harness():
526
- with disable_run_logger():
604
+ store = MemoryDocumentStore()
605
+ set_document_store(store)
606
+ try:
607
+ with prefect_test_harness(), disable_run_logger():
527
608
  result = asyncio.run(self.run(project_name, documents, options, context))
609
+ finally:
610
+ store.shutdown()
611
+ set_document_store(None)
528
612
 
529
613
  if output_dir:
530
- output_dir.mkdir(parents=True, exist_ok=True)
531
614
  (output_dir / "result.json").write_text(result.model_dump_json(indent=2))
532
615
 
533
616
  return result
@@ -535,19 +618,26 @@ class PipelineDeployment(Generic[TOptions, TResult]):
535
618
  @final
536
619
  def run_cli(
537
620
  self,
538
- initializer: Callable[[TOptions], tuple[str, DocumentList]] | None = None,
621
+ initializer: Callable[[TOptions], tuple[str, list[Document]]] | None = None,
539
622
  trace_name: str | None = None,
540
623
  ) -> None:
541
- """Execute pipeline from CLI arguments with --start/--end step control."""
624
+ """Execute pipeline from CLI arguments with --start/--end step control.
625
+
626
+ Args:
627
+ initializer: Optional callback returning (project_name, documents) from options.
628
+ trace_name: Optional Laminar trace span name prefix.
629
+ """
542
630
  if len(sys.argv) == 1:
543
631
  sys.argv.append("--help")
544
632
 
545
633
  setup_logging()
546
634
  try:
547
- Laminar.initialize()
548
- logger.info("LMNR tracing initialized.")
635
+ initialize_observability()
636
+ logger.info("Observability initialized.")
549
637
  except Exception as e:
550
- logger.warning(f"Failed to initialize LMNR: {e}")
638
+ logger.warning(f"Failed to initialize observability: {e}")
639
+ with contextlib.suppress(Exception):
640
+ Laminar.initialize(export_timeout_seconds=15)
551
641
 
552
642
  deployment = self
553
643
 
@@ -563,27 +653,50 @@ class PipelineDeployment(Generic[TOptions, TResult]):
563
653
  project_name: str | None = None
564
654
  start: int = 1
565
655
  end: int | None = None
656
+ no_trace: bool = False
566
657
 
567
658
  model_config = SettingsConfigDict(frozen=True, extra="ignore")
568
659
 
569
660
  opts = cast(TOptions, _CliOptions()) # type: ignore[reportCallIssue]
570
661
 
571
- wd: Path = getattr(opts, "working_directory")
662
+ wd = cast(Path, opts.working_directory) # pyright: ignore[reportAttributeAccessIssue]
572
663
  wd.mkdir(parents=True, exist_ok=True)
573
664
 
574
- project_name = getattr(opts, "project_name") or wd.name
665
+ project_name = cast(str, opts.project_name or wd.name) # pyright: ignore[reportAttributeAccessIssue]
575
666
  start_step = getattr(opts, "start", 1)
576
667
  end_step = getattr(opts, "end", None)
668
+ no_trace = getattr(opts, "no_trace", False)
577
669
 
578
- # Initialize documents and save to working directory
579
- if initializer and start_step == 1:
580
- _, documents = initializer(opts)
581
- if documents and self.flows:
582
- first_config = getattr(self.flows[0], "config", None)
583
- if first_config:
584
- asyncio.run(
585
- first_config.save_documents(str(wd), documents, validate_output_type=False)
586
- )
670
+ # Set up local debug tracing (writes to <working_dir>/.trace)
671
+ debug_processor: LocalDebugSpanProcessor | None = None
672
+ if not no_trace:
673
+ try:
674
+ trace_path = wd / ".trace"
675
+ trace_path.mkdir(parents=True, exist_ok=True)
676
+ debug_config = TraceDebugConfig(path=trace_path, max_traces=20)
677
+ debug_writer = LocalTraceWriter(debug_config)
678
+ debug_processor = LocalDebugSpanProcessor(debug_writer)
679
+ provider: Any = otel_trace.get_tracer_provider()
680
+ if hasattr(provider, "add_span_processor"):
681
+ provider.add_span_processor(debug_processor)
682
+ logger.info(f"Local debug tracing enabled at {trace_path}")
683
+ except Exception as e:
684
+ logger.warning(f"Failed to set up local debug tracing: {e}")
685
+ debug_processor = None
686
+
687
+ # Initialize document store — ClickHouse when configured, local filesystem otherwise
688
+ summary_generator = _build_summary_generator()
689
+ if settings.clickhouse_host:
690
+ store = create_document_store(settings, summary_generator=summary_generator)
691
+ else:
692
+ store = LocalDocumentStore(base_path=wd, summary_generator=summary_generator)
693
+ set_document_store(store)
694
+
695
+ # Initialize documents (always run initializer for run scope fingerprinting,
696
+ # even when start_step > 1, so --start N resumes find the correct scope)
697
+ initial_documents: list[Document] = []
698
+ if initializer:
699
+ _, initial_documents = initializer(opts)
587
700
 
588
701
  context = DeploymentContext()
589
702
 
@@ -604,11 +717,11 @@ class PipelineDeployment(Generic[TOptions, TResult]):
604
717
  result = asyncio.run(
605
718
  self._run_with_steps(
606
719
  project_name=project_name,
607
- storage_uri=str(wd),
608
720
  options=opts,
609
721
  context=context,
610
722
  start_step=start_step,
611
723
  end_step=end_step,
724
+ initial_documents=initial_documents,
612
725
  )
613
726
  )
614
727
 
@@ -616,48 +729,106 @@ class PipelineDeployment(Generic[TOptions, TResult]):
616
729
  result_file.write_text(result.model_dump_json(indent=2))
617
730
  logger.info(f"Result saved to {result_file}")
618
731
 
732
+ # Shutdown background workers (debug tracing, document summaries, tracking)
733
+ if debug_processor is not None:
734
+ debug_processor.shutdown()
735
+ store = get_document_store()
736
+ if store:
737
+ store.shutdown()
738
+ tracking_svc = get_tracking_service()
739
+ if tracking_svc:
740
+ tracking_svc.shutdown()
741
+
619
742
  async def _run_with_steps(
620
743
  self,
621
744
  project_name: str,
622
- storage_uri: str,
623
745
  options: TOptions,
624
746
  context: DeploymentContext,
625
747
  start_step: int = 1,
626
748
  end_step: int | None = None,
749
+ initial_documents: list[Document] | None = None,
627
750
  ) -> TResult:
628
- """Run pipeline with start/end step control for CLI resume support."""
751
+ """Run pipeline with start/end step control and DocumentStore-based resume."""
752
+ store = get_document_store()
629
753
  if end_step is None:
630
754
  end_step = len(self.flows)
631
755
 
632
756
  total_steps = len(self.flows)
633
- accumulated_docs = DocumentList([])
757
+ run_scope = _compute_run_scope(project_name, initial_documents or [], options)
634
758
 
635
- for i in range(start_step - 1, end_step):
636
- step = i + 1
637
- flow_fn = self.flows[i]
638
- flow_name = getattr(flow_fn, "name", flow_fn.__name__)
639
- logger.info(f"--- [Step {step}/{total_steps}] {flow_name} ---")
759
+ # Tracking lifecycle for CLI path
760
+ tracking_svc = None
761
+ run_uuid: UUID | None = None
762
+ run_failed = False
763
+ try:
764
+ tracking_svc = get_tracking_service()
765
+ if tracking_svc:
766
+ run_uuid = uuid4()
767
+ tracking_svc.set_run_context(run_id=run_uuid, project_name=project_name, flow_name=self.name, run_scope=run_scope)
768
+ tracking_svc.track_run_start(run_id=run_uuid, project_name=project_name, flow_name=self.name, run_scope=run_scope)
769
+ except Exception:
770
+ tracking_svc = None
640
771
 
641
- # Check cache
642
- cached = await self._load_cached_output(flow_fn, storage_uri)
643
- if cached is not None:
644
- logger.info(f"[{step}/{total_steps}] Cache hit: {flow_name}")
645
- accumulated_docs = DocumentList(list(accumulated_docs) + list(cached))
646
- continue
772
+ # Set RunContext for the entire pipeline run
773
+ run_token = set_run_context(RunContext(run_scope=run_scope))
774
+ try:
775
+ # Save initial documents to store
776
+ if store and initial_documents:
777
+ await store.save_batch(initial_documents, run_scope)
647
778
 
648
- current_docs = await flow_fn.config.load_documents(storage_uri)
649
- new_docs = await flow_fn(project_name, current_docs, options)
650
- await flow_fn.config.save_documents(storage_uri, new_docs)
651
- accumulated_docs = DocumentList(list(accumulated_docs) + list(new_docs))
779
+ for i in range(start_step - 1, end_step):
780
+ step = i + 1
781
+ flow_fn = self.flows[i]
782
+ flow_name = getattr(flow_fn, "name", flow_fn.__name__)
783
+ logger.info(f"--- [Step {step}/{total_steps}] {flow_name} ---")
784
+
785
+ # Resume check: skip if output documents already exist
786
+ output_types = getattr(flow_fn, "output_document_types", [])
787
+ if store and output_types:
788
+ all_outputs_exist = all([await store.has_documents(run_scope, ot) for ot in output_types])
789
+ if all_outputs_exist:
790
+ logger.info(f"--- [Step {step}/{total_steps}] Skipping {flow_name} (outputs exist) ---")
791
+ continue
792
+
793
+ # Load inputs from store
794
+ input_types = getattr(flow_fn, "input_document_types", [])
795
+ if store and input_types:
796
+ current_docs = await store.load(run_scope, input_types)
797
+ else:
798
+ current_docs = initial_documents or []
652
799
 
653
- return self.build_result(project_name, accumulated_docs, options)
800
+ await flow_fn(project_name, current_docs, options)
801
+
802
+ # Build result from all documents in store
803
+ if store:
804
+ all_docs = await store.load(run_scope, self._all_document_types())
805
+ else:
806
+ all_docs = initial_documents or []
807
+ return self.build_result(project_name, all_docs, options)
808
+ except Exception:
809
+ run_failed = True
810
+ raise
811
+ finally:
812
+ reset_run_context(run_token)
813
+ store = get_document_store()
814
+ if store:
815
+ with contextlib.suppress(Exception):
816
+ store.flush()
817
+ if (svc := tracking_svc) is not None and run_uuid is not None:
818
+ with contextlib.suppress(Exception):
819
+ svc.track_run_end(run_id=run_uuid, status=RunStatus.FAILED if run_failed else RunStatus.COMPLETED)
820
+ svc.flush()
654
821
 
655
822
  @final
656
823
  def as_prefect_flow(self) -> Callable[..., Any]:
657
- """Generate Prefect flow for production deployment."""
824
+ """Generate a Prefect flow for production deployment.
825
+
826
+ Returns:
827
+ Async Prefect flow callable that initializes DocumentStore from settings.
828
+ """
658
829
  deployment = self
659
830
 
660
- @flow( # pyright: ignore[reportUntypedFunctionDecorator]
831
+ @flow(
661
832
  name=self.name,
662
833
  flow_run_name=f"{self.name}-{{project_name}}",
663
834
  persist_result=True,
@@ -665,11 +836,20 @@ class PipelineDeployment(Generic[TOptions, TResult]):
665
836
  )
666
837
  async def _deployment_flow(
667
838
  project_name: str,
668
- documents: str | DocumentList,
839
+ documents: list[Document],
669
840
  options: FlowOptions,
670
841
  context: DeploymentContext,
671
842
  ) -> DeploymentResult:
672
- return await deployment.run(project_name, documents, cast(Any, options), context)
843
+ store = create_document_store(
844
+ settings,
845
+ summary_generator=_build_summary_generator(),
846
+ )
847
+ set_document_store(store)
848
+ try:
849
+ return await deployment.run(project_name, documents, cast(Any, options), context)
850
+ finally:
851
+ store.shutdown()
852
+ set_document_store(None)
673
853
 
674
854
  return _deployment_flow
675
855