ai-pipeline-core 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/.gitignore +0 -1
  2. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/PKG-INFO +1 -1
  3. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/__init__.py +1 -1
  4. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/base.py +69 -18
  5. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/progress.py +46 -18
  6. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/client.py +2 -2
  7. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/pyproject.toml +2 -2
  8. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/LICENSE +0 -0
  9. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/README.md +0 -0
  10. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/__init__.py +0 -0
  11. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/contract.py +0 -0
  12. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/deploy.py +0 -0
  13. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/helpers.py +0 -0
  14. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/deployment/remote.py +0 -0
  15. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/__init__.py +0 -0
  16. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/__main__.py +0 -0
  17. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/cli.py +0 -0
  18. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/extractor.py +0 -0
  19. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/guide_builder.py +0 -0
  20. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/trimmer.py +0 -0
  21. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/docs_generator/validator.py +0 -0
  22. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/__init__.py +0 -0
  23. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/_summary.py +0 -0
  24. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/_summary_worker.py +0 -0
  25. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/clickhouse.py +0 -0
  26. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/factory.py +0 -0
  27. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/local.py +0 -0
  28. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/memory.py +0 -0
  29. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/document_store/protocol.py +0 -0
  30. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/__init__.py +0 -0
  31. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/_context_vars.py +0 -0
  32. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/_hashing.py +0 -0
  33. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/attachment.py +0 -0
  34. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/context.py +0 -0
  35. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/document.py +0 -0
  36. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/mime_type.py +0 -0
  37. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/documents/utils.py +0 -0
  38. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/exceptions.py +0 -0
  39. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/images/__init__.py +0 -0
  40. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/images/_processing.py +0 -0
  41. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/__init__.py +0 -0
  42. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/ai_messages.py +0 -0
  43. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/model_options.py +0 -0
  44. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/model_response.py +0 -0
  45. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/llm/model_types.py +0 -0
  46. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/logging/__init__.py +0 -0
  47. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/logging/logging.yml +0 -0
  48. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/logging/logging_config.py +0 -0
  49. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  50. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/__init__.py +0 -0
  51. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/__init__.py +0 -0
  52. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_auto_summary.py +0 -0
  53. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_config.py +0 -0
  54. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_content.py +0 -0
  55. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_processor.py +0 -0
  56. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_summary.py +0 -0
  57. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_types.py +0 -0
  58. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_debug/_writer.py +0 -0
  59. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_document_tracking.py +0 -0
  60. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_initialization.py +0 -0
  61. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_logging_bridge.py +0 -0
  62. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_summary.py +0 -0
  63. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/__init__.py +0 -0
  64. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_client.py +0 -0
  65. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_internal.py +0 -0
  66. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_models.py +0 -0
  67. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_processor.py +0 -0
  68. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_service.py +0 -0
  69. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/_tracking/_writer.py +0 -0
  70. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/observability/tracing.py +0 -0
  71. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/pipeline/__init__.py +0 -0
  72. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/pipeline/decorators.py +0 -0
  73. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/pipeline/options.py +0 -0
  74. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/prompt_manager.py +0 -0
  75. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/py.typed +0 -0
  76. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/settings.py +0 -0
  77. {ai_pipeline_core-0.4.2 → ai_pipeline_core-0.4.4}/ai_pipeline_core/testing.py +0 -0
@@ -160,7 +160,6 @@ ehthumbs.db
160
160
  .prefect/
161
161
 
162
162
  # Test artifacts
163
- tests/test_data/
164
163
  test_output/
165
164
 
166
165
  # temporary specification
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
64
64
  from .settings import Settings
65
65
  from .testing import disable_run_logger, prefect_test_harness
66
66
 
67
- __version__ = "0.4.1"
67
+ __version__ = "0.4.4"
68
68
 
69
69
  __all__ = [
70
70
  "AIMessageType",
@@ -51,6 +51,7 @@ from .helpers import (
51
51
  send_webhook,
52
52
  upload_documents,
53
53
  )
54
+ from .progress import flow_context, webhook_worker
54
55
 
55
56
  logger = get_pipeline_logger(__name__)
56
57
 
@@ -518,16 +519,39 @@ class PipelineDeployment(Generic[TOptions, TResult]):
518
519
  else:
519
520
  current_docs = input_docs
520
521
 
521
- try:
522
- await active_flow(project_name, current_docs, options.model_dump())
523
- except Exception as e:
524
- # Upload partial results on failure
525
- if context.output_documents_urls and store:
526
- all_docs = await store.load(run_scope, self._all_document_types())
527
- await upload_documents(all_docs, context.output_documents_urls)
528
- await self._send_completion(context, flow_run_id, project_name, result=None, error=str(e))
529
- completion_sent = True
530
- raise
522
+ # Set up intra-flow progress context so progress_update() works inside flows
523
+ flow_minutes = tuple(getattr(f, "estimated_minutes", 1) for f in self.flows)
524
+ completed_mins = sum(flow_minutes[: max(step - 1, 0)])
525
+ progress_queue: asyncio.Queue[ProgressRun | None] = asyncio.Queue()
526
+ wh_url = context.progress_webhook_url or ""
527
+ worker = asyncio.create_task(webhook_worker(progress_queue, wh_url)) if wh_url else None
528
+
529
+ with flow_context(
530
+ webhook_url=wh_url,
531
+ project_name=project_name,
532
+ run_id=flow_run_id,
533
+ flow_run_id=flow_run_id,
534
+ flow_name=flow_name,
535
+ step=step,
536
+ total_steps=total_steps,
537
+ flow_minutes=flow_minutes,
538
+ completed_minutes=completed_mins,
539
+ queue=progress_queue,
540
+ ):
541
+ try:
542
+ await active_flow(project_name, current_docs, options.model_dump())
543
+ except Exception as e:
544
+ # Upload partial results on failure
545
+ if context.output_documents_urls and store:
546
+ all_docs = await store.load(run_scope, self._all_document_types())
547
+ await upload_documents(all_docs, context.output_documents_urls)
548
+ await self._send_completion(context, flow_run_id, project_name, result=None, error=str(e))
549
+ completion_sent = True
550
+ raise
551
+ finally:
552
+ progress_queue.put_nowait(None)
553
+ if worker:
554
+ await worker
531
555
 
532
556
  # Per-flow upload (load from store since @pipeline_flow saves there)
533
557
  if context.output_documents_urls and store and output_types:
@@ -797,7 +821,31 @@ class PipelineDeployment(Generic[TOptions, TResult]):
797
821
  else:
798
822
  current_docs = initial_documents or []
799
823
 
800
- await flow_fn(project_name, current_docs, options)
824
+ # Set up intra-flow progress context so progress_update() works inside flows
825
+ flow_minutes = tuple(getattr(f, "estimated_minutes", 1) for f in self.flows)
826
+ completed_mins = sum(flow_minutes[: max(step - 1, 0)])
827
+ progress_queue: asyncio.Queue[ProgressRun | None] = asyncio.Queue()
828
+ wh_url = context.progress_webhook_url or ""
829
+ worker = asyncio.create_task(webhook_worker(progress_queue, wh_url)) if wh_url else None
830
+
831
+ with flow_context(
832
+ webhook_url=wh_url,
833
+ project_name=project_name,
834
+ run_id=str(run_uuid) if run_uuid else "",
835
+ flow_run_id=str(run_uuid) if run_uuid else "",
836
+ flow_name=flow_name,
837
+ step=step,
838
+ total_steps=total_steps,
839
+ flow_minutes=flow_minutes,
840
+ completed_minutes=completed_mins,
841
+ queue=progress_queue,
842
+ ):
843
+ try:
844
+ await flow_fn(project_name, current_docs, options)
845
+ finally:
846
+ progress_queue.put_nowait(None)
847
+ if worker:
848
+ await worker
801
849
 
802
850
  # Build result from all documents in store
803
851
  if store:
@@ -828,12 +876,6 @@ class PipelineDeployment(Generic[TOptions, TResult]):
828
876
  """
829
877
  deployment = self
830
878
 
831
- @flow(
832
- name=self.name,
833
- flow_run_name=f"{self.name}-{{project_name}}",
834
- persist_result=True,
835
- result_serializer="json",
836
- )
837
879
  async def _deployment_flow(
838
880
  project_name: str,
839
881
  documents: list[Document],
@@ -851,7 +893,16 @@ class PipelineDeployment(Generic[TOptions, TResult]):
851
893
  store.shutdown()
852
894
  set_document_store(None)
853
895
 
854
- return _deployment_flow
896
+ # Patch annotations so Prefect generates the parameter schema from the concrete types
897
+ _deployment_flow.__annotations__["options"] = self.options_type
898
+ _deployment_flow.__annotations__["return"] = self.result_type
899
+
900
+ return flow(
901
+ name=self.name,
902
+ flow_run_name=f"{self.name}-{{project_name}}",
903
+ persist_result=True,
904
+ result_serializer="json",
905
+ )(_deployment_flow)
855
906
 
856
907
 
857
908
  __all__ = [
@@ -9,6 +9,8 @@ from dataclasses import dataclass
9
9
  from datetime import UTC, datetime
10
10
  from uuid import UUID
11
11
 
12
+ from prefect import get_client
13
+
12
14
  from ai_pipeline_core.logging import get_pipeline_logger
13
15
 
14
16
  from .contract import ProgressRun
@@ -38,9 +40,14 @@ _context: ContextVar[ProgressContext | None] = ContextVar("progress_context", de
38
40
 
39
41
 
40
42
  async def update(fraction: float, message: str = "") -> None:
41
- """Report intra-flow progress (0.0-1.0). No-op without context."""
43
+ """Report intra-flow progress (0.0-1.0). No-op without context.
44
+
45
+ Sends webhook payload (if webhook_url configured) AND updates Prefect
46
+ flow run labels (if flow_run_id available) so both push and poll consumers
47
+ see progress, and staleness detection stays current.
48
+ """
42
49
  ctx = _context.get()
43
- if ctx is None or not ctx.webhook_url:
50
+ if ctx is None:
44
51
  return
45
52
 
46
53
  fraction = max(0.0, min(1.0, fraction))
@@ -50,22 +57,43 @@ async def update(fraction: float, message: str = "") -> None:
50
57
  else:
51
58
  overall = fraction
52
59
  overall = round(max(0.0, min(1.0, overall)), 4)
53
-
54
- payload = ProgressRun(
55
- flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
56
- project_name=ctx.project_name,
57
- state="RUNNING",
58
- timestamp=datetime.now(UTC),
59
- step=ctx.step,
60
- total_steps=ctx.total_steps,
61
- flow_name=ctx.flow_name,
62
- status="progress",
63
- progress=overall,
64
- step_progress=round(fraction, 4),
65
- message=message,
66
- )
67
-
68
- ctx.queue.put_nowait(payload)
60
+ step_progress = round(fraction, 4)
61
+
62
+ # Enqueue webhook payload for async delivery
63
+ if ctx.webhook_url:
64
+ payload = ProgressRun(
65
+ flow_run_id=UUID(ctx.flow_run_id) if ctx.flow_run_id else UUID(int=0),
66
+ project_name=ctx.project_name,
67
+ state="RUNNING",
68
+ timestamp=datetime.now(UTC),
69
+ step=ctx.step,
70
+ total_steps=ctx.total_steps,
71
+ flow_name=ctx.flow_name,
72
+ status="progress",
73
+ progress=overall,
74
+ step_progress=step_progress,
75
+ message=message,
76
+ )
77
+ ctx.queue.put_nowait(payload)
78
+
79
+ # Update Prefect labels so polling consumers and staleness detection stay current
80
+ if ctx.flow_run_id:
81
+ try:
82
+ async with get_client() as client:
83
+ await client.update_flow_run_labels(
84
+ flow_run_id=UUID(ctx.flow_run_id),
85
+ labels={
86
+ "progress.step": ctx.step,
87
+ "progress.total_steps": ctx.total_steps,
88
+ "progress.flow_name": ctx.flow_name,
89
+ "progress.status": "progress",
90
+ "progress.progress": overall,
91
+ "progress.step_progress": step_progress,
92
+ "progress.message": message,
93
+ },
94
+ )
95
+ except Exception as e:
96
+ logger.warning(f"Progress label update failed: {e}")
69
97
 
70
98
 
71
99
  async def webhook_worker(
@@ -258,10 +258,10 @@ def _model_name_to_openrouter_model(model: ModelName) -> str:
258
258
  Returns:
259
259
  OpenRouter model name.
260
260
  """
261
- if model == "gemini-3-flash-search":
262
- return "google/gemini-3-flash:online"
263
261
  if model == "sonar-pro-search":
264
262
  return "perplexity/sonar-pro-search"
263
+ if model.endswith("-search"):
264
+ model = model.replace("-search", ":online")
265
265
  if model.startswith("gemini"):
266
266
  return f"google/{model}"
267
267
  elif model.startswith("gpt"):
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.4.2"
3
+ version = "0.4.4"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -61,7 +61,7 @@ dev = [
61
61
  minversion = "8.0"
62
62
  asyncio_mode = "auto"
63
63
  asyncio_default_fixture_loop_scope = "function"
64
- addopts = "-q -n 8 -m 'not integration and not clickhouse'"
64
+ addopts = "-q -m 'not integration and not clickhouse'"
65
65
  testpaths = ["tests"]
66
66
  markers = [
67
67
  "integration: marks tests as integration tests (deselect with '-m \"not integration\"')",