ai-pipeline-core 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
64
64
  from .settings import Settings
65
65
  from .testing import disable_run_logger, prefect_test_harness
66
66
 
67
- __version__ = "0.4.5"
67
+ __version__ = "0.4.6"
68
68
 
69
69
  __all__ = [
70
70
  "AIMessageType",
@@ -383,7 +383,7 @@ class Deployer:
383
383
  dest_uri = f"gs://{self.config['bucket']}/{flow_folder}/{tarball.name}"
384
384
  self._info(f"Uploading to {dest_uri}")
385
385
 
386
- tarball_bytes = tarball.read_bytes() # noqa: ASYNC240
386
+ tarball_bytes = tarball.read_bytes()
387
387
  await bucket.write_path(tarball.name, tarball_bytes)
388
388
 
389
389
  self._success(f"Package uploaded to {flow_folder}/{tarball.name}")
@@ -527,15 +527,16 @@ class Deployer:
527
527
  # Phase 3: Build vendor packages from [tool.deploy].vendor_packages
528
528
  vendor_wheels = self._build_vendor_packages()
529
529
 
530
- # Also include cli_agents wheels from agent builds
531
- if agent_builds:
532
- seen_agent: set[str] = set()
533
- for build_info in agent_builds.values():
534
- for filename, filepath in build_info["files"].items():
535
- if filename.endswith(".whl") and filename not in seen_agent and "cli_agents" in filename:
536
- if filename not in {w.name for w in vendor_wheels}:
537
- vendor_wheels.append(filepath)
538
- seen_agent.add(filename)
530
+ # Build cli-agents wheel if source is configured — it's a private package
531
+ # not on PyPI, so the worker needs the wheel even when no agents are deployed
532
+ cli_agents_source = self._get_cli_agents_source()
533
+ if cli_agents_source:
534
+ cli_dir = Path(cli_agents_source).resolve()
535
+ if (cli_dir / "pyproject.toml").exists():
536
+ cli_wheel = self._build_wheel_from_source(cli_dir)
537
+ if cli_wheel.name not in {w.name for w in vendor_wheels}:
538
+ vendor_wheels.append(cli_wheel)
539
+ self._success(f"Built cli-agents vendor wheel: {cli_wheel.name}")
539
540
 
540
541
  # Phase 4: Upload flow package + vendor wheels
541
542
  await self._upload_package(tarball, vendor_wheels)
@@ -1,9 +1,10 @@
1
1
  """Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
2
2
 
3
- import inspect
4
- from collections.abc import Callable
3
+ import asyncio
4
+ from collections.abc import Awaitable, Callable, Coroutine
5
5
  from functools import wraps
6
- from typing import Any, ParamSpec, TypeVar, cast
6
+ from typing import Any, TypeVar, cast
7
+ from uuid import UUID
7
8
 
8
9
  from prefect import get_client
9
10
  from prefect.client.orchestration import PrefectClient
@@ -13,14 +14,20 @@ from prefect.deployments.flow_runs import run_deployment
13
14
  from prefect.exceptions import ObjectNotFound
14
15
 
15
16
  from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
17
+ from ai_pipeline_core.documents import Document
18
+ from ai_pipeline_core.logging import get_pipeline_logger
16
19
  from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
17
20
  from ai_pipeline_core.pipeline.options import FlowOptions
18
21
  from ai_pipeline_core.settings import settings
19
22
 
20
- P = ParamSpec("P")
23
+ logger = get_pipeline_logger(__name__)
24
+
21
25
  TOptions = TypeVar("TOptions", bound=FlowOptions)
22
26
  TResult = TypeVar("TResult", bound=DeploymentResult)
23
27
 
28
+ ProgressCallback = Callable[[float, str], Awaitable[None]]
29
+ """Signature for remote deployment progress callbacks: (fraction, message) -> None."""
30
+
24
31
 
25
32
  def _is_already_traced(func: Callable[..., Any]) -> bool:
26
33
  """Check if function or its __wrapped__ has __is_traced__ attribute."""
@@ -30,17 +37,80 @@ def _is_already_traced(func: Callable[..., Any]) -> bool:
30
37
  return getattr(wrapped, "__is_traced__", False) if wrapped else False
31
38
 
32
39
 
33
- async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
34
- """Run a remote Prefect deployment, trying local client first then remote."""
40
+ _POLL_INTERVAL = 5.0
41
+
42
+
43
+ async def _poll_remote_flow_run(
44
+ client: PrefectClient,
45
+ flow_run_id: UUID,
46
+ deployment_name: str,
47
+ poll_interval: float = _POLL_INTERVAL,
48
+ on_progress: ProgressCallback | None = None,
49
+ ) -> Any:
50
+ """Poll a remote flow run until final, invoking on_progress callback with progress.
35
51
 
36
- async def _run(client: PrefectClient, as_subflow: bool) -> Any:
37
- fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
38
- return await fr.state.result() # type: ignore
52
+ Reads the remote flow run's progress labels on each poll cycle and calls
53
+ on_progress(fraction, message) if provided. Without a callback, no progress
54
+ is reported. Only sends 1.0 on successful completion (not failure).
55
+ """
56
+ last_fraction = 0.0
57
+
58
+ while True:
59
+ try:
60
+ flow_run = await client.read_flow_run(flow_run_id)
61
+ except Exception:
62
+ logger.warning("Failed to poll remote flow run %s", flow_run_id, exc_info=True)
63
+ await asyncio.sleep(poll_interval)
64
+ continue
65
+
66
+ state = flow_run.state
67
+ if state and state.is_final():
68
+ if on_progress and state.is_completed():
69
+ await on_progress(1.0, f"[{deployment_name}] Completed")
70
+ return await state.result() # type: ignore[union-attr]
71
+
72
+ if on_progress:
73
+ labels: dict[str, Any] = flow_run.labels or {}
74
+ progress_val = labels.get("progress.progress")
75
+
76
+ if progress_val is not None:
77
+ fraction = max(float(progress_val), last_fraction)
78
+ last_fraction = fraction
79
+ flow_name = str(labels.get("progress.flow_name", ""))
80
+ message = str(labels.get("progress.message", ""))
81
+ display = f"[{deployment_name}] {flow_name}: {message}" if flow_name else f"[{deployment_name}] Running"
82
+ await on_progress(fraction, display)
83
+ else:
84
+ await on_progress(last_fraction, f"[{deployment_name}] Waiting to start")
85
+
86
+ await asyncio.sleep(poll_interval)
87
+
88
+
89
+ async def run_remote_deployment(
90
+ deployment_name: str,
91
+ parameters: dict[str, Any],
92
+ on_progress: ProgressCallback | None = None,
93
+ ) -> Any:
94
+ """Run a remote Prefect deployment with optional progress callback.
95
+
96
+ Creates the remote flow run immediately (timeout=0) then polls its state,
97
+ invoking on_progress(fraction, message) on each poll cycle if provided.
98
+ """
99
+
100
+ async def _create_and_poll(client: PrefectClient, as_subflow: bool) -> Any:
101
+ fr: FlowRun = await run_deployment(
102
+ client=client,
103
+ name=deployment_name,
104
+ parameters=parameters,
105
+ as_subflow=as_subflow,
106
+ timeout=0,
107
+ ) # type: ignore
108
+ return await _poll_remote_flow_run(client, fr.id, deployment_name, on_progress=on_progress)
39
109
 
40
110
  async with get_client() as client:
41
111
  try:
42
112
  await client.read_deployment_by_name(name=deployment_name)
43
- return await _run(client, True) # noqa: FBT003
113
+ return await _create_and_poll(client, True) # noqa: FBT003
44
114
  except ObjectNotFound:
45
115
  pass
46
116
 
@@ -56,7 +126,7 @@ async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]
56
126
  await client.read_deployment_by_name(name=deployment_name)
57
127
  ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
58
128
  with ctx:
59
- return await _run(client, False) # noqa: FBT003
129
+ return await _create_and_poll(client, False) # noqa: FBT003
60
130
  except ObjectNotFound:
61
131
  pass
62
132
 
@@ -70,32 +140,38 @@ def remote_deployment(
70
140
  name: str | None = None,
71
141
  trace_level: TraceLevel = "always",
72
142
  trace_cost: float | None = None,
73
- ) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
74
- """Decorator to call PipelineDeployment flows remotely with automatic serialization."""
143
+ ) -> Callable[[Callable[..., Any]], Callable[..., Coroutine[Any, Any, TResult]]]:
144
+ """Decorator to call PipelineDeployment flows remotely with automatic serialization.
145
+
146
+ The decorated function's body is never executed — it serves as a typed stub.
147
+ The wrapper enforces the deployment contract: (project_name, documents, options, context).
148
+ """
75
149
 
76
- def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
150
+ def decorator(func: Callable[..., Any]) -> Callable[..., Coroutine[Any, Any, TResult]]:
77
151
  fname = getattr(func, "__name__", deployment_class.name)
78
152
 
79
153
  if _is_already_traced(func):
80
154
  raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
81
155
 
82
156
  @wraps(func)
83
- async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> TResult:
84
- sig = inspect.signature(func)
85
- bound = sig.bind(*args, **kwargs)
86
- bound.apply_defaults()
87
-
88
- # Pass parameters with proper types - Prefect handles Pydantic serialization
89
- parameters: dict[str, Any] = {}
90
- for pname, value in bound.arguments.items():
91
- if value is None and pname == "context":
92
- parameters[pname] = DeploymentContext()
93
- else:
94
- parameters[pname] = value
157
+ async def _wrapper(
158
+ project_name: str,
159
+ documents: list[Document],
160
+ options: TOptions,
161
+ context: DeploymentContext | None = None,
162
+ *,
163
+ on_progress: ProgressCallback | None = None,
164
+ ) -> TResult:
165
+ parameters: dict[str, Any] = {
166
+ "project_name": project_name,
167
+ "documents": documents,
168
+ "options": options,
169
+ "context": context if context is not None else DeploymentContext(),
170
+ }
95
171
 
96
172
  full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name.replace('-', '_')}"
97
173
 
98
- result = await run_remote_deployment(full_name, parameters)
174
+ result = await run_remote_deployment(full_name, parameters, on_progress=on_progress)
99
175
 
100
176
  if trace_cost is not None and trace_cost > 0:
101
177
  set_trace_cost(trace_cost)
@@ -111,6 +187,6 @@ def remote_deployment(
111
187
  name=name or deployment_class.name,
112
188
  )(_wrapper)
113
189
 
114
- return traced_wrapper # type: ignore[return-value]
190
+ return traced_wrapper
115
191
 
116
192
  return decorator
@@ -38,6 +38,34 @@ def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes,
38
38
  return buf.getvalue(), "image/png"
39
39
 
40
40
 
41
+ def _looks_like_text(content: bytes) -> bool:
42
+ """Check if content is valid UTF-8 text (not binary).
43
+
44
+ Uses heuristics: must decode as UTF-8 and have no null bytes.
45
+ Null bytes are common in binary files but rare in text.
46
+ """
47
+ if not content:
48
+ return True
49
+ # Null bytes indicate binary content
50
+ if b"\x00" in content:
51
+ return False
52
+ try:
53
+ content.decode("utf-8")
54
+ return True
55
+ except UnicodeDecodeError:
56
+ return False
57
+
58
+
59
+ def _has_pdf_signature(content: bytes) -> bool:
60
+ """Check if content starts with PDF magic bytes (%PDF-).
61
+
62
+ Real PDFs start with %PDF- (possibly after whitespace).
63
+ This prevents false positives when a real PDF happens to be
64
+ partly UTF-8 decodable (e.g., ASCII-heavy PDF metadata).
65
+ """
66
+ return content.lstrip().startswith(b"%PDF-")
67
+
68
+
41
69
  AIMessageType = str | Document | ModelResponse
42
70
  """Type for messages in AIMessages container.
43
71
 
@@ -350,7 +378,7 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
350
378
  return count
351
379
 
352
380
  @staticmethod
353
- def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
381
+ def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: C901, PLR0912, PLR0914, PLR0915
354
382
  """Convert a document to prompt format for LLM consumption.
355
383
 
356
384
  Renders the document as XML with text/image/PDF content, followed by any
@@ -368,8 +396,15 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
368
396
  description = f"<description>{document.description}</description>\n" if document.description else ""
369
397
  header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
370
398
 
399
+ # Check if "PDF" is actually text (misnamed file from URL ending in .pdf)
400
+ # Real PDFs start with %PDF- magic bytes; if missing and content is UTF-8, it's text
401
+ is_text = document.is_text
402
+ if not is_text and document.is_pdf and _looks_like_text(document.content) and not _has_pdf_signature(document.content):
403
+ is_text = True
404
+ logger.debug(f"Document '{document.name}' has PDF extension but contains text content - sending as text")
405
+
371
406
  # Handle text documents
372
- if document.is_text:
407
+ if is_text:
373
408
  text_content = document.content.decode("utf-8")
374
409
  content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
375
410
  prompt.append({"type": "text", "text": content_text})
@@ -407,8 +442,16 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
407
442
  desc_attr = f' description="{att.description}"' if att.description else ""
408
443
  att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
409
444
 
410
- if att.is_text:
411
- prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
445
+ # Check if "PDF" attachment is actually text (same logic as document)
446
+ att_is_text = att.is_text
447
+ if not att_is_text and att.is_pdf and _looks_like_text(att.content) and not _has_pdf_signature(att.content):
448
+ att_is_text = True
449
+ logger.debug(f"Attachment '{att.name}' has PDF extension but contains text content - sending as text")
450
+
451
+ if att_is_text:
452
+ # Use content.decode() directly - att.text property raises ValueError if is_text is False
453
+ att_text = att.content.decode("utf-8")
454
+ prompt.append({"type": "text", "text": f"{att_open}{att_text}\n</attachment>\n"})
412
455
  elif att.is_image or att.is_pdf:
413
456
  prompt.append({"type": "text", "text": att_open})
414
457
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -1,4 +1,4 @@
1
- ai_pipeline_core/__init__.py,sha256=ydSl96FjnoGhcGRC_Pnr1n7R0ub3vZWnKxyLWILJ54o,3270
1
+ ai_pipeline_core/__init__.py,sha256=QAQSyrKafsNov4dy9vNqTVarh2nDdrthMfYh7X-3Mcg,3270
2
2
  ai_pipeline_core/exceptions.py,sha256=csAl7vq6xjSFBF8-UM9WZODCbhsOdOG5zH6IbA8iteM,1280
3
3
  ai_pipeline_core/prompt_manager.py,sha256=3wFkL5rrjtUT1cLInkgyhS8hKnO4MeD1cdXAEuLhgoE,9459
4
4
  ai_pipeline_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -7,10 +7,10 @@ ai_pipeline_core/testing.py,sha256=jIRrLxNvTwdamucfJoHET2qMeRhhMZV9uEJXO5vAfis,2
7
7
  ai_pipeline_core/deployment/__init__.py,sha256=wTkVK6gcEQvqBajFMTAuodRONpN25yHbR1jtcumf0WQ,900
8
8
  ai_pipeline_core/deployment/base.py,sha256=bGSnDdrw6cLM_TItAiwptnwApbw5wkoIGY9pnwDvOTQ,37485
9
9
  ai_pipeline_core/deployment/contract.py,sha256=a1qbHhneTGB27oSOUy79CUIhOIzOoq37M63XoIMzA4Y,1952
10
- ai_pipeline_core/deployment/deploy.py,sha256=3Pojw_HonW46RbjWf6AGNtv-1F46Ed4y71pgQL0teAk,24576
10
+ ai_pipeline_core/deployment/deploy.py,sha256=y5FxKMm7nGwkjzA74pTffO7A82MaDuajx6LHGTem8bI,24662
11
11
  ai_pipeline_core/deployment/helpers.py,sha256=yVtGFUs4AFXkpLkiQ_ale0nXXt5btfWSb5PAbikQHNs,3312
12
12
  ai_pipeline_core/deployment/progress.py,sha256=rO2g8VIh7EpzxzGGAroXEpveWoWZkk66jkDW22BY4j8,4827
13
- ai_pipeline_core/deployment/remote.py,sha256=pOle4Dw_U4j9mLbk8NQdmKUys4uaGJyOKMZXhRvnw3E,4679
13
+ ai_pipeline_core/deployment/remote.py,sha256=tOBbICtPXbJHN8QA9juCqkeP9PqEc16mPyBrwaOwEt4,7434
14
14
  ai_pipeline_core/docs_generator/__init__.py,sha256=JbWbk-Lw5GgWrCMRuw8zvKNTZY2jXv7XqoMiBYudvRI,1255
15
15
  ai_pipeline_core/docs_generator/__main__.py,sha256=CH4agiM2suFJ63MhTg5m0GuXdc40z-6o4ojR72JQWVA,145
16
16
  ai_pipeline_core/docs_generator/cli.py,sha256=8OjdMtzQraPxWN3uPapSNJnKyPLPtnygKL0rF5JL2GY,7172
@@ -37,7 +37,7 @@ ai_pipeline_core/documents/utils.py,sha256=9WOW3zvKYxQPnM8LjYFy3V9-yqc6hwgCaiog3
37
37
  ai_pipeline_core/images/__init__.py,sha256=Hc2QKR27Q2Q-h5nH-EbzfxdE3dHArBm-st5_xjOKFh0,8854
38
38
  ai_pipeline_core/images/_processing.py,sha256=MrCuPGsyyEl9UlXYIPhZs0wN8CPTMZmejV2Lo2wyCZk,4362
39
39
  ai_pipeline_core/llm/__init__.py,sha256=oyRvYD5DLDl7JIRTBUaiVz6jUC5dLLujkMNFpfRp2zc,795
40
- ai_pipeline_core/llm/ai_messages.py,sha256=Ycmntk5d6NUFqVVsnNR_IDwJUFuHYEH7CPvmmDfYaJI,17424
40
+ ai_pipeline_core/llm/ai_messages.py,sha256=Ieldm2za0tVd-5ysxYTjietWq1gtJ8kWbP-AqWqNJNg,19308
41
41
  ai_pipeline_core/llm/client.py,sha256=N8eH9bY2rF28U5kGK0HQ3ibKvphcipSMLVVxtxtut8Y,30275
42
42
  ai_pipeline_core/llm/model_options.py,sha256=hg8xR0RJdJKp8QJNA4EbLnfFsnkE4HnxD85aYxc--hM,9164
43
43
  ai_pipeline_core/llm/model_response.py,sha256=Ml9wcssSssqibReJxCc9EQu488pz69Cmq_XNBs_xmak,12219
@@ -70,7 +70,7 @@ ai_pipeline_core/observability/_tracking/_writer.py,sha256=xZjwYyIxDzzzPxqkKjYAY
70
70
  ai_pipeline_core/pipeline/__init__.py,sha256=uMv1jwSyq8Ym8Hbn5097twBJLdwN1iMeqnVM4EWyrhA,282
71
71
  ai_pipeline_core/pipeline/decorators.py,sha256=CDJAeOjGLt5Ewc0Jc9zEuwLZwKyutOv89LSRS9dcXmI,37456
72
72
  ai_pipeline_core/pipeline/options.py,sha256=KF4FcT085-IwX8r649v0a9ua5xnApM0qG2wJHWbq39A,438
73
- ai_pipeline_core-0.4.5.dist-info/METADATA,sha256=U-sw_nFzaA55Z_YzxVvvh9Mb8N3L50nU7ME6YkQ9tO4,29947
74
- ai_pipeline_core-0.4.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
75
- ai_pipeline_core-0.4.5.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
76
- ai_pipeline_core-0.4.5.dist-info/RECORD,,
73
+ ai_pipeline_core-0.4.7.dist-info/METADATA,sha256=yFjXJ9fHXFtmrF2jIFx62k5spfR1PEipR_Uekbn3bmo,29947
74
+ ai_pipeline_core-0.4.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
75
+ ai_pipeline_core-0.4.7.dist-info/licenses/LICENSE,sha256=kKj8mfbdWwkyG3U6n7ztB3bAZlEwShTkAsvaY657i3I,1074
76
+ ai_pipeline_core-0.4.7.dist-info/RECORD,,