ai-pipeline-core 0.4.5__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/PKG-INFO +1 -1
  2. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/deploy.py +11 -10
  4. ai_pipeline_core-0.4.7/ai_pipeline_core/deployment/remote.py +192 -0
  5. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/ai_messages.py +47 -4
  6. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/pyproject.toml +2 -2
  7. ai_pipeline_core-0.4.5/ai_pipeline_core/deployment/remote.py +0 -116
  8. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/.gitignore +0 -0
  9. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/LICENSE +0 -0
  10. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/README.md +0 -0
  11. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/__init__.py +0 -0
  12. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/base.py +0 -0
  13. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/contract.py +0 -0
  14. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/helpers.py +0 -0
  15. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/deployment/progress.py +0 -0
  16. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/__init__.py +0 -0
  17. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/__main__.py +0 -0
  18. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/cli.py +0 -0
  19. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/extractor.py +0 -0
  20. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/guide_builder.py +0 -0
  21. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/trimmer.py +0 -0
  22. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/docs_generator/validator.py +0 -0
  23. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/__init__.py +0 -0
  24. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/_summary.py +0 -0
  25. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/_summary_worker.py +0 -0
  26. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/clickhouse.py +0 -0
  27. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/factory.py +0 -0
  28. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/local.py +0 -0
  29. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/memory.py +0 -0
  30. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/document_store/protocol.py +0 -0
  31. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/__init__.py +0 -0
  32. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/_context_vars.py +0 -0
  33. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/_hashing.py +0 -0
  34. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/attachment.py +0 -0
  35. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/context.py +0 -0
  36. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/document.py +0 -0
  37. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/mime_type.py +0 -0
  38. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/documents/utils.py +0 -0
  39. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/exceptions.py +0 -0
  40. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/images/__init__.py +0 -0
  41. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/images/_processing.py +0 -0
  42. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/__init__.py +0 -0
  43. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/client.py +0 -0
  44. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/model_options.py +0 -0
  45. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/model_response.py +0 -0
  46. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/llm/model_types.py +0 -0
  47. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/logging/__init__.py +0 -0
  48. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/logging/logging.yml +0 -0
  49. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/logging/logging_config.py +0 -0
  50. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  51. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/__init__.py +0 -0
  52. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/__init__.py +0 -0
  53. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_auto_summary.py +0 -0
  54. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_config.py +0 -0
  55. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_content.py +0 -0
  56. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_processor.py +0 -0
  57. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_summary.py +0 -0
  58. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_types.py +0 -0
  59. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_debug/_writer.py +0 -0
  60. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_document_tracking.py +0 -0
  61. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_initialization.py +0 -0
  62. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_logging_bridge.py +0 -0
  63. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_summary.py +0 -0
  64. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/__init__.py +0 -0
  65. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_client.py +0 -0
  66. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_internal.py +0 -0
  67. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_models.py +0 -0
  68. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_processor.py +0 -0
  69. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_service.py +0 -0
  70. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/_tracking/_writer.py +0 -0
  71. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/observability/tracing.py +0 -0
  72. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/pipeline/__init__.py +0 -0
  73. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/pipeline/decorators.py +0 -0
  74. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/pipeline/options.py +0 -0
  75. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/prompt_manager.py +0 -0
  76. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/py.typed +0 -0
  77. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/settings.py +0 -0
  78. {ai_pipeline_core-0.4.5 → ai_pipeline_core-0.4.7}/ai_pipeline_core/testing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -64,7 +64,7 @@ from .prompt_manager import PromptManager
64
64
  from .settings import Settings
65
65
  from .testing import disable_run_logger, prefect_test_harness
66
66
 
67
- __version__ = "0.4.5"
67
+ __version__ = "0.4.6"
68
68
 
69
69
  __all__ = [
70
70
  "AIMessageType",
@@ -383,7 +383,7 @@ class Deployer:
383
383
  dest_uri = f"gs://{self.config['bucket']}/{flow_folder}/{tarball.name}"
384
384
  self._info(f"Uploading to {dest_uri}")
385
385
 
386
- tarball_bytes = tarball.read_bytes() # noqa: ASYNC240
386
+ tarball_bytes = tarball.read_bytes()
387
387
  await bucket.write_path(tarball.name, tarball_bytes)
388
388
 
389
389
  self._success(f"Package uploaded to {flow_folder}/{tarball.name}")
@@ -527,15 +527,16 @@ class Deployer:
527
527
  # Phase 3: Build vendor packages from [tool.deploy].vendor_packages
528
528
  vendor_wheels = self._build_vendor_packages()
529
529
 
530
- # Also include cli_agents wheels from agent builds
531
- if agent_builds:
532
- seen_agent: set[str] = set()
533
- for build_info in agent_builds.values():
534
- for filename, filepath in build_info["files"].items():
535
- if filename.endswith(".whl") and filename not in seen_agent and "cli_agents" in filename:
536
- if filename not in {w.name for w in vendor_wheels}:
537
- vendor_wheels.append(filepath)
538
- seen_agent.add(filename)
530
+ # Build cli-agents wheel if source is configured — it's a private package
531
+ # not on PyPI, so the worker needs the wheel even when no agents are deployed
532
+ cli_agents_source = self._get_cli_agents_source()
533
+ if cli_agents_source:
534
+ cli_dir = Path(cli_agents_source).resolve()
535
+ if (cli_dir / "pyproject.toml").exists():
536
+ cli_wheel = self._build_wheel_from_source(cli_dir)
537
+ if cli_wheel.name not in {w.name for w in vendor_wheels}:
538
+ vendor_wheels.append(cli_wheel)
539
+ self._success(f"Built cli-agents vendor wheel: {cli_wheel.name}")
539
540
 
540
541
  # Phase 4: Upload flow package + vendor wheels
541
542
  await self._upload_package(tarball, vendor_wheels)
@@ -0,0 +1,192 @@
1
+ """Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
2
+
3
+ import asyncio
4
+ from collections.abc import Awaitable, Callable, Coroutine
5
+ from functools import wraps
6
+ from typing import Any, TypeVar, cast
7
+ from uuid import UUID
8
+
9
+ from prefect import get_client
10
+ from prefect.client.orchestration import PrefectClient
11
+ from prefect.client.schemas import FlowRun
12
+ from prefect.context import AsyncClientContext
13
+ from prefect.deployments.flow_runs import run_deployment
14
+ from prefect.exceptions import ObjectNotFound
15
+
16
+ from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
17
+ from ai_pipeline_core.documents import Document
18
+ from ai_pipeline_core.logging import get_pipeline_logger
19
+ from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
20
+ from ai_pipeline_core.pipeline.options import FlowOptions
21
+ from ai_pipeline_core.settings import settings
22
+
23
+ logger = get_pipeline_logger(__name__)
24
+
25
+ TOptions = TypeVar("TOptions", bound=FlowOptions)
26
+ TResult = TypeVar("TResult", bound=DeploymentResult)
27
+
28
+ ProgressCallback = Callable[[float, str], Awaitable[None]]
29
+ """Signature for remote deployment progress callbacks: (fraction, message) -> None."""
30
+
31
+
32
+ def _is_already_traced(func: Callable[..., Any]) -> bool:
33
+ """Check if function or its __wrapped__ has __is_traced__ attribute."""
34
+ if getattr(func, "__is_traced__", False):
35
+ return True
36
+ wrapped = getattr(func, "__wrapped__", None)
37
+ return getattr(wrapped, "__is_traced__", False) if wrapped else False
38
+
39
+
40
+ _POLL_INTERVAL = 5.0
41
+
42
+
43
+ async def _poll_remote_flow_run(
44
+ client: PrefectClient,
45
+ flow_run_id: UUID,
46
+ deployment_name: str,
47
+ poll_interval: float = _POLL_INTERVAL,
48
+ on_progress: ProgressCallback | None = None,
49
+ ) -> Any:
50
+ """Poll a remote flow run until final, invoking on_progress callback with progress.
51
+
52
+ Reads the remote flow run's progress labels on each poll cycle and calls
53
+ on_progress(fraction, message) if provided. Without a callback, no progress
54
+ is reported. Only sends 1.0 on successful completion (not failure).
55
+ """
56
+ last_fraction = 0.0
57
+
58
+ while True:
59
+ try:
60
+ flow_run = await client.read_flow_run(flow_run_id)
61
+ except Exception:
62
+ logger.warning("Failed to poll remote flow run %s", flow_run_id, exc_info=True)
63
+ await asyncio.sleep(poll_interval)
64
+ continue
65
+
66
+ state = flow_run.state
67
+ if state and state.is_final():
68
+ if on_progress and state.is_completed():
69
+ await on_progress(1.0, f"[{deployment_name}] Completed")
70
+ return await state.result() # type: ignore[union-attr]
71
+
72
+ if on_progress:
73
+ labels: dict[str, Any] = flow_run.labels or {}
74
+ progress_val = labels.get("progress.progress")
75
+
76
+ if progress_val is not None:
77
+ fraction = max(float(progress_val), last_fraction)
78
+ last_fraction = fraction
79
+ flow_name = str(labels.get("progress.flow_name", ""))
80
+ message = str(labels.get("progress.message", ""))
81
+ display = f"[{deployment_name}] {flow_name}: {message}" if flow_name else f"[{deployment_name}] Running"
82
+ await on_progress(fraction, display)
83
+ else:
84
+ await on_progress(last_fraction, f"[{deployment_name}] Waiting to start")
85
+
86
+ await asyncio.sleep(poll_interval)
87
+
88
+
89
+ async def run_remote_deployment(
90
+ deployment_name: str,
91
+ parameters: dict[str, Any],
92
+ on_progress: ProgressCallback | None = None,
93
+ ) -> Any:
94
+ """Run a remote Prefect deployment with optional progress callback.
95
+
96
+ Creates the remote flow run immediately (timeout=0) then polls its state,
97
+ invoking on_progress(fraction, message) on each poll cycle if provided.
98
+ """
99
+
100
+ async def _create_and_poll(client: PrefectClient, as_subflow: bool) -> Any:
101
+ fr: FlowRun = await run_deployment(
102
+ client=client,
103
+ name=deployment_name,
104
+ parameters=parameters,
105
+ as_subflow=as_subflow,
106
+ timeout=0,
107
+ ) # type: ignore
108
+ return await _poll_remote_flow_run(client, fr.id, deployment_name, on_progress=on_progress)
109
+
110
+ async with get_client() as client:
111
+ try:
112
+ await client.read_deployment_by_name(name=deployment_name)
113
+ return await _create_and_poll(client, True) # noqa: FBT003
114
+ except ObjectNotFound:
115
+ pass
116
+
117
+ if not settings.prefect_api_url:
118
+ raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
119
+
120
+ async with PrefectClient(
121
+ api=settings.prefect_api_url,
122
+ api_key=settings.prefect_api_key,
123
+ auth_string=settings.prefect_api_auth_string,
124
+ ) as client:
125
+ try:
126
+ await client.read_deployment_by_name(name=deployment_name)
127
+ ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
128
+ with ctx:
129
+ return await _create_and_poll(client, False) # noqa: FBT003
130
+ except ObjectNotFound:
131
+ pass
132
+
133
+ raise ValueError(f"{deployment_name} deployment not found")
134
+
135
+
136
+ def remote_deployment(
137
+ deployment_class: type[PipelineDeployment[TOptions, TResult]],
138
+ *,
139
+ deployment_name: str | None = None,
140
+ name: str | None = None,
141
+ trace_level: TraceLevel = "always",
142
+ trace_cost: float | None = None,
143
+ ) -> Callable[[Callable[..., Any]], Callable[..., Coroutine[Any, Any, TResult]]]:
144
+ """Decorator to call PipelineDeployment flows remotely with automatic serialization.
145
+
146
+ The decorated function's body is never executed — it serves as a typed stub.
147
+ The wrapper enforces the deployment contract: (project_name, documents, options, context).
148
+ """
149
+
150
+ def decorator(func: Callable[..., Any]) -> Callable[..., Coroutine[Any, Any, TResult]]:
151
+ fname = getattr(func, "__name__", deployment_class.name)
152
+
153
+ if _is_already_traced(func):
154
+ raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
155
+
156
+ @wraps(func)
157
+ async def _wrapper(
158
+ project_name: str,
159
+ documents: list[Document],
160
+ options: TOptions,
161
+ context: DeploymentContext | None = None,
162
+ *,
163
+ on_progress: ProgressCallback | None = None,
164
+ ) -> TResult:
165
+ parameters: dict[str, Any] = {
166
+ "project_name": project_name,
167
+ "documents": documents,
168
+ "options": options,
169
+ "context": context if context is not None else DeploymentContext(),
170
+ }
171
+
172
+ full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name.replace('-', '_')}"
173
+
174
+ result = await run_remote_deployment(full_name, parameters, on_progress=on_progress)
175
+
176
+ if trace_cost is not None and trace_cost > 0:
177
+ set_trace_cost(trace_cost)
178
+
179
+ if isinstance(result, DeploymentResult):
180
+ return cast(TResult, result)
181
+ if isinstance(result, dict):
182
+ return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
183
+ raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
184
+
185
+ traced_wrapper = trace(
186
+ level=trace_level,
187
+ name=name or deployment_class.name,
188
+ )(_wrapper)
189
+
190
+ return traced_wrapper
191
+
192
+ return decorator
@@ -38,6 +38,34 @@ def _ensure_llm_compatible_image(content: bytes, mime_type: str) -> tuple[bytes,
38
38
  return buf.getvalue(), "image/png"
39
39
 
40
40
 
41
+ def _looks_like_text(content: bytes) -> bool:
42
+ """Check if content is valid UTF-8 text (not binary).
43
+
44
+ Uses heuristics: must decode as UTF-8 and have no null bytes.
45
+ Null bytes are common in binary files but rare in text.
46
+ """
47
+ if not content:
48
+ return True
49
+ # Null bytes indicate binary content
50
+ if b"\x00" in content:
51
+ return False
52
+ try:
53
+ content.decode("utf-8")
54
+ return True
55
+ except UnicodeDecodeError:
56
+ return False
57
+
58
+
59
+ def _has_pdf_signature(content: bytes) -> bool:
60
+ """Check if content starts with PDF magic bytes (%PDF-).
61
+
62
+ Real PDFs start with %PDF- (possibly after whitespace).
63
+ This prevents false positives when a real PDF happens to be
64
+ partly UTF-8 decodable (e.g., ASCII-heavy PDF metadata).
65
+ """
66
+ return content.lstrip().startswith(b"%PDF-")
67
+
68
+
41
69
  AIMessageType = str | Document | ModelResponse
42
70
  """Type for messages in AIMessages container.
43
71
 
@@ -350,7 +378,7 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
350
378
  return count
351
379
 
352
380
  @staticmethod
353
- def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: PLR0912, PLR0914
381
+ def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]: # noqa: C901, PLR0912, PLR0914, PLR0915
354
382
  """Convert a document to prompt format for LLM consumption.
355
383
 
356
384
  Renders the document as XML with text/image/PDF content, followed by any
@@ -368,8 +396,15 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
368
396
  description = f"<description>{document.description}</description>\n" if document.description else ""
369
397
  header_text = f"<document>\n<id>{document.id}</id>\n<name>{document.name}</name>\n{description}"
370
398
 
399
+ # Check if "PDF" is actually text (misnamed file from URL ending in .pdf)
400
+ # Real PDFs start with %PDF- magic bytes; if missing and content is UTF-8, it's text
401
+ is_text = document.is_text
402
+ if not is_text and document.is_pdf and _looks_like_text(document.content) and not _has_pdf_signature(document.content):
403
+ is_text = True
404
+ logger.debug(f"Document '{document.name}' has PDF extension but contains text content - sending as text")
405
+
371
406
  # Handle text documents
372
- if document.is_text:
407
+ if is_text:
373
408
  text_content = document.content.decode("utf-8")
374
409
  content_text = f"{header_text}<content>\n{text_content}\n</content>\n"
375
410
  prompt.append({"type": "text", "text": content_text})
@@ -407,8 +442,16 @@ class AIMessages(list[AIMessageType]): # noqa: PLR0904
407
442
  desc_attr = f' description="{att.description}"' if att.description else ""
408
443
  att_open = f'<attachment name="{att.name}"{desc_attr}>\n'
409
444
 
410
- if att.is_text:
411
- prompt.append({"type": "text", "text": f"{att_open}{att.text}\n</attachment>\n"})
445
+ # Check if "PDF" attachment is actually text (same logic as document)
446
+ att_is_text = att.is_text
447
+ if not att_is_text and att.is_pdf and _looks_like_text(att.content) and not _has_pdf_signature(att.content):
448
+ att_is_text = True
449
+ logger.debug(f"Attachment '{att.name}' has PDF extension but contains text content - sending as text")
450
+
451
+ if att_is_text:
452
+ # Use content.decode() directly - att.text property raises ValueError if is_text is False
453
+ att_text = att.content.decode("utf-8")
454
+ prompt.append({"type": "text", "text": f"{att_open}{att_text}\n</attachment>\n"})
412
455
  elif att.is_image or att.is_pdf:
413
456
  prompt.append({"type": "text", "text": att_open})
414
457
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.4.5"
3
+ version = "0.4.7"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -142,7 +142,7 @@ convention = "google"
142
142
  ".vulture_whitelist.py" = ["B018", "E402", "F401", "D"]
143
143
  "ai_pipeline_core/__init__.py" = ["RUF067"]
144
144
  "ai_pipeline_core/deployment/base.py" = ["T20", "C901", "PLR0912", "PLR0914", "PLR0915", "PLR0917", "PLR6301", "PLC0415", "UP046"]
145
- "ai_pipeline_core/deployment/deploy.py" = ["T20", "PLC2701", "S404", "S602", "S603"]
145
+ "ai_pipeline_core/deployment/deploy.py" = ["T20", "PLC2701", "S404", "S602", "S603", "ASYNC"]
146
146
  "ai_pipeline_core/deployment/remote.py" = ["UP047"]
147
147
  "ai_pipeline_core/deployment/progress.py" = ["RUF029"]
148
148
  "ai_pipeline_core/documents/document.py" = ["PLR0904"]
@@ -1,116 +0,0 @@
1
- """Remote deployment utilities for calling PipelineDeployment flows via Prefect."""
2
-
3
- import inspect
4
- from collections.abc import Callable
5
- from functools import wraps
6
- from typing import Any, ParamSpec, TypeVar, cast
7
-
8
- from prefect import get_client
9
- from prefect.client.orchestration import PrefectClient
10
- from prefect.client.schemas import FlowRun
11
- from prefect.context import AsyncClientContext
12
- from prefect.deployments.flow_runs import run_deployment
13
- from prefect.exceptions import ObjectNotFound
14
-
15
- from ai_pipeline_core.deployment import DeploymentContext, DeploymentResult, PipelineDeployment
16
- from ai_pipeline_core.observability.tracing import TraceLevel, set_trace_cost, trace
17
- from ai_pipeline_core.pipeline.options import FlowOptions
18
- from ai_pipeline_core.settings import settings
19
-
20
- P = ParamSpec("P")
21
- TOptions = TypeVar("TOptions", bound=FlowOptions)
22
- TResult = TypeVar("TResult", bound=DeploymentResult)
23
-
24
-
25
- def _is_already_traced(func: Callable[..., Any]) -> bool:
26
- """Check if function or its __wrapped__ has __is_traced__ attribute."""
27
- if getattr(func, "__is_traced__", False):
28
- return True
29
- wrapped = getattr(func, "__wrapped__", None)
30
- return getattr(wrapped, "__is_traced__", False) if wrapped else False
31
-
32
-
33
- async def run_remote_deployment(deployment_name: str, parameters: dict[str, Any]) -> Any:
34
- """Run a remote Prefect deployment, trying local client first then remote."""
35
-
36
- async def _run(client: PrefectClient, as_subflow: bool) -> Any:
37
- fr: FlowRun = await run_deployment(client=client, name=deployment_name, parameters=parameters, as_subflow=as_subflow) # type: ignore
38
- return await fr.state.result() # type: ignore
39
-
40
- async with get_client() as client:
41
- try:
42
- await client.read_deployment_by_name(name=deployment_name)
43
- return await _run(client, True) # noqa: FBT003
44
- except ObjectNotFound:
45
- pass
46
-
47
- if not settings.prefect_api_url:
48
- raise ValueError(f"{deployment_name} not found, PREFECT_API_URL not set")
49
-
50
- async with PrefectClient(
51
- api=settings.prefect_api_url,
52
- api_key=settings.prefect_api_key,
53
- auth_string=settings.prefect_api_auth_string,
54
- ) as client:
55
- try:
56
- await client.read_deployment_by_name(name=deployment_name)
57
- ctx = AsyncClientContext.model_construct(client=client, _httpx_settings=None, _context_stack=0)
58
- with ctx:
59
- return await _run(client, False) # noqa: FBT003
60
- except ObjectNotFound:
61
- pass
62
-
63
- raise ValueError(f"{deployment_name} deployment not found")
64
-
65
-
66
- def remote_deployment(
67
- deployment_class: type[PipelineDeployment[TOptions, TResult]],
68
- *,
69
- deployment_name: str | None = None,
70
- name: str | None = None,
71
- trace_level: TraceLevel = "always",
72
- trace_cost: float | None = None,
73
- ) -> Callable[[Callable[P, TResult]], Callable[P, TResult]]:
74
- """Decorator to call PipelineDeployment flows remotely with automatic serialization."""
75
-
76
- def decorator(func: Callable[P, TResult]) -> Callable[P, TResult]:
77
- fname = getattr(func, "__name__", deployment_class.name)
78
-
79
- if _is_already_traced(func):
80
- raise TypeError(f"@remote_deployment target '{fname}' already has @trace")
81
-
82
- @wraps(func)
83
- async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> TResult:
84
- sig = inspect.signature(func)
85
- bound = sig.bind(*args, **kwargs)
86
- bound.apply_defaults()
87
-
88
- # Pass parameters with proper types - Prefect handles Pydantic serialization
89
- parameters: dict[str, Any] = {}
90
- for pname, value in bound.arguments.items():
91
- if value is None and pname == "context":
92
- parameters[pname] = DeploymentContext()
93
- else:
94
- parameters[pname] = value
95
-
96
- full_name = f"{deployment_class.name}/{deployment_name or deployment_class.name.replace('-', '_')}"
97
-
98
- result = await run_remote_deployment(full_name, parameters)
99
-
100
- if trace_cost is not None and trace_cost > 0:
101
- set_trace_cost(trace_cost)
102
-
103
- if isinstance(result, DeploymentResult):
104
- return cast(TResult, result)
105
- if isinstance(result, dict):
106
- return cast(TResult, deployment_class.result_type(**cast(dict[str, Any], result)))
107
- raise TypeError(f"Expected DeploymentResult, got {type(result).__name__}")
108
-
109
- traced_wrapper = trace(
110
- level=trace_level,
111
- name=name or deployment_class.name,
112
- )(_wrapper)
113
-
114
- return traced_wrapper # type: ignore[return-value]
115
-
116
- return decorator