ai-pipeline-core 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +1 -1
- ai_pipeline_core/documents/document.py +24 -1
- ai_pipeline_core/documents/mime_type.py +4 -4
- ai_pipeline_core/llm/ai_messages.py +32 -0
- ai_pipeline_core/llm/client.py +82 -51
- ai_pipeline_core/llm/model_options.py +19 -1
- ai_pipeline_core/llm/model_response.py +113 -173
- ai_pipeline_core/llm/model_types.py +1 -1
- ai_pipeline_core/pipeline.py +0 -11
- ai_pipeline_core/settings.py +4 -2
- ai_pipeline_core/simple_runner/cli.py +0 -2
- ai_pipeline_core/tracing.py +0 -2
- ai_pipeline_core/utils/__init__.py +8 -0
- ai_pipeline_core/utils/deploy.py +373 -0
- ai_pipeline_core/utils/remote_deployment.py +269 -0
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/METADATA +4 -4
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/RECORD +19 -16
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/licenses/LICENSE +0 -0
ai_pipeline_core/__init__.py
CHANGED
|
@@ -29,6 +29,7 @@ from typing import (
|
|
|
29
29
|
overload,
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
+
import tiktoken
|
|
32
33
|
from pydantic import (
|
|
33
34
|
BaseModel,
|
|
34
35
|
ConfigDict,
|
|
@@ -980,7 +981,7 @@ class Document(BaseModel, ABC):
|
|
|
980
981
|
"""Detect the MIME type from document content.
|
|
981
982
|
|
|
982
983
|
Detection strategy (in order):
|
|
983
|
-
1. Returns '
|
|
984
|
+
1. Returns 'text/plain' for empty content
|
|
984
985
|
2. Extension-based detection for known text formats (preferred)
|
|
985
986
|
3. python-magic content analysis for unknown extensions
|
|
986
987
|
4. Fallback to extension or 'application/octet-stream'
|
|
@@ -1103,6 +1104,28 @@ class Document(BaseModel, ABC):
|
|
|
1103
1104
|
raise ValueError(f"Document is not text: {self.name}")
|
|
1104
1105
|
return self.content.decode("utf-8")
|
|
1105
1106
|
|
|
1107
|
+
@property
|
|
1108
|
+
def approximate_tokens_count(self) -> int:
|
|
1109
|
+
"""Approximate tokens count for the document content.
|
|
1110
|
+
|
|
1111
|
+
@public
|
|
1112
|
+
|
|
1113
|
+
Uses tiktoken with gpt-4 encoding to estimate token count.
|
|
1114
|
+
For text documents, encodes the actual text. For non-text
|
|
1115
|
+
documents (images, PDFs, etc.), returns a fixed estimate of 1024 tokens.
|
|
1116
|
+
|
|
1117
|
+
Returns:
|
|
1118
|
+
Approximate number of tokens for this document.
|
|
1119
|
+
|
|
1120
|
+
Example:
|
|
1121
|
+
>>> doc = MyDocument.create(name="data.txt", content="Hello world")
|
|
1122
|
+
>>> doc.approximate_tokens_count # ~2 tokens
|
|
1123
|
+
"""
|
|
1124
|
+
if self.is_text:
|
|
1125
|
+
return len(tiktoken.encoding_for_model("gpt-4").encode(self.text))
|
|
1126
|
+
else:
|
|
1127
|
+
return 1024 # Fixed estimate for non-text documents
|
|
1128
|
+
|
|
1106
1129
|
def as_yaml(self) -> Any:
|
|
1107
1130
|
r"""Parse document content as YAML.
|
|
1108
1131
|
|
|
@@ -43,7 +43,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
|
|
|
43
43
|
r"""Detect MIME type from document content and filename.
|
|
44
44
|
|
|
45
45
|
Uses a multi-stage detection strategy for maximum accuracy:
|
|
46
|
-
1. Returns '
|
|
46
|
+
1. Returns 'text/plain' for empty content
|
|
47
47
|
2. Uses extension-based detection for known formats (most reliable)
|
|
48
48
|
3. Falls back to python-magic content analysis
|
|
49
49
|
4. Final fallback to extension or 'application/octet-stream'
|
|
@@ -57,7 +57,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
|
|
|
57
57
|
Never returns None or empty string.
|
|
58
58
|
|
|
59
59
|
Fallback behavior:
|
|
60
|
-
- Empty content: '
|
|
60
|
+
- Empty content: 'text/plain'
|
|
61
61
|
- Unknown extension with binary content: 'application/octet-stream'
|
|
62
62
|
- Magic library failure: Falls back to extension or 'application/octet-stream'
|
|
63
63
|
|
|
@@ -75,13 +75,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
|
|
|
75
75
|
>>> detect_mime_type(b'Hello World', "text.txt")
|
|
76
76
|
'text/plain'
|
|
77
77
|
>>> detect_mime_type(b'', "empty.txt")
|
|
78
|
-
'
|
|
78
|
+
'text/plain'
|
|
79
79
|
>>> detect_mime_type(b'\\x89PNG', "image.xyz")
|
|
80
80
|
'image/png' # Magic detects PNG despite wrong extension
|
|
81
81
|
"""
|
|
82
82
|
# Check for empty content
|
|
83
83
|
if len(content) == 0:
|
|
84
|
-
return "
|
|
84
|
+
return "text/plain"
|
|
85
85
|
|
|
86
86
|
# Try extension-based detection first for known formats
|
|
87
87
|
# This is more reliable for text formats that magic might misidentify
|
|
@@ -12,6 +12,7 @@ import json
|
|
|
12
12
|
from copy import deepcopy
|
|
13
13
|
from typing import Any, Callable, Iterable, SupportsIndex, Union
|
|
14
14
|
|
|
15
|
+
import tiktoken
|
|
15
16
|
from openai.types.chat import (
|
|
16
17
|
ChatCompletionContentPartParam,
|
|
17
18
|
ChatCompletionMessageParam,
|
|
@@ -301,6 +302,37 @@ class AIMessages(list[AIMessageType]):
|
|
|
301
302
|
system_prompt = ""
|
|
302
303
|
return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
|
|
303
304
|
|
|
305
|
+
@property
|
|
306
|
+
def approximate_tokens_count(self) -> int:
|
|
307
|
+
"""Approximate tokens count for the messages.
|
|
308
|
+
|
|
309
|
+
@public
|
|
310
|
+
|
|
311
|
+
Uses tiktoken with gpt-4 encoding to estimate total token count
|
|
312
|
+
across all messages in the conversation.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Approximate tokens count for all messages.
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
ValueError: If message contains unsupported type.
|
|
319
|
+
|
|
320
|
+
Example:
|
|
321
|
+
>>> messages = AIMessages(["Hello", "World"])
|
|
322
|
+
>>> messages.approximate_tokens_count # ~2-3 tokens
|
|
323
|
+
"""
|
|
324
|
+
count = 0
|
|
325
|
+
for message in self:
|
|
326
|
+
if isinstance(message, str):
|
|
327
|
+
count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
|
|
328
|
+
elif isinstance(message, Document):
|
|
329
|
+
count += message.approximate_tokens_count
|
|
330
|
+
elif isinstance(message, ModelResponse): # type: ignore
|
|
331
|
+
count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
|
|
332
|
+
else:
|
|
333
|
+
raise ValueError(f"Unsupported message type: {type(message)}")
|
|
334
|
+
return count
|
|
335
|
+
|
|
304
336
|
@staticmethod
|
|
305
337
|
def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
|
|
306
338
|
"""Convert a document to prompt format for LLM consumption.
|
ai_pipeline_core/llm/client.py
CHANGED
|
@@ -12,15 +12,17 @@ Key functions:
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
|
+
import time
|
|
15
16
|
from typing import Any, TypeVar
|
|
16
17
|
|
|
17
18
|
from lmnr import Laminar
|
|
18
19
|
from openai import AsyncOpenAI
|
|
20
|
+
from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDoneEvent
|
|
19
21
|
from openai.types.chat import (
|
|
20
22
|
ChatCompletionMessageParam,
|
|
21
23
|
)
|
|
22
24
|
from prefect.logging import get_logger
|
|
23
|
-
from pydantic import BaseModel
|
|
25
|
+
from pydantic import BaseModel, ValidationError
|
|
24
26
|
|
|
25
27
|
from ai_pipeline_core.exceptions import LLMError
|
|
26
28
|
from ai_pipeline_core.settings import settings
|
|
@@ -101,6 +103,42 @@ def _process_messages(
|
|
|
101
103
|
return processed_messages
|
|
102
104
|
|
|
103
105
|
|
|
106
|
+
def _model_name_to_openrouter_model(model: ModelName) -> str:
|
|
107
|
+
"""Convert a model name to an OpenRouter model name.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
model: Model name to convert.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
OpenRouter model name.
|
|
114
|
+
"""
|
|
115
|
+
if model == "gpt-4o-search":
|
|
116
|
+
return "openai/gpt-4o-search-preview"
|
|
117
|
+
if model == "gemini-2.5-flash-search":
|
|
118
|
+
return "google/gemini-2.5-flash:online"
|
|
119
|
+
if model == "grok-4-fast-search":
|
|
120
|
+
return "x-ai/grok-4-fast:online"
|
|
121
|
+
if model == "sonar-pro-search":
|
|
122
|
+
return "perplexity/sonar-reasoning-pro"
|
|
123
|
+
if model.startswith("gemini"):
|
|
124
|
+
return f"google/{model}"
|
|
125
|
+
elif model.startswith("gpt"):
|
|
126
|
+
return f"openai/{model}"
|
|
127
|
+
elif model.startswith("grok"):
|
|
128
|
+
return f"x-ai/{model}"
|
|
129
|
+
elif model.startswith("claude"):
|
|
130
|
+
return f"anthropic/{model}"
|
|
131
|
+
elif model.startswith("qwen3"):
|
|
132
|
+
return f"qwen/{model}"
|
|
133
|
+
elif model.startswith("deepseek-"):
|
|
134
|
+
return f"deepseek/{model}"
|
|
135
|
+
elif model.startswith("glm-"):
|
|
136
|
+
return f"z-ai/{model}"
|
|
137
|
+
elif model.startswith("kimi-"):
|
|
138
|
+
return f"moonshotai/{model}"
|
|
139
|
+
return model
|
|
140
|
+
|
|
141
|
+
|
|
104
142
|
async def _generate(
|
|
105
143
|
model: str, messages: list[ChatCompletionMessageParam], completion_kwargs: dict[str, Any]
|
|
106
144
|
) -> ModelResponse:
|
|
@@ -126,23 +164,44 @@ async def _generate(
|
|
|
126
164
|
- Captures response headers for cost tracking
|
|
127
165
|
- Response includes model options for debugging
|
|
128
166
|
"""
|
|
167
|
+
if "openrouter" in settings.openai_base_url.lower():
|
|
168
|
+
model = _model_name_to_openrouter_model(model)
|
|
169
|
+
|
|
129
170
|
async with AsyncOpenAI(
|
|
130
171
|
api_key=settings.openai_api_key,
|
|
131
172
|
base_url=settings.openai_base_url,
|
|
132
173
|
) as client:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
174
|
+
start_time = time.time()
|
|
175
|
+
first_token_time = None
|
|
176
|
+
usage = None
|
|
177
|
+
async with client.chat.completions.stream(
|
|
178
|
+
model=model,
|
|
179
|
+
messages=messages,
|
|
180
|
+
**completion_kwargs,
|
|
181
|
+
) as stream:
|
|
182
|
+
async for event in stream:
|
|
183
|
+
if isinstance(event, ContentDeltaEvent):
|
|
184
|
+
if not first_token_time:
|
|
185
|
+
first_token_time = time.time()
|
|
186
|
+
elif isinstance(event, ContentDoneEvent):
|
|
187
|
+
pass
|
|
188
|
+
elif isinstance(event, ChunkEvent):
|
|
189
|
+
if event.chunk.usage: # used to fix a bug with missing usage data
|
|
190
|
+
usage = event.chunk.usage
|
|
191
|
+
if not first_token_time:
|
|
192
|
+
first_token_time = time.time()
|
|
193
|
+
raw_response = await stream.get_final_completion()
|
|
194
|
+
|
|
195
|
+
metadata = {
|
|
196
|
+
"time_taken": round(time.time() - start_time, 2),
|
|
197
|
+
"first_token_time": round(first_token_time - start_time, 2),
|
|
198
|
+
}
|
|
199
|
+
response = ModelResponse(
|
|
200
|
+
raw_response,
|
|
201
|
+
model_options=completion_kwargs,
|
|
202
|
+
metadata=metadata,
|
|
203
|
+
usage=usage,
|
|
204
|
+
)
|
|
146
205
|
return response
|
|
147
206
|
|
|
148
207
|
|
|
@@ -182,8 +241,6 @@ async def _generate_with_retry(
|
|
|
182
241
|
context, messages, options.system_prompt, options.cache_ttl
|
|
183
242
|
)
|
|
184
243
|
completion_kwargs: dict[str, Any] = {
|
|
185
|
-
"model": model,
|
|
186
|
-
"messages": processed_messages,
|
|
187
244
|
**options.to_openai_completion_kwargs(),
|
|
188
245
|
}
|
|
189
246
|
|
|
@@ -197,20 +254,18 @@ async def _generate_with_retry(
|
|
|
197
254
|
) as span:
|
|
198
255
|
response = await _generate(model, processed_messages, completion_kwargs)
|
|
199
256
|
span.set_attributes(response.get_laminar_metadata())
|
|
200
|
-
Laminar.set_span_output(
|
|
201
|
-
|
|
202
|
-
|
|
257
|
+
Laminar.set_span_output([
|
|
258
|
+
r for r in (response.reasoning_content, response.content) if r
|
|
259
|
+
])
|
|
260
|
+
response.validate_output()
|
|
203
261
|
return response
|
|
204
|
-
except (asyncio.TimeoutError, ValueError, Exception) as e:
|
|
262
|
+
except (asyncio.TimeoutError, ValueError, ValidationError, Exception) as e:
|
|
205
263
|
if not isinstance(e, asyncio.TimeoutError):
|
|
206
264
|
# disable cache if it's not a timeout because it may cause an error
|
|
207
265
|
completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
|
|
208
266
|
|
|
209
267
|
logger.warning(
|
|
210
|
-
"LLM generation failed (attempt
|
|
211
|
-
attempt + 1,
|
|
212
|
-
options.retries,
|
|
213
|
-
e,
|
|
268
|
+
f"LLM generation failed (attempt {attempt + 1}/{options.retries}): {e}",
|
|
214
269
|
)
|
|
215
270
|
if attempt == options.retries - 1:
|
|
216
271
|
raise LLMError("Exhausted all retry attempts for LLM generation.") from e
|
|
@@ -453,8 +508,8 @@ async def generate_structured(
|
|
|
453
508
|
In most cases, leave as None to use framework defaults.
|
|
454
509
|
Configure model behavior centrally via LiteLLM proxy settings when possible.
|
|
455
510
|
|
|
456
|
-
|
|
457
|
-
|
|
511
|
+
Note:
|
|
512
|
+
Vision/PDF model compatibility considerations:
|
|
458
513
|
- Images require vision-capable models that also support structured output
|
|
459
514
|
- PDFs require models with both document processing AND structured output support
|
|
460
515
|
- Many models support either vision OR structured output, but not both
|
|
@@ -536,28 +591,4 @@ async def generate_structured(
|
|
|
536
591
|
except (ValueError, LLMError):
|
|
537
592
|
raise # Explicitly re-raise to satisfy DOC502
|
|
538
593
|
|
|
539
|
-
|
|
540
|
-
parsed_value: T | None = None
|
|
541
|
-
|
|
542
|
-
# Check if response has choices and parsed content
|
|
543
|
-
if response.choices and hasattr(response.choices[0].message, "parsed"):
|
|
544
|
-
parsed: Any = response.choices[0].message.parsed # type: ignore[attr-defined]
|
|
545
|
-
|
|
546
|
-
# If parsed is a dict, instantiate it as the response format class
|
|
547
|
-
if isinstance(parsed, dict):
|
|
548
|
-
parsed_value = response_format(**parsed)
|
|
549
|
-
# If it's already the right type, use it
|
|
550
|
-
elif isinstance(parsed, response_format):
|
|
551
|
-
parsed_value = parsed
|
|
552
|
-
else:
|
|
553
|
-
# Otherwise try to convert it
|
|
554
|
-
raise TypeError(
|
|
555
|
-
f"Unable to convert parsed response to {response_format.__name__}: "
|
|
556
|
-
f"got type {type(parsed).__name__}" # type: ignore[reportUnknownArgumentType]
|
|
557
|
-
)
|
|
558
|
-
|
|
559
|
-
if parsed_value is None:
|
|
560
|
-
raise ValueError("No parsed content available from the model response")
|
|
561
|
-
|
|
562
|
-
# Create a StructuredModelResponse with the parsed value
|
|
563
|
-
return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
|
|
594
|
+
return StructuredModelResponse[T].from_model_response(response)
|
|
@@ -88,6 +88,12 @@ class ModelOptions(BaseModel):
|
|
|
88
88
|
and detect abuse. Maximum length is typically 256 characters.
|
|
89
89
|
Useful for multi-tenant applications or per-user billing.
|
|
90
90
|
|
|
91
|
+
metadata: Custom metadata tags for tracking and observability.
|
|
92
|
+
Dictionary of string key-value pairs for tagging requests.
|
|
93
|
+
Useful for tracking experiments, versions, or custom attributes.
|
|
94
|
+
Maximum of 16 key-value pairs, each key/value max 64 characters.
|
|
95
|
+
Passed through to LMNR tracing and API provider metadata.
|
|
96
|
+
|
|
91
97
|
extra_body: Additional provider-specific parameters to pass in request body.
|
|
92
98
|
Dictionary of custom parameters not covered by standard options.
|
|
93
99
|
Merged with usage_tracking if both are set.
|
|
@@ -147,6 +153,12 @@ class ModelOptions(BaseModel):
|
|
|
147
153
|
... user="user_12345", # Track costs per user
|
|
148
154
|
... temperature=0.7
|
|
149
155
|
... )
|
|
156
|
+
>>>
|
|
157
|
+
>>> # With metadata for tracking and observability
|
|
158
|
+
>>> options = ModelOptions(
|
|
159
|
+
... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
|
|
160
|
+
... temperature=0.7
|
|
161
|
+
... )
|
|
150
162
|
|
|
151
163
|
Note:
|
|
152
164
|
- Not all options apply to all models
|
|
@@ -165,7 +177,7 @@ class ModelOptions(BaseModel):
|
|
|
165
177
|
search_context_size: Literal["low", "medium", "high"] | None = None
|
|
166
178
|
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
|
167
179
|
retries: int = 3
|
|
168
|
-
retry_delay_seconds: int =
|
|
180
|
+
retry_delay_seconds: int = 20
|
|
169
181
|
timeout: int = 600
|
|
170
182
|
cache_ttl: str | None = "5m"
|
|
171
183
|
service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
|
|
@@ -175,6 +187,7 @@ class ModelOptions(BaseModel):
|
|
|
175
187
|
verbosity: Literal["low", "medium", "high"] | None = None
|
|
176
188
|
usage_tracking: bool = True
|
|
177
189
|
user: str | None = None
|
|
190
|
+
metadata: dict[str, str] | None = None
|
|
178
191
|
extra_body: dict[str, Any] | None = None
|
|
179
192
|
|
|
180
193
|
def to_openai_completion_kwargs(self) -> dict[str, Any]:
|
|
@@ -200,6 +213,7 @@ class ModelOptions(BaseModel):
|
|
|
200
213
|
- service_tier -> service_tier
|
|
201
214
|
- verbosity -> verbosity
|
|
202
215
|
- user -> user (for cost tracking)
|
|
216
|
+
- metadata -> metadata (for tracking/observability)
|
|
203
217
|
- extra_body -> extra_body (merged with usage tracking)
|
|
204
218
|
|
|
205
219
|
Web Search Structure:
|
|
@@ -253,7 +267,11 @@ class ModelOptions(BaseModel):
|
|
|
253
267
|
if self.user:
|
|
254
268
|
kwargs["user"] = self.user
|
|
255
269
|
|
|
270
|
+
if self.metadata:
|
|
271
|
+
kwargs["metadata"] = self.metadata
|
|
272
|
+
|
|
256
273
|
if self.usage_tracking:
|
|
257
274
|
kwargs["extra_body"]["usage"] = {"include": True}
|
|
275
|
+
kwargs["stream_options"] = {"include_usage": True}
|
|
258
276
|
|
|
259
277
|
return kwargs
|