ai-pipeline-core 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +1 -1
- ai_pipeline_core/documents/document.py +24 -1
- ai_pipeline_core/documents/mime_type.py +4 -4
- ai_pipeline_core/llm/ai_messages.py +32 -0
- ai_pipeline_core/llm/client.py +82 -51
- ai_pipeline_core/llm/model_options.py +19 -1
- ai_pipeline_core/llm/model_response.py +113 -173
- ai_pipeline_core/llm/model_types.py +1 -1
- ai_pipeline_core/pipeline.py +0 -11
- ai_pipeline_core/settings.py +4 -2
- ai_pipeline_core/simple_runner/cli.py +0 -2
- ai_pipeline_core/tracing.py +0 -2
- ai_pipeline_core/utils/__init__.py +8 -0
- ai_pipeline_core/utils/deploy.py +373 -0
- ai_pipeline_core/utils/remote_deployment.py +269 -0
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/METADATA +4 -4
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/RECORD +19 -16
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.2.4.dist-info → ai_pipeline_core-0.2.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,13 +6,18 @@ Provides enhanced response classes that use OpenAI-compatible base types via Lit
|
|
|
6
6
|
with additional metadata, cost tracking, and structured output support.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import
|
|
9
|
+
import json
|
|
10
|
+
from copy import deepcopy
|
|
10
11
|
from typing import Any, Generic, TypeVar
|
|
11
12
|
|
|
12
|
-
from openai.types.chat import ChatCompletion
|
|
13
|
-
from
|
|
13
|
+
from openai.types.chat import ChatCompletion
|
|
14
|
+
from openai.types.completion_usage import CompletionUsage
|
|
15
|
+
from pydantic import BaseModel
|
|
14
16
|
|
|
15
|
-
T = TypeVar(
|
|
17
|
+
T = TypeVar(
|
|
18
|
+
"T",
|
|
19
|
+
bound=BaseModel,
|
|
20
|
+
)
|
|
16
21
|
"""Type parameter for structured response Pydantic models."""
|
|
17
22
|
|
|
18
23
|
|
|
@@ -52,42 +57,48 @@ class ModelResponse(ChatCompletion):
|
|
|
52
57
|
when absolutely necessary.
|
|
53
58
|
"""
|
|
54
59
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
chat_completion: ChatCompletion,
|
|
63
|
+
model_options: dict[str, Any],
|
|
64
|
+
metadata: dict[str, Any],
|
|
65
|
+
usage: CompletionUsage | None = None,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Initialize ModelResponse from ChatCompletion.
|
|
60
68
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
headers dict if not provided.
|
|
69
|
+
Wraps an OpenAI ChatCompletion object with additional metadata
|
|
70
|
+
and model options for tracking and observability.
|
|
64
71
|
|
|
65
72
|
Args:
|
|
66
|
-
chat_completion:
|
|
67
|
-
|
|
68
|
-
|
|
73
|
+
chat_completion: ChatCompletion object from the API.
|
|
74
|
+
model_options: Model configuration options used for the request.
|
|
75
|
+
Stored for metadata extraction and tracing.
|
|
76
|
+
metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
|
|
77
|
+
Includes timing information and custom tags.
|
|
78
|
+
usage: Optional usage information from streaming response.
|
|
69
79
|
|
|
70
80
|
Example:
|
|
71
|
-
>>> #
|
|
72
|
-
>>> response = ModelResponse(chat_completion_obj)
|
|
73
|
-
>>>
|
|
74
|
-
>>> # Direct initialization (mainly for testing)
|
|
81
|
+
>>> # Usually created internally by generate()
|
|
75
82
|
>>> response = ModelResponse(
|
|
76
|
-
...
|
|
77
|
-
...
|
|
78
|
-
...
|
|
83
|
+
... chat_completion=completion,
|
|
84
|
+
... model_options={"temperature": 0.7, "model": "gpt-4"},
|
|
85
|
+
... metadata={"time_taken": 1.5, "first_token_time": 0.3}
|
|
79
86
|
... )
|
|
80
87
|
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
88
|
+
data = chat_completion.model_dump()
|
|
89
|
+
|
|
90
|
+
# fixes issue where the role is "assistantassistant" instead of "assistant"
|
|
91
|
+
for i in range(len(data["choices"])):
|
|
92
|
+
if role := data["choices"][i]["message"].get("role"):
|
|
93
|
+
if role.startswith("assistant") and role != "assistant":
|
|
94
|
+
data["choices"][i]["message"]["role"] = "assistant"
|
|
95
|
+
|
|
96
|
+
super().__init__(**data)
|
|
97
|
+
|
|
98
|
+
self._model_options = model_options
|
|
99
|
+
self._metadata = metadata
|
|
100
|
+
if usage:
|
|
101
|
+
self.usage = usage
|
|
91
102
|
|
|
92
103
|
@property
|
|
93
104
|
def content(self) -> str:
|
|
@@ -113,38 +124,21 @@ class ModelResponse(ChatCompletion):
|
|
|
113
124
|
content = self.choices[0].message.content or ""
|
|
114
125
|
return content.split("</think>")[-1].strip()
|
|
115
126
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
Saves a deep copy of the options used for this generation,
|
|
120
|
-
excluding the messages for brevity.
|
|
121
|
-
|
|
122
|
-
Args:
|
|
123
|
-
options: Dictionary of model options from the API call.
|
|
124
|
-
|
|
125
|
-
Note:
|
|
126
|
-
Messages are removed to avoid storing large prompts.
|
|
127
|
-
Called internally by the generation functions.
|
|
128
|
-
"""
|
|
129
|
-
self.model_options = copy.deepcopy(options)
|
|
130
|
-
if "messages" in self.model_options:
|
|
131
|
-
del self.model_options["messages"]
|
|
132
|
-
|
|
133
|
-
def set_headers(self, headers: dict[str, str]) -> None:
|
|
134
|
-
"""Store HTTP response headers.
|
|
135
|
-
|
|
136
|
-
Saves response headers which contain LiteLLM metadata
|
|
137
|
-
including cost information and call IDs.
|
|
127
|
+
@property
|
|
128
|
+
def reasoning_content(self) -> str:
|
|
129
|
+
"""Get the reasoning content.
|
|
138
130
|
|
|
139
|
-
|
|
140
|
-
headers: Dictionary of HTTP headers from the response.
|
|
131
|
+
@public
|
|
141
132
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
- x-litellm-call-id: Unique call identifier
|
|
145
|
-
- x-litellm-model-id: Actual model used
|
|
133
|
+
Returns:
|
|
134
|
+
The reasoning content from the model, or empty string if none.
|
|
146
135
|
"""
|
|
147
|
-
|
|
136
|
+
message = self.choices[0].message
|
|
137
|
+
if reasoning_content := getattr(message, "reasoning_content", None):
|
|
138
|
+
return reasoning_content
|
|
139
|
+
if not message.content or "</think>" not in message.content:
|
|
140
|
+
return ""
|
|
141
|
+
return message.content.split("</think>")[0].strip()
|
|
148
142
|
|
|
149
143
|
def get_laminar_metadata(self) -> dict[str, str | int | float]:
|
|
150
144
|
"""Extract metadata for LMNR (Laminar) observability including cost tracking.
|
|
@@ -224,25 +218,17 @@ class ModelResponse(ChatCompletion):
|
|
|
224
218
|
- Cached tokens reduce actual cost but may not be reflected
|
|
225
219
|
- Used internally by tracing but accessible for cost analysis
|
|
226
220
|
"""
|
|
227
|
-
metadata: dict[str, str | int | float] =
|
|
228
|
-
|
|
229
|
-
litellm_id = self.headers.get("x-litellm-call-id")
|
|
230
|
-
cost = float(self.headers.get("x-litellm-response-cost") or 0)
|
|
231
|
-
|
|
232
|
-
# Add all x-litellm-* headers
|
|
233
|
-
for header, value in self.headers.items():
|
|
234
|
-
if header.startswith("x-litellm-"):
|
|
235
|
-
header_name = header.replace("x-litellm-", "").lower()
|
|
236
|
-
metadata[f"litellm.{header_name}"] = value
|
|
221
|
+
metadata: dict[str, str | int | float] = deepcopy(self._metadata)
|
|
237
222
|
|
|
238
223
|
# Add base metadata
|
|
239
224
|
metadata.update({
|
|
240
|
-
"gen_ai.response.id":
|
|
225
|
+
"gen_ai.response.id": self.id,
|
|
241
226
|
"gen_ai.response.model": self.model,
|
|
242
227
|
"get_ai.system": "litellm",
|
|
243
228
|
})
|
|
244
229
|
|
|
245
230
|
# Add usage metadata if available
|
|
231
|
+
cost = None
|
|
246
232
|
if self.usage:
|
|
247
233
|
metadata.update({
|
|
248
234
|
"gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
|
|
@@ -273,130 +259,84 @@ class ModelResponse(ChatCompletion):
|
|
|
273
259
|
"get_ai.cost": cost,
|
|
274
260
|
})
|
|
275
261
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
262
|
+
for key, value in self._model_options.items():
|
|
263
|
+
if "messages" in key:
|
|
264
|
+
continue
|
|
265
|
+
metadata[f"model_options.{key}"] = str(value)
|
|
266
|
+
|
|
267
|
+
other_fields = self.__dict__
|
|
268
|
+
for key, value in other_fields.items():
|
|
269
|
+
if key in ["_model_options", "_metadata", "choices", "usage"]:
|
|
270
|
+
continue
|
|
271
|
+
try:
|
|
272
|
+
metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
|
|
273
|
+
except Exception:
|
|
274
|
+
metadata[f"response.raw.{key}"] = str(value)
|
|
275
|
+
|
|
276
|
+
message = self.choices[0].message
|
|
277
|
+
for key, value in message.__dict__.items():
|
|
278
|
+
if key in ["content"]:
|
|
279
|
+
continue
|
|
280
|
+
metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
|
|
279
281
|
|
|
280
282
|
return metadata
|
|
281
283
|
|
|
284
|
+
def validate_output(self) -> None:
|
|
285
|
+
"""Validate response output content and format.
|
|
282
286
|
|
|
283
|
-
|
|
284
|
-
|
|
287
|
+
Checks that response has non-empty content and validates against
|
|
288
|
+
response_format if structured output was requested.
|
|
285
289
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
290
|
+
Raises:
|
|
291
|
+
ValueError: If response content is empty.
|
|
292
|
+
ValidationError: If content doesn't match response_format schema.
|
|
293
|
+
"""
|
|
294
|
+
if not self.content:
|
|
295
|
+
raise ValueError("Empty response content")
|
|
289
296
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
>>>
|
|
294
|
-
>>> response = await generate_structured(
|
|
295
|
-
... "gpt-5",
|
|
296
|
-
... response_format=Analysis,
|
|
297
|
-
... messages="Analyze this text..."
|
|
298
|
-
... )
|
|
299
|
-
>>>
|
|
300
|
-
>>> # Primary usage: access parsed model
|
|
301
|
-
>>> analysis = response.parsed
|
|
302
|
-
>>> print(f"Sentiment: {analysis.sentiment}")
|
|
303
|
-
>>>
|
|
304
|
-
>>> # Can add to messages for conversation
|
|
305
|
-
>>> messages.append(response)
|
|
297
|
+
if response_format := self._model_options.get("response_format"):
|
|
298
|
+
if isinstance(response_format, BaseModel):
|
|
299
|
+
response_format.model_validate_json(self.content)
|
|
306
300
|
|
|
307
|
-
The two main interactions:
|
|
308
|
-
1. Accessing .parsed property for the structured data
|
|
309
|
-
2. Adding to AIMessages for conversation continuity
|
|
310
301
|
|
|
311
|
-
|
|
312
|
-
|
|
302
|
+
class StructuredModelResponse(ModelResponse, Generic[T]):
|
|
303
|
+
"""Response wrapper for structured/typed LLM output.
|
|
313
304
|
|
|
314
|
-
|
|
315
|
-
T: The Pydantic model type for the structured output.
|
|
305
|
+
@public
|
|
316
306
|
|
|
317
|
-
|
|
318
|
-
Extends ModelResponse with type-safe parsed data access.
|
|
319
|
-
Other inherited properties should rarely be needed.
|
|
307
|
+
Primary usage is accessing the .parsed property for the structured data.
|
|
320
308
|
"""
|
|
321
309
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
parsed_value: T | None = None,
|
|
326
|
-
**kwargs: Any,
|
|
327
|
-
) -> None:
|
|
328
|
-
"""Initialize with ChatCompletion and parsed value.
|
|
310
|
+
@classmethod
|
|
311
|
+
def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
|
|
312
|
+
"""Convert a ModelResponse to StructuredModelResponse.
|
|
329
313
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
from ParsedChatCompletion automatically.
|
|
314
|
+
Takes an existing ModelResponse and converts it to a StructuredModelResponse
|
|
315
|
+
for accessing parsed structured output. Used internally by generate_structured().
|
|
333
316
|
|
|
334
317
|
Args:
|
|
335
|
-
|
|
336
|
-
parsed_value: Pre-parsed Pydantic model instance.
|
|
337
|
-
If None, attempts extraction from
|
|
338
|
-
ParsedChatCompletion.
|
|
339
|
-
**kwargs: Additional ChatCompletion parameters.
|
|
318
|
+
model_response: The ModelResponse to convert.
|
|
340
319
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
2. Extract from ParsedChatCompletion if available
|
|
344
|
-
3. Store as None (access will raise ValueError)
|
|
345
|
-
|
|
346
|
-
Note:
|
|
347
|
-
Usually created internally by generate_structured().
|
|
348
|
-
The parsed value is validated by Pydantic automatically.
|
|
320
|
+
Returns:
|
|
321
|
+
StructuredModelResponse with lazy parsing support.
|
|
349
322
|
"""
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
# Extract parsed value from ParsedChatCompletion if available
|
|
354
|
-
if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
|
|
355
|
-
if chat_completion.choices: # type: ignore[attr-defined]
|
|
356
|
-
message = chat_completion.choices[0].message # type: ignore[attr-defined]
|
|
357
|
-
if hasattr(message, "parsed"): # type: ignore
|
|
358
|
-
self._parsed_value = message.parsed # type: ignore[attr-defined]
|
|
323
|
+
model_response.__class__ = cls
|
|
324
|
+
return model_response # type: ignore[return-value]
|
|
359
325
|
|
|
360
326
|
@property
|
|
361
327
|
def parsed(self) -> T:
|
|
362
|
-
"""Get the parsed
|
|
363
|
-
|
|
364
|
-
@public
|
|
328
|
+
"""Get the parsed structured output.
|
|
365
329
|
|
|
366
|
-
|
|
367
|
-
|
|
330
|
+
Lazily parses the JSON content into the specified Pydantic model.
|
|
331
|
+
Result is cached after first access.
|
|
368
332
|
|
|
369
333
|
Returns:
|
|
370
|
-
|
|
334
|
+
Parsed Pydantic model instance.
|
|
371
335
|
|
|
372
336
|
Raises:
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
Example:
|
|
376
|
-
>>> class UserInfo(BaseModel):
|
|
377
|
-
... name: str
|
|
378
|
-
... age: int
|
|
379
|
-
>>>
|
|
380
|
-
>>> response = await generate_structured(
|
|
381
|
-
... "gpt-5",
|
|
382
|
-
... response_format=UserInfo,
|
|
383
|
-
... messages="Extract user info..."
|
|
384
|
-
... )
|
|
385
|
-
>>>
|
|
386
|
-
>>> # Primary usage: get the parsed model
|
|
387
|
-
>>> user = response.parsed
|
|
388
|
-
>>> print(f"{user.name} is {user.age} years old")
|
|
389
|
-
>>>
|
|
390
|
-
>>> # Can also add to messages
|
|
391
|
-
>>> messages.append(response)
|
|
392
|
-
|
|
393
|
-
Note:
|
|
394
|
-
Type-safe with full IDE support. This is the main property
|
|
395
|
-
you'll use with structured responses.
|
|
337
|
+
ValidationError: If content doesn't match the response_format schema.
|
|
396
338
|
"""
|
|
397
|
-
if self
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
"No parsed content available. This should not happen for StructuredModelResponse."
|
|
402
|
-
)
|
|
339
|
+
if not hasattr(self, "_parsed_value"):
|
|
340
|
+
response_format = self._model_options.get("response_format")
|
|
341
|
+
self._parsed_value: T = response_format.model_validate_json(self.content) # type: ignore[return-value]
|
|
342
|
+
return self._parsed_value
|
ai_pipeline_core/pipeline.py
CHANGED
|
@@ -306,8 +306,6 @@ def pipeline_task(
|
|
|
306
306
|
|
|
307
307
|
Args:
|
|
308
308
|
__fn: Function to decorate (when used without parentheses).
|
|
309
|
-
|
|
310
|
-
Tracing parameters:
|
|
311
309
|
trace_level: When to trace ("always", "debug", "off").
|
|
312
310
|
- "always": Always trace (default)
|
|
313
311
|
- "debug": Only trace when LMNR_DEBUG="true"
|
|
@@ -322,8 +320,6 @@ def pipeline_task(
|
|
|
322
320
|
Also forces trace level to "always" if not already set.
|
|
323
321
|
trace_trim_documents: Trim document content in traces to first 100 chars (default True).
|
|
324
322
|
Reduces trace size with large documents.
|
|
325
|
-
|
|
326
|
-
Prefect task parameters:
|
|
327
323
|
name: Task name (defaults to function name).
|
|
328
324
|
description: Human-readable task description.
|
|
329
325
|
tags: Tags for organization and filtering.
|
|
@@ -523,13 +519,8 @@ def pipeline_flow(
|
|
|
523
519
|
) -> DocumentList # Must return DocumentList
|
|
524
520
|
|
|
525
521
|
Args:
|
|
526
|
-
__fn: Function to decorate (when used without parentheses).
|
|
527
|
-
|
|
528
|
-
Config parameter:
|
|
529
522
|
config: Required FlowConfig class for document loading/saving. Enables
|
|
530
523
|
automatic loading from string paths and saving outputs.
|
|
531
|
-
|
|
532
|
-
Tracing parameters:
|
|
533
524
|
trace_level: When to trace ("always", "debug", "off").
|
|
534
525
|
- "always": Always trace (default)
|
|
535
526
|
- "debug": Only trace when LMNR_DEBUG="true"
|
|
@@ -544,8 +535,6 @@ def pipeline_flow(
|
|
|
544
535
|
Also forces trace level to "always" if not already set.
|
|
545
536
|
trace_trim_documents: Trim document content in traces to first 100 chars (default True).
|
|
546
537
|
Reduces trace size with large documents.
|
|
547
|
-
|
|
548
|
-
Prefect flow parameters:
|
|
549
538
|
name: Flow name (defaults to function name).
|
|
550
539
|
version: Flow version identifier.
|
|
551
540
|
flow_run_name: Static or dynamic run name.
|
ai_pipeline_core/settings.py
CHANGED
|
@@ -126,6 +126,10 @@ class Settings(BaseSettings):
|
|
|
126
126
|
# Prefect Configuration
|
|
127
127
|
prefect_api_url: str = ""
|
|
128
128
|
prefect_api_key: str = ""
|
|
129
|
+
prefect_api_auth_string: str = ""
|
|
130
|
+
prefect_work_pool_name: str = "default"
|
|
131
|
+
prefect_work_queue_name: str = "default"
|
|
132
|
+
prefect_gcs_bucket: str = ""
|
|
129
133
|
|
|
130
134
|
# Observability
|
|
131
135
|
lmnr_project_api_key: str = ""
|
|
@@ -135,6 +139,4 @@ class Settings(BaseSettings):
|
|
|
135
139
|
gcs_service_account_file: str = "" # Path to GCS service account JSON file
|
|
136
140
|
|
|
137
141
|
|
|
138
|
-
# Legacy: Module-level instance for backwards compatibility
|
|
139
|
-
# Applications should create their own settings instance
|
|
140
142
|
settings = Settings()
|
ai_pipeline_core/tracing.py
CHANGED