ai-pipeline-core 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,13 +6,18 @@ Provides enhanced response classes that use OpenAI-compatible base types via Lit
6
6
  with additional metadata, cost tracking, and structured output support.
7
7
  """
8
8
 
9
- import copy
9
+ import json
10
+ from copy import deepcopy
10
11
  from typing import Any, Generic, TypeVar
11
12
 
12
- from openai.types.chat import ChatCompletion, ParsedChatCompletion
13
- from pydantic import BaseModel, Field
13
+ from openai.types.chat import ChatCompletion
14
+ from openai.types.completion_usage import CompletionUsage
15
+ from pydantic import BaseModel
14
16
 
15
- T = TypeVar("T", bound=BaseModel)
17
+ T = TypeVar(
18
+ "T",
19
+ bound=BaseModel,
20
+ )
16
21
  """Type parameter for structured response Pydantic models."""
17
22
 
18
23
 
@@ -52,42 +57,48 @@ class ModelResponse(ChatCompletion):
52
57
  when absolutely necessary.
53
58
  """
54
59
 
55
- headers: dict[str, str] = Field(default_factory=dict)
56
- model_options: dict[str, Any] = Field(default_factory=dict)
57
-
58
- def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
59
- """Initialize ModelResponse from ChatCompletion or kwargs.
60
+ def __init__(
61
+ self,
62
+ chat_completion: ChatCompletion,
63
+ model_options: dict[str, Any],
64
+ metadata: dict[str, Any],
65
+ usage: CompletionUsage | None = None,
66
+ ) -> None:
67
+ """Initialize ModelResponse from ChatCompletion.
60
68
 
61
- Can be initialized from an existing ChatCompletion object or
62
- directly from keyword arguments. Automatically initializes
63
- headers dict if not provided.
69
+ Wraps an OpenAI ChatCompletion object with additional metadata
70
+ and model options for tracking and observability.
64
71
 
65
72
  Args:
66
- chat_completion: Optional ChatCompletion to wrap.
67
- **kwargs: Direct initialization parameters if no
68
- ChatCompletion provided.
73
+ chat_completion: ChatCompletion object from the API.
74
+ model_options: Model configuration options used for the request.
75
+ Stored for metadata extraction and tracing.
76
+ metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
77
+ Includes timing information and custom tags.
78
+ usage: Optional usage information from streaming response.
69
79
 
70
80
  Example:
71
- >>> # From ChatCompletion
72
- >>> response = ModelResponse(chat_completion_obj)
73
- >>>
74
- >>> # Direct initialization (mainly for testing)
81
+ >>> # Usually created internally by generate()
75
82
  >>> response = ModelResponse(
76
- ... id="test",
77
- ... model="gpt-5",
78
- ... choices=[...]
83
+ ... chat_completion=completion,
84
+ ... model_options={"temperature": 0.7, "model": "gpt-4"},
85
+ ... metadata={"time_taken": 1.5, "first_token_time": 0.3}
79
86
  ... )
80
87
  """
81
- if chat_completion:
82
- # Copy all attributes from the ChatCompletion instance
83
- data = chat_completion.model_dump()
84
- data["headers"] = {} # Add default headers
85
- super().__init__(**data)
86
- else:
87
- # Initialize from kwargs
88
- if "headers" not in kwargs:
89
- kwargs["headers"] = {}
90
- super().__init__(**kwargs)
88
+ data = chat_completion.model_dump()
89
+
90
+ # fixes issue where the role is "assistantassistant" instead of "assistant"
91
+ for i in range(len(data["choices"])):
92
+ if role := data["choices"][i]["message"].get("role"):
93
+ if role.startswith("assistant") and role != "assistant":
94
+ data["choices"][i]["message"]["role"] = "assistant"
95
+
96
+ super().__init__(**data)
97
+
98
+ self._model_options = model_options
99
+ self._metadata = metadata
100
+ if usage:
101
+ self.usage = usage
91
102
 
92
103
  @property
93
104
  def content(self) -> str:
@@ -113,38 +124,21 @@ class ModelResponse(ChatCompletion):
113
124
  content = self.choices[0].message.content or ""
114
125
  return content.split("</think>")[-1].strip()
115
126
 
116
- def set_model_options(self, options: dict[str, Any]) -> None:
117
- """Store the model configuration used for generation.
118
-
119
- Saves a deep copy of the options used for this generation,
120
- excluding the messages for brevity.
121
-
122
- Args:
123
- options: Dictionary of model options from the API call.
124
-
125
- Note:
126
- Messages are removed to avoid storing large prompts.
127
- Called internally by the generation functions.
128
- """
129
- self.model_options = copy.deepcopy(options)
130
- if "messages" in self.model_options:
131
- del self.model_options["messages"]
132
-
133
- def set_headers(self, headers: dict[str, str]) -> None:
134
- """Store HTTP response headers.
135
-
136
- Saves response headers which contain LiteLLM metadata
137
- including cost information and call IDs.
127
+ @property
128
+ def reasoning_content(self) -> str:
129
+ """Get the reasoning content.
138
130
 
139
- Args:
140
- headers: Dictionary of HTTP headers from the response.
131
+ @public
141
132
 
142
- Headers of interest:
143
- - x-litellm-response-cost: Generation cost
144
- - x-litellm-call-id: Unique call identifier
145
- - x-litellm-model-id: Actual model used
133
+ Returns:
134
+ The reasoning content from the model, or empty string if none.
146
135
  """
147
- self.headers = copy.deepcopy(headers)
136
+ message = self.choices[0].message
137
+ if reasoning_content := getattr(message, "reasoning_content", None):
138
+ return reasoning_content
139
+ if not message.content or "</think>" not in message.content:
140
+ return ""
141
+ return message.content.split("</think>")[0].strip()
148
142
 
149
143
  def get_laminar_metadata(self) -> dict[str, str | int | float]:
150
144
  """Extract metadata for LMNR (Laminar) observability including cost tracking.
@@ -224,25 +218,17 @@ class ModelResponse(ChatCompletion):
224
218
  - Cached tokens reduce actual cost but may not be reflected
225
219
  - Used internally by tracing but accessible for cost analysis
226
220
  """
227
- metadata: dict[str, str | int | float] = {}
228
-
229
- litellm_id = self.headers.get("x-litellm-call-id")
230
- cost = float(self.headers.get("x-litellm-response-cost") or 0)
231
-
232
- # Add all x-litellm-* headers
233
- for header, value in self.headers.items():
234
- if header.startswith("x-litellm-"):
235
- header_name = header.replace("x-litellm-", "").lower()
236
- metadata[f"litellm.{header_name}"] = value
221
+ metadata: dict[str, str | int | float] = deepcopy(self._metadata)
237
222
 
238
223
  # Add base metadata
239
224
  metadata.update({
240
- "gen_ai.response.id": litellm_id or self.id,
225
+ "gen_ai.response.id": self.id,
241
226
  "gen_ai.response.model": self.model,
242
227
  "get_ai.system": "litellm",
243
228
  })
244
229
 
245
230
  # Add usage metadata if available
231
+ cost = None
246
232
  if self.usage:
247
233
  metadata.update({
248
234
  "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
@@ -273,130 +259,84 @@ class ModelResponse(ChatCompletion):
273
259
  "get_ai.cost": cost,
274
260
  })
275
261
 
276
- if self.model_options:
277
- for key, value in self.model_options.items():
278
- metadata[f"model_options.{key}"] = str(value)
262
+ for key, value in self._model_options.items():
263
+ if "messages" in key:
264
+ continue
265
+ metadata[f"model_options.{key}"] = str(value)
266
+
267
+ other_fields = self.__dict__
268
+ for key, value in other_fields.items():
269
+ if key in ["_model_options", "_metadata", "choices", "usage"]:
270
+ continue
271
+ try:
272
+ metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
273
+ except Exception:
274
+ metadata[f"response.raw.{key}"] = str(value)
275
+
276
+ message = self.choices[0].message
277
+ for key, value in message.__dict__.items():
278
+ if key in ["content"]:
279
+ continue
280
+ metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
279
281
 
280
282
  return metadata
281
283
 
284
+ def validate_output(self) -> None:
285
+ """Validate response output content and format.
282
286
 
283
- class StructuredModelResponse(ModelResponse, Generic[T]):
284
- """Response wrapper for structured/typed LLM output.
287
+ Checks that response has non-empty content and validates against
288
+ response_format if structured output was requested.
285
289
 
286
- @public
287
-
288
- Primary usage is adding to AIMessages and accessing .parsed property:
290
+ Raises:
291
+ ValueError: If response content is empty.
292
+ ValidationError: If content doesn't match response_format schema.
293
+ """
294
+ if not self.content:
295
+ raise ValueError("Empty response content")
289
296
 
290
- >>> class Analysis(BaseModel):
291
- ... sentiment: float
292
- ... summary: str
293
- >>>
294
- >>> response = await generate_structured(
295
- ... "gpt-5",
296
- ... response_format=Analysis,
297
- ... messages="Analyze this text..."
298
- ... )
299
- >>>
300
- >>> # Primary usage: access parsed model
301
- >>> analysis = response.parsed
302
- >>> print(f"Sentiment: {analysis.sentiment}")
303
- >>>
304
- >>> # Can add to messages for conversation
305
- >>> messages.append(response)
297
+ if response_format := self._model_options.get("response_format"):
298
+ if isinstance(response_format, BaseModel):
299
+ response_format.model_validate_json(self.content)
306
300
 
307
- The two main interactions:
308
- 1. Accessing .parsed property for the structured data
309
- 2. Adding to AIMessages for conversation continuity
310
301
 
311
- These patterns cover virtually all use cases. Advanced features exist
312
- but should only be used when absolutely necessary.
302
+ class StructuredModelResponse(ModelResponse, Generic[T]):
303
+ """Response wrapper for structured/typed LLM output.
313
304
 
314
- Type Parameter:
315
- T: The Pydantic model type for the structured output.
305
+ @public
316
306
 
317
- Note:
318
- Extends ModelResponse with type-safe parsed data access.
319
- Other inherited properties should rarely be needed.
307
+ Primary usage is accessing the .parsed property for the structured data.
320
308
  """
321
309
 
322
- def __init__(
323
- self,
324
- chat_completion: ChatCompletion | None = None,
325
- parsed_value: T | None = None,
326
- **kwargs: Any,
327
- ) -> None:
328
- """Initialize with ChatCompletion and parsed value.
310
+ @classmethod
311
+ def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
312
+ """Convert a ModelResponse to StructuredModelResponse.
329
313
 
330
- Creates a structured response from a base completion and
331
- optionally a pre-parsed value. Can extract parsed value
332
- from ParsedChatCompletion automatically.
314
+ Takes an existing ModelResponse and converts it to a StructuredModelResponse
315
+ for accessing parsed structured output. Used internally by generate_structured().
333
316
 
334
317
  Args:
335
- chat_completion: Base chat completion response.
336
- parsed_value: Pre-parsed Pydantic model instance.
337
- If None, attempts extraction from
338
- ParsedChatCompletion.
339
- **kwargs: Additional ChatCompletion parameters.
318
+ model_response: The ModelResponse to convert.
340
319
 
341
- Extraction behavior:
342
- 1. Use provided parsed_value if given
343
- 2. Extract from ParsedChatCompletion if available
344
- 3. Store as None (access will raise ValueError)
345
-
346
- Note:
347
- Usually created internally by generate_structured().
348
- The parsed value is validated by Pydantic automatically.
320
+ Returns:
321
+ StructuredModelResponse with lazy parsing support.
349
322
  """
350
- super().__init__(chat_completion, **kwargs)
351
- self._parsed_value: T | None = parsed_value
352
-
353
- # Extract parsed value from ParsedChatCompletion if available
354
- if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
355
- if chat_completion.choices: # type: ignore[attr-defined]
356
- message = chat_completion.choices[0].message # type: ignore[attr-defined]
357
- if hasattr(message, "parsed"): # type: ignore
358
- self._parsed_value = message.parsed # type: ignore[attr-defined]
323
+ model_response.__class__ = cls
324
+ return model_response # type: ignore[return-value]
359
325
 
360
326
  @property
361
327
  def parsed(self) -> T:
362
- """Get the parsed Pydantic model instance.
363
-
364
- @public
328
+ """Get the parsed structured output.
365
329
 
366
- Primary property for accessing structured output.
367
- This is the main reason to use generate_structured().
330
+ Lazily parses the JSON content into the specified Pydantic model.
331
+ Result is cached after first access.
368
332
 
369
333
  Returns:
370
- Validated instance of the Pydantic model type T.
334
+ Parsed Pydantic model instance.
371
335
 
372
336
  Raises:
373
- ValueError: If no parsed content available (internal error).
374
-
375
- Example:
376
- >>> class UserInfo(BaseModel):
377
- ... name: str
378
- ... age: int
379
- >>>
380
- >>> response = await generate_structured(
381
- ... "gpt-5",
382
- ... response_format=UserInfo,
383
- ... messages="Extract user info..."
384
- ... )
385
- >>>
386
- >>> # Primary usage: get the parsed model
387
- >>> user = response.parsed
388
- >>> print(f"{user.name} is {user.age} years old")
389
- >>>
390
- >>> # Can also add to messages
391
- >>> messages.append(response)
392
-
393
- Note:
394
- Type-safe with full IDE support. This is the main property
395
- you'll use with structured responses.
337
+ ValidationError: If content doesn't match the response_format schema.
396
338
  """
397
- if self._parsed_value is not None:
398
- return self._parsed_value
399
-
400
- raise ValueError(
401
- "No parsed content available. This should not happen for StructuredModelResponse."
402
- )
339
+ if not hasattr(self, "_parsed_value"):
340
+ response_format = self._model_options.get("response_format")
341
+ self._parsed_value: T = response_format.model_validate_json(self.content) # type: ignore[return-value]
342
+ return self._parsed_value
@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
20
20
  "grok-4",
21
21
  # Small models
22
22
  "gemini-2.5-flash",
23
- "gpt-5-mini",
23
+ "gpt-5-nano",
24
24
  "grok-4-fast",
25
25
  # Search models
26
26
  "gemini-2.5-flash-search",
@@ -306,8 +306,6 @@ def pipeline_task(
306
306
 
307
307
  Args:
308
308
  __fn: Function to decorate (when used without parentheses).
309
-
310
- Tracing parameters:
311
309
  trace_level: When to trace ("always", "debug", "off").
312
310
  - "always": Always trace (default)
313
311
  - "debug": Only trace when LMNR_DEBUG="true"
@@ -322,8 +320,6 @@ def pipeline_task(
322
320
  Also forces trace level to "always" if not already set.
323
321
  trace_trim_documents: Trim document content in traces to first 100 chars (default True).
324
322
  Reduces trace size with large documents.
325
-
326
- Prefect task parameters:
327
323
  name: Task name (defaults to function name).
328
324
  description: Human-readable task description.
329
325
  tags: Tags for organization and filtering.
@@ -523,13 +519,8 @@ def pipeline_flow(
523
519
  ) -> DocumentList # Must return DocumentList
524
520
 
525
521
  Args:
526
- __fn: Function to decorate (when used without parentheses).
527
-
528
- Config parameter:
529
522
  config: Required FlowConfig class for document loading/saving. Enables
530
523
  automatic loading from string paths and saving outputs.
531
-
532
- Tracing parameters:
533
524
  trace_level: When to trace ("always", "debug", "off").
534
525
  - "always": Always trace (default)
535
526
  - "debug": Only trace when LMNR_DEBUG="true"
@@ -544,8 +535,6 @@ def pipeline_flow(
544
535
  Also forces trace level to "always" if not already set.
545
536
  trace_trim_documents: Trim document content in traces to first 100 chars (default True).
546
537
  Reduces trace size with large documents.
547
-
548
- Prefect flow parameters:
549
538
  name: Flow name (defaults to function name).
550
539
  version: Flow version identifier.
551
540
  flow_run_name: Static or dynamic run name.
@@ -126,6 +126,10 @@ class Settings(BaseSettings):
126
126
  # Prefect Configuration
127
127
  prefect_api_url: str = ""
128
128
  prefect_api_key: str = ""
129
+ prefect_api_auth_string: str = ""
130
+ prefect_work_pool_name: str = "default"
131
+ prefect_work_queue_name: str = "default"
132
+ prefect_gcs_bucket: str = ""
129
133
 
130
134
  # Observability
131
135
  lmnr_project_api_key: str = ""
@@ -135,6 +139,4 @@ class Settings(BaseSettings):
135
139
  gcs_service_account_file: str = "" # Path to GCS service account JSON file
136
140
 
137
141
 
138
- # Legacy: Module-level instance for backwards compatibility
139
- # Applications should create their own settings instance
140
142
  settings = Settings()
@@ -1,7 +1,5 @@
1
1
  """Command-line interface for simple pipeline execution."""
2
2
 
3
- from __future__ import annotations
4
-
5
3
  import asyncio
6
4
  import os
7
5
  import sys
@@ -6,8 +6,6 @@ This module centralizes:
6
6
  ``observe`` instrumentation, and optional support for test runs.
7
7
  """
8
8
 
9
- from __future__ import annotations
10
-
11
9
  import inspect
12
10
  import json
13
11
  import os
@@ -0,0 +1,8 @@
1
+ """Experimental utilities for deployment and remote execution.
2
+
3
+ These features are experimental and subject to change.
4
+ """
5
+
6
+ from .remote_deployment import remote_deployment, run_remote_deployment
7
+
8
+ __all__ = ["remote_deployment", "run_remote_deployment"]