ai-pipeline-core 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/PKG-INFO +4 -4
  2. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/document.py +24 -1
  4. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/mime_type.py +4 -4
  5. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/ai_messages.py +32 -0
  6. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/client.py +38 -52
  7. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_options.py +19 -1
  8. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_response.py +101 -173
  9. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/model_types.py +1 -1
  10. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/pipeline.py +0 -11
  11. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/pyproject.toml +5 -5
  12. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/.gitignore +0 -0
  13. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/LICENSE +0 -0
  14. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/README.md +0 -0
  15. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/__init__.py +0 -0
  16. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/document_list.py +0 -0
  17. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/flow_document.py +0 -0
  18. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/task_document.py +0 -0
  19. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/temporary_document.py +0 -0
  20. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/documents/utils.py +0 -0
  21. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/exceptions.py +0 -0
  22. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/flow/__init__.py +0 -0
  23. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/flow/config.py +0 -0
  24. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/flow/options.py +0 -0
  25. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/llm/__init__.py +0 -0
  26. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/logging/__init__.py +0 -0
  27. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/logging/logging.yml +0 -0
  28. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/logging/logging_config.py +0 -0
  29. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  30. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/prefect.py +0 -0
  31. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/prompt_manager.py +0 -0
  32. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/py.typed +0 -0
  33. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/settings.py +0 -0
  34. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/simple_runner/__init__.py +0 -0
  35. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/simple_runner/cli.py +0 -0
  36. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/simple_runner/simple_runner.py +0 -0
  37. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/storage/__init__.py +0 -0
  38. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/storage/storage.py +0 -0
  39. {ai_pipeline_core-0.2.3 → ai_pipeline_core-0.2.5}/ai_pipeline_core/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
21
  Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.17
23
+ Requires-Dist: lmnr>=0.7.18
24
24
  Requires-Dist: openai>=1.109.1
25
25
  Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
26
  Requires-Dist: prefect>=3.4.21
@@ -28,7 +28,7 @@ Requires-Dist: pydantic-settings>=2.10.1
28
28
  Requires-Dist: pydantic>=2.11.9
29
29
  Requires-Dist: python-magic>=0.4.27
30
30
  Requires-Dist: ruamel-yaml>=0.18.14
31
- Requires-Dist: tiktoken>=0.11.0
31
+ Requires-Dist: tiktoken>=0.12.0
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
34
34
  Requires-Dist: bump2version>=1.0.1; extra == 'dev'
@@ -40,7 +40,7 @@ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
40
40
  Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
41
41
  Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
42
42
  Requires-Dist: pytest>=8.4.1; extra == 'dev'
43
- Requires-Dist: ruff>=0.12.9; extra == 'dev'
43
+ Requires-Dist: ruff>=0.14.1; extra == 'dev'
44
44
  Description-Content-Type: text/markdown
45
45
 
46
46
  # AI Pipeline Core
@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
118
118
  from .settings import Settings
119
119
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
120
120
 
121
- __version__ = "0.2.3"
121
+ __version__ = "0.2.5"
122
122
 
123
123
  __all__ = [
124
124
  # Config/Settings
@@ -29,6 +29,7 @@ from typing import (
29
29
  overload,
30
30
  )
31
31
 
32
+ import tiktoken
32
33
  from pydantic import (
33
34
  BaseModel,
34
35
  ConfigDict,
@@ -980,7 +981,7 @@ class Document(BaseModel, ABC):
980
981
  """Detect the MIME type from document content.
981
982
 
982
983
  Detection strategy (in order):
983
- 1. Returns 'application/x-empty' for empty content
984
+ 1. Returns 'text/plain' for empty content
984
985
  2. Extension-based detection for known text formats (preferred)
985
986
  3. python-magic content analysis for unknown extensions
986
987
  4. Fallback to extension or 'application/octet-stream'
@@ -1103,6 +1104,28 @@ class Document(BaseModel, ABC):
1103
1104
  raise ValueError(f"Document is not text: {self.name}")
1104
1105
  return self.content.decode("utf-8")
1105
1106
 
1107
+ @property
1108
+ def approximate_tokens_count(self) -> int:
1109
+ """Approximate tokens count for the document content.
1110
+
1111
+ @public
1112
+
1113
+ Uses tiktoken with gpt-4 encoding to estimate token count.
1114
+ For text documents, encodes the actual text. For non-text
1115
+ documents (images, PDFs, etc.), returns a fixed estimate of 1024 tokens.
1116
+
1117
+ Returns:
1118
+ Approximate number of tokens for this document.
1119
+
1120
+ Example:
1121
+ >>> doc = MyDocument.create(name="data.txt", content="Hello world")
1122
+ >>> doc.approximate_tokens_count # ~2 tokens
1123
+ """
1124
+ if self.is_text:
1125
+ return len(tiktoken.encoding_for_model("gpt-4").encode(self.text))
1126
+ else:
1127
+ return 1024 # Fixed estimate for non-text documents
1128
+
1106
1129
  def as_yaml(self) -> Any:
1107
1130
  r"""Parse document content as YAML.
1108
1131
 
@@ -43,7 +43,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
43
43
  r"""Detect MIME type from document content and filename.
44
44
 
45
45
  Uses a multi-stage detection strategy for maximum accuracy:
46
- 1. Returns 'application/x-empty' for empty content
46
+ 1. Returns 'text/plain' for empty content
47
47
  2. Uses extension-based detection for known formats (most reliable)
48
48
  3. Falls back to python-magic content analysis
49
49
  4. Final fallback to extension or 'application/octet-stream'
@@ -57,7 +57,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
57
57
  Never returns None or empty string.
58
58
 
59
59
  Fallback behavior:
60
- - Empty content: 'application/x-empty'
60
+ - Empty content: 'text/plain'
61
61
  - Unknown extension with binary content: 'application/octet-stream'
62
62
  - Magic library failure: Falls back to extension or 'application/octet-stream'
63
63
 
@@ -75,13 +75,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
75
75
  >>> detect_mime_type(b'Hello World', "text.txt")
76
76
  'text/plain'
77
77
  >>> detect_mime_type(b'', "empty.txt")
78
- 'application/x-empty'
78
+ 'text/plain'
79
79
  >>> detect_mime_type(b'\\x89PNG', "image.xyz")
80
80
  'image/png' # Magic detects PNG despite wrong extension
81
81
  """
82
82
  # Check for empty content
83
83
  if len(content) == 0:
84
- return "application/x-empty"
84
+ return "text/plain"
85
85
 
86
86
  # Try extension-based detection first for known formats
87
87
  # This is more reliable for text formats that magic might misidentify
@@ -12,6 +12,7 @@ import json
12
12
  from copy import deepcopy
13
13
  from typing import Any, Callable, Iterable, SupportsIndex, Union
14
14
 
15
+ import tiktoken
15
16
  from openai.types.chat import (
16
17
  ChatCompletionContentPartParam,
17
18
  ChatCompletionMessageParam,
@@ -301,6 +302,37 @@ class AIMessages(list[AIMessageType]):
301
302
  system_prompt = ""
302
303
  return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
303
304
 
305
+ @property
306
+ def approximate_tokens_count(self) -> int:
307
+ """Approximate tokens count for the messages.
308
+
309
+ @public
310
+
311
+ Uses tiktoken with gpt-4 encoding to estimate total token count
312
+ across all messages in the conversation.
313
+
314
+ Returns:
315
+ Approximate tokens count for all messages.
316
+
317
+ Raises:
318
+ ValueError: If message contains unsupported type.
319
+
320
+ Example:
321
+ >>> messages = AIMessages(["Hello", "World"])
322
+ >>> messages.approximate_tokens_count # ~2-3 tokens
323
+ """
324
+ count = 0
325
+ for message in self:
326
+ if isinstance(message, str):
327
+ count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
328
+ elif isinstance(message, Document):
329
+ count += message.approximate_tokens_count
330
+ elif isinstance(message, ModelResponse): # type: ignore
331
+ count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
332
+ else:
333
+ raise ValueError(f"Unsupported message type: {type(message)}")
334
+ return count
335
+
304
336
  @staticmethod
305
337
  def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
306
338
  """Convert a document to prompt format for LLM consumption.
@@ -12,15 +12,17 @@ Key functions:
12
12
  """
13
13
 
14
14
  import asyncio
15
+ import time
15
16
  from typing import Any, TypeVar
16
17
 
17
18
  from lmnr import Laminar
18
19
  from openai import AsyncOpenAI
20
+ from openai.lib.streaming.chat import ContentDeltaEvent, ContentDoneEvent
19
21
  from openai.types.chat import (
20
22
  ChatCompletionMessageParam,
21
23
  )
22
24
  from prefect.logging import get_logger
23
- from pydantic import BaseModel
25
+ from pydantic import BaseModel, ValidationError
24
26
 
25
27
  from ai_pipeline_core.exceptions import LLMError
26
28
  from ai_pipeline_core.settings import settings
@@ -130,19 +132,31 @@ async def _generate(
130
132
  api_key=settings.openai_api_key,
131
133
  base_url=settings.openai_base_url,
132
134
  ) as client:
133
- # Use parse for structured output, create for regular
134
- if completion_kwargs.get("response_format"):
135
- raw_response = await client.chat.completions.with_raw_response.parse( # type: ignore[var-annotated]
136
- **completion_kwargs,
137
- )
138
- else:
139
- raw_response = await client.chat.completions.with_raw_response.create( # type: ignore[var-annotated]
140
- **completion_kwargs
141
- )
142
-
143
- response = ModelResponse(raw_response.parse()) # type: ignore[arg-type]
144
- response.set_model_options(completion_kwargs)
145
- response.set_headers(dict(raw_response.headers.items())) # type: ignore[arg-type]
135
+ start_time, first_token_time = time.time(), None
136
+ async with client.chat.completions.stream(
137
+ model=model,
138
+ messages=messages,
139
+ **completion_kwargs,
140
+ ) as stream:
141
+ async for event in stream:
142
+ if isinstance(event, ContentDeltaEvent):
143
+ if not first_token_time:
144
+ first_token_time = time.time()
145
+ elif isinstance(event, ContentDoneEvent):
146
+ pass
147
+ if not first_token_time:
148
+ first_token_time = time.time()
149
+ raw_response = await stream.get_final_completion()
150
+
151
+ metadata = {
152
+ "time_taken": round(time.time() - start_time, 2),
153
+ "first_token_time": round(first_token_time - start_time, 2),
154
+ }
155
+ response = ModelResponse(
156
+ raw_response,
157
+ model_options=completion_kwargs,
158
+ metadata=metadata,
159
+ )
146
160
  return response
147
161
 
148
162
 
@@ -182,12 +196,10 @@ async def _generate_with_retry(
182
196
  context, messages, options.system_prompt, options.cache_ttl
183
197
  )
184
198
  completion_kwargs: dict[str, Any] = {
185
- "model": model,
186
- "messages": processed_messages,
187
199
  **options.to_openai_completion_kwargs(),
188
200
  }
189
201
 
190
- if context:
202
+ if context and options.cache_ttl:
191
203
  completion_kwargs["prompt_cache_key"] = context.get_prompt_cache_key(options.system_prompt)
192
204
 
193
205
  for attempt in range(options.retries):
@@ -197,20 +209,18 @@ async def _generate_with_retry(
197
209
  ) as span:
198
210
  response = await _generate(model, processed_messages, completion_kwargs)
199
211
  span.set_attributes(response.get_laminar_metadata())
200
- Laminar.set_span_output(response.content)
201
- if not response.content:
202
- raise ValueError(f"Model {model} returned an empty response.")
212
+ Laminar.set_span_output([
213
+ r for r in (response.reasoning_content, response.content) if r
214
+ ])
215
+ response.validate_output()
203
216
  return response
204
- except (asyncio.TimeoutError, ValueError, Exception) as e:
217
+ except (asyncio.TimeoutError, ValueError, ValidationError, Exception) as e:
205
218
  if not isinstance(e, asyncio.TimeoutError):
206
219
  # disable cache if it's not a timeout because it may cause an error
207
220
  completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
208
221
 
209
222
  logger.warning(
210
- "LLM generation failed (attempt %d/%d): %s",
211
- attempt + 1,
212
- options.retries,
213
- e,
223
+ f"LLM generation failed (attempt {attempt + 1}/{options.retries}): {e}",
214
224
  )
215
225
  if attempt == options.retries - 1:
216
226
  raise LLMError("Exhausted all retry attempts for LLM generation.") from e
@@ -453,8 +463,8 @@ async def generate_structured(
453
463
  In most cases, leave as None to use framework defaults.
454
464
  Configure model behavior centrally via LiteLLM proxy settings when possible.
455
465
 
456
- VISION/PDF MODEL COMPATIBILITY:
457
- When using Documents with images/PDFs in structured output:
466
+ Note:
467
+ Vision/PDF model compatibility considerations:
458
468
  - Images require vision-capable models that also support structured output
459
469
  - PDFs require models with both document processing AND structured output support
460
470
  - Many models support either vision OR structured output, but not both
@@ -536,28 +546,4 @@ async def generate_structured(
536
546
  except (ValueError, LLMError):
537
547
  raise # Explicitly re-raise to satisfy DOC502
538
548
 
539
- # Extract the parsed value from the response
540
- parsed_value: T | None = None
541
-
542
- # Check if response has choices and parsed content
543
- if response.choices and hasattr(response.choices[0].message, "parsed"):
544
- parsed: Any = response.choices[0].message.parsed # type: ignore[attr-defined]
545
-
546
- # If parsed is a dict, instantiate it as the response format class
547
- if isinstance(parsed, dict):
548
- parsed_value = response_format(**parsed)
549
- # If it's already the right type, use it
550
- elif isinstance(parsed, response_format):
551
- parsed_value = parsed
552
- else:
553
- # Otherwise try to convert it
554
- raise TypeError(
555
- f"Unable to convert parsed response to {response_format.__name__}: "
556
- f"got type {type(parsed).__name__}" # type: ignore[reportUnknownArgumentType]
557
- )
558
-
559
- if parsed_value is None:
560
- raise ValueError("No parsed content available from the model response")
561
-
562
- # Create a StructuredModelResponse with the parsed value
563
- return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
549
+ return StructuredModelResponse[T].from_model_response(response)
@@ -88,6 +88,12 @@ class ModelOptions(BaseModel):
88
88
  and detect abuse. Maximum length is typically 256 characters.
89
89
  Useful for multi-tenant applications or per-user billing.
90
90
 
91
+ metadata: Custom metadata tags for tracking and observability.
92
+ Dictionary of string key-value pairs for tagging requests.
93
+ Useful for tracking experiments, versions, or custom attributes.
94
+ Maximum of 16 key-value pairs, each key/value max 64 characters.
95
+ Passed through to LMNR tracing and API provider metadata.
96
+
91
97
  extra_body: Additional provider-specific parameters to pass in request body.
92
98
  Dictionary of custom parameters not covered by standard options.
93
99
  Merged with usage_tracking if both are set.
@@ -147,6 +153,12 @@ class ModelOptions(BaseModel):
147
153
  ... user="user_12345", # Track costs per user
148
154
  ... temperature=0.7
149
155
  ... )
156
+ >>>
157
+ >>> # With metadata for tracking and observability
158
+ >>> options = ModelOptions(
159
+ ... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
160
+ ... temperature=0.7
161
+ ... )
150
162
 
151
163
  Note:
152
164
  - Not all options apply to all models
@@ -165,7 +177,7 @@ class ModelOptions(BaseModel):
165
177
  search_context_size: Literal["low", "medium", "high"] | None = None
166
178
  reasoning_effort: Literal["low", "medium", "high"] | None = None
167
179
  retries: int = 3
168
- retry_delay_seconds: int = 10
180
+ retry_delay_seconds: int = 20
169
181
  timeout: int = 600
170
182
  cache_ttl: str | None = "5m"
171
183
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
@@ -175,6 +187,7 @@ class ModelOptions(BaseModel):
175
187
  verbosity: Literal["low", "medium", "high"] | None = None
176
188
  usage_tracking: bool = True
177
189
  user: str | None = None
190
+ metadata: dict[str, str] | None = None
178
191
  extra_body: dict[str, Any] | None = None
179
192
 
180
193
  def to_openai_completion_kwargs(self) -> dict[str, Any]:
@@ -200,6 +213,7 @@ class ModelOptions(BaseModel):
200
213
  - service_tier -> service_tier
201
214
  - verbosity -> verbosity
202
215
  - user -> user (for cost tracking)
216
+ - metadata -> metadata (for tracking/observability)
203
217
  - extra_body -> extra_body (merged with usage tracking)
204
218
 
205
219
  Web Search Structure:
@@ -253,7 +267,11 @@ class ModelOptions(BaseModel):
253
267
  if self.user:
254
268
  kwargs["user"] = self.user
255
269
 
270
+ if self.metadata:
271
+ kwargs["metadata"] = self.metadata
272
+
256
273
  if self.usage_tracking:
257
274
  kwargs["extra_body"]["usage"] = {"include": True}
275
+ kwargs["stream_options"] = {"include_usage": True}
258
276
 
259
277
  return kwargs
@@ -6,13 +6,17 @@ Provides enhanced response classes that use OpenAI-compatible base types via Lit
6
6
  with additional metadata, cost tracking, and structured output support.
7
7
  """
8
8
 
9
- import copy
9
+ import json
10
+ from copy import deepcopy
10
11
  from typing import Any, Generic, TypeVar
11
12
 
12
- from openai.types.chat import ChatCompletion, ParsedChatCompletion
13
- from pydantic import BaseModel, Field
13
+ from openai.types.chat import ChatCompletion
14
+ from pydantic import BaseModel
14
15
 
15
- T = TypeVar("T", bound=BaseModel)
16
+ T = TypeVar(
17
+ "T",
18
+ bound=BaseModel,
19
+ )
16
20
  """Type parameter for structured response Pydantic models."""
17
21
 
18
22
 
@@ -52,42 +56,37 @@ class ModelResponse(ChatCompletion):
52
56
  when absolutely necessary.
53
57
  """
54
58
 
55
- headers: dict[str, str] = Field(default_factory=dict)
56
- model_options: dict[str, Any] = Field(default_factory=dict)
57
-
58
- def __init__(self, chat_completion: ChatCompletion | None = None, **kwargs: Any) -> None:
59
- """Initialize ModelResponse from ChatCompletion or kwargs.
59
+ def __init__(
60
+ self,
61
+ chat_completion: ChatCompletion,
62
+ model_options: dict[str, Any],
63
+ metadata: dict[str, Any],
64
+ ) -> None:
65
+ """Initialize ModelResponse from ChatCompletion.
60
66
 
61
- Can be initialized from an existing ChatCompletion object or
62
- directly from keyword arguments. Automatically initializes
63
- headers dict if not provided.
67
+ Wraps an OpenAI ChatCompletion object with additional metadata
68
+ and model options for tracking and observability.
64
69
 
65
70
  Args:
66
- chat_completion: Optional ChatCompletion to wrap.
67
- **kwargs: Direct initialization parameters if no
68
- ChatCompletion provided.
71
+ chat_completion: ChatCompletion object from the API.
72
+ model_options: Model configuration options used for the request.
73
+ Stored for metadata extraction and tracing.
74
+ metadata: Custom metadata for tracking (time_taken, first_token_time, etc.).
75
+ Includes timing information and custom tags.
69
76
 
70
77
  Example:
71
- >>> # From ChatCompletion
72
- >>> response = ModelResponse(chat_completion_obj)
73
- >>>
74
- >>> # Direct initialization (mainly for testing)
78
+ >>> # Usually created internally by generate()
75
79
  >>> response = ModelResponse(
76
- ... id="test",
77
- ... model="gpt-5",
78
- ... choices=[...]
80
+ ... chat_completion=completion,
81
+ ... model_options={"temperature": 0.7, "model": "gpt-4"},
82
+ ... metadata={"time_taken": 1.5, "first_token_time": 0.3}
79
83
  ... )
80
84
  """
81
- if chat_completion:
82
- # Copy all attributes from the ChatCompletion instance
83
- data = chat_completion.model_dump()
84
- data["headers"] = {} # Add default headers
85
- super().__init__(**data)
86
- else:
87
- # Initialize from kwargs
88
- if "headers" not in kwargs:
89
- kwargs["headers"] = {}
90
- super().__init__(**kwargs)
85
+ data = chat_completion.model_dump()
86
+ super().__init__(**data)
87
+
88
+ self._model_options = model_options
89
+ self._metadata = metadata
91
90
 
92
91
  @property
93
92
  def content(self) -> str:
@@ -113,38 +112,21 @@ class ModelResponse(ChatCompletion):
113
112
  content = self.choices[0].message.content or ""
114
113
  return content.split("</think>")[-1].strip()
115
114
 
116
- def set_model_options(self, options: dict[str, Any]) -> None:
117
- """Store the model configuration used for generation.
118
-
119
- Saves a deep copy of the options used for this generation,
120
- excluding the messages for brevity.
121
-
122
- Args:
123
- options: Dictionary of model options from the API call.
124
-
125
- Note:
126
- Messages are removed to avoid storing large prompts.
127
- Called internally by the generation functions.
128
- """
129
- self.model_options = copy.deepcopy(options)
130
- if "messages" in self.model_options:
131
- del self.model_options["messages"]
132
-
133
- def set_headers(self, headers: dict[str, str]) -> None:
134
- """Store HTTP response headers.
135
-
136
- Saves response headers which contain LiteLLM metadata
137
- including cost information and call IDs.
115
+ @property
116
+ def reasoning_content(self) -> str:
117
+ """Get the reasoning content.
138
118
 
139
- Args:
140
- headers: Dictionary of HTTP headers from the response.
119
+ @public
141
120
 
142
- Headers of interest:
143
- - x-litellm-response-cost: Generation cost
144
- - x-litellm-call-id: Unique call identifier
145
- - x-litellm-model-id: Actual model used
121
+ Returns:
122
+ The reasoning content from the model, or empty string if none.
146
123
  """
147
- self.headers = copy.deepcopy(headers)
124
+ message = self.choices[0].message
125
+ if reasoning_content := getattr(message, "reasoning_content", None):
126
+ return reasoning_content
127
+ if not message.content or "</think>" not in message.content:
128
+ return ""
129
+ return message.content.split("</think>")[0].strip()
148
130
 
149
131
  def get_laminar_metadata(self) -> dict[str, str | int | float]:
150
132
  """Extract metadata for LMNR (Laminar) observability including cost tracking.
@@ -224,25 +206,17 @@ class ModelResponse(ChatCompletion):
224
206
  - Cached tokens reduce actual cost but may not be reflected
225
207
  - Used internally by tracing but accessible for cost analysis
226
208
  """
227
- metadata: dict[str, str | int | float] = {}
228
-
229
- litellm_id = self.headers.get("x-litellm-call-id")
230
- cost = float(self.headers.get("x-litellm-response-cost") or 0)
231
-
232
- # Add all x-litellm-* headers
233
- for header, value in self.headers.items():
234
- if header.startswith("x-litellm-"):
235
- header_name = header.replace("x-litellm-", "").lower()
236
- metadata[f"litellm.{header_name}"] = value
209
+ metadata: dict[str, str | int | float] = deepcopy(self._metadata)
237
210
 
238
211
  # Add base metadata
239
212
  metadata.update({
240
- "gen_ai.response.id": litellm_id or self.id,
213
+ "gen_ai.response.id": self.id,
241
214
  "gen_ai.response.model": self.model,
242
215
  "get_ai.system": "litellm",
243
216
  })
244
217
 
245
218
  # Add usage metadata if available
219
+ cost = None
246
220
  if self.usage:
247
221
  metadata.update({
248
222
  "gen_ai.usage.prompt_tokens": self.usage.prompt_tokens,
@@ -273,130 +247,84 @@ class ModelResponse(ChatCompletion):
273
247
  "get_ai.cost": cost,
274
248
  })
275
249
 
276
- if self.model_options:
277
- for key, value in self.model_options.items():
278
- metadata[f"model_options.{key}"] = str(value)
250
+ for key, value in self._model_options.items():
251
+ if "messages" in key:
252
+ continue
253
+ metadata[f"model_options.{key}"] = str(value)
254
+
255
+ other_fields = self.__dict__
256
+ for key, value in other_fields.items():
257
+ if key in ["_model_options", "_metadata", "choices", "usage"]:
258
+ continue
259
+ try:
260
+ metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
261
+ except Exception:
262
+ metadata[f"response.raw.{key}"] = str(value)
263
+
264
+ message = self.choices[0].message
265
+ for key, value in message.__dict__.items():
266
+ if key in ["content"]:
267
+ continue
268
+ metadata[f"response.raw.message.{key}"] = json.dumps(value, indent=2, default=str)
279
269
 
280
270
  return metadata
281
271
 
272
+ def validate_output(self) -> None:
273
+ """Validate response output content and format.
282
274
 
283
- class StructuredModelResponse(ModelResponse, Generic[T]):
284
- """Response wrapper for structured/typed LLM output.
285
-
286
- @public
275
+ Checks that response has non-empty content and validates against
276
+ response_format if structured output was requested.
287
277
 
288
- Primary usage is adding to AIMessages and accessing .parsed property:
278
+ Raises:
279
+ ValueError: If response content is empty.
280
+ ValidationError: If content doesn't match response_format schema.
281
+ """
282
+ if not self.content:
283
+ raise ValueError("Empty response content")
289
284
 
290
- >>> class Analysis(BaseModel):
291
- ... sentiment: float
292
- ... summary: str
293
- >>>
294
- >>> response = await generate_structured(
295
- ... "gpt-5",
296
- ... response_format=Analysis,
297
- ... messages="Analyze this text..."
298
- ... )
299
- >>>
300
- >>> # Primary usage: access parsed model
301
- >>> analysis = response.parsed
302
- >>> print(f"Sentiment: {analysis.sentiment}")
303
- >>>
304
- >>> # Can add to messages for conversation
305
- >>> messages.append(response)
285
+ if response_format := self._model_options.get("response_format"):
286
+ if isinstance(response_format, BaseModel):
287
+ response_format.model_validate_json(self.content)
306
288
 
307
- The two main interactions:
308
- 1. Accessing .parsed property for the structured data
309
- 2. Adding to AIMessages for conversation continuity
310
289
 
311
- These patterns cover virtually all use cases. Advanced features exist
312
- but should only be used when absolutely necessary.
290
+ class StructuredModelResponse(ModelResponse, Generic[T]):
291
+ """Response wrapper for structured/typed LLM output.
313
292
 
314
- Type Parameter:
315
- T: The Pydantic model type for the structured output.
293
+ @public
316
294
 
317
- Note:
318
- Extends ModelResponse with type-safe parsed data access.
319
- Other inherited properties should rarely be needed.
295
+ Primary usage is accessing the .parsed property for the structured data.
320
296
  """
321
297
 
322
- def __init__(
323
- self,
324
- chat_completion: ChatCompletion | None = None,
325
- parsed_value: T | None = None,
326
- **kwargs: Any,
327
- ) -> None:
328
- """Initialize with ChatCompletion and parsed value.
298
+ @classmethod
299
+ def from_model_response(cls, model_response: ModelResponse) -> "StructuredModelResponse[T]":
300
+ """Convert a ModelResponse to StructuredModelResponse.
329
301
 
330
- Creates a structured response from a base completion and
331
- optionally a pre-parsed value. Can extract parsed value
332
- from ParsedChatCompletion automatically.
302
+ Takes an existing ModelResponse and converts it to a StructuredModelResponse
303
+ for accessing parsed structured output. Used internally by generate_structured().
333
304
 
334
305
  Args:
335
- chat_completion: Base chat completion response.
336
- parsed_value: Pre-parsed Pydantic model instance.
337
- If None, attempts extraction from
338
- ParsedChatCompletion.
339
- **kwargs: Additional ChatCompletion parameters.
340
-
341
- Extraction behavior:
342
- 1. Use provided parsed_value if given
343
- 2. Extract from ParsedChatCompletion if available
344
- 3. Store as None (access will raise ValueError)
306
+ model_response: The ModelResponse to convert.
345
307
 
346
- Note:
347
- Usually created internally by generate_structured().
348
- The parsed value is validated by Pydantic automatically.
308
+ Returns:
309
+ StructuredModelResponse with lazy parsing support.
349
310
  """
350
- super().__init__(chat_completion, **kwargs)
351
- self._parsed_value: T | None = parsed_value
352
-
353
- # Extract parsed value from ParsedChatCompletion if available
354
- if chat_completion and isinstance(chat_completion, ParsedChatCompletion):
355
- if chat_completion.choices: # type: ignore[attr-defined]
356
- message = chat_completion.choices[0].message # type: ignore[attr-defined]
357
- if hasattr(message, "parsed"): # type: ignore
358
- self._parsed_value = message.parsed # type: ignore[attr-defined]
311
+ model_response.__class__ = cls
312
+ return model_response # type: ignore[return-value]
359
313
 
360
314
  @property
361
315
  def parsed(self) -> T:
362
- """Get the parsed Pydantic model instance.
316
+ """Get the parsed structured output.
363
317
 
364
- @public
365
-
366
- Primary property for accessing structured output.
367
- This is the main reason to use generate_structured().
318
+ Lazily parses the JSON content into the specified Pydantic model.
319
+ Result is cached after first access.
368
320
 
369
321
  Returns:
370
- Validated instance of the Pydantic model type T.
322
+ Parsed Pydantic model instance.
371
323
 
372
324
  Raises:
373
- ValueError: If no parsed content available (internal error).
374
-
375
- Example:
376
- >>> class UserInfo(BaseModel):
377
- ... name: str
378
- ... age: int
379
- >>>
380
- >>> response = await generate_structured(
381
- ... "gpt-5",
382
- ... response_format=UserInfo,
383
- ... messages="Extract user info..."
384
- ... )
385
- >>>
386
- >>> # Primary usage: get the parsed model
387
- >>> user = response.parsed
388
- >>> print(f"{user.name} is {user.age} years old")
389
- >>>
390
- >>> # Can also add to messages
391
- >>> messages.append(response)
392
-
393
- Note:
394
- Type-safe with full IDE support. This is the main property
395
- you'll use with structured responses.
325
+ ValidationError: If content doesn't match the response_format schema.
396
326
  """
397
- if self._parsed_value is not None:
398
- return self._parsed_value
399
-
400
- raise ValueError(
401
- "No parsed content available. This should not happen for StructuredModelResponse."
402
- )
327
+ if not hasattr(self, "_parsed_value"):
328
+ response_format = self._model_options.get("response_format")
329
+ self._parsed_value: T = response_format.model_validate_json(self.content) # type: ignore[return-value]
330
+ return self._parsed_value
@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
20
20
  "grok-4",
21
21
  # Small models
22
22
  "gemini-2.5-flash",
23
- "gpt-5-mini",
23
+ "gpt-5-nano",
24
24
  "grok-4-fast",
25
25
  # Search models
26
26
  "gemini-2.5-flash-search",
@@ -306,8 +306,6 @@ def pipeline_task(
306
306
 
307
307
  Args:
308
308
  __fn: Function to decorate (when used without parentheses).
309
-
310
- Tracing parameters:
311
309
  trace_level: When to trace ("always", "debug", "off").
312
310
  - "always": Always trace (default)
313
311
  - "debug": Only trace when LMNR_DEBUG="true"
@@ -322,8 +320,6 @@ def pipeline_task(
322
320
  Also forces trace level to "always" if not already set.
323
321
  trace_trim_documents: Trim document content in traces to first 100 chars (default True).
324
322
  Reduces trace size with large documents.
325
-
326
- Prefect task parameters:
327
323
  name: Task name (defaults to function name).
328
324
  description: Human-readable task description.
329
325
  tags: Tags for organization and filtering.
@@ -523,13 +519,8 @@ def pipeline_flow(
523
519
  ) -> DocumentList # Must return DocumentList
524
520
 
525
521
  Args:
526
- __fn: Function to decorate (when used without parentheses).
527
-
528
- Config parameter:
529
522
  config: Required FlowConfig class for document loading/saving. Enables
530
523
  automatic loading from string paths and saving outputs.
531
-
532
- Tracing parameters:
533
524
  trace_level: When to trace ("always", "debug", "off").
534
525
  - "always": Always trace (default)
535
526
  - "debug": Only trace when LMNR_DEBUG="true"
@@ -544,8 +535,6 @@ def pipeline_flow(
544
535
  Also forces trace level to "always" if not already set.
545
536
  trace_trim_documents: Trim document content in traces to first 100 chars (default True).
546
537
  Reduces trace size with large documents.
547
-
548
- Prefect flow parameters:
549
538
  name: Flow name (defaults to function name).
550
539
  version: Flow version identifier.
551
540
  flow_run_name: Static or dynamic run name.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.2.3"
3
+ version = "0.2.5"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -22,7 +22,7 @@ classifiers = [
22
22
  dependencies = [
23
23
  "httpx>=0.28.1",
24
24
  "Jinja2>=3.1.6",
25
- "lmnr>=0.7.17",
25
+ "lmnr>=0.7.18",
26
26
  "openai>=1.109.1",
27
27
  "prefect>=3.4.21",
28
28
  "prefect-gcp[cloud_storage]>=0.6.10",
@@ -30,7 +30,7 @@ dependencies = [
30
30
  "pydantic>=2.11.9",
31
31
  "python-magic>=0.4.27",
32
32
  "ruamel.yaml>=0.18.14",
33
- "tiktoken>=0.11.0",
33
+ "tiktoken>=0.12.0",
34
34
  ]
35
35
 
36
36
  [project.urls]
@@ -50,7 +50,7 @@ dev = [
50
50
  "pytest-mock>=3.14.0",
51
51
  "pytest-xdist>=3.8.0",
52
52
  "pytest>=8.4.1",
53
- "ruff>=0.12.9",
53
+ "ruff>=0.14.1",
54
54
  ]
55
55
 
56
56
  [tool.pytest.ini_options]
@@ -177,7 +177,7 @@ reportIncompatibleVariableOverride = "error"
177
177
  reportMissingParameterType = "warning"
178
178
 
179
179
  [tool.bumpversion]
180
- current_version = "0.2.3"
180
+ current_version = "0.2.5"
181
181
  commit = true
182
182
  tag = true
183
183
  tag_name = "v{new_version}"