ai-pipeline-core 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/PKG-INFO +4 -4
  2. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/document.py +24 -1
  4. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/mime_type.py +4 -4
  5. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/ai_messages.py +32 -0
  6. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/client.py +82 -51
  7. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/model_options.py +19 -1
  8. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/model_response.py +113 -173
  9. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/model_types.py +1 -1
  10. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/pipeline.py +0 -11
  11. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/settings.py +4 -2
  12. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/simple_runner/cli.py +0 -2
  13. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/tracing.py +0 -2
  14. ai_pipeline_core-0.2.6/ai_pipeline_core/utils/__init__.py +8 -0
  15. ai_pipeline_core-0.2.6/ai_pipeline_core/utils/deploy.py +373 -0
  16. ai_pipeline_core-0.2.6/ai_pipeline_core/utils/remote_deployment.py +269 -0
  17. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/pyproject.toml +6 -5
  18. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/.gitignore +0 -0
  19. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/LICENSE +0 -0
  20. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/README.md +0 -0
  21. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/__init__.py +0 -0
  22. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/document_list.py +0 -0
  23. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/flow_document.py +0 -0
  24. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/task_document.py +0 -0
  25. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/temporary_document.py +0 -0
  26. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/documents/utils.py +0 -0
  27. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/exceptions.py +0 -0
  28. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/flow/__init__.py +0 -0
  29. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/flow/config.py +0 -0
  30. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/flow/options.py +0 -0
  31. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/llm/__init__.py +0 -0
  32. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/logging/__init__.py +0 -0
  33. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/logging/logging.yml +0 -0
  34. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/logging/logging_config.py +0 -0
  35. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  36. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/prefect.py +0 -0
  37. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/prompt_manager.py +0 -0
  38. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/py.typed +0 -0
  39. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/simple_runner/__init__.py +0 -0
  40. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/simple_runner/simple_runner.py +0 -0
  41. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/storage/__init__.py +0 -0
  42. {ai_pipeline_core-0.2.4 → ai_pipeline_core-0.2.6}/ai_pipeline_core/storage/storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
21
  Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.17
23
+ Requires-Dist: lmnr>=0.7.18
24
24
  Requires-Dist: openai>=1.109.1
25
25
  Requires-Dist: prefect-gcp[cloud-storage]>=0.6.10
26
26
  Requires-Dist: prefect>=3.4.21
@@ -28,7 +28,7 @@ Requires-Dist: pydantic-settings>=2.10.1
28
28
  Requires-Dist: pydantic>=2.11.9
29
29
  Requires-Dist: python-magic>=0.4.27
30
30
  Requires-Dist: ruamel-yaml>=0.18.14
31
- Requires-Dist: tiktoken>=0.11.0
31
+ Requires-Dist: tiktoken>=0.12.0
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: basedpyright>=1.31.2; extra == 'dev'
34
34
  Requires-Dist: bump2version>=1.0.1; extra == 'dev'
@@ -40,7 +40,7 @@ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
40
40
  Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
41
41
  Requires-Dist: pytest-xdist>=3.8.0; extra == 'dev'
42
42
  Requires-Dist: pytest>=8.4.1; extra == 'dev'
43
- Requires-Dist: ruff>=0.12.9; extra == 'dev'
43
+ Requires-Dist: ruff>=0.14.1; extra == 'dev'
44
44
  Description-Content-Type: text/markdown
45
45
 
46
46
  # AI Pipeline Core
@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
118
118
  from .settings import Settings
119
119
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
120
120
 
121
- __version__ = "0.2.4"
121
+ __version__ = "0.2.6"
122
122
 
123
123
  __all__ = [
124
124
  # Config/Settings
@@ -29,6 +29,7 @@ from typing import (
29
29
  overload,
30
30
  )
31
31
 
32
+ import tiktoken
32
33
  from pydantic import (
33
34
  BaseModel,
34
35
  ConfigDict,
@@ -980,7 +981,7 @@ class Document(BaseModel, ABC):
980
981
  """Detect the MIME type from document content.
981
982
 
982
983
  Detection strategy (in order):
983
- 1. Returns 'application/x-empty' for empty content
984
+ 1. Returns 'text/plain' for empty content
984
985
  2. Extension-based detection for known text formats (preferred)
985
986
  3. python-magic content analysis for unknown extensions
986
987
  4. Fallback to extension or 'application/octet-stream'
@@ -1103,6 +1104,28 @@ class Document(BaseModel, ABC):
1103
1104
  raise ValueError(f"Document is not text: {self.name}")
1104
1105
  return self.content.decode("utf-8")
1105
1106
 
1107
+ @property
1108
+ def approximate_tokens_count(self) -> int:
1109
+ """Approximate tokens count for the document content.
1110
+
1111
+ @public
1112
+
1113
+ Uses tiktoken with gpt-4 encoding to estimate token count.
1114
+ For text documents, encodes the actual text. For non-text
1115
+ documents (images, PDFs, etc.), returns a fixed estimate of 1024 tokens.
1116
+
1117
+ Returns:
1118
+ Approximate number of tokens for this document.
1119
+
1120
+ Example:
1121
+ >>> doc = MyDocument.create(name="data.txt", content="Hello world")
1122
+ >>> doc.approximate_tokens_count # ~2 tokens
1123
+ """
1124
+ if self.is_text:
1125
+ return len(tiktoken.encoding_for_model("gpt-4").encode(self.text))
1126
+ else:
1127
+ return 1024 # Fixed estimate for non-text documents
1128
+
1106
1129
  def as_yaml(self) -> Any:
1107
1130
  r"""Parse document content as YAML.
1108
1131
 
@@ -43,7 +43,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
43
43
  r"""Detect MIME type from document content and filename.
44
44
 
45
45
  Uses a multi-stage detection strategy for maximum accuracy:
46
- 1. Returns 'application/x-empty' for empty content
46
+ 1. Returns 'text/plain' for empty content
47
47
  2. Uses extension-based detection for known formats (most reliable)
48
48
  3. Falls back to python-magic content analysis
49
49
  4. Final fallback to extension or 'application/octet-stream'
@@ -57,7 +57,7 @@ def detect_mime_type(content: bytes, name: str) -> str:
57
57
  Never returns None or empty string.
58
58
 
59
59
  Fallback behavior:
60
- - Empty content: 'application/x-empty'
60
+ - Empty content: 'text/plain'
61
61
  - Unknown extension with binary content: 'application/octet-stream'
62
62
  - Magic library failure: Falls back to extension or 'application/octet-stream'
63
63
 
@@ -75,13 +75,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
75
75
  >>> detect_mime_type(b'Hello World', "text.txt")
76
76
  'text/plain'
77
77
  >>> detect_mime_type(b'', "empty.txt")
78
- 'application/x-empty'
78
+ 'text/plain'
79
79
  >>> detect_mime_type(b'\\x89PNG', "image.xyz")
80
80
  'image/png' # Magic detects PNG despite wrong extension
81
81
  """
82
82
  # Check for empty content
83
83
  if len(content) == 0:
84
- return "application/x-empty"
84
+ return "text/plain"
85
85
 
86
86
  # Try extension-based detection first for known formats
87
87
  # This is more reliable for text formats that magic might misidentify
@@ -12,6 +12,7 @@ import json
12
12
  from copy import deepcopy
13
13
  from typing import Any, Callable, Iterable, SupportsIndex, Union
14
14
 
15
+ import tiktoken
15
16
  from openai.types.chat import (
16
17
  ChatCompletionContentPartParam,
17
18
  ChatCompletionMessageParam,
@@ -301,6 +302,37 @@ class AIMessages(list[AIMessageType]):
301
302
  system_prompt = ""
302
303
  return hashlib.sha256((system_prompt + json.dumps(self.to_prompt())).encode()).hexdigest()
303
304
 
305
+ @property
306
+ def approximate_tokens_count(self) -> int:
307
+ """Approximate tokens count for the messages.
308
+
309
+ @public
310
+
311
+ Uses tiktoken with gpt-4 encoding to estimate total token count
312
+ across all messages in the conversation.
313
+
314
+ Returns:
315
+ Approximate tokens count for all messages.
316
+
317
+ Raises:
318
+ ValueError: If message contains unsupported type.
319
+
320
+ Example:
321
+ >>> messages = AIMessages(["Hello", "World"])
322
+ >>> messages.approximate_tokens_count # ~2-3 tokens
323
+ """
324
+ count = 0
325
+ for message in self:
326
+ if isinstance(message, str):
327
+ count += len(tiktoken.encoding_for_model("gpt-4").encode(message))
328
+ elif isinstance(message, Document):
329
+ count += message.approximate_tokens_count
330
+ elif isinstance(message, ModelResponse): # type: ignore
331
+ count += len(tiktoken.encoding_for_model("gpt-4").encode(message.content))
332
+ else:
333
+ raise ValueError(f"Unsupported message type: {type(message)}")
334
+ return count
335
+
304
336
  @staticmethod
305
337
  def document_to_prompt(document: Document) -> list[ChatCompletionContentPartParam]:
306
338
  """Convert a document to prompt format for LLM consumption.
@@ -12,15 +12,17 @@ Key functions:
12
12
  """
13
13
 
14
14
  import asyncio
15
+ import time
15
16
  from typing import Any, TypeVar
16
17
 
17
18
  from lmnr import Laminar
18
19
  from openai import AsyncOpenAI
20
+ from openai.lib.streaming.chat import ChunkEvent, ContentDeltaEvent, ContentDoneEvent
19
21
  from openai.types.chat import (
20
22
  ChatCompletionMessageParam,
21
23
  )
22
24
  from prefect.logging import get_logger
23
- from pydantic import BaseModel
25
+ from pydantic import BaseModel, ValidationError
24
26
 
25
27
  from ai_pipeline_core.exceptions import LLMError
26
28
  from ai_pipeline_core.settings import settings
@@ -101,6 +103,42 @@ def _process_messages(
101
103
  return processed_messages
102
104
 
103
105
 
106
+ def _model_name_to_openrouter_model(model: ModelName) -> str:
107
+ """Convert a model name to an OpenRouter model name.
108
+
109
+ Args:
110
+ model: Model name to convert.
111
+
112
+ Returns:
113
+ OpenRouter model name.
114
+ """
115
+ if model == "gpt-4o-search":
116
+ return "openai/gpt-4o-search-preview"
117
+ if model == "gemini-2.5-flash-search":
118
+ return "google/gemini-2.5-flash:online"
119
+ if model == "grok-4-fast-search":
120
+ return "x-ai/grok-4-fast:online"
121
+ if model == "sonar-pro-search":
122
+ return "perplexity/sonar-reasoning-pro"
123
+ if model.startswith("gemini"):
124
+ return f"google/{model}"
125
+ elif model.startswith("gpt"):
126
+ return f"openai/{model}"
127
+ elif model.startswith("grok"):
128
+ return f"x-ai/{model}"
129
+ elif model.startswith("claude"):
130
+ return f"anthropic/{model}"
131
+ elif model.startswith("qwen3"):
132
+ return f"qwen/{model}"
133
+ elif model.startswith("deepseek-"):
134
+ return f"deepseek/{model}"
135
+ elif model.startswith("glm-"):
136
+ return f"z-ai/{model}"
137
+ elif model.startswith("kimi-"):
138
+ return f"moonshotai/{model}"
139
+ return model
140
+
141
+
104
142
  async def _generate(
105
143
  model: str, messages: list[ChatCompletionMessageParam], completion_kwargs: dict[str, Any]
106
144
  ) -> ModelResponse:
@@ -126,23 +164,44 @@ async def _generate(
126
164
  - Captures response headers for cost tracking
127
165
  - Response includes model options for debugging
128
166
  """
167
+ if "openrouter" in settings.openai_base_url.lower():
168
+ model = _model_name_to_openrouter_model(model)
169
+
129
170
  async with AsyncOpenAI(
130
171
  api_key=settings.openai_api_key,
131
172
  base_url=settings.openai_base_url,
132
173
  ) as client:
133
- # Use parse for structured output, create for regular
134
- if completion_kwargs.get("response_format"):
135
- raw_response = await client.chat.completions.with_raw_response.parse( # type: ignore[var-annotated]
136
- **completion_kwargs,
137
- )
138
- else:
139
- raw_response = await client.chat.completions.with_raw_response.create( # type: ignore[var-annotated]
140
- **completion_kwargs
141
- )
142
-
143
- response = ModelResponse(raw_response.parse()) # type: ignore[arg-type]
144
- response.set_model_options(completion_kwargs)
145
- response.set_headers(dict(raw_response.headers.items())) # type: ignore[arg-type]
174
+ start_time = time.time()
175
+ first_token_time = None
176
+ usage = None
177
+ async with client.chat.completions.stream(
178
+ model=model,
179
+ messages=messages,
180
+ **completion_kwargs,
181
+ ) as stream:
182
+ async for event in stream:
183
+ if isinstance(event, ContentDeltaEvent):
184
+ if not first_token_time:
185
+ first_token_time = time.time()
186
+ elif isinstance(event, ContentDoneEvent):
187
+ pass
188
+ elif isinstance(event, ChunkEvent):
189
+ if event.chunk.usage: # used to fix a bug with missing usage data
190
+ usage = event.chunk.usage
191
+ if not first_token_time:
192
+ first_token_time = time.time()
193
+ raw_response = await stream.get_final_completion()
194
+
195
+ metadata = {
196
+ "time_taken": round(time.time() - start_time, 2),
197
+ "first_token_time": round(first_token_time - start_time, 2),
198
+ }
199
+ response = ModelResponse(
200
+ raw_response,
201
+ model_options=completion_kwargs,
202
+ metadata=metadata,
203
+ usage=usage,
204
+ )
146
205
  return response
147
206
 
148
207
 
@@ -182,8 +241,6 @@ async def _generate_with_retry(
182
241
  context, messages, options.system_prompt, options.cache_ttl
183
242
  )
184
243
  completion_kwargs: dict[str, Any] = {
185
- "model": model,
186
- "messages": processed_messages,
187
244
  **options.to_openai_completion_kwargs(),
188
245
  }
189
246
 
@@ -197,20 +254,18 @@ async def _generate_with_retry(
197
254
  ) as span:
198
255
  response = await _generate(model, processed_messages, completion_kwargs)
199
256
  span.set_attributes(response.get_laminar_metadata())
200
- Laminar.set_span_output(response.content)
201
- if not response.content:
202
- raise ValueError(f"Model {model} returned an empty response.")
257
+ Laminar.set_span_output([
258
+ r for r in (response.reasoning_content, response.content) if r
259
+ ])
260
+ response.validate_output()
203
261
  return response
204
- except (asyncio.TimeoutError, ValueError, Exception) as e:
262
+ except (asyncio.TimeoutError, ValueError, ValidationError, Exception) as e:
205
263
  if not isinstance(e, asyncio.TimeoutError):
206
264
  # disable cache if it's not a timeout because it may cause an error
207
265
  completion_kwargs["extra_body"]["cache"] = {"no-cache": True}
208
266
 
209
267
  logger.warning(
210
- "LLM generation failed (attempt %d/%d): %s",
211
- attempt + 1,
212
- options.retries,
213
- e,
268
+ f"LLM generation failed (attempt {attempt + 1}/{options.retries}): {e}",
214
269
  )
215
270
  if attempt == options.retries - 1:
216
271
  raise LLMError("Exhausted all retry attempts for LLM generation.") from e
@@ -453,8 +508,8 @@ async def generate_structured(
453
508
  In most cases, leave as None to use framework defaults.
454
509
  Configure model behavior centrally via LiteLLM proxy settings when possible.
455
510
 
456
- VISION/PDF MODEL COMPATIBILITY:
457
- When using Documents with images/PDFs in structured output:
511
+ Note:
512
+ Vision/PDF model compatibility considerations:
458
513
  - Images require vision-capable models that also support structured output
459
514
  - PDFs require models with both document processing AND structured output support
460
515
  - Many models support either vision OR structured output, but not both
@@ -536,28 +591,4 @@ async def generate_structured(
536
591
  except (ValueError, LLMError):
537
592
  raise # Explicitly re-raise to satisfy DOC502
538
593
 
539
- # Extract the parsed value from the response
540
- parsed_value: T | None = None
541
-
542
- # Check if response has choices and parsed content
543
- if response.choices and hasattr(response.choices[0].message, "parsed"):
544
- parsed: Any = response.choices[0].message.parsed # type: ignore[attr-defined]
545
-
546
- # If parsed is a dict, instantiate it as the response format class
547
- if isinstance(parsed, dict):
548
- parsed_value = response_format(**parsed)
549
- # If it's already the right type, use it
550
- elif isinstance(parsed, response_format):
551
- parsed_value = parsed
552
- else:
553
- # Otherwise try to convert it
554
- raise TypeError(
555
- f"Unable to convert parsed response to {response_format.__name__}: "
556
- f"got type {type(parsed).__name__}" # type: ignore[reportUnknownArgumentType]
557
- )
558
-
559
- if parsed_value is None:
560
- raise ValueError("No parsed content available from the model response")
561
-
562
- # Create a StructuredModelResponse with the parsed value
563
- return StructuredModelResponse[T](chat_completion=response, parsed_value=parsed_value)
594
+ return StructuredModelResponse[T].from_model_response(response)
@@ -88,6 +88,12 @@ class ModelOptions(BaseModel):
88
88
  and detect abuse. Maximum length is typically 256 characters.
89
89
  Useful for multi-tenant applications or per-user billing.
90
90
 
91
+ metadata: Custom metadata tags for tracking and observability.
92
+ Dictionary of string key-value pairs for tagging requests.
93
+ Useful for tracking experiments, versions, or custom attributes.
94
+ Maximum of 16 key-value pairs, each key/value max 64 characters.
95
+ Passed through to LMNR tracing and API provider metadata.
96
+
91
97
  extra_body: Additional provider-specific parameters to pass in request body.
92
98
  Dictionary of custom parameters not covered by standard options.
93
99
  Merged with usage_tracking if both are set.
@@ -147,6 +153,12 @@ class ModelOptions(BaseModel):
147
153
  ... user="user_12345", # Track costs per user
148
154
  ... temperature=0.7
149
155
  ... )
156
+ >>>
157
+ >>> # With metadata for tracking and observability
158
+ >>> options = ModelOptions(
159
+ ... metadata={"experiment": "v1", "version": "2.0", "feature": "search"},
160
+ ... temperature=0.7
161
+ ... )
150
162
 
151
163
  Note:
152
164
  - Not all options apply to all models
@@ -165,7 +177,7 @@ class ModelOptions(BaseModel):
165
177
  search_context_size: Literal["low", "medium", "high"] | None = None
166
178
  reasoning_effort: Literal["low", "medium", "high"] | None = None
167
179
  retries: int = 3
168
- retry_delay_seconds: int = 10
180
+ retry_delay_seconds: int = 20
169
181
  timeout: int = 600
170
182
  cache_ttl: str | None = "5m"
171
183
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
@@ -175,6 +187,7 @@ class ModelOptions(BaseModel):
175
187
  verbosity: Literal["low", "medium", "high"] | None = None
176
188
  usage_tracking: bool = True
177
189
  user: str | None = None
190
+ metadata: dict[str, str] | None = None
178
191
  extra_body: dict[str, Any] | None = None
179
192
 
180
193
  def to_openai_completion_kwargs(self) -> dict[str, Any]:
@@ -200,6 +213,7 @@ class ModelOptions(BaseModel):
200
213
  - service_tier -> service_tier
201
214
  - verbosity -> verbosity
202
215
  - user -> user (for cost tracking)
216
+ - metadata -> metadata (for tracking/observability)
203
217
  - extra_body -> extra_body (merged with usage tracking)
204
218
 
205
219
  Web Search Structure:
@@ -253,7 +267,11 @@ class ModelOptions(BaseModel):
253
267
  if self.user:
254
268
  kwargs["user"] = self.user
255
269
 
270
+ if self.metadata:
271
+ kwargs["metadata"] = self.metadata
272
+
256
273
  if self.usage_tracking:
257
274
  kwargs["extra_body"]["usage"] = {"include": True}
275
+ kwargs["stream_options"] = {"include_usage": True}
258
276
 
259
277
  return kwargs