openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. openaivec/__init__.py +13 -4
  2. openaivec/_cache/__init__.py +12 -0
  3. openaivec/_cache/optimize.py +109 -0
  4. openaivec/_cache/proxy.py +806 -0
  5. openaivec/_di.py +326 -0
  6. openaivec/_embeddings.py +203 -0
  7. openaivec/{log.py → _log.py} +2 -2
  8. openaivec/_model.py +113 -0
  9. openaivec/{prompt.py → _prompt.py} +95 -28
  10. openaivec/_provider.py +207 -0
  11. openaivec/_responses.py +511 -0
  12. openaivec/_schema/__init__.py +9 -0
  13. openaivec/_schema/infer.py +340 -0
  14. openaivec/_schema/spec.py +350 -0
  15. openaivec/_serialize.py +234 -0
  16. openaivec/{util.py → _util.py} +25 -85
  17. openaivec/pandas_ext.py +1635 -425
  18. openaivec/spark.py +604 -335
  19. openaivec/task/__init__.py +27 -29
  20. openaivec/task/customer_support/__init__.py +9 -15
  21. openaivec/task/customer_support/customer_sentiment.py +51 -41
  22. openaivec/task/customer_support/inquiry_classification.py +86 -61
  23. openaivec/task/customer_support/inquiry_summary.py +44 -45
  24. openaivec/task/customer_support/intent_analysis.py +56 -41
  25. openaivec/task/customer_support/response_suggestion.py +49 -43
  26. openaivec/task/customer_support/urgency_analysis.py +76 -71
  27. openaivec/task/nlp/__init__.py +4 -4
  28. openaivec/task/nlp/dependency_parsing.py +19 -20
  29. openaivec/task/nlp/keyword_extraction.py +22 -24
  30. openaivec/task/nlp/morphological_analysis.py +25 -25
  31. openaivec/task/nlp/named_entity_recognition.py +26 -28
  32. openaivec/task/nlp/sentiment_analysis.py +29 -21
  33. openaivec/task/nlp/translation.py +24 -30
  34. openaivec/task/table/__init__.py +3 -0
  35. openaivec/task/table/fillna.py +183 -0
  36. openaivec-1.0.10.dist-info/METADATA +399 -0
  37. openaivec-1.0.10.dist-info/RECORD +39 -0
  38. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
  39. openaivec/embeddings.py +0 -172
  40. openaivec/responses.py +0 -392
  41. openaivec/serialize.py +0 -225
  42. openaivec/task/model.py +0 -84
  43. openaivec-0.10.0.dist-info/METADATA +0 -546
  44. openaivec-0.10.0.dist-info/RECORD +0 -29
  45. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
openaivec/responses.py DELETED
@@ -1,392 +0,0 @@
1
- import asyncio
2
- from dataclasses import dataclass, field
3
- from logging import Logger, getLogger
4
- from typing import Generic, List, Type, TypeVar, cast
5
-
6
- from openai import AsyncOpenAI, OpenAI, RateLimitError
7
- from openai.types.responses import ParsedResponse
8
- from pydantic import BaseModel
9
-
10
- from .log import observe
11
- from .task.model import PreparedTask
12
- from .util import backoff, backoff_async, map, map_async
13
-
14
- __all__ = [
15
- "BatchResponses",
16
- "AsyncBatchResponses",
17
- ]
18
-
19
- _LOGGER: Logger = getLogger(__name__)
20
-
21
-
22
- def _vectorize_system_message(system_message: str) -> str:
23
- """Return the system prompt that instructs the model to work on a batch.
24
-
25
- The returned XML‐ish prompt explains two things to the LLM:
26
-
27
- 1. The *general* system instruction coming from the caller (`system_message`)
28
- is preserved verbatim.
29
- 2. Extra instructions describe how the model should treat the incoming JSON
30
- that contains multiple user messages and how it must shape its output.
31
-
32
- Args:
33
- system_message: A single‑instance system instruction the caller would
34
- normally send to the model.
35
-
36
- Returns:
37
- A long, composite system prompt with embedded examples that can be
38
- supplied to the `instructions=` field of the OpenAI **JSON mode**
39
- endpoint.
40
- """
41
- return f"""
42
- <SystemMessage>
43
- <ElementInstructions>
44
- <Instruction>{system_message}</Instruction>
45
- </ElementInstructions>
46
- <BatchInstructions>
47
- <Instruction>
48
- You will receive multiple user messages at once.
49
- Please provide an appropriate response to each message individually.
50
- </Instruction>
51
- </BatchInstructions>
52
- <Examples>
53
- <Example>
54
- <Input>
55
- {{
56
- "user_messages": [
57
- {{
58
- "id": 1,
59
- "body": "{{user_message_1}}"
60
- }},
61
- {{
62
- "id": 2,
63
- "body": "{{user_message_2}}"
64
- }}
65
- ]
66
- }}
67
- </Input>
68
- <Output>
69
- {{
70
- "assistant_messages": [
71
- {{
72
- "id": 1,
73
- "body": "{{assistant_response_1}}"
74
- }},
75
- {{
76
- "id": 2,
77
- "body": "{{assistant_response_2}}"
78
- }}
79
- ]
80
- }}
81
- </Output>
82
- </Example>
83
- </Examples>
84
- </SystemMessage>
85
- """
86
-
87
-
88
- T = TypeVar("T")
89
-
90
-
91
- class Message(BaseModel, Generic[T]):
92
- id: int
93
- body: T
94
-
95
-
96
- class Request(BaseModel):
97
- user_messages: List[Message[str]]
98
-
99
-
100
- class Response(BaseModel, Generic[T]):
101
- assistant_messages: List[Message[T]]
102
-
103
-
104
- @dataclass(frozen=True)
105
- class BatchResponses(Generic[T]):
106
- """Stateless façade that turns OpenAI's JSON‑mode API into a batched API.
107
-
108
- This wrapper allows you to submit *multiple* user prompts in one JSON‑mode
109
- request and receive the answers in the original order.
110
-
111
- Example:
112
- ```python
113
- vector_llm = BatchResponses(
114
- client=openai_client,
115
- model_name="gpt‑4o‑mini",
116
- system_message="You are a helpful assistant."
117
- )
118
- answers = vector_llm.parse(questions, batch_size=32)
119
- ```
120
-
121
- Attributes:
122
- client: Initialised ``openai.OpenAI`` client.
123
- model_name: Name of the model (or Azure deployment) to invoke.
124
- system_message: System prompt prepended to every request.
125
- temperature: Sampling temperature passed to the model.
126
- top_p: Nucleus‑sampling parameter.
127
- response_format: Expected Pydantic type of each assistant message
128
- (defaults to ``str``).
129
-
130
- Notes:
131
- Internally the work is delegated to two helpers:
132
-
133
- * ``_predict_chunk`` – fragments the workload and restores ordering.
134
- * ``_request_llm`` – performs a single OpenAI API call.
135
- """
136
-
137
- client: OpenAI
138
- model_name: str # it would be the name of deployment for Azure
139
- system_message: str
140
- temperature: float = 0.0
141
- top_p: float = 1.0
142
- response_format: Type[T] = str
143
- _vectorized_system_message: str = field(init=False)
144
- _model_json_schema: dict = field(init=False)
145
-
146
- @classmethod
147
- def of_task(cls, client: OpenAI, model_name: str, task: PreparedTask) -> "BatchResponses":
148
- """Create a BatchResponses instance from a PreparedTask."""
149
- return cls(
150
- client=client,
151
- model_name=model_name,
152
- system_message=task.instructions,
153
- temperature=task.temperature,
154
- top_p=task.top_p,
155
- response_format=task.response_format,
156
- )
157
-
158
- def __post_init__(self):
159
- object.__setattr__(
160
- self,
161
- "_vectorized_system_message",
162
- _vectorize_system_message(self.system_message),
163
- )
164
-
165
- @observe(_LOGGER)
166
- @backoff(exception=RateLimitError, scale=15, max_retries=8)
167
- def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[T]]:
168
- """Make a single call to the OpenAI *JSON mode* endpoint.
169
-
170
- Args:
171
- user_messages: Sequence of `Message[str]` objects representing the
172
- prompts for this minibatch. Each message carries a unique `id`
173
- so we can restore ordering later.
174
-
175
- Returns:
176
- ParsedResponse containing `Response[T]` which in turn holds the
177
- assistant messages in arbitrary order.
178
-
179
- Raises:
180
- openai.RateLimitError: Transparently re‑raised after the
181
- exponential back‑off decorator exhausts all retries.
182
- """
183
- response_format = self.response_format
184
-
185
- class MessageT(BaseModel):
186
- id: int
187
- body: response_format # type: ignore
188
-
189
- class ResponseT(BaseModel):
190
- assistant_messages: List[MessageT]
191
-
192
- completion: ParsedResponse[ResponseT] = self.client.responses.parse(
193
- model=self.model_name,
194
- instructions=self._vectorized_system_message,
195
- input=Request(user_messages=user_messages).model_dump_json(),
196
- temperature=self.temperature,
197
- top_p=self.top_p,
198
- text_format=ResponseT,
199
- )
200
- return cast(ParsedResponse[Response[T]], completion)
201
-
202
- @observe(_LOGGER)
203
- def _predict_chunk(self, user_messages: List[str]) -> List[T]:
204
- """Helper executed for every unique minibatch.
205
-
206
- This method:
207
- 1. Converts plain strings into `Message[str]` with stable indices.
208
- 2. Delegates the request to `_request_llm`.
209
- 3. Reorders the responses so they match the original indices.
210
-
211
- The function is *pure* – it has no side‑effects and the result depends
212
- only on its arguments – which allows it to be used safely in both
213
- serial and parallel execution paths.
214
- """
215
- messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
216
- responses: ParsedResponse[Response[T]] = self._request_llm(messages)
217
- response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
218
- sorted_responses = [response_dict.get(m.id, None) for m in messages]
219
- return sorted_responses
220
-
221
- @observe(_LOGGER)
222
- def parse(self, inputs: List[str], batch_size: int) -> List[T]:
223
- """Public API: batched predict.
224
-
225
- Args:
226
- inputs: All prompts that require a response. Duplicate
227
- entries are de‑duplicated under the hood to save tokens.
228
- batch_size: Maximum number of *unique* prompts per LLM call.
229
-
230
- Returns:
231
- A list containing the assistant responses in the same order as
232
- *inputs*.
233
- """
234
- return map(inputs, self._predict_chunk, batch_size)
235
-
236
-
237
- @dataclass(frozen=True)
238
- class AsyncBatchResponses(Generic[T]):
239
- """Stateless façade that turns OpenAI's JSON-mode API into a batched API (Async version).
240
-
241
- This wrapper allows you to submit *multiple* user prompts in one JSON-mode
242
- request and receive the answers in the original order asynchronously. It also
243
- controls the maximum number of concurrent requests to the OpenAI API.
244
-
245
- Example:
246
- ```python
247
- import asyncio
248
- from openai import AsyncOpenAI
249
- from openaivec.aio.responses import AsyncBatchResponses
250
-
251
- # Assuming openai_async_client is an initialized AsyncOpenAI client
252
- openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
253
-
254
- vector_llm = AsyncBatchResponses(
255
- client=openai_async_client,
256
- model_name="gpt-4o-mini",
257
- system_message="You are a helpful assistant.",
258
- max_concurrency=5 # Limit concurrent requests
259
- )
260
- questions = ["What is the capital of France?", "Explain quantum physics simply."]
261
- # Asynchronous call
262
- async def main():
263
- answers = await vector_llm.parse(questions, batch_size=32)
264
- print(answers)
265
-
266
- # Run the async function
267
- asyncio.run(main())
268
- ```
269
-
270
- Attributes:
271
- client: Initialised `openai.AsyncOpenAI` client.
272
- model_name: Name of the model (or Azure deployment) to invoke.
273
- system_message: System prompt prepended to every request.
274
- temperature: Sampling temperature passed to the model.
275
- top_p: Nucleus-sampling parameter.
276
- response_format: Expected Pydantic type of each assistant message
277
- (defaults to `str`).
278
- max_concurrency: Maximum number of concurrent requests to the OpenAI API.
279
- """
280
-
281
- client: AsyncOpenAI
282
- model_name: str # it would be the name of deployment for Azure
283
- system_message: str
284
- temperature: float = 0.0
285
- top_p: float = 1.0
286
- response_format: Type[T] = str
287
- max_concurrency: int = 8 # Default concurrency limit
288
- _vectorized_system_message: str = field(init=False)
289
- _model_json_schema: dict = field(init=False)
290
- _semaphore: asyncio.Semaphore = field(init=False, repr=False)
291
-
292
- @classmethod
293
- def of_task(cls, client: AsyncOpenAI, model_name: str, task: PreparedTask, max_concurrency: int = 8) -> "AsyncBatchResponses":
294
- """Create an AsyncBatchResponses instance from a PreparedTask."""
295
- return cls(
296
- client=client,
297
- model_name=model_name,
298
- system_message=task.instructions,
299
- temperature=task.temperature,
300
- top_p=task.top_p,
301
- response_format=task.response_format,
302
- max_concurrency=max_concurrency,
303
- )
304
-
305
- def __post_init__(self):
306
- object.__setattr__(
307
- self,
308
- "_vectorized_system_message",
309
- _vectorize_system_message(self.system_message),
310
- )
311
- # Initialize the semaphore after the object is created
312
- # Use object.__setattr__ because the dataclass is frozen
313
- object.__setattr__(self, "_semaphore", asyncio.Semaphore(self.max_concurrency))
314
-
315
- @observe(_LOGGER)
316
- @backoff_async(exception=RateLimitError, scale=15, max_retries=8)
317
- async def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[T]]:
318
- """Make a single async call to the OpenAI *JSON mode* endpoint, respecting concurrency limits.
319
-
320
- Args:
321
- user_messages: Sequence of `Message[str]` objects representing the
322
- prompts for this minibatch. Each message carries a unique `id`
323
- so we can restore ordering later.
324
-
325
- Returns:
326
- ParsedResponse containing `Response[T]` which in turn holds the
327
- assistant messages in arbitrary order.
328
-
329
- Raises:
330
- openai.RateLimitError: Transparently re-raised after the
331
- exponential back-off decorator exhausts all retries.
332
- """
333
- response_format = self.response_format
334
-
335
- class MessageT(BaseModel):
336
- id: int
337
- body: response_format # type: ignore
338
-
339
- class ResponseT(BaseModel):
340
- assistant_messages: List[MessageT]
341
-
342
- # Acquire semaphore before making the API call
343
- async with self._semaphore:
344
- # Directly await the async call instead of using asyncio.run()
345
- completion: ParsedResponse[ResponseT] = await self.client.responses.parse(
346
- model=self.model_name,
347
- instructions=self._vectorized_system_message,
348
- input=Request(user_messages=user_messages).model_dump_json(),
349
- temperature=self.temperature,
350
- top_p=self.top_p,
351
- text_format=ResponseT,
352
- )
353
- return cast(ParsedResponse[Response[T]], completion)
354
-
355
- @observe(_LOGGER)
356
- async def _predict_chunk(self, user_messages: List[str]) -> List[T]:
357
- """Helper executed asynchronously for every unique minibatch.
358
-
359
- This method:
360
- 1. Converts plain strings into `Message[str]` with stable indices.
361
- 2. Delegates the request to `_request_llm`.
362
- 3. Reorders the responses so they match the original indices.
363
-
364
- The function is *pure* – it has no side-effects and the result depends
365
- only on its arguments.
366
- """
367
- messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
368
- responses: ParsedResponse[Response[T]] = await self._request_llm(messages)
369
- response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
370
- # Ensure proper handling for missing IDs - this shouldn't happen in normal operation
371
- sorted_responses = [response_dict.get(m.id, None) for m in messages]
372
- return sorted_responses
373
-
374
- @observe(_LOGGER)
375
- async def parse(self, inputs: List[str], batch_size: int) -> List[T]:
376
- """Asynchronous public API: batched predict.
377
-
378
- Args:
379
- inputs: All prompts that require a response. Duplicate
380
- entries are de-duplicated under the hood to save tokens.
381
- batch_size: Maximum number of *unique* prompts per LLM call.
382
-
383
- Returns:
384
- A list containing the assistant responses in the same order as
385
- *inputs*.
386
- """
387
-
388
- return await map_async(
389
- inputs=inputs,
390
- f=self._predict_chunk,
391
- batch_size=batch_size, # Use the batch_size argument passed to the method
392
- )
openaivec/serialize.py DELETED
@@ -1,225 +0,0 @@
1
- """Serialization utilities for Pydantic BaseModel classes.
2
-
3
- This module provides utilities for converting Pydantic BaseModel classes
4
- to and from JSON schema representations. It supports dynamic model creation
5
- from JSON schemas with special handling for enum fields, which are converted
6
- to Literal types for better type safety and compatibility.
7
-
8
- Example:
9
- Basic serialization and deserialization:
10
-
11
- ```python
12
- from pydantic import BaseModel
13
- from typing import Literal
14
-
15
- class Status(BaseModel):
16
- value: Literal["active", "inactive"]
17
- description: str
18
-
19
- # Serialize to JSON schema
20
- schema = serialize_base_model(Status)
21
-
22
- # Deserialize back to BaseModel class
23
- DynamicStatus = deserialize_base_model(schema)
24
- instance = DynamicStatus(value="active", description="User is active")
25
- ```
26
- """
27
-
28
- from typing import Any, Dict, List, Type, Literal
29
-
30
- from pydantic import BaseModel, Field, create_model
31
-
32
- __all__ = ["deserialize_base_model", "serialize_base_model"]
33
-
34
-
35
- def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
36
- """Serialize a Pydantic BaseModel to JSON schema.
37
-
38
- Args:
39
- obj: The Pydantic BaseModel class to serialize.
40
-
41
- Returns:
42
- A dictionary containing the JSON schema representation of the model.
43
-
44
- Example:
45
- ```python
46
- from pydantic import BaseModel
47
-
48
- class Person(BaseModel):
49
- name: str
50
- age: int
51
-
52
- schema = serialize_base_model(Person)
53
- ```
54
- """
55
- return obj.model_json_schema()
56
-
57
-
58
- def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
59
- """Dereference JSON schema by resolving $ref pointers.
60
-
61
- This function resolves all $ref references in a JSON schema by replacing
62
- them with the actual referenced definitions from the $defs section.
63
-
64
- Args:
65
- json_schema: The JSON schema containing potential $ref references.
66
-
67
- Returns:
68
- A dereferenced JSON schema with all $ref pointers resolved.
69
-
70
- Example:
71
- ```python
72
- schema = {
73
- "properties": {
74
- "user": {"$ref": "#/$defs/User"}
75
- },
76
- "$defs": {
77
- "User": {"type": "object", "properties": {"name": {"type": "string"}}}
78
- }
79
- }
80
- dereferenced = dereference_json_schema(schema)
81
- # user property will contain the actual User definition
82
- ```
83
- """
84
- model_map = json_schema.get("$defs", {})
85
-
86
- def dereference(obj):
87
- if isinstance(obj, dict):
88
- if "$ref" in obj:
89
- ref = obj["$ref"].split("/")[-1]
90
- return dereference(model_map[ref])
91
- else:
92
- return {k: dereference(v) for k, v in obj.items()}
93
-
94
- elif isinstance(obj, list):
95
- return [dereference(x) for x in obj]
96
- else:
97
- return obj
98
-
99
- result = {}
100
- for k, v in json_schema.items():
101
- if k == "$defs":
102
- continue
103
-
104
- result[k] = dereference(v)
105
-
106
- return result
107
-
108
-
109
- def parse_field(v: Dict[str, Any]) -> Any:
110
- """Parse a JSON schema field definition to a Python type.
111
-
112
- Converts JSON schema field definitions to corresponding Python types
113
- for use in Pydantic model creation.
114
-
115
- Args:
116
- v: A dictionary containing the JSON schema field definition.
117
-
118
- Returns:
119
- The corresponding Python type (str, int, float, bool, dict, List, or BaseModel).
120
-
121
- Raises:
122
- ValueError: If the field type is not supported.
123
-
124
- Example:
125
- ```python
126
- field_def = {"type": "string"}
127
- python_type = parse_field(field_def) # Returns str
128
-
129
- array_def = {"type": "array", "items": {"type": "integer"}}
130
- python_type = parse_field(array_def) # Returns List[int]
131
- ```
132
- """
133
- t = v["type"]
134
- if t == "string":
135
- return str
136
- elif t == "integer":
137
- return int
138
- elif t == "number":
139
- return float
140
- elif t == "boolean":
141
- return bool
142
- elif t == "object":
143
- # Check if it's a generic object (dict) or a nested model
144
- if "properties" in v:
145
- return deserialize_base_model(v)
146
- else:
147
- return dict
148
- elif t == "array":
149
- inner_type = parse_field(v["items"])
150
- return List[inner_type]
151
- else:
152
- raise ValueError(f"Unsupported type: {t}")
153
-
154
-
155
- def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
156
- """Deserialize a JSON schema to a Pydantic BaseModel class.
157
-
158
- Creates a dynamic Pydantic BaseModel class from a JSON schema definition.
159
- For enum fields, this function uses Literal types instead of Enum classes
160
- for better type safety and compatibility with systems like Apache Spark.
161
-
162
- Args:
163
- json_schema: A dictionary containing the JSON schema definition.
164
-
165
- Returns:
166
- A dynamically created Pydantic BaseModel class.
167
-
168
- Example:
169
- ```python
170
- schema = {
171
- "title": "Person",
172
- "type": "object",
173
- "properties": {
174
- "name": {"type": "string", "description": "Person's name"},
175
- "status": {
176
- "type": "string",
177
- "enum": ["active", "inactive"],
178
- "description": "Person's status"
179
- }
180
- }
181
- }
182
-
183
- PersonModel = deserialize_base_model(schema)
184
- person = PersonModel(name="John", status="active")
185
- ```
186
-
187
- Note:
188
- Enum fields are converted to Literal types for improved compatibility
189
- and type safety. This ensures better integration with data processing
190
- frameworks like Apache Spark.
191
- """
192
- fields = {}
193
- properties = dereference_json_schema(json_schema).get("properties", {})
194
-
195
- for k, v in properties.items():
196
- if "enum" in v:
197
- enum_values = v["enum"]
198
-
199
- # Always use Literal instead of Enum for better type safety and Spark compatibility
200
- if len(enum_values) == 1:
201
- literal_type = Literal[enum_values[0]]
202
- else:
203
- # Create Literal with multiple values
204
- literal_type = Literal[tuple(enum_values)]
205
-
206
- description = v.get("description")
207
- default_value = v.get("default")
208
-
209
- if default_value is not None:
210
- field_info = Field(default=default_value, description=description) if description is not None else Field(default=default_value)
211
- else:
212
- field_info = Field(description=description) if description is not None else Field()
213
-
214
- fields[k] = (literal_type, field_info)
215
- else:
216
- description = v.get("description")
217
- default_value = v.get("default")
218
-
219
- if default_value is not None:
220
- field_info = Field(default=default_value, description=description) if description is not None else Field(default=default_value)
221
- else:
222
- field_info = Field(description=description) if description is not None else Field()
223
-
224
- fields[k] = (parse_field(v), field_info)
225
- return create_model(json_schema["title"], **fields)
openaivec/task/model.py DELETED
@@ -1,84 +0,0 @@
1
- """Task model module for openaivec.
2
-
3
- This module provides predefined task models for the OpenAI API.
4
- The `PreparedTask` class represents a complete task configuration including
5
- instructions, response format, and sampling parameters.
6
-
7
- Example:
8
- Basic usage with a predefined task:
9
-
10
- ```python
11
- from openai import OpenAI
12
- from openaivec.task import nlp
13
- from openaivec.responses import BatchResponses
14
-
15
- translation_task: BatchResponses = BatchResponses.of_task(
16
- client=OpenAI(),
17
- model_name="gpt-4.1-mini",
18
- task=nlp.MULTILINGUAL_TRANSLATION
19
- )
20
- ```
21
-
22
- Note:
23
- This module uses Pydantic models for response format validation and
24
- type safety.
25
- """
26
-
27
- from dataclasses import dataclass
28
- from typing import Type, TypeVar
29
- from pydantic import BaseModel
30
-
31
- __all__ = ['PreparedTask']
32
-
33
- T = TypeVar('T', bound=BaseModel)
34
-
35
-
36
-
37
- @dataclass(frozen=True)
38
- class PreparedTask:
39
- """A data class representing a complete task configuration for OpenAI API calls.
40
-
41
- This class encapsulates all the necessary parameters for executing a task,
42
- including the instructions to be sent to the model, the expected response
43
- format using Pydantic models, and sampling parameters for controlling
44
- the model's output behavior.
45
-
46
- Attributes:
47
- instructions (str): The prompt or instructions to send to the OpenAI model.
48
- This should contain clear, specific directions for the task.
49
- response_format (Type[T]): A Pydantic model class that defines the expected
50
- structure of the response. Must inherit from BaseModel.
51
- temperature (float): Controls randomness in the model's output.
52
- Range: 0.0 to 1.0. Lower values make output more deterministic.
53
- Defaults to 0.0.
54
- top_p (float): Controls diversity via nucleus sampling. Only tokens
55
- comprising the top_p probability mass are considered.
56
- Range: 0.0 to 1.0. Defaults to 1.0.
57
-
58
- Example:
59
- Creating a custom task:
60
-
61
- ```python
62
- from pydantic import BaseModel
63
-
64
- class TranslationResponse(BaseModel):
65
- translated_text: str
66
- source_language: str
67
- target_language: str
68
-
69
- custom_task = PreparedTask(
70
- instructions="Translate the following text to French:",
71
- response_format=TranslationResponse,
72
- temperature=0.1,
73
- top_p=0.9
74
- )
75
- ```
76
-
77
- Note:
78
- This class is frozen (immutable) to ensure task configurations
79
- cannot be accidentally modified after creation.
80
- """
81
- instructions: str
82
- response_format: Type[T]
83
- temperature: float = 0.0
84
- top_p: float = 1.0