openaivec 0.14.7__tar.gz → 0.14.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.14.7 → openaivec-0.14.9}/.github/copilot-instructions.md +5 -1
- {openaivec-0.14.7 → openaivec-0.14.9}/PKG-INFO +1 -1
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_di.py +10 -9
- openaivec-0.14.9/src/openaivec/_dynamic.py +350 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_embeddings.py +12 -13
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_log.py +1 -1
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_model.py +3 -3
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_optimize.py +3 -4
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_prompt.py +4 -5
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_proxy.py +34 -35
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_responses.py +29 -29
- openaivec-0.14.9/src/openaivec/_schema.py +337 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_serialize.py +19 -15
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_util.py +9 -8
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/pandas_ext.py +20 -19
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/spark.py +11 -10
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/customer_sentiment.py +2 -2
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/inquiry_classification.py +8 -8
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/inquiry_summary.py +4 -4
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/intent_analysis.py +5 -5
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/response_suggestion.py +4 -4
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/urgency_analysis.py +9 -9
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/dependency_parsing.py +2 -4
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/keyword_extraction.py +3 -5
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/morphological_analysis.py +4 -6
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/named_entity_recognition.py +7 -9
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/sentiment_analysis.py +3 -3
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/translation.py +1 -2
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/table/fillna.py +2 -3
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_di.py +2 -3
- openaivec-0.14.9/tests/test_dynamic.py +497 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_proxy.py +10 -11
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_proxy_suggester.py +7 -8
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_responses.py +7 -8
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_schema.py +149 -115
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_serialize.py +5 -5
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_spark.py +1 -2
- openaivec-0.14.7/src/openaivec/_schema.py +0 -476
- {openaivec-0.14.7 → openaivec-0.14.9}/.env.example +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/.gitignore +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/LICENSE +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/README.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/SECURITY.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/SUPPORT.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/main.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/spark.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/task.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/index.md +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/docs/robots.txt +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/mkdocs.yml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/pyproject.toml +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/_provider.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/__init__.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_embeddings.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_optimize.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_pandas_ext.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_prompt.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_provider.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_task.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/tests/test_util.py +0 -0
- {openaivec-0.14.7 → openaivec-0.14.9}/uv.lock +0 -0
|
@@ -146,7 +146,11 @@ Public exports (`__init__.py`): `BatchResponses`, `AsyncBatchResponses`, `BatchE
|
|
|
146
146
|
|
|
147
147
|
- Python ≥ 3.10; Ruff for lint/format (`line-length=120`).
|
|
148
148
|
- Absolute imports (except re-export patterns in `__init__.py`) – enforced by Ruff rule TID252.
|
|
149
|
-
- Modern typing (
|
|
149
|
+
- Modern typing syntax (Python 3.9+):
|
|
150
|
+
- **Built-in generic types**: Use `list[T]`, `dict[K, V]`, `set[T]`, `tuple[T, ...]`, `type[T]` instead of `typing` equivalents
|
|
151
|
+
- **Union types**: Use `|` syntax (`int | str | None`) instead of `Union[...]`
|
|
152
|
+
- **Optional types**: Use `S | None` instead of `Optional[S]`
|
|
153
|
+
- **Collections.abc**: Use `collections.abc.Callable`, `collections.abc.Awaitable`, `collections.abc.Iterator` instead of `typing` equivalents
|
|
150
154
|
- Prefer `@dataclass` for simple immutable-ish contracts; use Pydantic only for validation-boundaries.
|
|
151
155
|
- Raise narrow exceptions (`ValueError`, `TypeError`) on contract violations—avoid broad except.
|
|
152
156
|
- Public APIs: Google-style docstrings with return/raises sections.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
1
2
|
from dataclasses import dataclass, field
|
|
2
3
|
from threading import RLock
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, TypeVar
|
|
4
5
|
|
|
5
6
|
__all__ = []
|
|
6
7
|
|
|
@@ -119,12 +120,12 @@ class Container:
|
|
|
119
120
|
```
|
|
120
121
|
"""
|
|
121
122
|
|
|
122
|
-
_instances:
|
|
123
|
-
_providers:
|
|
123
|
+
_instances: dict[type[Any], Any] = field(default_factory=dict)
|
|
124
|
+
_providers: dict[type[Any], Provider[Any]] = field(default_factory=dict)
|
|
124
125
|
_lock: RLock = field(default_factory=RLock)
|
|
125
|
-
_resolving:
|
|
126
|
+
_resolving: set[type[Any]] = field(default_factory=set)
|
|
126
127
|
|
|
127
|
-
def register(self, cls:
|
|
128
|
+
def register(self, cls: type[T], provider: Provider[T]) -> None:
|
|
128
129
|
"""Register a provider function for a service type.
|
|
129
130
|
|
|
130
131
|
The provider function will be called once to create the singleton instance
|
|
@@ -150,7 +151,7 @@ class Container:
|
|
|
150
151
|
|
|
151
152
|
self._providers[cls] = provider
|
|
152
153
|
|
|
153
|
-
def register_instance(self, cls:
|
|
154
|
+
def register_instance(self, cls: type[T], instance: T) -> None:
|
|
154
155
|
"""Register a pre-created instance for a service type.
|
|
155
156
|
|
|
156
157
|
The provided instance will be stored directly in the container and returned
|
|
@@ -178,7 +179,7 @@ class Container:
|
|
|
178
179
|
self._instances[cls] = instance
|
|
179
180
|
self._providers[cls] = lambda: instance
|
|
180
181
|
|
|
181
|
-
def resolve(self, cls:
|
|
182
|
+
def resolve(self, cls: type[T]) -> T:
|
|
182
183
|
"""Resolve a service instance, creating it if necessary.
|
|
183
184
|
|
|
184
185
|
Returns the singleton instance for the requested service type. If this is
|
|
@@ -232,7 +233,7 @@ class Container:
|
|
|
232
233
|
finally:
|
|
233
234
|
self._resolving.discard(cls)
|
|
234
235
|
|
|
235
|
-
def is_registered(self, cls:
|
|
236
|
+
def is_registered(self, cls: type[Any]) -> bool:
|
|
236
237
|
"""Check if a service type is registered in the container.
|
|
237
238
|
|
|
238
239
|
Args:
|
|
@@ -252,7 +253,7 @@ class Container:
|
|
|
252
253
|
with self._lock:
|
|
253
254
|
return cls in self._providers
|
|
254
255
|
|
|
255
|
-
def unregister(self, cls:
|
|
256
|
+
def unregister(self, cls: type[Any]) -> None:
|
|
256
257
|
"""Unregister a service type from the container.
|
|
257
258
|
|
|
258
259
|
Removes the provider function and any cached singleton instance for
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field, create_model
|
|
8
|
+
|
|
9
|
+
__all__: list[str] = []
|
|
10
|
+
|
|
11
|
+
_MAX_ENUM_VALUES = 24
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FieldSpec(BaseModel):
|
|
15
|
+
name: str = Field(
|
|
16
|
+
description=(
|
|
17
|
+
"Field name in lower_snake_case. Rules: (1) Use only lowercase letters, numbers, and underscores; "
|
|
18
|
+
"must start with a letter. (2) For numeric quantities append an explicit unit (e.g. 'duration_seconds', "
|
|
19
|
+
"'price_usd'). (3) Boolean fields use an affirmative 'is_' prefix (e.g. 'is_active'); avoid negative / "
|
|
20
|
+
"ambiguous forms like 'is_deleted' (prefer 'is_active', 'is_enabled'). (4) Name must be unique within the "
|
|
21
|
+
"containing object."
|
|
22
|
+
)
|
|
23
|
+
)
|
|
24
|
+
type: Literal[
|
|
25
|
+
"string",
|
|
26
|
+
"integer",
|
|
27
|
+
"float",
|
|
28
|
+
"boolean",
|
|
29
|
+
"enum",
|
|
30
|
+
"object",
|
|
31
|
+
"string_array",
|
|
32
|
+
"integer_array",
|
|
33
|
+
"float_array",
|
|
34
|
+
"boolean_array",
|
|
35
|
+
"enum_array",
|
|
36
|
+
"object_array",
|
|
37
|
+
] = Field(
|
|
38
|
+
description=(
|
|
39
|
+
"Logical data type. Allowed values: string | integer | float | boolean | enum | object | string_array | "
|
|
40
|
+
"integer_array | float_array | boolean_array | enum_array | object_array. *_array variants represent a "
|
|
41
|
+
"homogeneous list of the base type. 'enum' / 'enum_array' require 'enum_spec'. 'object' / 'object_array' "
|
|
42
|
+
"require 'object_spec'. Primitives must not define 'enum_spec' or 'object_spec'."
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
description: str = Field(
|
|
46
|
+
description=(
|
|
47
|
+
"Human‑readable, concise explanation of the field's meaning and business intent. Should clarify units, "
|
|
48
|
+
"value semantics, and any domain constraints not captured by type. 1–2 sentences; no implementation notes."
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
enum_spec: EnumSpec | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description=(
|
|
54
|
+
"Enumeration specification for 'enum' / 'enum_array'. Must be provided (non-empty) for those types and "
|
|
55
|
+
"omitted for all others. Maximum size enforced by constant."
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
object_spec: ObjectSpec | None = Field(
|
|
59
|
+
default=None,
|
|
60
|
+
description=(
|
|
61
|
+
"Nested object schema. Required for 'object' / 'object_array'; must be omitted for every other type. The "
|
|
62
|
+
"contained 'name' is used to derive the generated nested Pydantic model class name."
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class EnumSpec(BaseModel):
|
|
68
|
+
"""Enumeration specification for enum / enum_array field types.
|
|
69
|
+
|
|
70
|
+
Attributes:
|
|
71
|
+
name: Required Enum class name (UpperCamelCase). Must match ^[A-Z][A-Za-z0-9]*$. Previously optional; now
|
|
72
|
+
explicit to remove implicit coupling to the field name and make schemas self‑describing.
|
|
73
|
+
values: Raw label values (1–_MAX_ENUM_VALUES before de‑dup). Values are uppercased then
|
|
74
|
+
de-duplicated using a set; ordering of generated Enum members is not guaranteed. Any
|
|
75
|
+
casing variants collapse silently to a single member.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
name: str = Field(
|
|
79
|
+
description=("Required Enum class name (UpperCamelCase). Valid pattern: ^[A-Z][A-Za-z0-9]*$."),
|
|
80
|
+
)
|
|
81
|
+
values: list[str] = Field(
|
|
82
|
+
description=(
|
|
83
|
+
f"Raw enum label values (1–{_MAX_ENUM_VALUES}). Uppercased then deduplicated; order of members "
|
|
84
|
+
"not guaranteed."
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ObjectSpec(BaseModel):
|
|
90
|
+
name: str = Field(
|
|
91
|
+
description=(
|
|
92
|
+
"Object model class name in UpperCamelCase (singular noun). Must match ^[A-Z][A-Za-z0-9]*$ and is used "
|
|
93
|
+
"directly as the generated Pydantic model class name (no transformation)."
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
fields: list[FieldSpec] = Field(
|
|
97
|
+
description=(
|
|
98
|
+
"Non-empty list of FieldSpec definitions composing the object. Each field name must be unique; order is "
|
|
99
|
+
"preserved in the generated model."
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _build_model(object_spec: ObjectSpec) -> type[BaseModel]:
|
|
105
|
+
lower_sname_pattern = re.compile(r"^[a-z][a-z0-9]*(?:_[a-z0-9]+)*$")
|
|
106
|
+
upper_camel_pattern = re.compile(r"^[A-Z][A-Za-z0-9]*$")
|
|
107
|
+
type_map: dict[str, type] = {
|
|
108
|
+
"string": str,
|
|
109
|
+
"integer": int,
|
|
110
|
+
"float": float,
|
|
111
|
+
"boolean": bool,
|
|
112
|
+
"string_array": list[str],
|
|
113
|
+
"integer_array": list[int],
|
|
114
|
+
"float_array": list[float],
|
|
115
|
+
"boolean_array": list[bool],
|
|
116
|
+
}
|
|
117
|
+
output_fields: dict[str, tuple[type, object]] = {}
|
|
118
|
+
|
|
119
|
+
field_names: list[str] = [field.name for field in object_spec.fields]
|
|
120
|
+
|
|
121
|
+
# Assert that names of fields are not duplicated
|
|
122
|
+
if len(field_names) != len(set(field_names)):
|
|
123
|
+
raise ValueError("Field names must be unique within the object spec.")
|
|
124
|
+
|
|
125
|
+
for field in object_spec.fields:
|
|
126
|
+
# Assert that field names are lower_snake_case
|
|
127
|
+
if not lower_sname_pattern.match(field.name):
|
|
128
|
+
raise ValueError(f"Field name '{field.name}' must be in lower_snake_case format (e.g., 'my_field_name').")
|
|
129
|
+
|
|
130
|
+
# (EnumSpec.name now mandatory; no need to derive a fallback name from the field.)
|
|
131
|
+
match field:
|
|
132
|
+
case FieldSpec(
|
|
133
|
+
name=name,
|
|
134
|
+
type="string"
|
|
135
|
+
| "integer"
|
|
136
|
+
| "float"
|
|
137
|
+
| "boolean"
|
|
138
|
+
| "string_array"
|
|
139
|
+
| "integer_array"
|
|
140
|
+
| "float_array"
|
|
141
|
+
| "boolean_array",
|
|
142
|
+
description=description,
|
|
143
|
+
enum_spec=None,
|
|
144
|
+
object_spec=None,
|
|
145
|
+
):
|
|
146
|
+
field_type = type_map[field.type]
|
|
147
|
+
output_fields[name] = (field_type, Field(description=description))
|
|
148
|
+
|
|
149
|
+
case FieldSpec(name=name, type="enum", description=description, enum_spec=enum_spec, object_spec=None) if (
|
|
150
|
+
enum_spec
|
|
151
|
+
and 0 < len(enum_spec.values) <= _MAX_ENUM_VALUES
|
|
152
|
+
and upper_camel_pattern.match(enum_spec.name)
|
|
153
|
+
):
|
|
154
|
+
member_names = list({v.upper() for v in enum_spec.values})
|
|
155
|
+
enum_type = Enum(enum_spec.name, member_names)
|
|
156
|
+
output_fields[name] = (enum_type, Field(description=description))
|
|
157
|
+
|
|
158
|
+
case FieldSpec(
|
|
159
|
+
name=name, type="enum_array", description=description, enum_spec=enum_spec, object_spec=None
|
|
160
|
+
) if (
|
|
161
|
+
enum_spec
|
|
162
|
+
and 0 < len(enum_spec.values) <= _MAX_ENUM_VALUES
|
|
163
|
+
and upper_camel_pattern.match(enum_spec.name)
|
|
164
|
+
):
|
|
165
|
+
member_names = list({v.upper() for v in enum_spec.values})
|
|
166
|
+
enum_type = Enum(enum_spec.name, member_names)
|
|
167
|
+
output_fields[name] = (list[enum_type], Field(description=description))
|
|
168
|
+
|
|
169
|
+
case FieldSpec(
|
|
170
|
+
name=name, type="object", description=description, enum_spec=None, object_spec=object_spec
|
|
171
|
+
) if object_spec and upper_camel_pattern.match(object_spec.name):
|
|
172
|
+
nested_model = _build_model(object_spec)
|
|
173
|
+
output_fields[name] = (nested_model, Field(description=description))
|
|
174
|
+
|
|
175
|
+
case FieldSpec(
|
|
176
|
+
name=name, type="object_array", description=description, enum_spec=None, object_spec=object_spec
|
|
177
|
+
) if object_spec and upper_camel_pattern.match(object_spec.name):
|
|
178
|
+
nested_model = _build_model(object_spec)
|
|
179
|
+
output_fields[name] = (list[nested_model], Field(description=description))
|
|
180
|
+
|
|
181
|
+
# ---- Error cases (explicit reasons) ----
|
|
182
|
+
# Enum type without enum_spec (None or empty)
|
|
183
|
+
case FieldSpec(
|
|
184
|
+
name=name,
|
|
185
|
+
type="enum",
|
|
186
|
+
enum_spec=enum_spec,
|
|
187
|
+
object_spec=None,
|
|
188
|
+
) if not enum_spec or not enum_spec.values:
|
|
189
|
+
raise ValueError(f"Field '{name}': enum type requires non-empty enum_spec values list.")
|
|
190
|
+
# Enum type exceeding max length
|
|
191
|
+
case FieldSpec(
|
|
192
|
+
name=name,
|
|
193
|
+
type="enum",
|
|
194
|
+
enum_spec=enum_spec,
|
|
195
|
+
object_spec=None,
|
|
196
|
+
) if enum_spec and len(enum_spec.values) > _MAX_ENUM_VALUES:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
(
|
|
199
|
+
f"Field '{name}': enum type supports at most {_MAX_ENUM_VALUES} enum_spec values "
|
|
200
|
+
f"(got {len(enum_spec.values)})."
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
# Enum type invalid explicit name pattern
|
|
204
|
+
case FieldSpec(
|
|
205
|
+
name=name,
|
|
206
|
+
type="enum",
|
|
207
|
+
enum_spec=enum_spec,
|
|
208
|
+
object_spec=None,
|
|
209
|
+
) if enum_spec and not upper_camel_pattern.match(enum_spec.name):
|
|
210
|
+
raise ValueError(
|
|
211
|
+
(f"Field '{name}': enum_spec.name '{enum_spec.name}' invalid – must match ^[A-Z][A-Za-z0-9]*$")
|
|
212
|
+
)
|
|
213
|
+
# Enum type incorrectly provides an object_spec
|
|
214
|
+
case FieldSpec(
|
|
215
|
+
name=name,
|
|
216
|
+
type="enum",
|
|
217
|
+
enum_spec=enum_spec,
|
|
218
|
+
object_spec=object_spec,
|
|
219
|
+
) if object_spec is not None:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"Field '{name}': enum type must not provide object_spec (got object_spec={object_spec!r})."
|
|
222
|
+
)
|
|
223
|
+
# Enum array type without enum_spec
|
|
224
|
+
case FieldSpec(
|
|
225
|
+
name=name,
|
|
226
|
+
type="enum_array",
|
|
227
|
+
enum_spec=enum_spec,
|
|
228
|
+
object_spec=None,
|
|
229
|
+
) if not enum_spec or not enum_spec.values:
|
|
230
|
+
raise ValueError(f"Field '{name}': enum_array type requires non-empty enum_spec values list.")
|
|
231
|
+
# Enum array type exceeding max length
|
|
232
|
+
case FieldSpec(
|
|
233
|
+
name=name,
|
|
234
|
+
type="enum_array",
|
|
235
|
+
enum_spec=enum_spec,
|
|
236
|
+
object_spec=None,
|
|
237
|
+
) if enum_spec and len(enum_spec.values) > _MAX_ENUM_VALUES:
|
|
238
|
+
raise ValueError(
|
|
239
|
+
(
|
|
240
|
+
f"Field '{name}': enum_array type supports at most {_MAX_ENUM_VALUES} enum_spec values "
|
|
241
|
+
f"(got {len(enum_spec.values)})."
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
# Enum array type invalid explicit name pattern
|
|
245
|
+
case FieldSpec(
|
|
246
|
+
name=name,
|
|
247
|
+
type="enum_array",
|
|
248
|
+
enum_spec=enum_spec,
|
|
249
|
+
object_spec=None,
|
|
250
|
+
) if enum_spec and not upper_camel_pattern.match(enum_spec.name):
|
|
251
|
+
raise ValueError(
|
|
252
|
+
(f"Field '{name}': enum_spec.name '{enum_spec.name}' invalid – must match ^[A-Z][A-Za-z0-9]*$")
|
|
253
|
+
)
|
|
254
|
+
# Enum array type incorrectly provides an object_spec
|
|
255
|
+
case FieldSpec(
|
|
256
|
+
name=name,
|
|
257
|
+
type="enum_array",
|
|
258
|
+
enum_spec=enum_spec,
|
|
259
|
+
object_spec=object_spec,
|
|
260
|
+
) if object_spec is not None:
|
|
261
|
+
raise ValueError(
|
|
262
|
+
f"Field '{name}': enum_array type must not provide object_spec (got object_spec={object_spec!r})."
|
|
263
|
+
)
|
|
264
|
+
# Object type missing object_spec
|
|
265
|
+
case FieldSpec(
|
|
266
|
+
name=name,
|
|
267
|
+
type="object",
|
|
268
|
+
enum_spec=enum_spec,
|
|
269
|
+
object_spec=None,
|
|
270
|
+
):
|
|
271
|
+
raise ValueError(f"Field '{name}': object type requires object_spec (got object_spec=None).")
|
|
272
|
+
# Object array type missing object_spec
|
|
273
|
+
case FieldSpec(
|
|
274
|
+
name=name,
|
|
275
|
+
type="object_array",
|
|
276
|
+
enum_spec=enum_spec,
|
|
277
|
+
object_spec=None,
|
|
278
|
+
):
|
|
279
|
+
raise ValueError(f"Field '{name}': object_array type requires object_spec (got object_spec=None).")
|
|
280
|
+
# Object/object_array provided but invalid name pattern
|
|
281
|
+
case FieldSpec(
|
|
282
|
+
name=name,
|
|
283
|
+
type="object" | "object_array",
|
|
284
|
+
enum_spec=enum_spec,
|
|
285
|
+
object_spec=object_spec,
|
|
286
|
+
) if object_spec is not None and not upper_camel_pattern.match(object_spec.name):
|
|
287
|
+
raise ValueError(
|
|
288
|
+
(
|
|
289
|
+
f"Field '{name}': object_spec.name '{object_spec.name}' must be UpperCamelCase "
|
|
290
|
+
"(regex ^[A-Z][A-Za-z0-9]*$) and contain only letters and digits."
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
# Object/object_array types must not provide enum_spec
|
|
294
|
+
case FieldSpec(
|
|
295
|
+
name=name,
|
|
296
|
+
type="object" | "object_array",
|
|
297
|
+
enum_spec=enum_spec,
|
|
298
|
+
object_spec=object_spec,
|
|
299
|
+
) if enum_spec is not None:
|
|
300
|
+
raise ValueError(
|
|
301
|
+
f"Field '{name}': {field.type} must not define enum_spec (got enum_spec={enum_spec!r})."
|
|
302
|
+
)
|
|
303
|
+
# Primitive / simple array types must not have enum_spec
|
|
304
|
+
case FieldSpec(
|
|
305
|
+
name=name,
|
|
306
|
+
type="string"
|
|
307
|
+
| "integer"
|
|
308
|
+
| "float"
|
|
309
|
+
| "boolean"
|
|
310
|
+
| "string_array"
|
|
311
|
+
| "integer_array"
|
|
312
|
+
| "float_array"
|
|
313
|
+
| "boolean_array",
|
|
314
|
+
enum_spec=enum_spec,
|
|
315
|
+
object_spec=object_spec,
|
|
316
|
+
) if enum_spec is not None:
|
|
317
|
+
raise ValueError(
|
|
318
|
+
(f"Field '{name}': type '{field.type}' must not define enum_spec (got enum_spec={enum_spec!r}).")
|
|
319
|
+
)
|
|
320
|
+
# Primitive / simple array types must not have object_spec
|
|
321
|
+
case FieldSpec(
|
|
322
|
+
name=name,
|
|
323
|
+
type="string"
|
|
324
|
+
| "integer"
|
|
325
|
+
| "float"
|
|
326
|
+
| "boolean"
|
|
327
|
+
| "string_array"
|
|
328
|
+
| "integer_array"
|
|
329
|
+
| "float_array"
|
|
330
|
+
| "boolean_array",
|
|
331
|
+
enum_spec=None,
|
|
332
|
+
object_spec=object_spec,
|
|
333
|
+
) if object_spec is not None:
|
|
334
|
+
raise ValueError(
|
|
335
|
+
(
|
|
336
|
+
f"Field '{name}': type '{field.type}' must not define object_spec "
|
|
337
|
+
f"(got object_spec={object_spec!r})."
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
# Any other unmatched combination
|
|
341
|
+
case FieldSpec() as f:
|
|
342
|
+
raise ValueError(
|
|
343
|
+
(
|
|
344
|
+
"Field configuration invalid / unrecognized combination: "
|
|
345
|
+
f"name={f.name!r}, type={f.type!r}, enum_spec={'set' if f.enum_spec else None}, "
|
|
346
|
+
f"object_spec={'set' if f.object_spec else None}."
|
|
347
|
+
)
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return create_model(object_spec.name, **output_fields)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from logging import Logger, getLogger
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
from numpy.typing import NDArray
|
|
@@ -50,7 +49,7 @@ class BatchEmbeddings:
|
|
|
50
49
|
|
|
51
50
|
@observe(_LOGGER)
|
|
52
51
|
@backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
53
|
-
def _embed_chunk(self, inputs:
|
|
52
|
+
def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
|
|
54
53
|
"""Embed one minibatch of strings.
|
|
55
54
|
|
|
56
55
|
This private helper is the unit of work used by the map/parallel
|
|
@@ -58,23 +57,23 @@ class BatchEmbeddings:
|
|
|
58
57
|
``openai.RateLimitError`` is raised.
|
|
59
58
|
|
|
60
59
|
Args:
|
|
61
|
-
inputs (
|
|
60
|
+
inputs (list[str]): Input strings to be embedded. Duplicates allowed.
|
|
62
61
|
|
|
63
62
|
Returns:
|
|
64
|
-
|
|
63
|
+
list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
|
|
65
64
|
"""
|
|
66
65
|
responses = self.client.embeddings.create(input=inputs, model=self.model_name)
|
|
67
66
|
return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
|
|
68
67
|
|
|
69
68
|
@observe(_LOGGER)
|
|
70
|
-
def create(self, inputs:
|
|
69
|
+
def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
|
|
71
70
|
"""Generate embeddings for inputs using cached, ordered batching.
|
|
72
71
|
|
|
73
72
|
Args:
|
|
74
|
-
inputs (
|
|
73
|
+
inputs (list[str]): Input strings. Duplicates allowed.
|
|
75
74
|
|
|
76
75
|
Returns:
|
|
77
|
-
|
|
76
|
+
list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
|
|
78
77
|
"""
|
|
79
78
|
return self.cache.map(inputs, self._embed_chunk)
|
|
80
79
|
|
|
@@ -159,7 +158,7 @@ class AsyncBatchEmbeddings:
|
|
|
159
158
|
|
|
160
159
|
@backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
161
160
|
@observe(_LOGGER)
|
|
162
|
-
async def _embed_chunk(self, inputs:
|
|
161
|
+
async def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
|
|
163
162
|
"""Embed one minibatch of strings asynchronously.
|
|
164
163
|
|
|
165
164
|
This private helper handles the actual API call for a batch of inputs.
|
|
@@ -167,10 +166,10 @@ class AsyncBatchEmbeddings:
|
|
|
167
166
|
is raised.
|
|
168
167
|
|
|
169
168
|
Args:
|
|
170
|
-
inputs (
|
|
169
|
+
inputs (list[str]): Input strings to be embedded. Duplicates allowed.
|
|
171
170
|
|
|
172
171
|
Returns:
|
|
173
|
-
|
|
172
|
+
list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
|
|
174
173
|
|
|
175
174
|
Raises:
|
|
176
175
|
RateLimitError: Propagated if retries are exhausted.
|
|
@@ -179,13 +178,13 @@ class AsyncBatchEmbeddings:
|
|
|
179
178
|
return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
|
|
180
179
|
|
|
181
180
|
@observe(_LOGGER)
|
|
182
|
-
async def create(self, inputs:
|
|
181
|
+
async def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
|
|
183
182
|
"""Generate embeddings for inputs using proxy batching (async).
|
|
184
183
|
|
|
185
184
|
Args:
|
|
186
|
-
inputs (
|
|
185
|
+
inputs (list[str]): Input strings. Duplicates allowed.
|
|
187
186
|
|
|
188
187
|
Returns:
|
|
189
|
-
|
|
188
|
+
list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
|
|
190
189
|
"""
|
|
191
190
|
return await self.cache.map(inputs, self._embed_chunk) # type: ignore[arg-type]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Generic,
|
|
2
|
+
from typing import Generic, TypeVar
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
5
|
"PreparedTask",
|
|
@@ -20,7 +20,7 @@ class PreparedTask(Generic[ResponseFormat]):
|
|
|
20
20
|
Attributes:
|
|
21
21
|
instructions (str): The prompt or instructions to send to the OpenAI model.
|
|
22
22
|
This should contain clear, specific directions for the task.
|
|
23
|
-
response_format (
|
|
23
|
+
response_format (type[ResponseFormat]): A Pydantic model class or str type that defines the expected
|
|
24
24
|
structure of the response. Can be either a BaseModel subclass or str.
|
|
25
25
|
temperature (float): Controls randomness in the model's output.
|
|
26
26
|
Range: 0.0 to 1.0. Lower values make output more deterministic.
|
|
@@ -54,7 +54,7 @@ class PreparedTask(Generic[ResponseFormat]):
|
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
56
|
instructions: str
|
|
57
|
-
response_format:
|
|
57
|
+
response_format: type[ResponseFormat]
|
|
58
58
|
temperature: float = 0.0
|
|
59
59
|
top_p: float = 1.0
|
|
60
60
|
|
|
@@ -3,7 +3,6 @@ import time
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
|
-
from typing import List
|
|
7
6
|
|
|
8
7
|
__all__ = []
|
|
9
8
|
|
|
@@ -24,7 +23,7 @@ class BatchSizeSuggester:
|
|
|
24
23
|
max_duration: float = 60.0
|
|
25
24
|
step_ratio: float = 0.2
|
|
26
25
|
sample_size: int = 4
|
|
27
|
-
_history:
|
|
26
|
+
_history: list[PerformanceMetric] = field(default_factory=list)
|
|
28
27
|
_lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
|
|
29
28
|
_batch_size_changed_at: datetime | None = field(default=None, init=False)
|
|
30
29
|
|
|
@@ -65,9 +64,9 @@ class BatchSizeSuggester:
|
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
@property
|
|
68
|
-
def samples(self) ->
|
|
67
|
+
def samples(self) -> list[PerformanceMetric]:
|
|
69
68
|
with self._lock:
|
|
70
|
-
selected:
|
|
69
|
+
selected: list[PerformanceMetric] = []
|
|
71
70
|
for metric in reversed(self._history):
|
|
72
71
|
if metric.exception is not None:
|
|
73
72
|
continue
|
|
@@ -44,7 +44,6 @@ this will produce an XML string that looks like this:
|
|
|
44
44
|
|
|
45
45
|
import difflib
|
|
46
46
|
import logging
|
|
47
|
-
from typing import List
|
|
48
47
|
from xml.etree import ElementTree
|
|
49
48
|
|
|
50
49
|
from openai import OpenAI
|
|
@@ -90,8 +89,8 @@ class FewShotPrompt(BaseModel):
|
|
|
90
89
|
"""
|
|
91
90
|
|
|
92
91
|
purpose: str
|
|
93
|
-
cautions:
|
|
94
|
-
examples:
|
|
92
|
+
cautions: list[str]
|
|
93
|
+
examples: list[Example]
|
|
95
94
|
|
|
96
95
|
|
|
97
96
|
class Step(BaseModel):
|
|
@@ -116,7 +115,7 @@ class Request(BaseModel):
|
|
|
116
115
|
|
|
117
116
|
|
|
118
117
|
class Response(BaseModel):
|
|
119
|
-
iterations:
|
|
118
|
+
iterations: list[Step]
|
|
120
119
|
|
|
121
120
|
|
|
122
121
|
_PROMPT: str = """
|
|
@@ -358,7 +357,7 @@ class FewShotPromptBuilder:
|
|
|
358
357
|
"""
|
|
359
358
|
|
|
360
359
|
_prompt: FewShotPrompt
|
|
361
|
-
_steps:
|
|
360
|
+
_steps: list[Step]
|
|
362
361
|
|
|
363
362
|
def __init__(self):
|
|
364
363
|
"""Initialize an empty FewShotPromptBuilder.
|