openaivec 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/_responses.py +77 -25
- openaivec/_schema.py +413 -0
- openaivec/_serialize.py +178 -181
- openaivec/pandas_ext.py +242 -140
- openaivec/spark.py +21 -1
- openaivec/task/table/fillna.py +2 -2
- {openaivec-0.14.1.dist-info → openaivec-0.14.3.dist-info}/METADATA +1 -1
- {openaivec-0.14.1.dist-info → openaivec-0.14.3.dist-info}/RECORD +10 -9
- {openaivec-0.14.1.dist-info → openaivec-0.14.3.dist-info}/WHEEL +0 -0
- {openaivec-0.14.1.dist-info → openaivec-0.14.3.dist-info}/licenses/LICENSE +0 -0
openaivec/_serialize.py
CHANGED
|
@@ -1,31 +1,10 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Refactored serialization utilities for Pydantic BaseModel classes.
|
|
2
2
|
|
|
3
3
|
This module provides utilities for converting Pydantic BaseModel classes
|
|
4
|
-
to and from JSON schema representations
|
|
5
|
-
from JSON schemas with special handling for enum fields, which are converted
|
|
6
|
-
to Literal types for better type safety and compatibility.
|
|
7
|
-
|
|
8
|
-
Example:
|
|
9
|
-
Basic serialization and deserialization:
|
|
10
|
-
|
|
11
|
-
```python
|
|
12
|
-
from pydantic import BaseModel
|
|
13
|
-
from typing import Literal
|
|
14
|
-
|
|
15
|
-
class Status(BaseModel):
|
|
16
|
-
value: Literal["active", "inactive"]
|
|
17
|
-
description: str
|
|
18
|
-
|
|
19
|
-
# Serialize to JSON schema
|
|
20
|
-
schema = serialize_base_model(Status)
|
|
21
|
-
|
|
22
|
-
# Deserialize back to BaseModel class
|
|
23
|
-
DynamicStatus = deserialize_base_model(schema)
|
|
24
|
-
instance = DynamicStatus(value="active", description="User is active")
|
|
25
|
-
```
|
|
4
|
+
to and from JSON schema representations with simplified, maintainable code.
|
|
26
5
|
"""
|
|
27
6
|
|
|
28
|
-
from typing import Any, Dict, List, Literal, Type
|
|
7
|
+
from typing import Any, Dict, List, Literal, Tuple, Type, Union
|
|
29
8
|
|
|
30
9
|
from pydantic import BaseModel, Field, create_model
|
|
31
10
|
|
|
@@ -33,66 +12,38 @@ __all__ = []
|
|
|
33
12
|
|
|
34
13
|
|
|
35
14
|
def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
|
|
36
|
-
"""Serialize a Pydantic BaseModel to JSON schema.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
obj (Type[BaseModel]): The Pydantic BaseModel class to serialize.
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
A dictionary containing the JSON schema representation of the model.
|
|
43
|
-
|
|
44
|
-
Example:
|
|
45
|
-
```python
|
|
46
|
-
from pydantic import BaseModel
|
|
47
|
-
|
|
48
|
-
class Person(BaseModel):
|
|
49
|
-
name: str
|
|
50
|
-
age: int
|
|
51
|
-
|
|
52
|
-
schema = serialize_base_model(Person)
|
|
53
|
-
```
|
|
54
|
-
"""
|
|
15
|
+
"""Serialize a Pydantic BaseModel to JSON schema."""
|
|
55
16
|
return obj.model_json_schema()
|
|
56
17
|
|
|
57
18
|
|
|
58
19
|
def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
59
|
-
"""Dereference JSON schema by resolving $ref pointers.
|
|
60
|
-
|
|
61
|
-
This function resolves all $ref references in a JSON schema by replacing
|
|
62
|
-
them with the actual referenced definitions from the $defs section.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
json_schema (Dict[str, Any]): The JSON schema containing potential $ref references.
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
A dereferenced JSON schema with all $ref pointers resolved.
|
|
69
|
-
|
|
70
|
-
Example:
|
|
71
|
-
```python
|
|
72
|
-
schema = {
|
|
73
|
-
"properties": {
|
|
74
|
-
"user": {"$ref": "#/$defs/User"}
|
|
75
|
-
},
|
|
76
|
-
"$defs": {
|
|
77
|
-
"User": {"type": "object", "properties": {"name": {"type": "string"}}}
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
dereferenced = dereference_json_schema(schema)
|
|
81
|
-
# user property will contain the actual User definition
|
|
82
|
-
```
|
|
83
|
-
"""
|
|
20
|
+
"""Dereference JSON schema by resolving $ref pointers with circular reference protection."""
|
|
84
21
|
model_map = json_schema.get("$defs", {})
|
|
85
22
|
|
|
86
|
-
def dereference(obj):
|
|
23
|
+
def dereference(obj, current_path=None):
|
|
24
|
+
if current_path is None:
|
|
25
|
+
current_path = []
|
|
26
|
+
|
|
87
27
|
if isinstance(obj, dict):
|
|
88
28
|
if "$ref" in obj:
|
|
89
29
|
ref = obj["$ref"].split("/")[-1]
|
|
90
|
-
return dereference(model_map[ref])
|
|
91
|
-
else:
|
|
92
|
-
return {k: dereference(v) for k, v in obj.items()}
|
|
93
30
|
|
|
31
|
+
# Check for circular reference
|
|
32
|
+
if ref in current_path:
|
|
33
|
+
# Return a placeholder to break the cycle
|
|
34
|
+
return {"type": "object", "description": f"Circular reference to {ref}"}
|
|
35
|
+
|
|
36
|
+
if ref in model_map:
|
|
37
|
+
# Add to path and recurse
|
|
38
|
+
new_path = current_path + [ref]
|
|
39
|
+
return dereference(model_map[ref], new_path)
|
|
40
|
+
else:
|
|
41
|
+
# Invalid reference, return placeholder
|
|
42
|
+
return {"type": "object", "description": f"Invalid reference to {ref}"}
|
|
43
|
+
else:
|
|
44
|
+
return {k: dereference(v, current_path) for k, v in obj.items()}
|
|
94
45
|
elif isinstance(obj, list):
|
|
95
|
-
return [dereference(x) for x in obj]
|
|
46
|
+
return [dereference(x, current_path) for x in obj]
|
|
96
47
|
else:
|
|
97
48
|
return obj
|
|
98
49
|
|
|
@@ -100,134 +51,180 @@ def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
100
51
|
for k, v in json_schema.items():
|
|
101
52
|
if k == "$defs":
|
|
102
53
|
continue
|
|
103
|
-
|
|
104
54
|
result[k] = dereference(v)
|
|
105
55
|
|
|
106
56
|
return result
|
|
107
57
|
|
|
108
58
|
|
|
109
|
-
|
|
59
|
+
# ============================================================================
|
|
60
|
+
# Type Resolution - Separated into focused functions
|
|
61
|
+
# ============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_union_type(union_options: List[Dict[str, Any]]) -> Type:
|
|
65
|
+
"""Resolve anyOf/oneOf to Union type."""
|
|
66
|
+
union_types = []
|
|
67
|
+
for option in union_options:
|
|
68
|
+
if option.get("type") == "null":
|
|
69
|
+
union_types.append(type(None))
|
|
70
|
+
else:
|
|
71
|
+
union_types.append(parse_field(option))
|
|
72
|
+
|
|
73
|
+
if len(union_types) == 1:
|
|
74
|
+
return union_types[0]
|
|
75
|
+
elif len(union_types) == 2 and type(None) in union_types:
|
|
76
|
+
# Optional type: T | None
|
|
77
|
+
non_none_type = next(t for t in union_types if t is not type(None))
|
|
78
|
+
return Union[non_none_type, type(None)] # type: ignore[return-value]
|
|
79
|
+
else:
|
|
80
|
+
return Union[tuple(union_types)] # type: ignore[return-value]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _resolve_basic_type(type_name: str, field_def: Dict[str, Any]) -> Type:
|
|
84
|
+
"""Resolve basic JSON schema types to Python types."""
|
|
85
|
+
type_mapping = {
|
|
86
|
+
"string": str,
|
|
87
|
+
"integer": int,
|
|
88
|
+
"number": float,
|
|
89
|
+
"boolean": bool,
|
|
90
|
+
"null": type(None),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if type_name in type_mapping:
|
|
94
|
+
return type_mapping[type_name] # type: ignore[return-value]
|
|
95
|
+
elif type_name == "object":
|
|
96
|
+
# Check if it's a nested model or generic dict
|
|
97
|
+
if "properties" in field_def:
|
|
98
|
+
return deserialize_base_model(field_def)
|
|
99
|
+
else:
|
|
100
|
+
return dict
|
|
101
|
+
elif type_name == "array":
|
|
102
|
+
if "items" in field_def:
|
|
103
|
+
inner_type = parse_field(field_def["items"])
|
|
104
|
+
return List[inner_type]
|
|
105
|
+
else:
|
|
106
|
+
return List[Any]
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unsupported type: {type_name}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def parse_field(field_def: Dict[str, Any]) -> Type:
|
|
110
112
|
"""Parse a JSON schema field definition to a Python type.
|
|
111
113
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
+
Simplified version with clear separation of concerns.
|
|
115
|
+
"""
|
|
116
|
+
# Handle union types
|
|
117
|
+
if "anyOf" in field_def:
|
|
118
|
+
return _resolve_union_type(field_def["anyOf"])
|
|
119
|
+
if "oneOf" in field_def:
|
|
120
|
+
return _resolve_union_type(field_def["oneOf"])
|
|
114
121
|
|
|
115
|
-
|
|
116
|
-
|
|
122
|
+
# Handle basic types
|
|
123
|
+
if "type" not in field_def:
|
|
124
|
+
return Any # type: ignore[return-value]
|
|
117
125
|
|
|
118
|
-
|
|
119
|
-
The corresponding Python type (str, int, float, bool, dict, List, or BaseModel).
|
|
126
|
+
return _resolve_basic_type(field_def["type"], field_def)
|
|
120
127
|
|
|
121
|
-
Raises:
|
|
122
|
-
ValueError: If the field type is not supported.
|
|
123
128
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
python_type = parse_field(field_def) # Returns str
|
|
129
|
+
# ============================================================================
|
|
130
|
+
# Field Information Creation - Centralized logic
|
|
131
|
+
# ============================================================================
|
|
128
132
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
return str
|
|
136
|
-
elif t == "integer":
|
|
137
|
-
return int
|
|
138
|
-
elif t == "number":
|
|
139
|
-
return float
|
|
140
|
-
elif t == "boolean":
|
|
141
|
-
return bool
|
|
142
|
-
elif t == "object":
|
|
143
|
-
# Check if it's a generic object (dict) or a nested model
|
|
144
|
-
if "properties" in v:
|
|
145
|
-
return deserialize_base_model(v)
|
|
146
|
-
else:
|
|
147
|
-
return dict
|
|
148
|
-
elif t == "array":
|
|
149
|
-
inner_type = parse_field(v["items"])
|
|
150
|
-
return List[inner_type]
|
|
133
|
+
|
|
134
|
+
def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field: # type: ignore[type-arg]
|
|
135
|
+
"""Create Field info with consistent logic."""
|
|
136
|
+
if is_required and default_value is None:
|
|
137
|
+
# Required field without default
|
|
138
|
+
return Field(description=description) if description else Field()
|
|
151
139
|
else:
|
|
152
|
-
|
|
140
|
+
# Optional field or field with default
|
|
141
|
+
return Field(default=default_value, description=description) if description else Field(default=default_value)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _make_optional_if_needed(field_type: Type, is_required: bool, has_default: bool) -> Type:
|
|
145
|
+
"""Make field type optional if needed."""
|
|
146
|
+
if is_required or has_default:
|
|
147
|
+
return field_type
|
|
148
|
+
|
|
149
|
+
# Check if already nullable
|
|
150
|
+
if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
|
|
151
|
+
return field_type
|
|
152
|
+
|
|
153
|
+
# Make optional
|
|
154
|
+
return Union[field_type, type(None)] # type: ignore[return-value]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ============================================================================
|
|
158
|
+
# Field Processing - Separated enum and regular field logic
|
|
159
|
+
# ============================================================================
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _process_enum_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
163
|
+
"""Process enum field with Literal type."""
|
|
164
|
+
enum_values = field_def["enum"]
|
|
165
|
+
|
|
166
|
+
# Create Literal type
|
|
167
|
+
if len(enum_values) == 1:
|
|
168
|
+
literal_type = Literal[enum_values[0]]
|
|
169
|
+
else:
|
|
170
|
+
literal_type = Literal[tuple(enum_values)]
|
|
171
|
+
|
|
172
|
+
# Handle optionality
|
|
173
|
+
description = field_def.get("description")
|
|
174
|
+
default_value = field_def.get("default")
|
|
175
|
+
has_default = default_value is not None
|
|
176
|
+
|
|
177
|
+
if not is_required and not has_default:
|
|
178
|
+
literal_type = Union[literal_type, type(None)] # type: ignore[assignment]
|
|
179
|
+
default_value = None
|
|
180
|
+
|
|
181
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
182
|
+
return literal_type, field_info # type: ignore[return-value]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _process_regular_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
186
|
+
"""Process regular (non-enum) field."""
|
|
187
|
+
field_type = parse_field(field_def)
|
|
188
|
+
description = field_def.get("description")
|
|
189
|
+
default_value = field_def.get("default")
|
|
190
|
+
has_default = default_value is not None
|
|
191
|
+
|
|
192
|
+
# Handle optionality
|
|
193
|
+
field_type = _make_optional_if_needed(field_type, is_required, has_default)
|
|
194
|
+
|
|
195
|
+
if not is_required and not has_default:
|
|
196
|
+
default_value = None
|
|
197
|
+
|
|
198
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
199
|
+
return field_type, field_info
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ============================================================================
|
|
203
|
+
# Main Schema Processing - Clean and focused
|
|
204
|
+
# ============================================================================
|
|
153
205
|
|
|
154
206
|
|
|
155
207
|
def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
|
|
156
208
|
"""Deserialize a JSON schema to a Pydantic BaseModel class.
|
|
157
209
|
|
|
158
|
-
|
|
159
|
-
For enum fields, this function uses Literal types instead of Enum classes
|
|
160
|
-
for better type safety and compatibility with systems like Apache Spark.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
json_schema (Dict[str, Any]): A dictionary containing the JSON schema definition.
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
A dynamically created Pydantic BaseModel class.
|
|
167
|
-
|
|
168
|
-
Example:
|
|
169
|
-
```python
|
|
170
|
-
schema = {
|
|
171
|
-
"title": "Person",
|
|
172
|
-
"type": "object",
|
|
173
|
-
"properties": {
|
|
174
|
-
"name": {"type": "string", "description": "Person's name"},
|
|
175
|
-
"status": {
|
|
176
|
-
"type": "string",
|
|
177
|
-
"enum": ["active", "inactive"],
|
|
178
|
-
"description": "Person's status"
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
PersonModel = deserialize_base_model(schema)
|
|
184
|
-
person = PersonModel(name="John", status="active")
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
Note:
|
|
188
|
-
Enum fields are converted to Literal types for improved compatibility
|
|
189
|
-
and type safety. This ensures better integration with data processing
|
|
190
|
-
frameworks like Apache Spark.
|
|
210
|
+
Refactored version with clear separation of concerns and simplified logic.
|
|
191
211
|
"""
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
enum_values = v["enum"]
|
|
212
|
+
# Basic setup
|
|
213
|
+
title = json_schema.get("title", "DynamicModel")
|
|
214
|
+
dereferenced_schema = dereference_json_schema(json_schema)
|
|
215
|
+
properties = dereferenced_schema.get("properties", {})
|
|
216
|
+
required_fields = set(dereferenced_schema.get("required", []))
|
|
198
217
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
# Create Literal with multiple values
|
|
204
|
-
literal_type = Literal[tuple(enum_values)]
|
|
205
|
-
|
|
206
|
-
description = v.get("description")
|
|
207
|
-
default_value = v.get("default")
|
|
208
|
-
|
|
209
|
-
if default_value is not None:
|
|
210
|
-
field_info = (
|
|
211
|
-
Field(default=default_value, description=description)
|
|
212
|
-
if description is not None
|
|
213
|
-
else Field(default=default_value)
|
|
214
|
-
)
|
|
215
|
-
else:
|
|
216
|
-
field_info = Field(description=description) if description is not None else Field()
|
|
218
|
+
# Process each field
|
|
219
|
+
fields = {}
|
|
220
|
+
for field_name, field_def in properties.items():
|
|
221
|
+
is_required = field_name in required_fields
|
|
217
222
|
|
|
218
|
-
|
|
223
|
+
if "enum" in field_def:
|
|
224
|
+
field_type, field_info = _process_enum_field(field_name, field_def, is_required)
|
|
219
225
|
else:
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
if default_value is not None:
|
|
224
|
-
field_info = (
|
|
225
|
-
Field(default=default_value, description=description)
|
|
226
|
-
if description is not None
|
|
227
|
-
else Field(default=default_value)
|
|
228
|
-
)
|
|
229
|
-
else:
|
|
230
|
-
field_info = Field(description=description) if description is not None else Field()
|
|
226
|
+
field_type, field_info = _process_regular_field(field_name, field_def, is_required)
|
|
227
|
+
|
|
228
|
+
fields[field_name] = (field_type, field_info)
|
|
231
229
|
|
|
232
|
-
|
|
233
|
-
return create_model(json_schema["title"], **fields)
|
|
230
|
+
return create_model(title, **fields)
|