openaivec 0.14.1__tar.gz → 0.14.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.14.1 → openaivec-0.14.2}/PKG-INFO +1 -1
- openaivec-0.14.2/src/openaivec/_serialize.py +230 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/table/fillna.py +2 -2
- openaivec-0.14.2/tests/test_serialize_pydantic_v2_compliance.py +1045 -0
- openaivec-0.14.1/src/openaivec/_serialize.py +0 -233
- {openaivec-0.14.1 → openaivec-0.14.2}/.env.example +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.github/copilot-instructions.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/.gitignore +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/LICENSE +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/README.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/SECURITY.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/SUPPORT.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/main.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/spark.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/task.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/index.md +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/docs/robots.txt +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/mkdocs.yml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/pyproject.toml +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_di.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_embeddings.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_log.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_model.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_optimize.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_prompt.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_provider.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_proxy.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_responses.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_util.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/pandas_ext.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/spark.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/translation.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/__init__.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_di.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_embeddings.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_optimize.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_pandas_ext.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_prompt.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_provider.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_proxy.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_proxy_suggester.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_responses.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_serialize.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_spark.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_task.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_util.py +0 -0
- {openaivec-0.14.1 → openaivec-0.14.2}/uv.lock +0 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Refactored serialization utilities for Pydantic BaseModel classes.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for converting Pydantic BaseModel classes
|
|
4
|
+
to and from JSON schema representations with simplified, maintainable code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Literal, Tuple, Type, Union
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, create_model
|
|
10
|
+
|
|
11
|
+
__all__ = []
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
|
|
15
|
+
"""Serialize a Pydantic BaseModel to JSON schema."""
|
|
16
|
+
return obj.model_json_schema()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
20
|
+
"""Dereference JSON schema by resolving $ref pointers with circular reference protection."""
|
|
21
|
+
model_map = json_schema.get("$defs", {})
|
|
22
|
+
|
|
23
|
+
def dereference(obj, current_path=None):
|
|
24
|
+
if current_path is None:
|
|
25
|
+
current_path = []
|
|
26
|
+
|
|
27
|
+
if isinstance(obj, dict):
|
|
28
|
+
if "$ref" in obj:
|
|
29
|
+
ref = obj["$ref"].split("/")[-1]
|
|
30
|
+
|
|
31
|
+
# Check for circular reference
|
|
32
|
+
if ref in current_path:
|
|
33
|
+
# Return a placeholder to break the cycle
|
|
34
|
+
return {"type": "object", "description": f"Circular reference to {ref}"}
|
|
35
|
+
|
|
36
|
+
if ref in model_map:
|
|
37
|
+
# Add to path and recurse
|
|
38
|
+
new_path = current_path + [ref]
|
|
39
|
+
return dereference(model_map[ref], new_path)
|
|
40
|
+
else:
|
|
41
|
+
# Invalid reference, return placeholder
|
|
42
|
+
return {"type": "object", "description": f"Invalid reference to {ref}"}
|
|
43
|
+
else:
|
|
44
|
+
return {k: dereference(v, current_path) for k, v in obj.items()}
|
|
45
|
+
elif isinstance(obj, list):
|
|
46
|
+
return [dereference(x, current_path) for x in obj]
|
|
47
|
+
else:
|
|
48
|
+
return obj
|
|
49
|
+
|
|
50
|
+
result = {}
|
|
51
|
+
for k, v in json_schema.items():
|
|
52
|
+
if k == "$defs":
|
|
53
|
+
continue
|
|
54
|
+
result[k] = dereference(v)
|
|
55
|
+
|
|
56
|
+
return result
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ============================================================================
|
|
60
|
+
# Type Resolution - Separated into focused functions
|
|
61
|
+
# ============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_union_type(union_options: List[Dict[str, Any]]) -> Type:
|
|
65
|
+
"""Resolve anyOf/oneOf to Union type."""
|
|
66
|
+
union_types = []
|
|
67
|
+
for option in union_options:
|
|
68
|
+
if option.get("type") == "null":
|
|
69
|
+
union_types.append(type(None))
|
|
70
|
+
else:
|
|
71
|
+
union_types.append(parse_field(option))
|
|
72
|
+
|
|
73
|
+
if len(union_types) == 1:
|
|
74
|
+
return union_types[0]
|
|
75
|
+
elif len(union_types) == 2 and type(None) in union_types:
|
|
76
|
+
# Optional type: T | None
|
|
77
|
+
non_none_type = next(t for t in union_types if t is not type(None))
|
|
78
|
+
return Union[non_none_type, type(None)] # type: ignore[return-value]
|
|
79
|
+
else:
|
|
80
|
+
return Union[tuple(union_types)] # type: ignore[return-value]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _resolve_basic_type(type_name: str, field_def: Dict[str, Any]) -> Type:
|
|
84
|
+
"""Resolve basic JSON schema types to Python types."""
|
|
85
|
+
type_mapping = {
|
|
86
|
+
"string": str,
|
|
87
|
+
"integer": int,
|
|
88
|
+
"number": float,
|
|
89
|
+
"boolean": bool,
|
|
90
|
+
"null": type(None),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if type_name in type_mapping:
|
|
94
|
+
return type_mapping[type_name] # type: ignore[return-value]
|
|
95
|
+
elif type_name == "object":
|
|
96
|
+
# Check if it's a nested model or generic dict
|
|
97
|
+
if "properties" in field_def:
|
|
98
|
+
return deserialize_base_model(field_def)
|
|
99
|
+
else:
|
|
100
|
+
return dict
|
|
101
|
+
elif type_name == "array":
|
|
102
|
+
if "items" in field_def:
|
|
103
|
+
inner_type = parse_field(field_def["items"])
|
|
104
|
+
return List[inner_type]
|
|
105
|
+
else:
|
|
106
|
+
return List[Any]
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unsupported type: {type_name}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def parse_field(field_def: Dict[str, Any]) -> Type:
|
|
112
|
+
"""Parse a JSON schema field definition to a Python type.
|
|
113
|
+
|
|
114
|
+
Simplified version with clear separation of concerns.
|
|
115
|
+
"""
|
|
116
|
+
# Handle union types
|
|
117
|
+
if "anyOf" in field_def:
|
|
118
|
+
return _resolve_union_type(field_def["anyOf"])
|
|
119
|
+
if "oneOf" in field_def:
|
|
120
|
+
return _resolve_union_type(field_def["oneOf"])
|
|
121
|
+
|
|
122
|
+
# Handle basic types
|
|
123
|
+
if "type" not in field_def:
|
|
124
|
+
return Any # type: ignore[return-value]
|
|
125
|
+
|
|
126
|
+
return _resolve_basic_type(field_def["type"], field_def)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ============================================================================
|
|
130
|
+
# Field Information Creation - Centralized logic
|
|
131
|
+
# ============================================================================
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field: # type: ignore[type-arg]
|
|
135
|
+
"""Create Field info with consistent logic."""
|
|
136
|
+
if is_required and default_value is None:
|
|
137
|
+
# Required field without default
|
|
138
|
+
return Field(description=description) if description else Field()
|
|
139
|
+
else:
|
|
140
|
+
# Optional field or field with default
|
|
141
|
+
return Field(default=default_value, description=description) if description else Field(default=default_value)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _make_optional_if_needed(field_type: Type, is_required: bool, has_default: bool) -> Type:
|
|
145
|
+
"""Make field type optional if needed."""
|
|
146
|
+
if is_required or has_default:
|
|
147
|
+
return field_type
|
|
148
|
+
|
|
149
|
+
# Check if already nullable
|
|
150
|
+
if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
|
|
151
|
+
return field_type
|
|
152
|
+
|
|
153
|
+
# Make optional
|
|
154
|
+
return Union[field_type, type(None)] # type: ignore[return-value]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ============================================================================
|
|
158
|
+
# Field Processing - Separated enum and regular field logic
|
|
159
|
+
# ============================================================================
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _process_enum_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
163
|
+
"""Process enum field with Literal type."""
|
|
164
|
+
enum_values = field_def["enum"]
|
|
165
|
+
|
|
166
|
+
# Create Literal type
|
|
167
|
+
if len(enum_values) == 1:
|
|
168
|
+
literal_type = Literal[enum_values[0]]
|
|
169
|
+
else:
|
|
170
|
+
literal_type = Literal[tuple(enum_values)]
|
|
171
|
+
|
|
172
|
+
# Handle optionality
|
|
173
|
+
description = field_def.get("description")
|
|
174
|
+
default_value = field_def.get("default")
|
|
175
|
+
has_default = default_value is not None
|
|
176
|
+
|
|
177
|
+
if not is_required and not has_default:
|
|
178
|
+
literal_type = Union[literal_type, type(None)] # type: ignore[assignment]
|
|
179
|
+
default_value = None
|
|
180
|
+
|
|
181
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
182
|
+
return literal_type, field_info # type: ignore[return-value]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _process_regular_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
186
|
+
"""Process regular (non-enum) field."""
|
|
187
|
+
field_type = parse_field(field_def)
|
|
188
|
+
description = field_def.get("description")
|
|
189
|
+
default_value = field_def.get("default")
|
|
190
|
+
has_default = default_value is not None
|
|
191
|
+
|
|
192
|
+
# Handle optionality
|
|
193
|
+
field_type = _make_optional_if_needed(field_type, is_required, has_default)
|
|
194
|
+
|
|
195
|
+
if not is_required and not has_default:
|
|
196
|
+
default_value = None
|
|
197
|
+
|
|
198
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
199
|
+
return field_type, field_info
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ============================================================================
|
|
203
|
+
# Main Schema Processing - Clean and focused
|
|
204
|
+
# ============================================================================
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
|
|
208
|
+
"""Deserialize a JSON schema to a Pydantic BaseModel class.
|
|
209
|
+
|
|
210
|
+
Refactored version with clear separation of concerns and simplified logic.
|
|
211
|
+
"""
|
|
212
|
+
# Basic setup
|
|
213
|
+
title = json_schema.get("title", "DynamicModel")
|
|
214
|
+
dereferenced_schema = dereference_json_schema(json_schema)
|
|
215
|
+
properties = dereferenced_schema.get("properties", {})
|
|
216
|
+
required_fields = set(dereferenced_schema.get("required", []))
|
|
217
|
+
|
|
218
|
+
# Process each field
|
|
219
|
+
fields = {}
|
|
220
|
+
for field_name, field_def in properties.items():
|
|
221
|
+
is_required = field_name in required_fields
|
|
222
|
+
|
|
223
|
+
if "enum" in field_def:
|
|
224
|
+
field_type, field_info = _process_enum_field(field_name, field_def, is_required)
|
|
225
|
+
else:
|
|
226
|
+
field_type, field_info = _process_regular_field(field_name, field_def, is_required)
|
|
227
|
+
|
|
228
|
+
fields[field_name] = (field_type, field_info)
|
|
229
|
+
|
|
230
|
+
return create_model(title, **fields)
|
|
@@ -79,7 +79,7 @@ __all__ = ["fillna", "FillNaResponse"]
|
|
|
79
79
|
def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> List[Dict]:
|
|
80
80
|
examples: List[Dict] = []
|
|
81
81
|
|
|
82
|
-
samples: pd.DataFrame = df.sample(frac=1)
|
|
82
|
+
samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
|
|
83
83
|
samples = samples.dropna(subset=[target_column_name])
|
|
84
84
|
|
|
85
85
|
for i, row in samples.head(max_examples).iterrows():
|
|
@@ -109,7 +109,7 @@ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: in
|
|
|
109
109
|
output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
|
|
110
110
|
)
|
|
111
111
|
|
|
112
|
-
return builder.build()
|
|
112
|
+
return builder.improve().build()
|
|
113
113
|
|
|
114
114
|
|
|
115
115
|
class FillNaResponse(BaseModel):
|