openaivec 0.14.1__tar.gz → 0.14.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {openaivec-0.14.1 → openaivec-0.14.2}/PKG-INFO +1 -1
  2. openaivec-0.14.2/src/openaivec/_serialize.py +230 -0
  3. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/table/fillna.py +2 -2
  4. openaivec-0.14.2/tests/test_serialize_pydantic_v2_compliance.py +1045 -0
  5. openaivec-0.14.1/src/openaivec/_serialize.py +0 -233
  6. {openaivec-0.14.1 → openaivec-0.14.2}/.env.example +0 -0
  7. {openaivec-0.14.1 → openaivec-0.14.2}/.github/copilot-instructions.md +0 -0
  8. {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-mkdocs.yml +0 -0
  9. {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-package.yml +0 -0
  10. {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-test.yml +0 -0
  11. {openaivec-0.14.1 → openaivec-0.14.2}/.github/workflows/python-update.yml +0 -0
  12. {openaivec-0.14.1 → openaivec-0.14.2}/.gitignore +0 -0
  13. {openaivec-0.14.1 → openaivec-0.14.2}/CODE_OF_CONDUCT.md +0 -0
  14. {openaivec-0.14.1 → openaivec-0.14.2}/LICENSE +0 -0
  15. {openaivec-0.14.1 → openaivec-0.14.2}/README.md +0 -0
  16. {openaivec-0.14.1 → openaivec-0.14.2}/SECURITY.md +0 -0
  17. {openaivec-0.14.1 → openaivec-0.14.2}/SUPPORT.md +0 -0
  18. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/main.md +0 -0
  19. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/pandas_ext.md +0 -0
  20. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/spark.md +0 -0
  21. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/task.md +0 -0
  22. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  23. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  24. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  25. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  26. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  27. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  28. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  29. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  30. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  31. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  32. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  33. {openaivec-0.14.1 → openaivec-0.14.2}/docs/api/tasks/nlp/translation.md +0 -0
  34. {openaivec-0.14.1 → openaivec-0.14.2}/docs/index.md +0 -0
  35. {openaivec-0.14.1 → openaivec-0.14.2}/docs/robots.txt +0 -0
  36. {openaivec-0.14.1 → openaivec-0.14.2}/mkdocs.yml +0 -0
  37. {openaivec-0.14.1 → openaivec-0.14.2}/pyproject.toml +0 -0
  38. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/__init__.py +0 -0
  39. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_di.py +0 -0
  40. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_embeddings.py +0 -0
  41. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_log.py +0 -0
  42. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_model.py +0 -0
  43. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_optimize.py +0 -0
  44. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_prompt.py +0 -0
  45. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_provider.py +0 -0
  46. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_proxy.py +0 -0
  47. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_responses.py +0 -0
  48. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/_util.py +0 -0
  49. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/pandas_ext.py +0 -0
  50. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/spark.py +0 -0
  51. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/__init__.py +0 -0
  52. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/__init__.py +0 -0
  53. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  54. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  55. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  56. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  57. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  58. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  59. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/__init__.py +0 -0
  60. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  61. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  62. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  63. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  64. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  65. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/nlp/translation.py +0 -0
  66. {openaivec-0.14.1 → openaivec-0.14.2}/src/openaivec/task/table/__init__.py +0 -0
  67. {openaivec-0.14.1 → openaivec-0.14.2}/tests/__init__.py +0 -0
  68. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_di.py +0 -0
  69. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_embeddings.py +0 -0
  70. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_optimize.py +0 -0
  71. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_pandas_ext.py +0 -0
  72. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_prompt.py +0 -0
  73. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_provider.py +0 -0
  74. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_proxy.py +0 -0
  75. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_proxy_suggester.py +0 -0
  76. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_responses.py +0 -0
  77. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_serialize.py +0 -0
  78. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_spark.py +0 -0
  79. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_task.py +0 -0
  80. {openaivec-0.14.1 → openaivec-0.14.2}/tests/test_util.py +0 -0
  81. {openaivec-0.14.1 → openaivec-0.14.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.1
3
+ Version: 0.14.2
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -0,0 +1,230 @@
1
+ """Refactored serialization utilities for Pydantic BaseModel classes.
2
+
3
+ This module provides utilities for converting Pydantic BaseModel classes
4
+ to and from JSON schema representations with simplified, maintainable code.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Literal, Tuple, Type, Union
8
+
9
+ from pydantic import BaseModel, Field, create_model
10
+
11
+ __all__ = []
12
+
13
+
14
+ def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
15
+ """Serialize a Pydantic BaseModel to JSON schema."""
16
+ return obj.model_json_schema()
17
+
18
+
19
+ def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
20
+ """Dereference JSON schema by resolving $ref pointers with circular reference protection."""
21
+ model_map = json_schema.get("$defs", {})
22
+
23
+ def dereference(obj, current_path=None):
24
+ if current_path is None:
25
+ current_path = []
26
+
27
+ if isinstance(obj, dict):
28
+ if "$ref" in obj:
29
+ ref = obj["$ref"].split("/")[-1]
30
+
31
+ # Check for circular reference
32
+ if ref in current_path:
33
+ # Return a placeholder to break the cycle
34
+ return {"type": "object", "description": f"Circular reference to {ref}"}
35
+
36
+ if ref in model_map:
37
+ # Add to path and recurse
38
+ new_path = current_path + [ref]
39
+ return dereference(model_map[ref], new_path)
40
+ else:
41
+ # Invalid reference, return placeholder
42
+ return {"type": "object", "description": f"Invalid reference to {ref}"}
43
+ else:
44
+ return {k: dereference(v, current_path) for k, v in obj.items()}
45
+ elif isinstance(obj, list):
46
+ return [dereference(x, current_path) for x in obj]
47
+ else:
48
+ return obj
49
+
50
+ result = {}
51
+ for k, v in json_schema.items():
52
+ if k == "$defs":
53
+ continue
54
+ result[k] = dereference(v)
55
+
56
+ return result
57
+
58
+
59
+ # ============================================================================
60
+ # Type Resolution - Separated into focused functions
61
+ # ============================================================================
62
+
63
+
64
+ def _resolve_union_type(union_options: List[Dict[str, Any]]) -> Type:
65
+ """Resolve anyOf/oneOf to Union type."""
66
+ union_types = []
67
+ for option in union_options:
68
+ if option.get("type") == "null":
69
+ union_types.append(type(None))
70
+ else:
71
+ union_types.append(parse_field(option))
72
+
73
+ if len(union_types) == 1:
74
+ return union_types[0]
75
+ elif len(union_types) == 2 and type(None) in union_types:
76
+ # Optional type: T | None
77
+ non_none_type = next(t for t in union_types if t is not type(None))
78
+ return Union[non_none_type, type(None)] # type: ignore[return-value]
79
+ else:
80
+ return Union[tuple(union_types)] # type: ignore[return-value]
81
+
82
+
83
+ def _resolve_basic_type(type_name: str, field_def: Dict[str, Any]) -> Type:
84
+ """Resolve basic JSON schema types to Python types."""
85
+ type_mapping = {
86
+ "string": str,
87
+ "integer": int,
88
+ "number": float,
89
+ "boolean": bool,
90
+ "null": type(None),
91
+ }
92
+
93
+ if type_name in type_mapping:
94
+ return type_mapping[type_name] # type: ignore[return-value]
95
+ elif type_name == "object":
96
+ # Check if it's a nested model or generic dict
97
+ if "properties" in field_def:
98
+ return deserialize_base_model(field_def)
99
+ else:
100
+ return dict
101
+ elif type_name == "array":
102
+ if "items" in field_def:
103
+ inner_type = parse_field(field_def["items"])
104
+ return List[inner_type]
105
+ else:
106
+ return List[Any]
107
+ else:
108
+ raise ValueError(f"Unsupported type: {type_name}")
109
+
110
+
111
+ def parse_field(field_def: Dict[str, Any]) -> Type:
112
+ """Parse a JSON schema field definition to a Python type.
113
+
114
+ Simplified version with clear separation of concerns.
115
+ """
116
+ # Handle union types
117
+ if "anyOf" in field_def:
118
+ return _resolve_union_type(field_def["anyOf"])
119
+ if "oneOf" in field_def:
120
+ return _resolve_union_type(field_def["oneOf"])
121
+
122
+ # Handle basic types
123
+ if "type" not in field_def:
124
+ return Any # type: ignore[return-value]
125
+
126
+ return _resolve_basic_type(field_def["type"], field_def)
127
+
128
+
129
+ # ============================================================================
130
+ # Field Information Creation - Centralized logic
131
+ # ============================================================================
132
+
133
+
134
+ def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field: # type: ignore[type-arg]
135
+ """Create Field info with consistent logic."""
136
+ if is_required and default_value is None:
137
+ # Required field without default
138
+ return Field(description=description) if description else Field()
139
+ else:
140
+ # Optional field or field with default
141
+ return Field(default=default_value, description=description) if description else Field(default=default_value)
142
+
143
+
144
+ def _make_optional_if_needed(field_type: Type, is_required: bool, has_default: bool) -> Type:
145
+ """Make field type optional if needed."""
146
+ if is_required or has_default:
147
+ return field_type
148
+
149
+ # Check if already nullable
150
+ if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
151
+ return field_type
152
+
153
+ # Make optional
154
+ return Union[field_type, type(None)] # type: ignore[return-value]
155
+
156
+
157
+ # ============================================================================
158
+ # Field Processing - Separated enum and regular field logic
159
+ # ============================================================================
160
+
161
+
162
+ def _process_enum_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
163
+ """Process enum field with Literal type."""
164
+ enum_values = field_def["enum"]
165
+
166
+ # Create Literal type
167
+ if len(enum_values) == 1:
168
+ literal_type = Literal[enum_values[0]]
169
+ else:
170
+ literal_type = Literal[tuple(enum_values)]
171
+
172
+ # Handle optionality
173
+ description = field_def.get("description")
174
+ default_value = field_def.get("default")
175
+ has_default = default_value is not None
176
+
177
+ if not is_required and not has_default:
178
+ literal_type = Union[literal_type, type(None)] # type: ignore[assignment]
179
+ default_value = None
180
+
181
+ field_info = _create_field_info(description, default_value, is_required)
182
+ return literal_type, field_info # type: ignore[return-value]
183
+
184
+
185
+ def _process_regular_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
186
+ """Process regular (non-enum) field."""
187
+ field_type = parse_field(field_def)
188
+ description = field_def.get("description")
189
+ default_value = field_def.get("default")
190
+ has_default = default_value is not None
191
+
192
+ # Handle optionality
193
+ field_type = _make_optional_if_needed(field_type, is_required, has_default)
194
+
195
+ if not is_required and not has_default:
196
+ default_value = None
197
+
198
+ field_info = _create_field_info(description, default_value, is_required)
199
+ return field_type, field_info
200
+
201
+
202
+ # ============================================================================
203
+ # Main Schema Processing - Clean and focused
204
+ # ============================================================================
205
+
206
+
207
+ def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
208
+ """Deserialize a JSON schema to a Pydantic BaseModel class.
209
+
210
+ Refactored version with clear separation of concerns and simplified logic.
211
+ """
212
+ # Basic setup
213
+ title = json_schema.get("title", "DynamicModel")
214
+ dereferenced_schema = dereference_json_schema(json_schema)
215
+ properties = dereferenced_schema.get("properties", {})
216
+ required_fields = set(dereferenced_schema.get("required", []))
217
+
218
+ # Process each field
219
+ fields = {}
220
+ for field_name, field_def in properties.items():
221
+ is_required = field_name in required_fields
222
+
223
+ if "enum" in field_def:
224
+ field_type, field_info = _process_enum_field(field_name, field_def, is_required)
225
+ else:
226
+ field_type, field_info = _process_regular_field(field_name, field_def, is_required)
227
+
228
+ fields[field_name] = (field_type, field_info)
229
+
230
+ return create_model(title, **fields)
@@ -79,7 +79,7 @@ __all__ = ["fillna", "FillNaResponse"]
79
79
  def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> List[Dict]:
80
80
  examples: List[Dict] = []
81
81
 
82
- samples: pd.DataFrame = df.sample(frac=1)
82
+ samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
83
83
  samples = samples.dropna(subset=[target_column_name])
84
84
 
85
85
  for i, row in samples.head(max_examples).iterrows():
@@ -109,7 +109,7 @@ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: in
109
109
  output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
110
110
  )
111
111
 
112
- return builder.build()
112
+ return builder.improve().build()
113
113
 
114
114
 
115
115
  class FillNaResponse(BaseModel):