openaivec 0.14.0__tar.gz → 0.14.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.14.0 → openaivec-0.14.2}/PKG-INFO +3 -3
- {openaivec-0.14.0 → openaivec-0.14.2}/README.md +2 -2
- openaivec-0.14.2/docs/api/main.md +19 -0
- openaivec-0.14.2/docs/api/pandas_ext.md +3 -0
- openaivec-0.14.2/docs/api/spark.md +3 -0
- openaivec-0.14.2/docs/api/task.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/customer_sentiment.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/inquiry_classification.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/inquiry_summary.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/intent_analysis.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/response_suggestion.md +3 -0
- openaivec-0.14.2/docs/api/tasks/customer_support/urgency_analysis.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/dependency_parsing.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/keyword_extraction.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/morphological_analysis.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/named_entity_recognition.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/sentiment_analysis.md +3 -0
- openaivec-0.14.2/docs/api/tasks/nlp/translation.md +3 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/docs/index.md +2 -4
- {openaivec-0.14.0 → openaivec-0.14.2}/mkdocs.yml +19 -2
- openaivec-0.14.2/src/openaivec/_serialize.py +230 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/table/fillna.py +2 -2
- openaivec-0.14.2/tests/test_serialize_pydantic_v2_compliance.py +1045 -0
- openaivec-0.14.0/docs/api/main.md +0 -118
- openaivec-0.14.0/docs/api/pandas_ext.md +0 -15
- openaivec-0.14.0/docs/api/spark.md +0 -15
- openaivec-0.14.0/docs/api/task.md +0 -19
- openaivec-0.14.0/docs/api/tasks/customer_support/customer_sentiment.md +0 -3
- openaivec-0.14.0/docs/api/tasks/customer_support/inquiry_classification.md +0 -3
- openaivec-0.14.0/docs/api/tasks/customer_support/inquiry_summary.md +0 -3
- openaivec-0.14.0/docs/api/tasks/customer_support/intent_analysis.md +0 -3
- openaivec-0.14.0/docs/api/tasks/customer_support/response_suggestion.md +0 -3
- openaivec-0.14.0/docs/api/tasks/customer_support/urgency_analysis.md +0 -3
- openaivec-0.14.0/docs/api/tasks/nlp/dependency_parsing.md +0 -15
- openaivec-0.14.0/docs/api/tasks/nlp/keyword_extraction.md +0 -15
- openaivec-0.14.0/docs/api/tasks/nlp/morphological_analysis.md +0 -15
- openaivec-0.14.0/docs/api/tasks/nlp/named_entity_recognition.md +0 -15
- openaivec-0.14.0/docs/api/tasks/nlp/sentiment_analysis.md +0 -15
- openaivec-0.14.0/docs/api/tasks/nlp/translation.md +0 -15
- openaivec-0.14.0/src/openaivec/_serialize.py +0 -233
- {openaivec-0.14.0 → openaivec-0.14.2}/.env.example +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.github/copilot-instructions.md +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/.gitignore +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/LICENSE +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/SECURITY.md +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/SUPPORT.md +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/docs/robots.txt +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/pyproject.toml +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_di.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_embeddings.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_log.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_model.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_optimize.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_prompt.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_provider.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_proxy.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_responses.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_util.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/pandas_ext.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/spark.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/translation.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/__init__.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_di.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_embeddings.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_optimize.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_pandas_ext.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_prompt.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_provider.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_proxy.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_proxy_suggester.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_responses.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_serialize.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_spark.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_task.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_util.py +0 -0
- {openaivec-0.14.0 → openaivec-0.14.2}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.2
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -98,7 +98,7 @@ survey_responses.assign(
|
|
|
98
98
|
).ai.extract("structured") # Auto-expands to columns
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
-
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
|
|
101
|
+
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
102
102
|
|
|
103
103
|
# Overview
|
|
104
104
|
|
|
@@ -746,7 +746,7 @@ uv run ruff check . --fix
|
|
|
746
746
|
📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
|
|
747
747
|
📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
|
|
748
748
|
📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
|
|
749
|
-
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
|
|
749
|
+
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
|
|
750
750
|
|
|
751
751
|
## Community
|
|
752
752
|
|
|
@@ -72,7 +72,7 @@ survey_responses.assign(
|
|
|
72
72
|
).ai.extract("structured") # Auto-expands to columns
|
|
73
73
|
```
|
|
74
74
|
|
|
75
|
-
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
|
|
75
|
+
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
76
76
|
|
|
77
77
|
# Overview
|
|
78
78
|
|
|
@@ -720,7 +720,7 @@ uv run ruff check . --fix
|
|
|
720
720
|
📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
|
|
721
721
|
📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
|
|
722
722
|
📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
|
|
723
|
-
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
|
|
723
|
+
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
|
|
724
724
|
|
|
725
725
|
## Community
|
|
726
726
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Main Package API
|
|
2
|
+
|
|
3
|
+
The main `openaivec` package provides the core classes for AI-powered data processing.
|
|
4
|
+
|
|
5
|
+
## Core Classes
|
|
6
|
+
|
|
7
|
+
All core functionality is accessible through the main package imports:
|
|
8
|
+
|
|
9
|
+
::: openaivec.BatchResponses
|
|
10
|
+
|
|
11
|
+
::: openaivec.AsyncBatchResponses
|
|
12
|
+
|
|
13
|
+
::: openaivec.BatchEmbeddings
|
|
14
|
+
|
|
15
|
+
::: openaivec.AsyncBatchEmbeddings
|
|
16
|
+
|
|
17
|
+
## Prompt Building
|
|
18
|
+
|
|
19
|
+
::: openaivec.FewShotPromptBuilder
|
|
@@ -67,12 +67,10 @@ Get started with these comprehensive examples:
|
|
|
67
67
|
|
|
68
68
|
Detailed documentation for all components:
|
|
69
69
|
|
|
70
|
+
🔗 **[Main Package](api/main.md)** - Core classes (BatchResponses, BatchEmbeddings, FewShotPromptBuilder)
|
|
70
71
|
🔗 **[pandas_ext](api/pandas_ext.md)** - Pandas Series and DataFrame extensions
|
|
71
72
|
🔗 **[spark](api/spark.md)** - Apache Spark UDF builders
|
|
72
|
-
🔗 **[
|
|
73
|
-
🔗 **[embeddings](api/embeddings.md)** - Batch embedding generation
|
|
74
|
-
🔗 **[prompt](api/prompt.md)** - Few-shot prompt building
|
|
75
|
-
🔗 **[util](api/util.md)** - Utility functions and helpers
|
|
73
|
+
🔗 **[task](api/task.md)** - Pre-built task modules for NLP and customer support
|
|
76
74
|
|
|
77
75
|
## Quick Start
|
|
78
76
|
|
|
@@ -131,8 +131,25 @@ plugins:
|
|
|
131
131
|
python:
|
|
132
132
|
paths:
|
|
133
133
|
- src
|
|
134
|
-
|
|
135
|
-
|
|
134
|
+
options:
|
|
135
|
+
docstring_style: google
|
|
136
|
+
show_submodules: true
|
|
137
|
+
show_source: true
|
|
138
|
+
show_root_heading: true
|
|
139
|
+
show_root_toc_entry: true
|
|
140
|
+
heading_level: 2
|
|
141
|
+
members_order: source
|
|
142
|
+
show_signature_annotations: true
|
|
143
|
+
separate_signature: true
|
|
144
|
+
show_bases: true
|
|
145
|
+
show_docstring_parameters: true
|
|
146
|
+
show_docstring_returns: true
|
|
147
|
+
show_docstring_examples: true
|
|
148
|
+
show_category_heading: true
|
|
149
|
+
group_by_category: true
|
|
150
|
+
show_if_no_docstring: false
|
|
151
|
+
inherited_members: false
|
|
152
|
+
merge_init_into_class: true
|
|
136
153
|
|
|
137
154
|
markdown_extensions:
|
|
138
155
|
- abbr
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Refactored serialization utilities for Pydantic BaseModel classes.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for converting Pydantic BaseModel classes
|
|
4
|
+
to and from JSON schema representations with simplified, maintainable code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Literal, Tuple, Type, Union
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, create_model
|
|
10
|
+
|
|
11
|
+
__all__ = []
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
|
|
15
|
+
"""Serialize a Pydantic BaseModel to JSON schema."""
|
|
16
|
+
return obj.model_json_schema()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
20
|
+
"""Dereference JSON schema by resolving $ref pointers with circular reference protection."""
|
|
21
|
+
model_map = json_schema.get("$defs", {})
|
|
22
|
+
|
|
23
|
+
def dereference(obj, current_path=None):
|
|
24
|
+
if current_path is None:
|
|
25
|
+
current_path = []
|
|
26
|
+
|
|
27
|
+
if isinstance(obj, dict):
|
|
28
|
+
if "$ref" in obj:
|
|
29
|
+
ref = obj["$ref"].split("/")[-1]
|
|
30
|
+
|
|
31
|
+
# Check for circular reference
|
|
32
|
+
if ref in current_path:
|
|
33
|
+
# Return a placeholder to break the cycle
|
|
34
|
+
return {"type": "object", "description": f"Circular reference to {ref}"}
|
|
35
|
+
|
|
36
|
+
if ref in model_map:
|
|
37
|
+
# Add to path and recurse
|
|
38
|
+
new_path = current_path + [ref]
|
|
39
|
+
return dereference(model_map[ref], new_path)
|
|
40
|
+
else:
|
|
41
|
+
# Invalid reference, return placeholder
|
|
42
|
+
return {"type": "object", "description": f"Invalid reference to {ref}"}
|
|
43
|
+
else:
|
|
44
|
+
return {k: dereference(v, current_path) for k, v in obj.items()}
|
|
45
|
+
elif isinstance(obj, list):
|
|
46
|
+
return [dereference(x, current_path) for x in obj]
|
|
47
|
+
else:
|
|
48
|
+
return obj
|
|
49
|
+
|
|
50
|
+
result = {}
|
|
51
|
+
for k, v in json_schema.items():
|
|
52
|
+
if k == "$defs":
|
|
53
|
+
continue
|
|
54
|
+
result[k] = dereference(v)
|
|
55
|
+
|
|
56
|
+
return result
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ============================================================================
|
|
60
|
+
# Type Resolution - Separated into focused functions
|
|
61
|
+
# ============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_union_type(union_options: List[Dict[str, Any]]) -> Type:
|
|
65
|
+
"""Resolve anyOf/oneOf to Union type."""
|
|
66
|
+
union_types = []
|
|
67
|
+
for option in union_options:
|
|
68
|
+
if option.get("type") == "null":
|
|
69
|
+
union_types.append(type(None))
|
|
70
|
+
else:
|
|
71
|
+
union_types.append(parse_field(option))
|
|
72
|
+
|
|
73
|
+
if len(union_types) == 1:
|
|
74
|
+
return union_types[0]
|
|
75
|
+
elif len(union_types) == 2 and type(None) in union_types:
|
|
76
|
+
# Optional type: T | None
|
|
77
|
+
non_none_type = next(t for t in union_types if t is not type(None))
|
|
78
|
+
return Union[non_none_type, type(None)] # type: ignore[return-value]
|
|
79
|
+
else:
|
|
80
|
+
return Union[tuple(union_types)] # type: ignore[return-value]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _resolve_basic_type(type_name: str, field_def: Dict[str, Any]) -> Type:
|
|
84
|
+
"""Resolve basic JSON schema types to Python types."""
|
|
85
|
+
type_mapping = {
|
|
86
|
+
"string": str,
|
|
87
|
+
"integer": int,
|
|
88
|
+
"number": float,
|
|
89
|
+
"boolean": bool,
|
|
90
|
+
"null": type(None),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if type_name in type_mapping:
|
|
94
|
+
return type_mapping[type_name] # type: ignore[return-value]
|
|
95
|
+
elif type_name == "object":
|
|
96
|
+
# Check if it's a nested model or generic dict
|
|
97
|
+
if "properties" in field_def:
|
|
98
|
+
return deserialize_base_model(field_def)
|
|
99
|
+
else:
|
|
100
|
+
return dict
|
|
101
|
+
elif type_name == "array":
|
|
102
|
+
if "items" in field_def:
|
|
103
|
+
inner_type = parse_field(field_def["items"])
|
|
104
|
+
return List[inner_type]
|
|
105
|
+
else:
|
|
106
|
+
return List[Any]
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unsupported type: {type_name}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def parse_field(field_def: Dict[str, Any]) -> Type:
|
|
112
|
+
"""Parse a JSON schema field definition to a Python type.
|
|
113
|
+
|
|
114
|
+
Simplified version with clear separation of concerns.
|
|
115
|
+
"""
|
|
116
|
+
# Handle union types
|
|
117
|
+
if "anyOf" in field_def:
|
|
118
|
+
return _resolve_union_type(field_def["anyOf"])
|
|
119
|
+
if "oneOf" in field_def:
|
|
120
|
+
return _resolve_union_type(field_def["oneOf"])
|
|
121
|
+
|
|
122
|
+
# Handle basic types
|
|
123
|
+
if "type" not in field_def:
|
|
124
|
+
return Any # type: ignore[return-value]
|
|
125
|
+
|
|
126
|
+
return _resolve_basic_type(field_def["type"], field_def)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ============================================================================
|
|
130
|
+
# Field Information Creation - Centralized logic
|
|
131
|
+
# ============================================================================
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field: # type: ignore[type-arg]
|
|
135
|
+
"""Create Field info with consistent logic."""
|
|
136
|
+
if is_required and default_value is None:
|
|
137
|
+
# Required field without default
|
|
138
|
+
return Field(description=description) if description else Field()
|
|
139
|
+
else:
|
|
140
|
+
# Optional field or field with default
|
|
141
|
+
return Field(default=default_value, description=description) if description else Field(default=default_value)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _make_optional_if_needed(field_type: Type, is_required: bool, has_default: bool) -> Type:
|
|
145
|
+
"""Make field type optional if needed."""
|
|
146
|
+
if is_required or has_default:
|
|
147
|
+
return field_type
|
|
148
|
+
|
|
149
|
+
# Check if already nullable
|
|
150
|
+
if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
|
|
151
|
+
return field_type
|
|
152
|
+
|
|
153
|
+
# Make optional
|
|
154
|
+
return Union[field_type, type(None)] # type: ignore[return-value]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ============================================================================
|
|
158
|
+
# Field Processing - Separated enum and regular field logic
|
|
159
|
+
# ============================================================================
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _process_enum_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
163
|
+
"""Process enum field with Literal type."""
|
|
164
|
+
enum_values = field_def["enum"]
|
|
165
|
+
|
|
166
|
+
# Create Literal type
|
|
167
|
+
if len(enum_values) == 1:
|
|
168
|
+
literal_type = Literal[enum_values[0]]
|
|
169
|
+
else:
|
|
170
|
+
literal_type = Literal[tuple(enum_values)]
|
|
171
|
+
|
|
172
|
+
# Handle optionality
|
|
173
|
+
description = field_def.get("description")
|
|
174
|
+
default_value = field_def.get("default")
|
|
175
|
+
has_default = default_value is not None
|
|
176
|
+
|
|
177
|
+
if not is_required and not has_default:
|
|
178
|
+
literal_type = Union[literal_type, type(None)] # type: ignore[assignment]
|
|
179
|
+
default_value = None
|
|
180
|
+
|
|
181
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
182
|
+
return literal_type, field_info # type: ignore[return-value]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _process_regular_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
|
|
186
|
+
"""Process regular (non-enum) field."""
|
|
187
|
+
field_type = parse_field(field_def)
|
|
188
|
+
description = field_def.get("description")
|
|
189
|
+
default_value = field_def.get("default")
|
|
190
|
+
has_default = default_value is not None
|
|
191
|
+
|
|
192
|
+
# Handle optionality
|
|
193
|
+
field_type = _make_optional_if_needed(field_type, is_required, has_default)
|
|
194
|
+
|
|
195
|
+
if not is_required and not has_default:
|
|
196
|
+
default_value = None
|
|
197
|
+
|
|
198
|
+
field_info = _create_field_info(description, default_value, is_required)
|
|
199
|
+
return field_type, field_info
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ============================================================================
|
|
203
|
+
# Main Schema Processing - Clean and focused
|
|
204
|
+
# ============================================================================
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
|
|
208
|
+
"""Deserialize a JSON schema to a Pydantic BaseModel class.
|
|
209
|
+
|
|
210
|
+
Refactored version with clear separation of concerns and simplified logic.
|
|
211
|
+
"""
|
|
212
|
+
# Basic setup
|
|
213
|
+
title = json_schema.get("title", "DynamicModel")
|
|
214
|
+
dereferenced_schema = dereference_json_schema(json_schema)
|
|
215
|
+
properties = dereferenced_schema.get("properties", {})
|
|
216
|
+
required_fields = set(dereferenced_schema.get("required", []))
|
|
217
|
+
|
|
218
|
+
# Process each field
|
|
219
|
+
fields = {}
|
|
220
|
+
for field_name, field_def in properties.items():
|
|
221
|
+
is_required = field_name in required_fields
|
|
222
|
+
|
|
223
|
+
if "enum" in field_def:
|
|
224
|
+
field_type, field_info = _process_enum_field(field_name, field_def, is_required)
|
|
225
|
+
else:
|
|
226
|
+
field_type, field_info = _process_regular_field(field_name, field_def, is_required)
|
|
227
|
+
|
|
228
|
+
fields[field_name] = (field_type, field_info)
|
|
229
|
+
|
|
230
|
+
return create_model(title, **fields)
|
|
@@ -79,7 +79,7 @@ __all__ = ["fillna", "FillNaResponse"]
|
|
|
79
79
|
def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> List[Dict]:
|
|
80
80
|
examples: List[Dict] = []
|
|
81
81
|
|
|
82
|
-
samples: pd.DataFrame = df.sample(frac=1)
|
|
82
|
+
samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
|
|
83
83
|
samples = samples.dropna(subset=[target_column_name])
|
|
84
84
|
|
|
85
85
|
for i, row in samples.head(max_examples).iterrows():
|
|
@@ -109,7 +109,7 @@ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: in
|
|
|
109
109
|
output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
|
|
110
110
|
)
|
|
111
111
|
|
|
112
|
-
return builder.build()
|
|
112
|
+
return builder.improve().build()
|
|
113
113
|
|
|
114
114
|
|
|
115
115
|
class FillNaResponse(BaseModel):
|