openaivec 0.14.0__tar.gz → 0.14.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {openaivec-0.14.0 → openaivec-0.14.2}/PKG-INFO +3 -3
  2. {openaivec-0.14.0 → openaivec-0.14.2}/README.md +2 -2
  3. openaivec-0.14.2/docs/api/main.md +19 -0
  4. openaivec-0.14.2/docs/api/pandas_ext.md +3 -0
  5. openaivec-0.14.2/docs/api/spark.md +3 -0
  6. openaivec-0.14.2/docs/api/task.md +3 -0
  7. openaivec-0.14.2/docs/api/tasks/customer_support/customer_sentiment.md +3 -0
  8. openaivec-0.14.2/docs/api/tasks/customer_support/inquiry_classification.md +3 -0
  9. openaivec-0.14.2/docs/api/tasks/customer_support/inquiry_summary.md +3 -0
  10. openaivec-0.14.2/docs/api/tasks/customer_support/intent_analysis.md +3 -0
  11. openaivec-0.14.2/docs/api/tasks/customer_support/response_suggestion.md +3 -0
  12. openaivec-0.14.2/docs/api/tasks/customer_support/urgency_analysis.md +3 -0
  13. openaivec-0.14.2/docs/api/tasks/nlp/dependency_parsing.md +3 -0
  14. openaivec-0.14.2/docs/api/tasks/nlp/keyword_extraction.md +3 -0
  15. openaivec-0.14.2/docs/api/tasks/nlp/morphological_analysis.md +3 -0
  16. openaivec-0.14.2/docs/api/tasks/nlp/named_entity_recognition.md +3 -0
  17. openaivec-0.14.2/docs/api/tasks/nlp/sentiment_analysis.md +3 -0
  18. openaivec-0.14.2/docs/api/tasks/nlp/translation.md +3 -0
  19. {openaivec-0.14.0 → openaivec-0.14.2}/docs/index.md +2 -4
  20. {openaivec-0.14.0 → openaivec-0.14.2}/mkdocs.yml +19 -2
  21. openaivec-0.14.2/src/openaivec/_serialize.py +230 -0
  22. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/table/fillna.py +2 -2
  23. openaivec-0.14.2/tests/test_serialize_pydantic_v2_compliance.py +1045 -0
  24. openaivec-0.14.0/docs/api/main.md +0 -118
  25. openaivec-0.14.0/docs/api/pandas_ext.md +0 -15
  26. openaivec-0.14.0/docs/api/spark.md +0 -15
  27. openaivec-0.14.0/docs/api/task.md +0 -19
  28. openaivec-0.14.0/docs/api/tasks/customer_support/customer_sentiment.md +0 -3
  29. openaivec-0.14.0/docs/api/tasks/customer_support/inquiry_classification.md +0 -3
  30. openaivec-0.14.0/docs/api/tasks/customer_support/inquiry_summary.md +0 -3
  31. openaivec-0.14.0/docs/api/tasks/customer_support/intent_analysis.md +0 -3
  32. openaivec-0.14.0/docs/api/tasks/customer_support/response_suggestion.md +0 -3
  33. openaivec-0.14.0/docs/api/tasks/customer_support/urgency_analysis.md +0 -3
  34. openaivec-0.14.0/docs/api/tasks/nlp/dependency_parsing.md +0 -15
  35. openaivec-0.14.0/docs/api/tasks/nlp/keyword_extraction.md +0 -15
  36. openaivec-0.14.0/docs/api/tasks/nlp/morphological_analysis.md +0 -15
  37. openaivec-0.14.0/docs/api/tasks/nlp/named_entity_recognition.md +0 -15
  38. openaivec-0.14.0/docs/api/tasks/nlp/sentiment_analysis.md +0 -15
  39. openaivec-0.14.0/docs/api/tasks/nlp/translation.md +0 -15
  40. openaivec-0.14.0/src/openaivec/_serialize.py +0 -233
  41. {openaivec-0.14.0 → openaivec-0.14.2}/.env.example +0 -0
  42. {openaivec-0.14.0 → openaivec-0.14.2}/.github/copilot-instructions.md +0 -0
  43. {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-mkdocs.yml +0 -0
  44. {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-package.yml +0 -0
  45. {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-test.yml +0 -0
  46. {openaivec-0.14.0 → openaivec-0.14.2}/.github/workflows/python-update.yml +0 -0
  47. {openaivec-0.14.0 → openaivec-0.14.2}/.gitignore +0 -0
  48. {openaivec-0.14.0 → openaivec-0.14.2}/CODE_OF_CONDUCT.md +0 -0
  49. {openaivec-0.14.0 → openaivec-0.14.2}/LICENSE +0 -0
  50. {openaivec-0.14.0 → openaivec-0.14.2}/SECURITY.md +0 -0
  51. {openaivec-0.14.0 → openaivec-0.14.2}/SUPPORT.md +0 -0
  52. {openaivec-0.14.0 → openaivec-0.14.2}/docs/robots.txt +0 -0
  53. {openaivec-0.14.0 → openaivec-0.14.2}/pyproject.toml +0 -0
  54. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/__init__.py +0 -0
  55. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_di.py +0 -0
  56. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_embeddings.py +0 -0
  57. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_log.py +0 -0
  58. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_model.py +0 -0
  59. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_optimize.py +0 -0
  60. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_prompt.py +0 -0
  61. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_provider.py +0 -0
  62. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_proxy.py +0 -0
  63. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_responses.py +0 -0
  64. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/_util.py +0 -0
  65. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/pandas_ext.py +0 -0
  66. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/spark.py +0 -0
  67. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/__init__.py +0 -0
  68. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/__init__.py +0 -0
  69. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  70. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  71. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  72. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  73. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  74. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  75. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/__init__.py +0 -0
  76. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  77. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  78. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  79. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  80. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  81. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/nlp/translation.py +0 -0
  82. {openaivec-0.14.0 → openaivec-0.14.2}/src/openaivec/task/table/__init__.py +0 -0
  83. {openaivec-0.14.0 → openaivec-0.14.2}/tests/__init__.py +0 -0
  84. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_di.py +0 -0
  85. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_embeddings.py +0 -0
  86. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_optimize.py +0 -0
  87. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_pandas_ext.py +0 -0
  88. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_prompt.py +0 -0
  89. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_provider.py +0 -0
  90. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_proxy.py +0 -0
  91. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_proxy_suggester.py +0 -0
  92. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_responses.py +0 -0
  93. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_serialize.py +0 -0
  94. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_spark.py +0 -0
  95. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_task.py +0 -0
  96. {openaivec-0.14.0 → openaivec-0.14.2}/tests/test_util.py +0 -0
  97. {openaivec-0.14.0 → openaivec-0.14.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.0
3
+ Version: 0.14.2
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -98,7 +98,7 @@ survey_responses.assign(
98
98
  ).ai.extract("structured") # Auto-expands to columns
99
99
  ```
100
100
 
101
- 📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
101
+ 📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
102
102
 
103
103
  # Overview
104
104
 
@@ -746,7 +746,7 @@ uv run ruff check . --fix
746
746
  📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
747
747
  📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
748
748
  📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
749
- 📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
749
+ 📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
750
750
 
751
751
  ## Community
752
752
 
@@ -72,7 +72,7 @@ survey_responses.assign(
72
72
  ).ai.extract("structured") # Auto-expands to columns
73
73
  ```
74
74
 
75
- 📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
75
+ 📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
76
76
 
77
77
  # Overview
78
78
 
@@ -720,7 +720,7 @@ uv run ruff check . --fix
720
720
  📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
721
721
  📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
722
722
  📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
723
- 📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
723
+ 📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
724
724
 
725
725
  ## Community
726
726
 
@@ -0,0 +1,19 @@
1
+ # Main Package API
2
+
3
+ The main `openaivec` package provides the core classes for AI-powered data processing.
4
+
5
+ ## Core Classes
6
+
7
+ All core functionality is accessible through the main package imports:
8
+
9
+ ::: openaivec.BatchResponses
10
+
11
+ ::: openaivec.AsyncBatchResponses
12
+
13
+ ::: openaivec.BatchEmbeddings
14
+
15
+ ::: openaivec.AsyncBatchEmbeddings
16
+
17
+ ## Prompt Building
18
+
19
+ ::: openaivec.FewShotPromptBuilder
@@ -0,0 +1,3 @@
1
+ # Pandas Extension
2
+
3
+ ::: openaivec.pandas_ext
@@ -0,0 +1,3 @@
1
+ # Spark Extension
2
+
3
+ ::: openaivec.spark
@@ -0,0 +1,3 @@
1
+ # Task Module
2
+
3
+ ::: openaivec.task
@@ -0,0 +1,3 @@
1
+ # Customer Sentiment Analysis
2
+
3
+ ::: openaivec.task.customer_support.customer_sentiment
@@ -0,0 +1,3 @@
1
+ # Inquiry Classification
2
+
3
+ ::: openaivec.task.customer_support.inquiry_classification
@@ -0,0 +1,3 @@
1
+ # Inquiry Summary
2
+
3
+ ::: openaivec.task.customer_support.inquiry_summary
@@ -0,0 +1,3 @@
1
+ # Intent Analysis
2
+
3
+ ::: openaivec.task.customer_support.intent_analysis
@@ -0,0 +1,3 @@
1
+ # Response Suggestion
2
+
3
+ ::: openaivec.task.customer_support.response_suggestion
@@ -0,0 +1,3 @@
1
+ # Urgency Analysis
2
+
3
+ ::: openaivec.task.customer_support.urgency_analysis
@@ -0,0 +1,3 @@
1
+ # Dependency Parsing Task
2
+
3
+ ::: openaivec.task.nlp.dependency_parsing
@@ -0,0 +1,3 @@
1
+ # Keyword Extraction Task
2
+
3
+ ::: openaivec.task.nlp.keyword_extraction
@@ -0,0 +1,3 @@
1
+ # Morphological Analysis Task
2
+
3
+ ::: openaivec.task.nlp.morphological_analysis
@@ -0,0 +1,3 @@
1
+ # Named Entity Recognition Task
2
+
3
+ ::: openaivec.task.nlp.named_entity_recognition
@@ -0,0 +1,3 @@
1
+ # Sentiment Analysis Task
2
+
3
+ ::: openaivec.task.nlp.sentiment_analysis
@@ -0,0 +1,3 @@
1
+ # Translation Task
2
+
3
+ ::: openaivec.task.nlp.translation
@@ -67,12 +67,10 @@ Get started with these comprehensive examples:
67
67
 
68
68
  Detailed documentation for all components:
69
69
 
70
+ 🔗 **[Main Package](api/main.md)** - Core classes (BatchResponses, BatchEmbeddings, FewShotPromptBuilder)
70
71
  🔗 **[pandas_ext](api/pandas_ext.md)** - Pandas Series and DataFrame extensions
71
72
  🔗 **[spark](api/spark.md)** - Apache Spark UDF builders
72
- 🔗 **[responses](api/responses.md)** - Batch response processing
73
- 🔗 **[embeddings](api/embeddings.md)** - Batch embedding generation
74
- 🔗 **[prompt](api/prompt.md)** - Few-shot prompt building
75
- 🔗 **[util](api/util.md)** - Utility functions and helpers
73
+ 🔗 **[task](api/task.md)** - Pre-built task modules for NLP and customer support
76
74
 
77
75
  ## Quick Start
78
76
 
@@ -131,8 +131,25 @@ plugins:
131
131
  python:
132
132
  paths:
133
133
  - src
134
- docstring_style: google
135
- show_submodules: true
134
+ options:
135
+ docstring_style: google
136
+ show_submodules: true
137
+ show_source: true
138
+ show_root_heading: true
139
+ show_root_toc_entry: true
140
+ heading_level: 2
141
+ members_order: source
142
+ show_signature_annotations: true
143
+ separate_signature: true
144
+ show_bases: true
145
+ show_docstring_parameters: true
146
+ show_docstring_returns: true
147
+ show_docstring_examples: true
148
+ show_category_heading: true
149
+ group_by_category: true
150
+ show_if_no_docstring: false
151
+ inherited_members: false
152
+ merge_init_into_class: true
136
153
 
137
154
  markdown_extensions:
138
155
  - abbr
@@ -0,0 +1,230 @@
1
+ """Refactored serialization utilities for Pydantic BaseModel classes.
2
+
3
+ This module provides utilities for converting Pydantic BaseModel classes
4
+ to and from JSON schema representations with simplified, maintainable code.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Literal, Tuple, Type, Union
8
+
9
+ from pydantic import BaseModel, Field, create_model
10
+
11
+ __all__ = []
12
+
13
+
14
+ def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
15
+ """Serialize a Pydantic BaseModel to JSON schema."""
16
+ return obj.model_json_schema()
17
+
18
+
19
+ def dereference_json_schema(json_schema: Dict[str, Any]) -> Dict[str, Any]:
20
+ """Dereference JSON schema by resolving $ref pointers with circular reference protection."""
21
+ model_map = json_schema.get("$defs", {})
22
+
23
+ def dereference(obj, current_path=None):
24
+ if current_path is None:
25
+ current_path = []
26
+
27
+ if isinstance(obj, dict):
28
+ if "$ref" in obj:
29
+ ref = obj["$ref"].split("/")[-1]
30
+
31
+ # Check for circular reference
32
+ if ref in current_path:
33
+ # Return a placeholder to break the cycle
34
+ return {"type": "object", "description": f"Circular reference to {ref}"}
35
+
36
+ if ref in model_map:
37
+ # Add to path and recurse
38
+ new_path = current_path + [ref]
39
+ return dereference(model_map[ref], new_path)
40
+ else:
41
+ # Invalid reference, return placeholder
42
+ return {"type": "object", "description": f"Invalid reference to {ref}"}
43
+ else:
44
+ return {k: dereference(v, current_path) for k, v in obj.items()}
45
+ elif isinstance(obj, list):
46
+ return [dereference(x, current_path) for x in obj]
47
+ else:
48
+ return obj
49
+
50
+ result = {}
51
+ for k, v in json_schema.items():
52
+ if k == "$defs":
53
+ continue
54
+ result[k] = dereference(v)
55
+
56
+ return result
57
+
58
+
59
+ # ============================================================================
60
+ # Type Resolution - Separated into focused functions
61
+ # ============================================================================
62
+
63
+
64
+ def _resolve_union_type(union_options: List[Dict[str, Any]]) -> Type:
65
+ """Resolve anyOf/oneOf to Union type."""
66
+ union_types = []
67
+ for option in union_options:
68
+ if option.get("type") == "null":
69
+ union_types.append(type(None))
70
+ else:
71
+ union_types.append(parse_field(option))
72
+
73
+ if len(union_types) == 1:
74
+ return union_types[0]
75
+ elif len(union_types) == 2 and type(None) in union_types:
76
+ # Optional type: T | None
77
+ non_none_type = next(t for t in union_types if t is not type(None))
78
+ return Union[non_none_type, type(None)] # type: ignore[return-value]
79
+ else:
80
+ return Union[tuple(union_types)] # type: ignore[return-value]
81
+
82
+
83
+ def _resolve_basic_type(type_name: str, field_def: Dict[str, Any]) -> Type:
84
+ """Resolve basic JSON schema types to Python types."""
85
+ type_mapping = {
86
+ "string": str,
87
+ "integer": int,
88
+ "number": float,
89
+ "boolean": bool,
90
+ "null": type(None),
91
+ }
92
+
93
+ if type_name in type_mapping:
94
+ return type_mapping[type_name] # type: ignore[return-value]
95
+ elif type_name == "object":
96
+ # Check if it's a nested model or generic dict
97
+ if "properties" in field_def:
98
+ return deserialize_base_model(field_def)
99
+ else:
100
+ return dict
101
+ elif type_name == "array":
102
+ if "items" in field_def:
103
+ inner_type = parse_field(field_def["items"])
104
+ return List[inner_type]
105
+ else:
106
+ return List[Any]
107
+ else:
108
+ raise ValueError(f"Unsupported type: {type_name}")
109
+
110
+
111
+ def parse_field(field_def: Dict[str, Any]) -> Type:
112
+ """Parse a JSON schema field definition to a Python type.
113
+
114
+ Simplified version with clear separation of concerns.
115
+ """
116
+ # Handle union types
117
+ if "anyOf" in field_def:
118
+ return _resolve_union_type(field_def["anyOf"])
119
+ if "oneOf" in field_def:
120
+ return _resolve_union_type(field_def["oneOf"])
121
+
122
+ # Handle basic types
123
+ if "type" not in field_def:
124
+ return Any # type: ignore[return-value]
125
+
126
+ return _resolve_basic_type(field_def["type"], field_def)
127
+
128
+
129
+ # ============================================================================
130
+ # Field Information Creation - Centralized logic
131
+ # ============================================================================
132
+
133
+
134
+ def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field: # type: ignore[type-arg]
135
+ """Create Field info with consistent logic."""
136
+ if is_required and default_value is None:
137
+ # Required field without default
138
+ return Field(description=description) if description else Field()
139
+ else:
140
+ # Optional field or field with default
141
+ return Field(default=default_value, description=description) if description else Field(default=default_value)
142
+
143
+
144
+ def _make_optional_if_needed(field_type: Type, is_required: bool, has_default: bool) -> Type:
145
+ """Make field type optional if needed."""
146
+ if is_required or has_default:
147
+ return field_type
148
+
149
+ # Check if already nullable
150
+ if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
151
+ return field_type
152
+
153
+ # Make optional
154
+ return Union[field_type, type(None)] # type: ignore[return-value]
155
+
156
+
157
+ # ============================================================================
158
+ # Field Processing - Separated enum and regular field logic
159
+ # ============================================================================
160
+
161
+
162
+ def _process_enum_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
163
+ """Process enum field with Literal type."""
164
+ enum_values = field_def["enum"]
165
+
166
+ # Create Literal type
167
+ if len(enum_values) == 1:
168
+ literal_type = Literal[enum_values[0]]
169
+ else:
170
+ literal_type = Literal[tuple(enum_values)]
171
+
172
+ # Handle optionality
173
+ description = field_def.get("description")
174
+ default_value = field_def.get("default")
175
+ has_default = default_value is not None
176
+
177
+ if not is_required and not has_default:
178
+ literal_type = Union[literal_type, type(None)] # type: ignore[assignment]
179
+ default_value = None
180
+
181
+ field_info = _create_field_info(description, default_value, is_required)
182
+ return literal_type, field_info # type: ignore[return-value]
183
+
184
+
185
+ def _process_regular_field(field_name: str, field_def: Dict[str, Any], is_required: bool) -> Tuple[Type, Field]: # type: ignore[type-arg]
186
+ """Process regular (non-enum) field."""
187
+ field_type = parse_field(field_def)
188
+ description = field_def.get("description")
189
+ default_value = field_def.get("default")
190
+ has_default = default_value is not None
191
+
192
+ # Handle optionality
193
+ field_type = _make_optional_if_needed(field_type, is_required, has_default)
194
+
195
+ if not is_required and not has_default:
196
+ default_value = None
197
+
198
+ field_info = _create_field_info(description, default_value, is_required)
199
+ return field_type, field_info
200
+
201
+
202
+ # ============================================================================
203
+ # Main Schema Processing - Clean and focused
204
+ # ============================================================================
205
+
206
+
207
+ def deserialize_base_model(json_schema: Dict[str, Any]) -> Type[BaseModel]:
208
+ """Deserialize a JSON schema to a Pydantic BaseModel class.
209
+
210
+ Refactored version with clear separation of concerns and simplified logic.
211
+ """
212
+ # Basic setup
213
+ title = json_schema.get("title", "DynamicModel")
214
+ dereferenced_schema = dereference_json_schema(json_schema)
215
+ properties = dereferenced_schema.get("properties", {})
216
+ required_fields = set(dereferenced_schema.get("required", []))
217
+
218
+ # Process each field
219
+ fields = {}
220
+ for field_name, field_def in properties.items():
221
+ is_required = field_name in required_fields
222
+
223
+ if "enum" in field_def:
224
+ field_type, field_info = _process_enum_field(field_name, field_def, is_required)
225
+ else:
226
+ field_type, field_info = _process_regular_field(field_name, field_def, is_required)
227
+
228
+ fields[field_name] = (field_type, field_info)
229
+
230
+ return create_model(title, **fields)
@@ -79,7 +79,7 @@ __all__ = ["fillna", "FillNaResponse"]
79
79
  def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> List[Dict]:
80
80
  examples: List[Dict] = []
81
81
 
82
- samples: pd.DataFrame = df.sample(frac=1)
82
+ samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
83
83
  samples = samples.dropna(subset=[target_column_name])
84
84
 
85
85
  for i, row in samples.head(max_examples).iterrows():
@@ -109,7 +109,7 @@ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: in
109
109
  output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
110
110
  )
111
111
 
112
- return builder.build()
112
+ return builder.improve().build()
113
113
 
114
114
 
115
115
  class FillNaResponse(BaseModel):