openaivec 0.99.2__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.99.2 → openaivec-1.0.0}/.gitignore +3 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/PKG-INFO +3 -3
- {openaivec-0.99.2 → openaivec-1.0.0}/README.md +2 -2
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/__init__.py +2 -2
- openaivec-1.0.0/src/openaivec/_cache/__init__.py +12 -0
- openaivec-0.99.2/src/openaivec/_proxy.py → openaivec-1.0.0/src/openaivec/_cache/proxy.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_embeddings.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_prompt.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_responses.py +1 -1
- openaivec-1.0.0/src/openaivec/_schema/__init__.py +9 -0
- openaivec-0.99.2/src/openaivec/_schema.py → openaivec-1.0.0/src/openaivec/_schema/infer.py +6 -6
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/pandas_ext.py +12 -12
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/spark.py +4 -4
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/__init__.py +8 -6
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/customer_sentiment.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/inquiry_classification.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/inquiry_summary.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/intent_analysis.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/response_suggestion.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/urgency_analysis.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/dependency_parsing.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/keyword_extraction.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/morphological_analysis.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/named_entity_recognition.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/sentiment_analysis.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/translation.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/table/fillna.py +1 -1
- {openaivec-0.99.2/tests → openaivec-1.0.0/tests/_cache}/test_optimize.py +1 -1
- {openaivec-0.99.2/tests → openaivec-1.0.0/tests/_cache}/test_proxy.py +22 -22
- {openaivec-0.99.2/tests → openaivec-1.0.0/tests/_cache}/test_proxy_suggester.py +1 -1
- openaivec-0.99.2/tests/test_schema.py → openaivec-1.0.0/tests/_schema/test_infer.py +10 -10
- openaivec-0.99.2/tests/test_dynamic.py → openaivec-1.0.0/tests/_schema/test_spec.py +1 -1
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/conftest.py +2 -2
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_pandas_ext.py +4 -4
- {openaivec-0.99.2 → openaivec-1.0.0}/.env.example +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/.github/copilot-instructions.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/.github/dependabot.yml +0 -0
- /openaivec-0.99.2/.github/workflows/python-mkdocs.yml → /openaivec-1.0.0/.github/workflows/docs.yml +0 -0
- /openaivec-0.99.2/.github/workflows/python-package.yml → /openaivec-1.0.0/.github/workflows/publish.yml +0 -0
- /openaivec-0.99.2/.github/workflows/python-test.yml → /openaivec-1.0.0/.github/workflows/test.yml +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/AGENTS.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/LICENSE +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/SECURITY.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/SUPPORT.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/main.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/spark.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/task.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/contributor-guide.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/index.md +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/docs/robots.txt +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/mkdocs.yml +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/pyproject.toml +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/pytest.ini +0 -0
- /openaivec-0.99.2/src/openaivec/_optimize.py → /openaivec-1.0.0/src/openaivec/_cache/optimize.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_di.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_log.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_model.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_provider.py +0 -0
- /openaivec-0.99.2/src/openaivec/_dynamic.py → /openaivec-1.0.0/src/openaivec/_schema/spec.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_serialize.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/_util.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/__init__.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_di.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_embeddings.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_prompt.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_provider.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_responses.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_serialize.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_spark.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_task.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/tests/test_util.py +0 -0
- {openaivec-0.99.2 → openaivec-1.0.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -49,7 +49,7 @@ Description-Content-Type: text/markdown
|
|
|
49
49
|
- Drop-in `.ai` and `.aio` DataFrame accessors keep pandas analysts in their favorite tools.
|
|
50
50
|
- Smart batching (`BatchingMapProxy`) deduplicates prompts, enforces ordered outputs, and shortens runtimes without manual tuning.
|
|
51
51
|
- Built-in caches, retry logic, and reasoning model safeguards cut noisy boilerplate from production pipelines.
|
|
52
|
-
- Ready-made Spark UDF
|
|
52
|
+
- Ready-made Spark UDF helpers and Microsoft Fabric guides take AI workloads from notebooks into enterprise-scale ETL.
|
|
53
53
|
- Pre-configured task library and `FewShotPromptBuilder` ship curated prompts and structured outputs validated by Pydantic.
|
|
54
54
|
- Supports OpenAI and Azure OpenAI clients interchangeably, including async workloads and embeddings.
|
|
55
55
|
|
|
@@ -142,7 +142,7 @@ automatically in notebook environments when `show_progress=True`.
|
|
|
142
142
|
- Vectorized request batching with automatic deduplication, retries, and cache hooks for any OpenAI-compatible client.
|
|
143
143
|
- pandas `.ai` and `.aio` accessors for synchronous and asynchronous DataFrame pipelines, including `ai.extract` helpers.
|
|
144
144
|
- Task library with Pydantic-backed schemas for consistent structured outputs across pandas and Spark jobs.
|
|
145
|
-
- Spark UDF
|
|
145
|
+
- Spark UDF helpers (`responses_udf`, `embeddings_udf`, `parse_udf`, `task_udf`, etc.) for large-scale ETL and BI.
|
|
146
146
|
- Embeddings, token counting, and similarity utilities for search and retrieval use cases.
|
|
147
147
|
- Prompt tooling (`FewShotPromptBuilder`, `improve`) to craft and iterate production-ready instructions.
|
|
148
148
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
- Drop-in `.ai` and `.aio` DataFrame accessors keep pandas analysts in their favorite tools.
|
|
24
24
|
- Smart batching (`BatchingMapProxy`) deduplicates prompts, enforces ordered outputs, and shortens runtimes without manual tuning.
|
|
25
25
|
- Built-in caches, retry logic, and reasoning model safeguards cut noisy boilerplate from production pipelines.
|
|
26
|
-
- Ready-made Spark UDF
|
|
26
|
+
- Ready-made Spark UDF helpers and Microsoft Fabric guides take AI workloads from notebooks into enterprise-scale ETL.
|
|
27
27
|
- Pre-configured task library and `FewShotPromptBuilder` ship curated prompts and structured outputs validated by Pydantic.
|
|
28
28
|
- Supports OpenAI and Azure OpenAI clients interchangeably, including async workloads and embeddings.
|
|
29
29
|
|
|
@@ -116,7 +116,7 @@ automatically in notebook environments when `show_progress=True`.
|
|
|
116
116
|
- Vectorized request batching with automatic deduplication, retries, and cache hooks for any OpenAI-compatible client.
|
|
117
117
|
- pandas `.ai` and `.aio` accessors for synchronous and asynchronous DataFrame pipelines, including `ai.extract` helpers.
|
|
118
118
|
- Task library with Pydantic-backed schemas for consistent structured outputs across pandas and Spark jobs.
|
|
119
|
-
- Spark UDF
|
|
119
|
+
- Spark UDF helpers (`responses_udf`, `embeddings_udf`, `parse_udf`, `task_udf`, etc.) for large-scale ETL and BI.
|
|
120
120
|
- Embeddings, token counting, and similarity utilities for search and retrieval use cases.
|
|
121
121
|
- Prompt tooling (`FewShotPromptBuilder`, `improve`) to craft and iterate production-ready instructions.
|
|
122
122
|
|
|
@@ -2,7 +2,7 @@ from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
|
2
2
|
from ._model import PreparedTask
|
|
3
3
|
from ._prompt import FewShotPrompt, FewShotPromptBuilder
|
|
4
4
|
from ._responses import AsyncBatchResponses, BatchResponses
|
|
5
|
-
from ._schema import
|
|
5
|
+
from ._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"AsyncBatchEmbeddings",
|
|
@@ -11,7 +11,7 @@ __all__ = [
|
|
|
11
11
|
"BatchResponses",
|
|
12
12
|
"FewShotPrompt",
|
|
13
13
|
"FewShotPromptBuilder",
|
|
14
|
-
"
|
|
14
|
+
"SchemaInferenceOutput",
|
|
15
15
|
"PreparedTask",
|
|
16
16
|
"SchemaInferenceInput",
|
|
17
17
|
"SchemaInferer",
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Caching utilities used across OpenAIVec."""
|
|
2
|
+
|
|
3
|
+
from .optimize import BatchSizeSuggester, PerformanceMetric
|
|
4
|
+
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy, ProxyBase
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AsyncBatchingMapProxy",
|
|
8
|
+
"BatchSizeSuggester",
|
|
9
|
+
"BatchingMapProxy",
|
|
10
|
+
"PerformanceMetric",
|
|
11
|
+
"ProxyBase",
|
|
12
|
+
]
|
|
@@ -4,7 +4,7 @@ from collections.abc import Awaitable, Callable, Hashable
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import Any, Generic, TypeVar
|
|
6
6
|
|
|
7
|
-
from openaivec.
|
|
7
|
+
from openaivec._cache import BatchSizeSuggester
|
|
8
8
|
|
|
9
9
|
__all__ = []
|
|
10
10
|
|
|
@@ -5,8 +5,8 @@ import numpy as np
|
|
|
5
5
|
from numpy.typing import NDArray
|
|
6
6
|
from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
|
|
7
7
|
|
|
8
|
+
from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
|
|
8
9
|
from openaivec._log import observe
|
|
9
|
-
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
10
10
|
from openaivec._util import backoff, backoff_async
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
@@ -6,7 +6,7 @@ construction of a prompt in a structured way, including setting the
|
|
|
6
6
|
purpose, adding cautions, and providing examples.
|
|
7
7
|
|
|
8
8
|
```python
|
|
9
|
-
from openaivec
|
|
9
|
+
from openaivec import FewShotPromptBuilder
|
|
10
10
|
|
|
11
11
|
prompt_str: str = (
|
|
12
12
|
FewShotPromptBuilder()
|
|
@@ -7,9 +7,9 @@ from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, Ra
|
|
|
7
7
|
from openai.types.responses import ParsedResponse
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
10
|
+
from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
|
|
10
11
|
from openaivec._log import observe
|
|
11
12
|
from openaivec._model import PreparedTask, ResponseFormat
|
|
12
|
-
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
13
13
|
from openaivec._util import backoff, backoff_async
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Schema inference package.
|
|
2
|
+
|
|
3
|
+
Internal helpers now live in :mod:`openaivec._schema.infer`; this module simply
|
|
4
|
+
re-exports the main entry points so ``from openaivec._schema import ...`` still
|
|
5
|
+
behaves the same."""
|
|
6
|
+
|
|
7
|
+
from .infer import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
8
|
+
|
|
9
|
+
__all__ = ["SchemaInferenceOutput", "SchemaInferenceInput", "SchemaInferer"]
|
|
@@ -61,14 +61,14 @@ from openai import OpenAI
|
|
|
61
61
|
from openai.types.responses import ParsedResponse
|
|
62
62
|
from pydantic import BaseModel, Field
|
|
63
63
|
|
|
64
|
-
from openaivec._dynamic import ObjectSpec, _build_model
|
|
65
64
|
from openaivec._model import PreparedTask
|
|
65
|
+
from openaivec._schema.spec import ObjectSpec, _build_model
|
|
66
66
|
|
|
67
67
|
# Internal module: explicitly not part of public API
|
|
68
68
|
__all__: list[str] = []
|
|
69
69
|
|
|
70
70
|
|
|
71
|
-
class
|
|
71
|
+
class SchemaInferenceOutput(BaseModel):
|
|
72
72
|
"""Result of a schema inference round.
|
|
73
73
|
|
|
74
74
|
Contains the normalized *instructions*, objective *examples_summary*, the root
|
|
@@ -123,7 +123,7 @@ class InferredSchema(BaseModel):
|
|
|
123
123
|
)
|
|
124
124
|
|
|
125
125
|
@classmethod
|
|
126
|
-
def load(cls, path: str) -> "
|
|
126
|
+
def load(cls, path: str) -> "SchemaInferenceOutput":
|
|
127
127
|
"""Load an inferred schema from a JSON file.
|
|
128
128
|
|
|
129
129
|
Args:
|
|
@@ -265,7 +265,7 @@ class SchemaInferer:
|
|
|
265
265
|
client: OpenAI
|
|
266
266
|
model_name: str
|
|
267
267
|
|
|
268
|
-
def infer_schema(self, data: SchemaInferenceInput, *args, max_retries: int = 8, **kwargs) ->
|
|
268
|
+
def infer_schema(self, data: SchemaInferenceInput, *args, max_retries: int = 8, **kwargs) -> SchemaInferenceOutput:
|
|
269
269
|
"""Infer a validated schema from representative examples.
|
|
270
270
|
|
|
271
271
|
Workflow:
|
|
@@ -315,11 +315,11 @@ class SchemaInferer:
|
|
|
315
315
|
)
|
|
316
316
|
instructions = _INFER_INSTRUCTIONS + "\n\n" + "\n".join(feedback_lines)
|
|
317
317
|
|
|
318
|
-
response: ParsedResponse[
|
|
318
|
+
response: ParsedResponse[SchemaInferenceOutput] = self.client.responses.parse(
|
|
319
319
|
model=self.model_name,
|
|
320
320
|
instructions=instructions,
|
|
321
321
|
input=data.model_dump_json(),
|
|
322
|
-
text_format=
|
|
322
|
+
text_format=SchemaInferenceOutput,
|
|
323
323
|
*args,
|
|
324
324
|
**kwargs,
|
|
325
325
|
)
|
|
@@ -54,12 +54,12 @@ import tiktoken
|
|
|
54
54
|
from openai import AsyncOpenAI, OpenAI
|
|
55
55
|
from pydantic import BaseModel
|
|
56
56
|
|
|
57
|
+
from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
|
|
57
58
|
from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
58
59
|
from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
59
60
|
from openaivec._provider import CONTAINER, _check_azure_v1_api_url
|
|
60
|
-
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
61
61
|
from openaivec._responses import AsyncBatchResponses, BatchResponses
|
|
62
|
-
from openaivec._schema import
|
|
62
|
+
from openaivec._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
63
63
|
from openaivec.task.table import FillNaResponse, fillna
|
|
64
64
|
|
|
65
65
|
__all__ = [
|
|
@@ -308,7 +308,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
308
308
|
|
|
309
309
|
Example:
|
|
310
310
|
```python
|
|
311
|
-
from openaivec.
|
|
311
|
+
from openaivec._cache import BatchingMapProxy
|
|
312
312
|
import numpy as np
|
|
313
313
|
|
|
314
314
|
# Create a shared cache with custom batch size
|
|
@@ -387,7 +387,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
387
387
|
|
|
388
388
|
Example:
|
|
389
389
|
```python
|
|
390
|
-
from openaivec.
|
|
390
|
+
from openaivec._cache import BatchingMapProxy
|
|
391
391
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
392
392
|
reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
|
|
393
393
|
```
|
|
@@ -503,7 +503,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
503
503
|
schema model, aligned with the original Series index.
|
|
504
504
|
"""
|
|
505
505
|
|
|
506
|
-
schema:
|
|
506
|
+
schema: SchemaInferenceOutput | None = None
|
|
507
507
|
if response_format is None:
|
|
508
508
|
schema = self.infer_schema(instructions=instructions, max_examples=max_examples, **api_kwargs)
|
|
509
509
|
|
|
@@ -588,7 +588,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
588
588
|
**api_kwargs,
|
|
589
589
|
)
|
|
590
590
|
|
|
591
|
-
def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) ->
|
|
591
|
+
def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> SchemaInferenceOutput:
|
|
592
592
|
"""Infer a structured data schema from Series content using AI.
|
|
593
593
|
|
|
594
594
|
This method analyzes a sample of Series values to automatically generate
|
|
@@ -730,7 +730,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
730
730
|
|
|
731
731
|
Example:
|
|
732
732
|
```python
|
|
733
|
-
from openaivec.
|
|
733
|
+
from openaivec._cache import BatchingMapProxy
|
|
734
734
|
|
|
735
735
|
# Create a shared cache with custom batch size
|
|
736
736
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
@@ -990,7 +990,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
990
990
|
**api_kwargs,
|
|
991
991
|
)
|
|
992
992
|
|
|
993
|
-
def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) ->
|
|
993
|
+
def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> SchemaInferenceOutput:
|
|
994
994
|
"""Infer a structured data schema from DataFrame rows using AI.
|
|
995
995
|
|
|
996
996
|
This method analyzes a sample of DataFrame rows to automatically infer
|
|
@@ -1317,7 +1317,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1317
1317
|
|
|
1318
1318
|
Example:
|
|
1319
1319
|
```python
|
|
1320
|
-
from openaivec.
|
|
1320
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
1321
1321
|
import numpy as np
|
|
1322
1322
|
|
|
1323
1323
|
# Create a shared cache with custom batch size and concurrency
|
|
@@ -1424,7 +1424,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1424
1424
|
Example:
|
|
1425
1425
|
```python
|
|
1426
1426
|
from openaivec._model import PreparedTask
|
|
1427
|
-
from openaivec.
|
|
1427
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
1428
1428
|
|
|
1429
1429
|
# Create a shared cache with custom batch size and concurrency
|
|
1430
1430
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -1556,7 +1556,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1556
1556
|
Note:
|
|
1557
1557
|
This is an asynchronous method and must be awaited.
|
|
1558
1558
|
"""
|
|
1559
|
-
schema:
|
|
1559
|
+
schema: SchemaInferenceOutput | None = None
|
|
1560
1560
|
if response_format is None:
|
|
1561
1561
|
# Use synchronous schema inference
|
|
1562
1562
|
schema = self._obj.ai.infer_schema(instructions=instructions, max_examples=max_examples)
|
|
@@ -1650,7 +1650,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1650
1650
|
|
|
1651
1651
|
Example:
|
|
1652
1652
|
```python
|
|
1653
|
-
from openaivec.
|
|
1653
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
1654
1654
|
|
|
1655
1655
|
# Create a shared cache with custom batch size and concurrency
|
|
1656
1656
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -142,10 +142,10 @@ from pyspark.sql.udf import UserDefinedFunction
|
|
|
142
142
|
from typing_extensions import Literal
|
|
143
143
|
|
|
144
144
|
from openaivec import pandas_ext
|
|
145
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
145
146
|
from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
146
147
|
from openaivec._provider import CONTAINER
|
|
147
|
-
from openaivec.
|
|
148
|
-
from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
|
|
148
|
+
from openaivec._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
149
149
|
from openaivec._serialize import deserialize_base_model, serialize_base_model
|
|
150
150
|
from openaivec._util import TextChunker
|
|
151
151
|
|
|
@@ -518,7 +518,7 @@ def infer_schema(
|
|
|
518
518
|
example_table_name: str,
|
|
519
519
|
example_field_name: str,
|
|
520
520
|
max_examples: int = 100,
|
|
521
|
-
) ->
|
|
521
|
+
) -> SchemaInferenceOutput:
|
|
522
522
|
"""Infer the schema for a response format based on example data.
|
|
523
523
|
|
|
524
524
|
This function retrieves examples from a Spark table and infers the schema
|
|
@@ -606,7 +606,7 @@ def parse_udf(
|
|
|
606
606
|
if not response_format and not (example_field_name and example_table_name):
|
|
607
607
|
raise ValueError("Either response_format or example_table_name and example_field_name must be provided.")
|
|
608
608
|
|
|
609
|
-
schema:
|
|
609
|
+
schema: SchemaInferenceOutput | None = None
|
|
610
610
|
|
|
611
611
|
if not response_format:
|
|
612
612
|
schema = infer_schema(
|
|
@@ -32,7 +32,7 @@ Specialized tasks for customer service operations:
|
|
|
32
32
|
### Quick Start with Default Tasks
|
|
33
33
|
```python
|
|
34
34
|
from openai import OpenAI
|
|
35
|
-
from openaivec
|
|
35
|
+
from openaivec import BatchResponses
|
|
36
36
|
from openaivec.task import nlp, customer_support
|
|
37
37
|
|
|
38
38
|
client = OpenAI()
|
|
@@ -90,15 +90,17 @@ results_df = df.ai.extract("sentiment")
|
|
|
90
90
|
|
|
91
91
|
### Spark Integration
|
|
92
92
|
```python
|
|
93
|
-
from openaivec.spark import
|
|
93
|
+
from openaivec.spark import task_udf
|
|
94
94
|
|
|
95
95
|
# Register UDF for large-scale processing
|
|
96
96
|
spark.udf.register(
|
|
97
97
|
"analyze_sentiment",
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
model_name="gpt-4.1-mini"
|
|
101
|
-
|
|
98
|
+
task_udf(
|
|
99
|
+
task=nlp.SENTIMENT_ANALYSIS,
|
|
100
|
+
model_name="gpt-4.1-mini",
|
|
101
|
+
batch_size=64,
|
|
102
|
+
max_concurrency=8,
|
|
103
|
+
),
|
|
102
104
|
)
|
|
103
105
|
|
|
104
106
|
# Use in Spark SQL
|
|
@@ -5,7 +5,7 @@ import time
|
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
|
-
from openaivec.
|
|
8
|
+
from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def test_batching_map_proxy_batches_calls_by_batch_size():
|
|
@@ -115,7 +115,7 @@ def test_batching_map_proxy_rechecks_cache_within_batch_iteration():
|
|
|
115
115
|
|
|
116
116
|
|
|
117
117
|
def test_batching_map_proxy_map_func_length_mismatch_raises_and_releases():
|
|
118
|
-
from openaivec.
|
|
118
|
+
from openaivec._cache import BatchingMapProxy
|
|
119
119
|
|
|
120
120
|
p = BatchingMapProxy[int, int](batch_size=3)
|
|
121
121
|
|
|
@@ -134,14 +134,14 @@ def test_batching_map_proxy_map_func_length_mismatch_raises_and_releases():
|
|
|
134
134
|
|
|
135
135
|
# -------------------- Internal methods tests --------------------
|
|
136
136
|
def test_internal_unique_in_order():
|
|
137
|
-
from openaivec.
|
|
137
|
+
from openaivec._cache import BatchingMapProxy
|
|
138
138
|
|
|
139
139
|
p = BatchingMapProxy[int, int]()
|
|
140
140
|
assert p._unique_in_order([1, 1, 2, 3, 2, 4]) == [1, 2, 3, 4]
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
def test_internal_normalized_batch_size():
|
|
144
|
-
from openaivec.
|
|
144
|
+
from openaivec._cache import BatchingMapProxy
|
|
145
145
|
|
|
146
146
|
p = BatchingMapProxy[int, int]()
|
|
147
147
|
assert p._normalized_batch_size(5) == 5 # default None => total
|
|
@@ -152,7 +152,7 @@ def test_internal_normalized_batch_size():
|
|
|
152
152
|
|
|
153
153
|
|
|
154
154
|
def test_internal_all_cached_and_values():
|
|
155
|
-
from openaivec.
|
|
155
|
+
from openaivec._cache import BatchingMapProxy
|
|
156
156
|
|
|
157
157
|
p = BatchingMapProxy[int, int]()
|
|
158
158
|
# fill cache via public API
|
|
@@ -167,7 +167,7 @@ def test_internal_all_cached_and_values():
|
|
|
167
167
|
def test_internal_acquire_ownership():
|
|
168
168
|
import threading
|
|
169
169
|
|
|
170
|
-
from openaivec.
|
|
170
|
+
from openaivec._cache import BatchingMapProxy
|
|
171
171
|
|
|
172
172
|
p = BatchingMapProxy[int, int]()
|
|
173
173
|
# Cache 1; mark 2 inflight; 3 is missing
|
|
@@ -185,7 +185,7 @@ def test_internal_acquire_ownership():
|
|
|
185
185
|
def test_internal_finalize_success_and_failure():
|
|
186
186
|
import threading
|
|
187
187
|
|
|
188
|
-
from openaivec.
|
|
188
|
+
from openaivec._cache import BatchingMapProxy
|
|
189
189
|
|
|
190
190
|
p = BatchingMapProxy[int, int]()
|
|
191
191
|
inflight = getattr(p, "_inflight")
|
|
@@ -214,7 +214,7 @@ def test_internal_finalize_success_and_failure():
|
|
|
214
214
|
|
|
215
215
|
|
|
216
216
|
def test_internal_process_owned_batches_and_skip_cached():
|
|
217
|
-
from openaivec.
|
|
217
|
+
from openaivec._cache import BatchingMapProxy
|
|
218
218
|
|
|
219
219
|
calls: list[list[int]] = []
|
|
220
220
|
|
|
@@ -242,7 +242,7 @@ def test_internal_wait_for_with_inflight_event():
|
|
|
242
242
|
import threading
|
|
243
243
|
import time
|
|
244
244
|
|
|
245
|
-
from openaivec.
|
|
245
|
+
from openaivec._cache import BatchingMapProxy
|
|
246
246
|
|
|
247
247
|
p = BatchingMapProxy[int, int]()
|
|
248
248
|
|
|
@@ -284,7 +284,7 @@ async def _afunc_echo(xs: list[int]) -> list[int]:
|
|
|
284
284
|
|
|
285
285
|
|
|
286
286
|
def test_async_localproxy_basic(event_loop=None):
|
|
287
|
-
from openaivec.
|
|
287
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
288
288
|
|
|
289
289
|
calls: list[list[int]] = []
|
|
290
290
|
|
|
@@ -304,7 +304,7 @@ def test_async_localproxy_basic(event_loop=None):
|
|
|
304
304
|
|
|
305
305
|
|
|
306
306
|
def test_async_localproxy_dedup_and_cache(event_loop=None):
|
|
307
|
-
from openaivec.
|
|
307
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
308
308
|
|
|
309
309
|
calls: list[list[int]] = []
|
|
310
310
|
|
|
@@ -326,7 +326,7 @@ def test_async_localproxy_dedup_and_cache(event_loop=None):
|
|
|
326
326
|
|
|
327
327
|
|
|
328
328
|
def test_async_localproxy_concurrent_requests(event_loop=None):
|
|
329
|
-
from openaivec.
|
|
329
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
330
330
|
|
|
331
331
|
calls: list[list[int]] = []
|
|
332
332
|
|
|
@@ -355,7 +355,7 @@ def test_async_localproxy_concurrent_requests(event_loop=None):
|
|
|
355
355
|
|
|
356
356
|
|
|
357
357
|
def test_async_localproxy_max_concurrency_limit(event_loop=None):
|
|
358
|
-
from openaivec.
|
|
358
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
359
359
|
|
|
360
360
|
current = 0
|
|
361
361
|
peak = 0
|
|
@@ -383,7 +383,7 @@ def test_async_localproxy_max_concurrency_limit(event_loop=None):
|
|
|
383
383
|
|
|
384
384
|
|
|
385
385
|
def test_async_localproxy_map_func_length_mismatch_raises_and_releases(event_loop=None):
|
|
386
|
-
from openaivec.
|
|
386
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
387
387
|
|
|
388
388
|
async def bad(xs: list[int]) -> list[int]:
|
|
389
389
|
return xs[:-1]
|
|
@@ -426,7 +426,7 @@ def test_sync_clear_releases_memory_and_recomputes():
|
|
|
426
426
|
|
|
427
427
|
def test_batch_size_maximization_with_cache_hits():
|
|
428
428
|
"""Test that batch_size is maximized even when some items are cached."""
|
|
429
|
-
from openaivec.
|
|
429
|
+
from openaivec._cache import BatchingMapProxy
|
|
430
430
|
|
|
431
431
|
calls: list[list[int]] = []
|
|
432
432
|
|
|
@@ -458,7 +458,7 @@ def test_batch_size_maximization_with_cache_hits():
|
|
|
458
458
|
|
|
459
459
|
def test_batch_size_maximization_complex_scenario():
|
|
460
460
|
"""Test batch_size maximization with more complex cache hit patterns."""
|
|
461
|
-
from openaivec.
|
|
461
|
+
from openaivec._cache import BatchingMapProxy
|
|
462
462
|
|
|
463
463
|
calls: list[list[int]] = []
|
|
464
464
|
|
|
@@ -513,7 +513,7 @@ async def test_async_clear_releases_memory_and_recomputes():
|
|
|
513
513
|
@pytest.mark.asyncio
|
|
514
514
|
async def test_async_batch_size_maximization_with_cache_hits():
|
|
515
515
|
"""Test that batch_size is maximized even when some items are cached (async version)."""
|
|
516
|
-
from openaivec.
|
|
516
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
517
517
|
|
|
518
518
|
calls: list[list[int]] = []
|
|
519
519
|
|
|
@@ -547,7 +547,7 @@ async def test_async_batch_size_maximization_with_cache_hits():
|
|
|
547
547
|
@pytest.mark.asyncio
|
|
548
548
|
async def test_async_batch_size_maximization_complex_scenario():
|
|
549
549
|
"""Test batch_size maximization with more complex cache hit patterns (async version)."""
|
|
550
|
-
from openaivec.
|
|
550
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
551
551
|
|
|
552
552
|
calls: list[list[int]] = []
|
|
553
553
|
|
|
@@ -584,7 +584,7 @@ async def test_async_batch_size_maximization_complex_scenario():
|
|
|
584
584
|
|
|
585
585
|
def test_notebook_environment_detection():
|
|
586
586
|
"""Test notebook environment detection functionality."""
|
|
587
|
-
from openaivec.
|
|
587
|
+
from openaivec._cache import ProxyBase
|
|
588
588
|
|
|
589
589
|
proxy = ProxyBase()
|
|
590
590
|
# The method should return a boolean and not raise an exception
|
|
@@ -594,7 +594,7 @@ def test_notebook_environment_detection():
|
|
|
594
594
|
|
|
595
595
|
def test_progress_bar_methods():
|
|
596
596
|
"""Test progress bar creation and management methods."""
|
|
597
|
-
from openaivec.
|
|
597
|
+
from openaivec._cache import ProxyBase
|
|
598
598
|
|
|
599
599
|
proxy = ProxyBase()
|
|
600
600
|
proxy.show_progress = True
|
|
@@ -677,7 +677,7 @@ async def test_async_batching_proxy_with_progress_enabled():
|
|
|
677
677
|
|
|
678
678
|
def test_progress_bar_with_forced_notebook_environment():
|
|
679
679
|
"""Test progress bar functionality with forced notebook environment."""
|
|
680
|
-
from openaivec.
|
|
680
|
+
from openaivec._cache import ProxyBase
|
|
681
681
|
|
|
682
682
|
# Monkey patch the notebook detection to return True
|
|
683
683
|
original_method = ProxyBase._is_notebook_environment
|
|
@@ -707,7 +707,7 @@ def test_progress_bar_with_forced_notebook_environment():
|
|
|
707
707
|
@pytest.mark.asyncio
|
|
708
708
|
async def test_async_progress_bar_with_forced_notebook_environment():
|
|
709
709
|
"""Test async progress bar functionality with forced notebook environment."""
|
|
710
|
-
from openaivec.
|
|
710
|
+
from openaivec._cache import ProxyBase
|
|
711
711
|
|
|
712
712
|
# Monkey patch the notebook detection to return True
|
|
713
713
|
original_method = ProxyBase._is_notebook_environment
|
|
@@ -4,8 +4,8 @@ from typing import get_args, get_origin
|
|
|
4
4
|
import pytest
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
-
from openaivec.
|
|
8
|
-
from openaivec._schema import
|
|
7
|
+
from openaivec._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer # type: ignore
|
|
8
|
+
from openaivec._schema.spec import EnumSpec, FieldSpec, ObjectSpec # internal types for constructing test schemas
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@pytest.fixture(scope="session")
|
|
@@ -136,7 +136,7 @@ class TestInferredSchemaBuildModel:
|
|
|
136
136
|
|
|
137
137
|
def test_build_model_primitive_types(self):
|
|
138
138
|
"""Test that all primitive types are correctly mapped to Python types."""
|
|
139
|
-
schema =
|
|
139
|
+
schema = SchemaInferenceOutput(
|
|
140
140
|
instructions="Test primitive types",
|
|
141
141
|
examples_summary="Various primitive type examples",
|
|
142
142
|
examples_instructions_alignment="Primitive examples justify coverage of all base types",
|
|
@@ -167,7 +167,7 @@ class TestInferredSchemaBuildModel:
|
|
|
167
167
|
|
|
168
168
|
def test_build_model_enum_field(self):
|
|
169
169
|
"""Test that enum fields generate proper Enum classes."""
|
|
170
|
-
schema =
|
|
170
|
+
schema = SchemaInferenceOutput(
|
|
171
171
|
instructions="Test enum types",
|
|
172
172
|
examples_summary="Enum examples",
|
|
173
173
|
examples_instructions_alignment="Stable status labels appear repeatedly, supporting enum creation",
|
|
@@ -207,7 +207,7 @@ class TestInferredSchemaBuildModel:
|
|
|
207
207
|
FieldSpec(name="second_field", type="boolean", description="Second field"),
|
|
208
208
|
]
|
|
209
209
|
|
|
210
|
-
schema =
|
|
210
|
+
schema = SchemaInferenceOutput(
|
|
211
211
|
instructions="Test field ordering",
|
|
212
212
|
examples_summary="Field ordering examples",
|
|
213
213
|
examples_instructions_alignment="Ordering matters for deterministic downstream column alignment",
|
|
@@ -223,7 +223,7 @@ class TestInferredSchemaBuildModel:
|
|
|
223
223
|
|
|
224
224
|
def test_build_model_field_descriptions(self):
|
|
225
225
|
"""Test that field descriptions are correctly included in the model."""
|
|
226
|
-
schema =
|
|
226
|
+
schema = SchemaInferenceOutput(
|
|
227
227
|
instructions="Test field descriptions",
|
|
228
228
|
examples_summary="Description examples",
|
|
229
229
|
examples_instructions_alignment="Descriptions guide extraction disambiguation",
|
|
@@ -246,7 +246,7 @@ class TestInferredSchemaBuildModel:
|
|
|
246
246
|
|
|
247
247
|
def test_build_model_empty_fields(self):
|
|
248
248
|
"""Test behavior with empty fields list."""
|
|
249
|
-
schema =
|
|
249
|
+
schema = SchemaInferenceOutput(
|
|
250
250
|
instructions="Test empty fields",
|
|
251
251
|
examples_summary="Empty examples",
|
|
252
252
|
examples_instructions_alignment="Edge case of no extractable signals",
|
|
@@ -266,7 +266,7 @@ class TestInferredSchemaBuildModel:
|
|
|
266
266
|
|
|
267
267
|
def test_build_model_mixed_enum_and_regular_fields(self):
|
|
268
268
|
"""Test a complex scenario with both enum and regular fields of all types."""
|
|
269
|
-
schema =
|
|
269
|
+
schema = SchemaInferenceOutput(
|
|
270
270
|
instructions="Test mixed field types",
|
|
271
271
|
examples_summary="Mixed type examples",
|
|
272
272
|
examples_instructions_alignment="Examples demonstrate diverse field types including enums",
|
|
@@ -313,7 +313,7 @@ class TestInferredSchemaBuildModel:
|
|
|
313
313
|
|
|
314
314
|
def test_build_model_multiple_calls_independence(self):
|
|
315
315
|
"""Test that multiple calls to build_model return independent model classes."""
|
|
316
|
-
schema =
|
|
316
|
+
schema = SchemaInferenceOutput(
|
|
317
317
|
instructions="Test independence",
|
|
318
318
|
examples_summary="Independence examples",
|
|
319
319
|
examples_instructions_alignment="Independence ensures rebuilding yields fresh class objects",
|
|
@@ -338,7 +338,7 @@ class TestInferredSchemaBuildModel:
|
|
|
338
338
|
|
|
339
339
|
def test_build_model_array_types(self):
|
|
340
340
|
"""Test that *_array types map to list element annotations and proper JSON Schema arrays."""
|
|
341
|
-
schema =
|
|
341
|
+
schema = SchemaInferenceOutput(
|
|
342
342
|
instructions="Test array types",
|
|
343
343
|
examples_summary="Array type examples",
|
|
344
344
|
examples_instructions_alignment="Examples justify homogeneous primitive arrays",
|
|
@@ -5,7 +5,7 @@ from typing import get_args, get_origin
|
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
|
-
from openaivec.
|
|
8
|
+
from openaivec._schema.spec import _MAX_ENUM_VALUES, EnumSpec, FieldSpec, ObjectSpec, _build_model
|
|
9
9
|
|
|
10
10
|
# ----------------------------- Success Cases -----------------------------
|
|
11
11
|
|
|
@@ -269,7 +269,7 @@ def performance_timer():
|
|
|
269
269
|
@pytest.fixture
|
|
270
270
|
def batch_cache():
|
|
271
271
|
"""BatchingMapProxy cache for testing."""
|
|
272
|
-
from openaivec.
|
|
272
|
+
from openaivec._cache import BatchingMapProxy
|
|
273
273
|
|
|
274
274
|
return BatchingMapProxy(batch_size=32)
|
|
275
275
|
|
|
@@ -277,7 +277,7 @@ def batch_cache():
|
|
|
277
277
|
@pytest.fixture
|
|
278
278
|
def async_batch_cache():
|
|
279
279
|
"""AsyncBatchingMapProxy cache for testing."""
|
|
280
|
-
from openaivec.
|
|
280
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
281
281
|
|
|
282
282
|
return AsyncBatchingMapProxy(batch_size=32, max_concurrency=4)
|
|
283
283
|
|
|
@@ -532,7 +532,7 @@ class TestPandasExt:
|
|
|
532
532
|
|
|
533
533
|
def test_shared_cache_responses_sync(self):
|
|
534
534
|
"""Test shared cache functionality for responses."""
|
|
535
|
-
from openaivec.
|
|
535
|
+
from openaivec._cache import BatchingMapProxy
|
|
536
536
|
|
|
537
537
|
shared_cache = BatchingMapProxy(batch_size=32)
|
|
538
538
|
series1 = pd.Series(["cat", "dog", "elephant"])
|
|
@@ -557,7 +557,7 @@ class TestPandasExt:
|
|
|
557
557
|
|
|
558
558
|
def test_shared_cache_embeddings_sync(self):
|
|
559
559
|
"""Test shared cache functionality for embeddings."""
|
|
560
|
-
from openaivec.
|
|
560
|
+
from openaivec._cache import BatchingMapProxy
|
|
561
561
|
|
|
562
562
|
shared_cache = BatchingMapProxy(batch_size=32)
|
|
563
563
|
series1 = pd.Series(["apple", "banana", "cherry"])
|
|
@@ -582,7 +582,7 @@ class TestPandasExt:
|
|
|
582
582
|
|
|
583
583
|
def test_shared_cache_async(self):
|
|
584
584
|
"""Test shared cache functionality for async methods."""
|
|
585
|
-
from openaivec.
|
|
585
|
+
from openaivec._cache import AsyncBatchingMapProxy
|
|
586
586
|
|
|
587
587
|
async def run_test():
|
|
588
588
|
shared_cache = AsyncBatchingMapProxy(batch_size=32, max_concurrency=4)
|
|
@@ -723,7 +723,7 @@ class TestPandasExt:
|
|
|
723
723
|
|
|
724
724
|
def test_parse_with_cache_methods(self):
|
|
725
725
|
"""Test parse_with_cache methods for both Series and DataFrame."""
|
|
726
|
-
from openaivec.
|
|
726
|
+
from openaivec._cache import BatchingMapProxy
|
|
727
727
|
|
|
728
728
|
# Test Series parse_with_cache
|
|
729
729
|
series = pd.Series(["Good product", "Bad experience"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/openaivec-0.99.2/.github/workflows/python-mkdocs.yml → /openaivec-1.0.0/.github/workflows/docs.yml
RENAMED
|
File without changes
|
|
File without changes
|
/openaivec-0.99.2/.github/workflows/python-test.yml → /openaivec-1.0.0/.github/workflows/test.yml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{openaivec-0.99.2 → openaivec-1.0.0}/docs/api/tasks/customer_support/inquiry_classification.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/openaivec-0.99.2/src/openaivec/_optimize.py → /openaivec-1.0.0/src/openaivec/_cache/optimize.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/openaivec-0.99.2/src/openaivec/_dynamic.py → /openaivec-1.0.0/src/openaivec/_schema/spec.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|