openaivec 1.0.6__tar.gz → 1.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-1.0.6 → openaivec-1.0.8}/PKG-INFO +4 -3
- {openaivec-1.0.6 → openaivec-1.0.8}/README.md +3 -2
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/index.md +7 -2
- openaivec-1.0.8/docs/overrides/main.html +10 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/mkdocs.yml +4 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/uv.lock +353 -342
- {openaivec-1.0.6 → openaivec-1.0.8}/.env.example +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.github/copilot-instructions.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.github/dependabot.yml +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.github/workflows/docs.yml +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.github/workflows/publish.yml +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.github/workflows/test.yml +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/.gitignore +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/AGENTS.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/LICENSE +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/SECURITY.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/SUPPORT.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/main.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/pandas_ext.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/spark.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/task.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/contributor-guide.md +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/docs/robots.txt +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/pyproject.toml +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/pytest.ini +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_cache/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_cache/optimize.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_cache/proxy.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_di.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_embeddings.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_log.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_model.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_prompt.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_provider.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_responses.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_schema/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_schema/infer.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_schema/spec.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_serialize.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/_util.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/pandas_ext.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/spark.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/nlp/translation.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/src/openaivec/task/table/fillna.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/__init__.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/_cache/test_optimize.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/_cache/test_proxy.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/_cache/test_proxy_suggester.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/_schema/test_infer.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/_schema/test_spec.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/conftest.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_di.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_embeddings.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_pandas_ext.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_prompt.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_provider.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_responses.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_serialize.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_spark.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_task.py +0 -0
- {openaivec-1.0.6 → openaivec-1.0.8}/tests/test_util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.8
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -26,7 +26,7 @@ Description-Content-Type: text/markdown
|
|
|
26
26
|
|
|
27
27
|
# openaivec
|
|
28
28
|
|
|
29
|
-
Transform pandas and Spark workflows with AI-powered text processing—batching, caching, and guardrails included.
|
|
29
|
+
Transform pandas and Spark workflows with AI-powered text processing—batching, caching, and guardrails included. Built for OpenAI batch pipelines so you can group prompts, cut API overhead, and keep outputs aligned with your data.
|
|
30
30
|
|
|
31
31
|
[Contributor guidelines](AGENTS.md)
|
|
32
32
|
|
|
@@ -92,6 +92,7 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
|
|
|
92
92
|
## Why openaivec?
|
|
93
93
|
|
|
94
94
|
- Drop-in `.ai` and `.aio` accessors keep pandas analysts in familiar tooling.
|
|
95
|
+
- OpenAI batch-optimized: `BatchingMapProxy`/`AsyncBatchingMapProxy` coalesce requests, dedupe prompts, and keep column order stable.
|
|
95
96
|
- Smart batching (`BatchingMapProxy`/`AsyncBatchingMapProxy`) dedupes prompts, preserves order, and releases waiters on failure.
|
|
96
97
|
- Reasoning support mirrors the OpenAI SDK; structured outputs accept Pydantic `response_format`.
|
|
97
98
|
- Built-in caches and retries remove boilerplate; helpers reuse caches across pandas, Spark, and async flows.
|
|
@@ -100,7 +101,7 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
|
|
|
100
101
|
|
|
101
102
|
# Overview
|
|
102
103
|
|
|
103
|
-
Vectorized OpenAI
|
|
104
|
+
Vectorized OpenAI batch processing so you handle many inputs per call instead of one-by-one. Batching proxies dedupe inputs, enforce ordered outputs, and unblock waiters even on upstream errors. Cache helpers (`responses_with_cache`, Spark UDF builders) plug into the same layer so expensive prompts are reused across pandas, Spark, and async flows. Reasoning models honor SDK semantics. Requires Python 3.10+.
|
|
104
105
|
|
|
105
106
|
## Core Workflows
|
|
106
107
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# openaivec
|
|
2
2
|
|
|
3
|
-
Transform pandas and Spark workflows with AI-powered text processing—batching, caching, and guardrails included.
|
|
3
|
+
Transform pandas and Spark workflows with AI-powered text processing—batching, caching, and guardrails included. Built for OpenAI batch pipelines so you can group prompts, cut API overhead, and keep outputs aligned with your data.
|
|
4
4
|
|
|
5
5
|
[Contributor guidelines](AGENTS.md)
|
|
6
6
|
|
|
@@ -66,6 +66,7 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
|
|
|
66
66
|
## Why openaivec?
|
|
67
67
|
|
|
68
68
|
- Drop-in `.ai` and `.aio` accessors keep pandas analysts in familiar tooling.
|
|
69
|
+
- OpenAI batch-optimized: `BatchingMapProxy`/`AsyncBatchingMapProxy` coalesce requests, dedupe prompts, and keep column order stable.
|
|
69
70
|
- Smart batching (`BatchingMapProxy`/`AsyncBatchingMapProxy`) dedupes prompts, preserves order, and releases waiters on failure.
|
|
70
71
|
- Reasoning support mirrors the OpenAI SDK; structured outputs accept Pydantic `response_format`.
|
|
71
72
|
- Built-in caches and retries remove boilerplate; helpers reuse caches across pandas, Spark, and async flows.
|
|
@@ -74,7 +75,7 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
|
|
|
74
75
|
|
|
75
76
|
# Overview
|
|
76
77
|
|
|
77
|
-
Vectorized OpenAI
|
|
78
|
+
Vectorized OpenAI batch processing so you handle many inputs per call instead of one-by-one. Batching proxies dedupe inputs, enforce ordered outputs, and unblock waiters even on upstream errors. Cache helpers (`responses_with_cache`, Spark UDF builders) plug into the same layer so expensive prompts are reused across pandas, Spark, and async flows. Reasoning models honor SDK semantics. Requires Python 3.10+.
|
|
78
79
|
|
|
79
80
|
## Core Workflows
|
|
80
81
|
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
---
|
|
2
|
+
title: OpenAI Batch Processing for Pandas & Spark
|
|
3
|
+
---
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
# OpenAI Batch Processing for Pandas & Spark
|
|
6
|
+
|
|
7
|
+
Welcome to **openaivec** - Transform your data analysis with OpenAI's language models and batch-first pipelines! This library enables seamless integration of AI text processing, sentiment analysis, NLP tasks, and embeddings into your [**Pandas**](https://pandas.pydata.org/) DataFrames and [**Apache Spark**](https://spark.apache.org/) workflows for scalable data insights, while automatically handling OpenAI batch orchestration.
|
|
4
8
|
|
|
5
9
|
## 🚀 Quick Start Example
|
|
6
10
|
|
|
@@ -41,6 +45,7 @@ Perfect for **data scientists**, **analysts**, and **ML engineers** who want to
|
|
|
41
45
|
|
|
42
46
|
- **🚀 Vectorized Processing**: Handle thousands of records in minutes, not hours
|
|
43
47
|
- **⚡ Asynchronous Interface**: `.aio` accessor with `batch_size` and `max_concurrency` control
|
|
48
|
+
- **📦 OpenAI Batch Friendly**: `BatchingMapProxy` groups prompts, dedupes inputs, and keeps outputs aligned for pandas and Spark
|
|
44
49
|
- **💰 Cost Efficient**: Automatic deduplication significantly reduces API costs
|
|
45
50
|
- **🔗 Seamless Integration**: Works within existing pandas/Spark workflows
|
|
46
51
|
- **📈 Enterprise Scale**: From 100s to millions of records
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{% extends "base.html" %}
|
|
2
|
+
|
|
3
|
+
{% block extrahead %}
|
|
4
|
+
{{ super() }}
|
|
5
|
+
{%- set site_meta = config.extra.get("meta", []) -%}
|
|
6
|
+
{%- set page_meta = page.meta.get("meta", []) if page and page.meta else [] -%}
|
|
7
|
+
{%- for meta in site_meta + page_meta %}
|
|
8
|
+
<meta{% for attr, value in meta.items() %} {{ attr }}="{{ value }}"{% endfor %}>
|
|
9
|
+
{%- endfor %}
|
|
10
|
+
{% endblock %}
|
|
@@ -8,6 +8,7 @@ edit_uri: edit/main/docs/
|
|
|
8
8
|
theme:
|
|
9
9
|
name: material
|
|
10
10
|
language: en
|
|
11
|
+
custom_dir: docs/overrides
|
|
11
12
|
palette:
|
|
12
13
|
# Palette toggle for light mode
|
|
13
14
|
- media: "(prefers-color-scheme: light)"
|
|
@@ -116,6 +117,9 @@ extra:
|
|
|
116
117
|
analytics:
|
|
117
118
|
provider: google
|
|
118
119
|
property: G-ZZ7FDHLKYS
|
|
120
|
+
meta:
|
|
121
|
+
- name: google-site-verification
|
|
122
|
+
content: UZhByQkwHoP8ke9kNHhrVrXNM_nnHFGd6ycOKKcBRcs
|
|
119
123
|
social:
|
|
120
124
|
- icon: fontawesome/brands/github
|
|
121
125
|
link: https://github.com/microsoft/openaivec
|