openaivec 0.13.6__tar.gz → 0.13.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {openaivec-0.13.6 → openaivec-0.13.7}/.github/copilot-instructions.md +26 -0
  2. {openaivec-0.13.6 → openaivec-0.13.7}/PKG-INFO +1 -1
  3. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/__init__.py +7 -2
  4. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/di.py +2 -0
  5. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/log.py +1 -1
  6. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/model.py +4 -0
  7. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/optimize.py +3 -1
  8. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/pandas_ext.py +7 -0
  9. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/provider.py +2 -0
  10. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/proxy.py +2 -0
  11. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/serialize.py +1 -1
  12. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/spark.py +9 -0
  13. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/util.py +2 -0
  14. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_optimize.py +1 -1
  15. {openaivec-0.13.6 → openaivec-0.13.7}/.env.example +0 -0
  16. {openaivec-0.13.6 → openaivec-0.13.7}/.github/workflows/python-mkdocs.yml +0 -0
  17. {openaivec-0.13.6 → openaivec-0.13.7}/.github/workflows/python-package.yml +0 -0
  18. {openaivec-0.13.6 → openaivec-0.13.7}/.github/workflows/python-test.yml +0 -0
  19. {openaivec-0.13.6 → openaivec-0.13.7}/.github/workflows/python-update.yml +0 -0
  20. {openaivec-0.13.6 → openaivec-0.13.7}/.gitignore +0 -0
  21. {openaivec-0.13.6 → openaivec-0.13.7}/CODE_OF_CONDUCT.md +0 -0
  22. {openaivec-0.13.6 → openaivec-0.13.7}/LICENSE +0 -0
  23. {openaivec-0.13.6 → openaivec-0.13.7}/README.md +0 -0
  24. {openaivec-0.13.6 → openaivec-0.13.7}/SECURITY.md +0 -0
  25. {openaivec-0.13.6 → openaivec-0.13.7}/SUPPORT.md +0 -0
  26. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/di.md +0 -0
  27. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/embeddings.md +0 -0
  28. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/pandas_ext.md +0 -0
  29. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/prompt.md +0 -0
  30. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/proxy.md +0 -0
  31. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/responses.md +0 -0
  32. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/spark.md +0 -0
  33. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/task.md +0 -0
  34. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  35. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  36. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  37. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  38. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  39. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  40. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  41. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  42. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  43. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  44. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  45. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/tasks/nlp/translation.md +0 -0
  46. {openaivec-0.13.6 → openaivec-0.13.7}/docs/api/util.md +0 -0
  47. {openaivec-0.13.6 → openaivec-0.13.7}/docs/index.md +0 -0
  48. {openaivec-0.13.6 → openaivec-0.13.7}/docs/robots.txt +0 -0
  49. {openaivec-0.13.6 → openaivec-0.13.7}/mkdocs.yml +0 -0
  50. {openaivec-0.13.6 → openaivec-0.13.7}/pyproject.toml +0 -0
  51. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/embeddings.py +0 -0
  52. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/prompt.py +0 -0
  53. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/responses.py +0 -0
  54. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/__init__.py +0 -0
  55. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/__init__.py +0 -0
  56. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  57. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  58. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  59. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  60. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  61. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  62. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/__init__.py +0 -0
  63. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  64. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  65. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  66. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  67. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  68. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/nlp/translation.py +0 -0
  69. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/table/__init__.py +0 -0
  70. {openaivec-0.13.6 → openaivec-0.13.7}/src/openaivec/task/table/fillna.py +0 -0
  71. {openaivec-0.13.6 → openaivec-0.13.7}/tests/__init__.py +0 -0
  72. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_di.py +0 -0
  73. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_embeddings.py +0 -0
  74. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_pandas_ext.py +0 -0
  75. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_prompt.py +0 -0
  76. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_provider.py +0 -0
  77. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_proxy.py +0 -0
  78. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_proxy_suggester.py +0 -0
  79. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_responses.py +0 -0
  80. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_serialize.py +0 -0
  81. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_spark.py +0 -0
  82. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_task.py +0 -0
  83. {openaivec-0.13.6 → openaivec-0.13.7}/tests/test_util.py +0 -0
  84. {openaivec-0.13.6 → openaivec-0.13.7}/uv.lock +0 -0
@@ -138,6 +138,32 @@ Don’t
138
138
  - Use `asyncio.run` in async tests (mirrors existing tests)
139
139
  - Optional integration tests can run with valid API keys; keep unit tests independent of network
140
140
 
141
+ ## Package Visibility Guidelines (`__all__`)
142
+
143
+ ### Public API Modules
144
+ These modules are part of the public API and should have comprehensive `__all__` declarations:
145
+
146
+ - `embeddings.py` - Batch embedding processing
147
+ - `model.py` - Task configuration models
148
+ - `prompt.py` - Few-shot prompt building
149
+ - `responses.py` - Batch response processing
150
+ - `spark.py` - Apache Spark UDF builders
151
+ - `pandas_ext.py` - Pandas DataFrame/Series extensions
152
+ - `task/*` - All task modules (NLP, customer support, table operations)
153
+
154
+ ### Internal Modules
155
+ These modules are for internal use only and should have `__all__ = []`:
156
+
157
+ - All other modules not listed above (util.py, serialize.py, log.py, provider.py, proxy.py, di.py, optimize.py, etc.)
158
+
159
+ ### `__all__` Best Practices
160
+
161
+ 1. **Public modules**: Include all classes, functions, and constants intended for external use
162
+ 2. **Internal modules**: Use `__all__ = []` to explicitly mark as internal-only
163
+ 3. **Task modules**: Each task module should export its main classes/functions
164
+ 4. **Package `__init__.py`**: Re-export public API from all public modules
165
+ 5. **Consistency**: Maintain alphabetical ordering within `__all__` lists
166
+
141
167
  ## Documentation (MkDocs)
142
168
 
143
169
  - For new developer-facing APIs, update `docs/api/` and consider a short example under `docs/examples/`
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.13.6
3
+ Version: 0.13.7
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -1,9 +1,14 @@
1
1
  from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
+ from .model import PreparedTask
3
+ from .prompt import FewShotPrompt, FewShotPromptBuilder
2
4
  from .responses import AsyncBatchResponses, BatchResponses
3
5
 
4
6
  __all__ = [
5
- "BatchResponses",
7
+ "AsyncBatchEmbeddings",
6
8
  "AsyncBatchResponses",
7
9
  "BatchEmbeddings",
8
- "AsyncBatchEmbeddings",
10
+ "BatchResponses",
11
+ "FewShotPrompt",
12
+ "FewShotPromptBuilder",
13
+ "PreparedTask",
9
14
  ]
@@ -2,6 +2,8 @@ from dataclasses import dataclass, field
2
2
  from threading import RLock
3
3
  from typing import Any, Callable, Dict, Set, Type, TypeVar
4
4
 
5
+ __all__ = []
6
+
5
7
  """Simple dependency injection container with singleton lifecycle management.
6
8
 
7
9
  This module provides a lightweight dependency injection container that manages
@@ -5,7 +5,7 @@ import uuid
5
5
  from logging import Logger
6
6
  from typing import Callable
7
7
 
8
- __all__ = ["observe"]
8
+ __all__ = []
9
9
 
10
10
 
11
11
  def observe(logger: Logger):
@@ -1,6 +1,10 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Generic, Type, TypeVar
3
3
 
4
+ __all__ = [
5
+ "PreparedTask",
6
+ ]
7
+
4
8
  ResponseFormat = TypeVar("ResponseFormat")
5
9
 
6
10
 
@@ -5,6 +5,8 @@ from dataclasses import dataclass, field
5
5
  from datetime import datetime, timezone
6
6
  from typing import List
7
7
 
8
+ __all__ = []
9
+
8
10
 
9
11
  @dataclass(frozen=True)
10
12
  class PerformanceMetric:
@@ -20,7 +22,7 @@ class BatchSizeSuggester:
20
22
  min_batch_size: int = 10
21
23
  min_duration: float = 30.0
22
24
  max_duration: float = 60.0
23
- step_ratio: float = 0.1
25
+ step_ratio: float = 0.2
24
26
  sample_size: int = 4
25
27
  _history: List[PerformanceMetric] = field(default_factory=list)
26
28
  _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
@@ -48,6 +48,13 @@ import numpy as np
48
48
  import pandas as pd
49
49
  import tiktoken
50
50
  from openai import AsyncOpenAI, OpenAI
51
+
52
+ __all__ = [
53
+ "embeddings_model",
54
+ "responses_model",
55
+ "use",
56
+ "use_async",
57
+ ]
51
58
  from pydantic import BaseModel
52
59
 
53
60
  from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
@@ -15,6 +15,8 @@ from openaivec.model import (
15
15
  )
16
16
  from openaivec.util import TextChunker
17
17
 
18
+ __all__ = []
19
+
18
20
  CONTAINER = di.Container()
19
21
 
20
22
 
@@ -6,6 +6,8 @@ from typing import Any, Awaitable, Callable, Dict, Generic, List, TypeVar
6
6
 
7
7
  from openaivec.optimize import BatchSizeSuggester
8
8
 
9
+ __all__ = []
10
+
9
11
  S = TypeVar("S", bound=Hashable)
10
12
  T = TypeVar("T")
11
13
 
@@ -29,7 +29,7 @@ from typing import Any, Dict, List, Literal, Type
29
29
 
30
30
  from pydantic import BaseModel, Field, create_model
31
31
 
32
- __all__ = ["deserialize_base_model", "serialize_base_model"]
32
+ __all__ = []
33
33
 
34
34
 
35
35
  def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
@@ -12,6 +12,15 @@ improved performance in I/O-bound operations.
12
12
  automatically cache duplicate inputs within each partition, significantly reducing
13
13
  API calls and costs when processing datasets with overlapping content.
14
14
 
15
+ __all__ = [
16
+ "count_tokens_udf",
17
+ "embeddings_udf",
18
+ "responses_udf",
19
+ "similarity_udf",
20
+ "split_to_chunks_udf",
21
+ "task_udf",
22
+ ]
23
+
15
24
  ## Setup
16
25
 
17
26
  First, obtain a Spark session and configure authentication:
@@ -8,6 +8,8 @@ from typing import Awaitable, Callable, List, Type, TypeVar
8
8
  import numpy as np
9
9
  import tiktoken
10
10
 
11
+ __all__ = []
12
+
11
13
  T = TypeVar("T")
12
14
  U = TypeVar("U")
13
15
  V = TypeVar("V")
@@ -34,7 +34,7 @@ class TestBatchSizeSuggester:
34
34
  assert suggester.min_batch_size == 10
35
35
  assert suggester.min_duration == 30.0
36
36
  assert suggester.max_duration == 60.0
37
- assert suggester.step_ratio == 0.1
37
+ assert suggester.step_ratio == 0.2
38
38
  assert suggester.sample_size == 4
39
39
  assert len(suggester._history) == 0
40
40
  assert suggester._batch_size_changed_at is None
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes