openaivec 0.14.10__tar.gz → 0.14.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {openaivec-0.14.10 → openaivec-0.14.12}/.github/copilot-instructions.md +18 -3
  2. {openaivec-0.14.10 → openaivec-0.14.12}/PKG-INFO +40 -16
  3. {openaivec-0.14.10 → openaivec-0.14.12}/README.md +39 -15
  4. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_di.py +21 -0
  5. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_provider.py +8 -29
  6. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/pandas_ext.py +1 -0
  7. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/spark.py +241 -96
  8. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_provider.py +12 -13
  9. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_spark.py +144 -5
  10. {openaivec-0.14.10 → openaivec-0.14.12}/.env.example +0 -0
  11. {openaivec-0.14.10 → openaivec-0.14.12}/.github/workflows/python-mkdocs.yml +0 -0
  12. {openaivec-0.14.10 → openaivec-0.14.12}/.github/workflows/python-package.yml +0 -0
  13. {openaivec-0.14.10 → openaivec-0.14.12}/.github/workflows/python-test.yml +0 -0
  14. {openaivec-0.14.10 → openaivec-0.14.12}/.github/workflows/python-update.yml +0 -0
  15. {openaivec-0.14.10 → openaivec-0.14.12}/.gitignore +0 -0
  16. {openaivec-0.14.10 → openaivec-0.14.12}/CODE_OF_CONDUCT.md +0 -0
  17. {openaivec-0.14.10 → openaivec-0.14.12}/LICENSE +0 -0
  18. {openaivec-0.14.10 → openaivec-0.14.12}/SECURITY.md +0 -0
  19. {openaivec-0.14.10 → openaivec-0.14.12}/SUPPORT.md +0 -0
  20. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/main.md +0 -0
  21. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/pandas_ext.md +0 -0
  22. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/spark.md +0 -0
  23. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/task.md +0 -0
  24. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  25. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  26. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  27. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  28. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  29. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  30. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  31. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  32. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  33. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  34. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  35. {openaivec-0.14.10 → openaivec-0.14.12}/docs/api/tasks/nlp/translation.md +0 -0
  36. {openaivec-0.14.10 → openaivec-0.14.12}/docs/index.md +0 -0
  37. {openaivec-0.14.10 → openaivec-0.14.12}/docs/robots.txt +0 -0
  38. {openaivec-0.14.10 → openaivec-0.14.12}/mkdocs.yml +0 -0
  39. {openaivec-0.14.10 → openaivec-0.14.12}/pyproject.toml +0 -0
  40. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/__init__.py +0 -0
  41. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_dynamic.py +0 -0
  42. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_embeddings.py +0 -0
  43. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_log.py +0 -0
  44. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_model.py +0 -0
  45. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_optimize.py +0 -0
  46. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_prompt.py +0 -0
  47. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_proxy.py +0 -0
  48. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_responses.py +0 -0
  49. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_schema.py +0 -0
  50. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_serialize.py +0 -0
  51. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/_util.py +0 -0
  52. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/__init__.py +0 -0
  53. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/__init__.py +0 -0
  54. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  55. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  56. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  57. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  58. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  59. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  60. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/__init__.py +0 -0
  61. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  62. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  63. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  64. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  65. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  66. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/nlp/translation.py +0 -0
  67. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/table/__init__.py +0 -0
  68. {openaivec-0.14.10 → openaivec-0.14.12}/src/openaivec/task/table/fillna.py +0 -0
  69. {openaivec-0.14.10 → openaivec-0.14.12}/tests/__init__.py +0 -0
  70. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_di.py +0 -0
  71. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_dynamic.py +0 -0
  72. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_embeddings.py +0 -0
  73. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_optimize.py +0 -0
  74. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_pandas_ext.py +0 -0
  75. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_prompt.py +0 -0
  76. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_proxy.py +0 -0
  77. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_proxy_suggester.py +0 -0
  78. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_responses.py +0 -0
  79. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_schema.py +0 -0
  80. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_serialize.py +0 -0
  81. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
  82. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_task.py +0 -0
  83. {openaivec-0.14.10 → openaivec-0.14.12}/tests/test_util.py +0 -0
  84. {openaivec-0.14.10 → openaivec-0.14.12}/uv.lock +0 -0
@@ -24,7 +24,10 @@ Entry points:
24
24
  - Spark UDF builders in `spark.py`
25
25
  - Structured tasks under `task/`
26
26
 
27
- Azure note: Use deployment name as `model`. Warn if base URL not v1. Behavior otherwise mirrors OpenAI.
27
+ Azure note: Use deployment name as `model`. Standard Azure OpenAI configuration uses:
28
+ - Base URL: `https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/`
29
+ - API Version: `"preview"`
30
+ Warn if base URL not v1. Behavior otherwise mirrors OpenAI.
28
31
 
29
32
  ---
30
33
 
@@ -137,7 +140,16 @@ Public exports (`__init__.py`): `BatchResponses`, `AsyncBatchResponses`, `BatchE
137
140
  ## 10. Provider / Azure Rules
138
141
 
139
142
  - Auto-detect provider from env variables; deployment name = model for Azure.
140
- - Warn (don’t fail) if Azure base URL not v1 format; still proceed.
143
+ - Standard Azure OpenAI configuration:
144
+ - Base URL: `https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/`
145
+ - API Version: `"preview"`
146
+ - Environment variables:
147
+ ```bash
148
+ export AZURE_OPENAI_API_KEY="your-azure-key"
149
+ export AZURE_OPENAI_BASE_URL="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
150
+ export AZURE_OPENAI_API_VERSION="preview"
151
+ ```
152
+ - Warn (don't fail) if Azure base URL not v1 format; still proceed.
141
153
  - Keep code paths unified; avoid forking logic unless behavior diverges.
142
154
 
143
155
  ---
@@ -348,6 +360,9 @@ uv run mkdocs serve
348
360
  Environment setup notes:
349
361
 
350
362
  - Set `OPENAI_API_KEY` or Azure trio (`AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_VERSION`).
363
+ - Standard Azure OpenAI configuration:
364
+ - `AZURE_OPENAI_BASE_URL="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"`
365
+ - `AZURE_OPENAI_API_VERSION="preview"`
351
366
  - Tests auto-skip live paths when credentials absent.
352
367
  - Use separate shell profiles per provider if switching frequently.
353
- - Azure canonical base URL should end with `/openai/v1/` (e.g. `https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/`); non‑v1 forms emit a warning.
368
+ - Azure canonical base URL must end with `/openai/v1/` (e.g. `https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/`); non‑v1 forms emit a warning.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.10
3
+ Version: 0.14.12
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -334,26 +334,34 @@ Scale to enterprise datasets with distributed processing:
334
334
  First, obtain a Spark session and configure authentication:
335
335
 
336
336
  ```python
337
- import os
338
337
  from pyspark.sql import SparkSession
338
+ from openaivec.spark import setup, setup_azure
339
339
 
340
340
  spark = SparkSession.builder.getOrCreate()
341
- sc = spark.sparkContext
342
341
 
343
- # Configure authentication via SparkContext environment variables
344
342
  # Option 1: Using OpenAI
345
- sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
343
+ setup(
344
+ spark,
345
+ api_key="your-openai-api-key",
346
+ responses_model_name="gpt-4.1-mini", # Optional: set default model
347
+ embeddings_model_name="text-embedding-3-small" # Optional: set default model
348
+ )
346
349
 
347
350
  # Option 2: Using Azure OpenAI
348
- # sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
349
- # sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
350
- # sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
351
+ # setup_azure(
352
+ # spark,
353
+ # api_key="your-azure-openai-api-key",
354
+ # base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
355
+ # api_version="preview",
356
+ # responses_model_name="my-gpt4-deployment", # Optional: set default deployment
357
+ # embeddings_model_name="my-embedding-deployment" # Optional: set default deployment
358
+ # )
351
359
  ```
352
360
 
353
361
  Next, create and register UDFs using the provided functions:
354
362
 
355
363
  ```python
356
- from openaivec.spark import responses_udf, task_udf, embeddings_udf, count_tokens_udf
364
+ from openaivec.spark import responses_udf, task_udf, embeddings_udf, count_tokens_udf, similarity_udf, parse_udf
357
365
  from pydantic import BaseModel
358
366
 
359
367
  # --- Register Responses UDF (String Output) ---
@@ -387,6 +395,9 @@ spark.udf.register(
387
395
  # --- Register Token Counting UDF ---
388
396
  spark.udf.register("count_tokens", count_tokens_udf())
389
397
 
398
+ # --- Register Similarity UDF ---
399
+ spark.udf.register("compute_similarity", similarity_udf())
400
+
390
401
  # --- Register UDFs with Pre-configured Tasks ---
391
402
  from openaivec.task import nlp, customer_support
392
403
 
@@ -414,6 +425,17 @@ spark.udf.register(
414
425
  )
415
426
  )
416
427
 
428
+ # --- Register Parse UDF (Dynamic Schema Inference) ---
429
+ spark.udf.register(
430
+ "parse_dynamic",
431
+ parse_udf(
432
+ instructions="Extract key entities and attributes from the text",
433
+ example_table_name="sample_texts", # Infer schema from examples
434
+ example_field_name="text",
435
+ max_examples=50
436
+ )
437
+ )
438
+
417
439
  ```
418
440
 
419
441
  You can now use these UDFs in Spark SQL:
@@ -691,17 +713,19 @@ steps:
691
713
  - In the notebook, import and use `openaivec.spark` functions as you normally would. For example:
692
714
 
693
715
  ```python
694
- import os
695
- from openaivec.spark import responses_udf, embeddings_udf
716
+ from openaivec.spark import setup_azure, responses_udf, embeddings_udf
696
717
 
697
718
  # In Microsoft Fabric, spark session is automatically available
698
719
  # spark = SparkSession.builder.getOrCreate()
699
- sc = spark.sparkContext
700
-
720
+
701
721
  # Configure Azure OpenAI authentication
702
- sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
703
- sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
704
- sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
722
+ setup_azure(
723
+ spark,
724
+ api_key="<your-api-key>",
725
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
726
+ api_version="preview",
727
+ responses_model_name="my-gpt4-deployment" # Your Azure deployment name
728
+ )
705
729
 
706
730
  # Register UDFs
707
731
  spark.udf.register(
@@ -308,26 +308,34 @@ Scale to enterprise datasets with distributed processing:
308
308
  First, obtain a Spark session and configure authentication:
309
309
 
310
310
  ```python
311
- import os
312
311
  from pyspark.sql import SparkSession
312
+ from openaivec.spark import setup, setup_azure
313
313
 
314
314
  spark = SparkSession.builder.getOrCreate()
315
- sc = spark.sparkContext
316
315
 
317
- # Configure authentication via SparkContext environment variables
318
316
  # Option 1: Using OpenAI
319
- sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
317
+ setup(
318
+ spark,
319
+ api_key="your-openai-api-key",
320
+ responses_model_name="gpt-4.1-mini", # Optional: set default model
321
+ embeddings_model_name="text-embedding-3-small" # Optional: set default model
322
+ )
320
323
 
321
324
  # Option 2: Using Azure OpenAI
322
- # sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
323
- # sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
324
- # sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
325
+ # setup_azure(
326
+ # spark,
327
+ # api_key="your-azure-openai-api-key",
328
+ # base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
329
+ # api_version="preview",
330
+ # responses_model_name="my-gpt4-deployment", # Optional: set default deployment
331
+ # embeddings_model_name="my-embedding-deployment" # Optional: set default deployment
332
+ # )
325
333
  ```
326
334
 
327
335
  Next, create and register UDFs using the provided functions:
328
336
 
329
337
  ```python
330
- from openaivec.spark import responses_udf, task_udf, embeddings_udf, count_tokens_udf
338
+ from openaivec.spark import responses_udf, task_udf, embeddings_udf, count_tokens_udf, similarity_udf, parse_udf
331
339
  from pydantic import BaseModel
332
340
 
333
341
  # --- Register Responses UDF (String Output) ---
@@ -361,6 +369,9 @@ spark.udf.register(
361
369
  # --- Register Token Counting UDF ---
362
370
  spark.udf.register("count_tokens", count_tokens_udf())
363
371
 
372
+ # --- Register Similarity UDF ---
373
+ spark.udf.register("compute_similarity", similarity_udf())
374
+
364
375
  # --- Register UDFs with Pre-configured Tasks ---
365
376
  from openaivec.task import nlp, customer_support
366
377
 
@@ -388,6 +399,17 @@ spark.udf.register(
388
399
  )
389
400
  )
390
401
 
402
+ # --- Register Parse UDF (Dynamic Schema Inference) ---
403
+ spark.udf.register(
404
+ "parse_dynamic",
405
+ parse_udf(
406
+ instructions="Extract key entities and attributes from the text",
407
+ example_table_name="sample_texts", # Infer schema from examples
408
+ example_field_name="text",
409
+ max_examples=50
410
+ )
411
+ )
412
+
391
413
  ```
392
414
 
393
415
  You can now use these UDFs in Spark SQL:
@@ -665,17 +687,19 @@ steps:
665
687
  - In the notebook, import and use `openaivec.spark` functions as you normally would. For example:
666
688
 
667
689
  ```python
668
- import os
669
- from openaivec.spark import responses_udf, embeddings_udf
690
+ from openaivec.spark import setup_azure, responses_udf, embeddings_udf
670
691
 
671
692
  # In Microsoft Fabric, spark session is automatically available
672
693
  # spark = SparkSession.builder.getOrCreate()
673
- sc = spark.sparkContext
674
-
694
+
675
695
  # Configure Azure OpenAI authentication
676
- sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
677
- sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
678
- sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
696
+ setup_azure(
697
+ spark,
698
+ api_key="<your-api-key>",
699
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
700
+ api_version="preview",
701
+ responses_model_name="my-gpt4-deployment" # Your Azure deployment name
702
+ )
679
703
 
680
704
  # Register UDFs
681
705
  spark.udf.register(
@@ -303,3 +303,24 @@ class Container:
303
303
  self._providers.clear()
304
304
  self._instances.clear()
305
305
  self._resolving.clear()
306
+
307
+ def clear_singletons(self) -> None:
308
+ """Clear all cached singleton instances from the container.
309
+
310
+ Removes all cached singleton instances while keeping the registered
311
+ providers intact. After calling this method, the next resolve call
312
+ for any service will create a new instance using the provider function.
313
+
314
+ Example:
315
+ ```python
316
+ container = Container()
317
+ container.register(str, lambda: "Hello")
318
+ instance1 = container.resolve(str)
319
+ container.clear_singletons()
320
+ instance2 = container.resolve(str)
321
+ print(instance1 is instance2)
322
+ # False - different instances after clearing singletons
323
+ ```
324
+ """
325
+ with self._lock:
326
+ self._instances.clear()
@@ -130,35 +130,9 @@ def provide_async_openai_client() -> AsyncOpenAI:
130
130
  )
131
131
 
132
132
 
133
- CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
134
- CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
135
- CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
136
- CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
137
- CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
138
- CONTAINER.register(
139
- cls=AzureOpenAIAPIVersion,
140
- provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
141
- )
142
- CONTAINER.register(OpenAI, provide_openai_client)
143
- CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
144
- CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
145
- CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
146
- CONTAINER.register(
147
- SchemaInferer,
148
- lambda: SchemaInferer(
149
- client=CONTAINER.resolve(OpenAI),
150
- model_name=CONTAINER.resolve(ResponsesModelName).value,
151
- ),
152
- )
153
-
154
-
155
- def reset_environment_registrations():
156
- """Reset environment variable related registrations in the container.
157
-
158
- This function re-registers environment variable dependent services to pick up
159
- current environment variable values. Useful for testing when environment
160
- variables are changed after initial container setup.
161
- """
133
+ def set_default_registrations():
134
+ CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
135
+ CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
162
136
  CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
163
137
  CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
164
138
  CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
@@ -168,6 +142,8 @@ def reset_environment_registrations():
168
142
  )
169
143
  CONTAINER.register(OpenAI, provide_openai_client)
170
144
  CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
145
+ CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
146
+ CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
171
147
  CONTAINER.register(
172
148
  SchemaInferer,
173
149
  lambda: SchemaInferer(
@@ -175,3 +151,6 @@ def reset_environment_registrations():
175
151
  model_name=CONTAINER.resolve(ResponsesModelName).value,
176
152
  ),
177
153
  )
154
+
155
+
156
+ set_default_registrations()
@@ -454,6 +454,7 @@ class OpenAIVecSeriesAccessor:
454
454
  """Parse Series values using an LLM with a provided cache.
455
455
  This method allows you to parse the Series content into structured data
456
456
  using an LLM, optionally inferring a schema based on the provided purpose.
457
+
457
458
  Args:
458
459
  instructions (str): System prompt for the LLM.
459
460
  cache (BatchingMapProxy[str, BaseModel]): Explicit cache instance for