openaivec 0.14.3__tar.gz → 0.14.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.14.3 → openaivec-0.14.4}/PKG-INFO +1 -1
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_proxy.py +24 -2
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_schema.py +47 -6
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/pandas_ext.py +372 -338
- openaivec-0.14.4/tests/test_schema.py +371 -0
- openaivec-0.14.3/tests/test_schema.py +0 -103
- {openaivec-0.14.3 → openaivec-0.14.4}/.env.example +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.github/copilot-instructions.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/.gitignore +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/LICENSE +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/README.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/SECURITY.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/SUPPORT.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/main.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/spark.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/task.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/index.md +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/docs/robots.txt +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/mkdocs.yml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/pyproject.toml +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_di.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_embeddings.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_log.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_model.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_optimize.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_prompt.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_provider.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_responses.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_serialize.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/_util.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/spark.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/nlp/translation.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/src/openaivec/task/table/fillna.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/__init__.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_di.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_embeddings.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_optimize.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_pandas_ext.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_prompt.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_provider.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_proxy.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_proxy_suggester.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_responses.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_serialize.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_spark.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_task.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/tests/test_util.py +0 -0
- {openaivec-0.14.3 → openaivec-0.14.4}/uv.lock +0 -0
|
@@ -460,7 +460,20 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
|
|
|
460
460
|
self.__process_owned(owned, map_func)
|
|
461
461
|
self.__wait_for(wait_for, map_func)
|
|
462
462
|
|
|
463
|
-
|
|
463
|
+
# Fetch results before purging None entries
|
|
464
|
+
results = self.__values(items)
|
|
465
|
+
|
|
466
|
+
# Remove None values from cache so they are recomputed on future calls
|
|
467
|
+
with self._lock:
|
|
468
|
+
if self._cache: # micro-optimization
|
|
469
|
+
for k in set(items):
|
|
470
|
+
try:
|
|
471
|
+
if self._cache.get(k, object()) is None:
|
|
472
|
+
del self._cache[k]
|
|
473
|
+
except KeyError:
|
|
474
|
+
pass
|
|
475
|
+
|
|
476
|
+
return results
|
|
464
477
|
|
|
465
478
|
|
|
466
479
|
@dataclass
|
|
@@ -745,4 +758,13 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
|
|
|
745
758
|
await self.__process_owned(owned, map_func)
|
|
746
759
|
await self.__wait_for(wait_for, map_func)
|
|
747
760
|
|
|
748
|
-
|
|
761
|
+
results = await self.__values(items)
|
|
762
|
+
|
|
763
|
+
# Remove None values from cache after retrieval to avoid persisting incomplete results
|
|
764
|
+
async with self._lock:
|
|
765
|
+
if self._cache:
|
|
766
|
+
for k in set(items):
|
|
767
|
+
if self._cache.get(k, object()) is None:
|
|
768
|
+
self._cache.pop(k, None)
|
|
769
|
+
|
|
770
|
+
return results
|
|
@@ -128,6 +128,12 @@ class InferredSchema(BaseModel):
|
|
|
128
128
|
redundancy removed).
|
|
129
129
|
examples_summary: Neutral description of structural / semantic patterns
|
|
130
130
|
observed in the examples (domain, recurring signals, constraints).
|
|
131
|
+
examples_purpose_alignment: Analytical explanation of how the concrete
|
|
132
|
+
recurring patterns in the provided examples *justify*, *constrain*,
|
|
133
|
+
or *refine* the stated purpose. Should map purpose facets to
|
|
134
|
+
observed evidence (or explicitly note gaps) to discourage
|
|
135
|
+
hallucinated fields and anchor extraction scope. This is an
|
|
136
|
+
internal quality aid – downstream consumers typically ignore it.
|
|
131
137
|
fields: Ordered list of ``FieldSpec`` objects comprising the schema's
|
|
132
138
|
sole authoritative contract.
|
|
133
139
|
inference_prompt: Self-contained extraction instructions enforcing an
|
|
@@ -147,6 +153,13 @@ class InferredSchema(BaseModel):
|
|
|
147
153
|
"patterns, and notable constraints."
|
|
148
154
|
)
|
|
149
155
|
)
|
|
156
|
+
examples_purpose_alignment: str = Field(
|
|
157
|
+
description=(
|
|
158
|
+
"Explanation of how observable recurring patterns in the examples substantiate and bound the stated "
|
|
159
|
+
"purpose. Should reference purpose facets and cite supporting example evidence (or note any gaps) to "
|
|
160
|
+
"reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream extraction."
|
|
161
|
+
)
|
|
162
|
+
)
|
|
150
163
|
fields: List[FieldSpec] = Field(
|
|
151
164
|
description=(
|
|
152
165
|
"Ordered list of proposed fields derived strictly from observable, repeatable signals in the "
|
|
@@ -234,7 +247,7 @@ class InferredSchema(BaseModel):
|
|
|
234
247
|
py_type = enum_cls
|
|
235
248
|
else:
|
|
236
249
|
py_type = type_map[spec.type]
|
|
237
|
-
fields[spec.name] = (py_type,
|
|
250
|
+
fields[spec.name] = (py_type, Field(description=spec.description))
|
|
238
251
|
|
|
239
252
|
model = create_model("InferredSchema", **fields) # type: ignore[call-arg]
|
|
240
253
|
return model
|
|
@@ -281,11 +294,15 @@ You are a schema inference engine.
|
|
|
281
294
|
Task:
|
|
282
295
|
1. Normalize the user's purpose (eliminate ambiguity, redundancy, contradictions).
|
|
283
296
|
2. Objectively summarize observable patterns in the example texts.
|
|
284
|
-
3.
|
|
285
|
-
|
|
286
|
-
|
|
297
|
+
3. Produce an "examples_purpose_alignment" explanation that explicitly maps purpose facets
|
|
298
|
+
to concrete recurring evidence in the examples (or flags gaps). Use concise bullet‑style
|
|
299
|
+
sentences (still a plain string) such as: "purpose facet -> supporting pattern / gap".
|
|
300
|
+
This MUST NOT introduce new domain facts beyond the examples & purpose.
|
|
301
|
+
4. Propose a minimal flat set of scalar fields (no nesting / arrays) that are reliably extractable.
|
|
302
|
+
5. Skip fields likely missing in a large share (>~20%) of realistic inputs.
|
|
303
|
+
6. Provide enum_values ONLY when a small stable closed categorical set (2–24 lowercase tokens)
|
|
287
304
|
is clearly evidenced; never invent.
|
|
288
|
-
|
|
305
|
+
7. If the purpose indicates prediction (predict / probability / likelihood), output only
|
|
289
306
|
explanatory features (no target restatement).
|
|
290
307
|
|
|
291
308
|
Rules:
|
|
@@ -305,6 +322,7 @@ Output contract:
|
|
|
305
322
|
Return exactly an InferredSchema object with JSON keys:
|
|
306
323
|
- purpose (string)
|
|
307
324
|
- examples_summary (string)
|
|
325
|
+
- examples_purpose_alignment (string)
|
|
308
326
|
- fields (array of FieldSpec objects: name, type, description, enum_values?)
|
|
309
327
|
- inference_prompt (string)
|
|
310
328
|
""".strip()
|
|
@@ -359,10 +377,31 @@ class SchemaInferer:
|
|
|
359
377
|
raise ValueError("max_retries must be >= 1")
|
|
360
378
|
|
|
361
379
|
last_err: Exception | None = None
|
|
380
|
+
previous_errors: list[str] = []
|
|
362
381
|
for attempt in range(max_retries):
|
|
382
|
+
if attempt == 0:
|
|
383
|
+
instructions = _INFER_INSTRUCTIONS
|
|
384
|
+
else:
|
|
385
|
+
# Provide structured feedback for correction. Keep concise and prohibit speculative expansion.
|
|
386
|
+
feedback_lines = [
|
|
387
|
+
"--- PRIOR VALIDATION FEEDBACK ---",
|
|
388
|
+
]
|
|
389
|
+
for i, err in enumerate(previous_errors[-5:], 1): # include last up to 5 errors
|
|
390
|
+
feedback_lines.append(f"{i}. {err}")
|
|
391
|
+
feedback_lines.extend(
|
|
392
|
+
[
|
|
393
|
+
"Adjust ONLY listed issues; avoid adding brand-new fields unless essential.",
|
|
394
|
+
"Don't hallucinate or broaden enum_values unless enum rule caused failure.",
|
|
395
|
+
"Duplicate names: minimally rename; keep semantics.",
|
|
396
|
+
"Unsupported type: change to string|integer|float|boolean (no new facts).",
|
|
397
|
+
"Bad enum length: drop enum or constrain to 2–24 evidenced tokens.",
|
|
398
|
+
]
|
|
399
|
+
)
|
|
400
|
+
instructions = _INFER_INSTRUCTIONS + "\n\n" + "\n".join(feedback_lines)
|
|
401
|
+
|
|
363
402
|
response: ParsedResponse[InferredSchema] = self.client.responses.parse(
|
|
364
403
|
model=self.model_name,
|
|
365
|
-
instructions=
|
|
404
|
+
instructions=instructions,
|
|
366
405
|
input=data.model_dump_json(),
|
|
367
406
|
text_format=InferredSchema,
|
|
368
407
|
*args,
|
|
@@ -371,8 +410,10 @@ class SchemaInferer:
|
|
|
371
410
|
parsed = response.output_parsed
|
|
372
411
|
try:
|
|
373
412
|
_basic_field_list_validation(parsed)
|
|
413
|
+
parsed.build_model() # ensure dynamic model creation succeeds
|
|
374
414
|
except ValueError as e:
|
|
375
415
|
last_err = e
|
|
416
|
+
previous_errors.append(str(e))
|
|
376
417
|
if attempt == max_retries - 1:
|
|
377
418
|
raise
|
|
378
419
|
continue
|