PyPI - openaivec - Versions diffs - 0.14.12__py3-none-any.whl → 0.14.13__py3-none-any.whl - Mend

openaivec 0.14.12py3-none-any.whl → 0.14.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

openaivec/_embeddings.py +17 -4
openaivec/_model.py +7 -12
openaivec/_prompt.py +3 -6
openaivec/_responses.py +39 -117
openaivec/_schema.py +27 -23
openaivec/pandas_ext.py +355 -343
openaivec/spark.py +32 -39
openaivec/task/__init__.py +1 -1
openaivec/task/customer_support/customer_sentiment.py +4 -9
openaivec/task/customer_support/inquiry_classification.py +5 -8
openaivec/task/customer_support/inquiry_summary.py +5 -6
openaivec/task/customer_support/intent_analysis.py +5 -7
openaivec/task/customer_support/response_suggestion.py +5 -8
openaivec/task/customer_support/urgency_analysis.py +5 -8
openaivec/task/nlp/dependency_parsing.py +1 -2
openaivec/task/nlp/keyword_extraction.py +1 -2
openaivec/task/nlp/morphological_analysis.py +1 -2
openaivec/task/nlp/named_entity_recognition.py +1 -2
openaivec/task/nlp/sentiment_analysis.py +1 -2
openaivec/task/nlp/translation.py +1 -1
openaivec/task/table/fillna.py +8 -3
{openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/METADATA +1 -1
openaivec-0.14.13.dist-info/RECORD +37 -0
openaivec-0.14.12.dist-info/RECORD +0 -37
{openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/WHEEL +0 -0
{openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/licenses/LICENSE +0 -0

openaivec/_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Internal schema inference & dynamic model materialization utilities.
 This (non-public) module converts a small *representative* sample of free‑text
-examples plus a *purpose* statement into:
+examples plus an *instructions* statement into:
 1. A vetted hierarchical object specification (``ObjectSpec``) whose recursively
      defined ``fields`` (``FieldSpec``) capture reliably extractable signals.
@@ -45,7 +45,7 @@ Example (conceptual):
         schema = inferer.infer_schema(
                 SchemaInferenceInput(
                         examples=["Order #123 delayed due to weather", "Order #456 delivered"],
-                        purpose="Extract operational status signals for logistics analytics",
+                        instructions="Extract operational status signals for logistics analytics",
                 )
         )
         Model = schema.model  # dynamic Pydantic model
@@ -71,16 +71,16 @@ __all__: list[str] = []
 class InferredSchema(BaseModel):
     """Result of a schema inference round.
-    Contains the normalized *purpose*, objective *examples_summary*, the root
+    Contains the normalized *instructions*, objective *examples_summary*, the root
     hierarchical ``object_spec`` contract, and the canonical reusable
     ``inference_prompt``. The prompt MUST be fully derivable from the other
     components (no new unstated facts) to preserve traceability.
     Attributes:
-        purpose: Unambiguous restatement of the user's objective.
+        instructions: Unambiguous restatement of the user's objective.
         examples_summary: Neutral description of structural / semantic patterns
             observed in the examples.
-        examples_purpose_alignment: Mapping from purpose facets to concrete
+        examples_instructions_alignment: Mapping from instructions facets to concrete
             recurring evidence (or explicit gaps) anchoring extraction scope.
         object_spec: Root ``ObjectSpec`` (UpperCamelCase name) whose ``fields``
             recursively define the extraction schema.
@@ -88,7 +88,7 @@ class InferredSchema(BaseModel):
             hierarchy, and types (no additions/removals/renames).
     """
-    purpose: str = Field(
+    instructions: str = Field(
         description=(
             "Normalized, unambiguous restatement of the user objective with redundant, vague, or "
             "conflicting phrasing removed."
@@ -100,24 +100,25 @@ class InferredSchema(BaseModel):
             "patterns, and notable constraints."
         )
     )
-    examples_purpose_alignment: str = Field(
+    examples_instructions_alignment: str = Field(
         description=(
             "Explanation of how observable recurring patterns in the examples substantiate and bound the stated "
-            "purpose. Should reference purpose facets and cite supporting example evidence (or note any gaps) to "
-            "reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream extraction."
+            "instructions. Should reference instructions facets and cite supporting example evidence (or note any "
+            "gaps) to reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream "
+            "extraction."
         )
     )
     object_spec: ObjectSpec = Field(
         description=(
             "Root ObjectSpec (recursive). Each contained object's field list is unique-name ordered and derived "
-            "strictly from observable, repeatable signals aligned with the purpose."
+            "strictly from observable, repeatable signals aligned with the instructions."
         )
     )
     inference_prompt: str = Field(
         description=(
-            "Canonical, reusable extraction prompt. Must be derivable from purpose + summaries + object_spec. Enforces "
-            "exact hierarchical field set (names, order per object, types) forbidding additions, removals, renames, or "
-            "subjective language. Self-contained (no TODOs, external refs, or placeholders)."
+            "Canonical, reusable extraction prompt. Must be derivable from instructions + summaries + object_spec. "
+            "Enforces exact hierarchical field set (names, order per object, types) forbidding additions, removals, "
+            "renames, or subjective language. Self-contained (no TODOs, external refs, or placeholders)."
         )
     )
@@ -153,7 +154,9 @@ class InferredSchema(BaseModel):
             PreparedTask: Ready for batched structured extraction calls.
         """
         return PreparedTask(
-            instructions=self.inference_prompt, response_format=self.model, top_p=None, temperature=None
+            instructions=self.inference_prompt,
+            response_format=self.model,
+            api_kwargs={"top_p": None, "temperature": None},
         )
     def build_model(self) -> type[BaseModel]:
@@ -176,7 +179,7 @@ class SchemaInferenceInput(BaseModel):
         examples: Representative sample texts restricted to the in‑scope
             distribution (exclude outliers / noise). Size should be *minimal*
             yet sufficient to surface recurring patterns.
-        purpose: Plain language description of downstream usage (analytics,
+        instructions: Plain language description of downstream usage (analytics,
             filtering, enrichment, feature engineering, etc.). Guides field
             relevance & exclusion of outcome labels.
     """
@@ -187,7 +190,7 @@ class SchemaInferenceInput(BaseModel):
             "exclude outliers not in scope."
         )
     )
-    purpose: str = Field(
+    instructions: str = Field(
         description=(
             "Plain language statement describing the downstream use of the extracted structured data (e.g. "
             "analytics, filtering, enrichment)."
@@ -199,15 +202,16 @@ _INFER_INSTRUCTIONS = """
 You are a schema inference engine.
 Task:
-1. Normalize the user's purpose (eliminate ambiguity, redundancy, contradictions).
+1. Normalize the user's instructions (eliminate ambiguity, redundancy, contradictions).
 2. Objectively summarize observable patterns in the example texts.
-3. Produce an "examples_purpose_alignment" explanation mapping purpose facets to concrete recurring evidence (or gaps).
+3. Produce an "examples_instructions_alignment" explanation mapping instructions facets to concrete recurring
+     evidence (or gaps).
 4. Propose a minimal hierarchical schema (root ObjectSpec) comprised of reliably extractable fields. Use nesting ONLY
      when a group of fields forms a cohesive sub-entity repeated in the data; otherwise keep flat.
 5. Skip fields likely missing in a large share (>~20%) of realistic inputs.
 6. Provide enum_spec ONLY when a small stable closed categorical set (1–{_MAX_ENUM_VALUES} raw tokens) is clearly
      evidenced; never invent unseen categories.
-7. If the purpose indicates prediction (predict / probability / likelihood),
+7. If the instructions indicate prediction (predict / probability / likelihood),
    output only explanatory features (no target restatement).
 Rules:
@@ -229,9 +233,9 @@ Rules:
 Output contract:
 Return exactly an InferredSchema JSON object with keys:
-        - purpose (string)
+        - instructions (string)
         - examples_summary (string)
-        - examples_purpose_alignment (string)
+        - examples_instructions_alignment (string)
         - object_spec (ObjectSpec: name, fields[list[FieldSpec]])
         - inference_prompt (string)
 Where each FieldSpec includes: name, type, description, optional enum_spec (for
@@ -272,14 +276,14 @@ class SchemaInferer:
                 3. Retry (up to ``max_retries``) on validation failure.
         Args:
-            data (SchemaInferenceInput): Representative examples + purpose.
+            data (SchemaInferenceInput): Representative examples + instructions.
             *args: Positional passthrough to ``client.responses.parse``.
             max_retries (int, optional): Attempts before surfacing the last validation error
                 (must be >= 1). Defaults to 3.
             **kwargs: Keyword passthrough to ``client.responses.parse``.
         Returns:
-            InferredSchema: Fully validated schema (purpose, examples summary,
+            InferredSchema: Fully validated schema (instructions, examples summary,
             ordered fields, extraction prompt).
         Raises:

openaivec 0.14.12__py3-none-any.whl → 0.14.13__py3-none-any.whl

openaivec 0.14.12py3-none-any.whl → 0.14.13py3-none-any.whl