openaivec 0.14.12__py3-none-any.whl → 0.14.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/_schema.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Internal schema inference & dynamic model materialization utilities.
2
2
 
3
3
  This (non-public) module converts a small *representative* sample of free‑text
4
- examples plus a *purpose* statement into:
4
+ examples plus an *instructions* statement into:
5
5
 
6
6
  1. A vetted hierarchical object specification (``ObjectSpec``) whose recursively
7
7
  defined ``fields`` (``FieldSpec``) capture reliably extractable signals.
@@ -45,7 +45,7 @@ Example (conceptual):
45
45
  schema = inferer.infer_schema(
46
46
  SchemaInferenceInput(
47
47
  examples=["Order #123 delayed due to weather", "Order #456 delivered"],
48
- purpose="Extract operational status signals for logistics analytics",
48
+ instructions="Extract operational status signals for logistics analytics",
49
49
  )
50
50
  )
51
51
  Model = schema.model # dynamic Pydantic model
@@ -71,16 +71,16 @@ __all__: list[str] = []
71
71
  class InferredSchema(BaseModel):
72
72
  """Result of a schema inference round.
73
73
 
74
- Contains the normalized *purpose*, objective *examples_summary*, the root
74
+ Contains the normalized *instructions*, objective *examples_summary*, the root
75
75
  hierarchical ``object_spec`` contract, and the canonical reusable
76
76
  ``inference_prompt``. The prompt MUST be fully derivable from the other
77
77
  components (no new unstated facts) to preserve traceability.
78
78
 
79
79
  Attributes:
80
- purpose: Unambiguous restatement of the user's objective.
80
+ instructions: Unambiguous restatement of the user's objective.
81
81
  examples_summary: Neutral description of structural / semantic patterns
82
82
  observed in the examples.
83
- examples_purpose_alignment: Mapping from purpose facets to concrete
83
+ examples_instructions_alignment: Mapping from instructions facets to concrete
84
84
  recurring evidence (or explicit gaps) anchoring extraction scope.
85
85
  object_spec: Root ``ObjectSpec`` (UpperCamelCase name) whose ``fields``
86
86
  recursively define the extraction schema.
@@ -88,7 +88,7 @@ class InferredSchema(BaseModel):
88
88
  hierarchy, and types (no additions/removals/renames).
89
89
  """
90
90
 
91
- purpose: str = Field(
91
+ instructions: str = Field(
92
92
  description=(
93
93
  "Normalized, unambiguous restatement of the user objective with redundant, vague, or "
94
94
  "conflicting phrasing removed."
@@ -100,24 +100,25 @@ class InferredSchema(BaseModel):
100
100
  "patterns, and notable constraints."
101
101
  )
102
102
  )
103
- examples_purpose_alignment: str = Field(
103
+ examples_instructions_alignment: str = Field(
104
104
  description=(
105
105
  "Explanation of how observable recurring patterns in the examples substantiate and bound the stated "
106
- "purpose. Should reference purpose facets and cite supporting example evidence (or note any gaps) to "
107
- "reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream extraction."
106
+ "instructions. Should reference instructions facets and cite supporting example evidence (or note any "
107
+ "gaps) to reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream "
108
+ "extraction."
108
109
  )
109
110
  )
110
111
  object_spec: ObjectSpec = Field(
111
112
  description=(
112
113
  "Root ObjectSpec (recursive). Each contained object's field list is unique-name ordered and derived "
113
- "strictly from observable, repeatable signals aligned with the purpose."
114
+ "strictly from observable, repeatable signals aligned with the instructions."
114
115
  )
115
116
  )
116
117
  inference_prompt: str = Field(
117
118
  description=(
118
- "Canonical, reusable extraction prompt. Must be derivable from purpose + summaries + object_spec. Enforces "
119
- "exact hierarchical field set (names, order per object, types) forbidding additions, removals, renames, or "
120
- "subjective language. Self-contained (no TODOs, external refs, or placeholders)."
119
+ "Canonical, reusable extraction prompt. Must be derivable from instructions + summaries + object_spec. "
120
+ "Enforces exact hierarchical field set (names, order per object, types) forbidding additions, removals, "
121
+ "renames, or subjective language. Self-contained (no TODOs, external refs, or placeholders)."
121
122
  )
122
123
  )
123
124
 
@@ -153,7 +154,9 @@ class InferredSchema(BaseModel):
153
154
  PreparedTask: Ready for batched structured extraction calls.
154
155
  """
155
156
  return PreparedTask(
156
- instructions=self.inference_prompt, response_format=self.model, top_p=None, temperature=None
157
+ instructions=self.inference_prompt,
158
+ response_format=self.model,
159
+ api_kwargs={"top_p": None, "temperature": None},
157
160
  )
158
161
 
159
162
  def build_model(self) -> type[BaseModel]:
@@ -176,7 +179,7 @@ class SchemaInferenceInput(BaseModel):
176
179
  examples: Representative sample texts restricted to the in‑scope
177
180
  distribution (exclude outliers / noise). Size should be *minimal*
178
181
  yet sufficient to surface recurring patterns.
179
- purpose: Plain language description of downstream usage (analytics,
182
+ instructions: Plain language description of downstream usage (analytics,
180
183
  filtering, enrichment, feature engineering, etc.). Guides field
181
184
  relevance & exclusion of outcome labels.
182
185
  """
@@ -187,7 +190,7 @@ class SchemaInferenceInput(BaseModel):
187
190
  "exclude outliers not in scope."
188
191
  )
189
192
  )
190
- purpose: str = Field(
193
+ instructions: str = Field(
191
194
  description=(
192
195
  "Plain language statement describing the downstream use of the extracted structured data (e.g. "
193
196
  "analytics, filtering, enrichment)."
@@ -199,15 +202,16 @@ _INFER_INSTRUCTIONS = """
199
202
  You are a schema inference engine.
200
203
 
201
204
  Task:
202
- 1. Normalize the user's purpose (eliminate ambiguity, redundancy, contradictions).
205
+ 1. Normalize the user's instructions (eliminate ambiguity, redundancy, contradictions).
203
206
  2. Objectively summarize observable patterns in the example texts.
204
- 3. Produce an "examples_purpose_alignment" explanation mapping purpose facets to concrete recurring evidence (or gaps).
207
+ 3. Produce an "examples_instructions_alignment" explanation mapping instructions facets to concrete recurring
208
+ evidence (or gaps).
205
209
  4. Propose a minimal hierarchical schema (root ObjectSpec) comprised of reliably extractable fields. Use nesting ONLY
206
210
  when a group of fields forms a cohesive sub-entity repeated in the data; otherwise keep flat.
207
211
  5. Skip fields likely missing in a large share (>~20%) of realistic inputs.
208
212
  6. Provide enum_spec ONLY when a small stable closed categorical set (1–{_MAX_ENUM_VALUES} raw tokens) is clearly
209
213
  evidenced; never invent unseen categories.
210
- 7. If the purpose indicates prediction (predict / probability / likelihood),
214
+ 7. If the instructions indicate prediction (predict / probability / likelihood),
211
215
  output only explanatory features (no target restatement).
212
216
 
213
217
  Rules:
@@ -229,9 +233,9 @@ Rules:
229
233
 
230
234
  Output contract:
231
235
  Return exactly an InferredSchema JSON object with keys:
232
- - purpose (string)
236
+ - instructions (string)
233
237
  - examples_summary (string)
234
- - examples_purpose_alignment (string)
238
+ - examples_instructions_alignment (string)
235
239
  - object_spec (ObjectSpec: name, fields[list[FieldSpec]])
236
240
  - inference_prompt (string)
237
241
  Where each FieldSpec includes: name, type, description, optional enum_spec (for
@@ -272,14 +276,14 @@ class SchemaInferer:
272
276
  3. Retry (up to ``max_retries``) on validation failure.
273
277
 
274
278
  Args:
275
- data (SchemaInferenceInput): Representative examples + purpose.
279
+ data (SchemaInferenceInput): Representative examples + instructions.
276
280
  *args: Positional passthrough to ``client.responses.parse``.
277
281
  max_retries (int, optional): Attempts before surfacing the last validation error
278
282
  (must be >= 1). Defaults to 3.
279
283
  **kwargs: Keyword passthrough to ``client.responses.parse``.
280
284
 
281
285
  Returns:
282
- InferredSchema: Fully validated schema (purpose, examples summary,
286
+ InferredSchema: Fully validated schema (instructions, examples summary,
283
287
  ordered fields, extraction prompt).
284
288
 
285
289
  Raises: