openai-sdk-helpers 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/__init__.py +45 -41
- openai_sdk_helpers/agent/__init__.py +4 -6
- openai_sdk_helpers/agent/base.py +110 -191
- openai_sdk_helpers/agent/{config.py → configuration.py} +24 -32
- openai_sdk_helpers/agent/{coordination.py → coordinator.py} +22 -23
- openai_sdk_helpers/agent/runner.py +3 -45
- openai_sdk_helpers/agent/search/base.py +54 -76
- openai_sdk_helpers/agent/search/vector.py +92 -108
- openai_sdk_helpers/agent/search/web.py +104 -82
- openai_sdk_helpers/agent/summarizer.py +22 -28
- openai_sdk_helpers/agent/translator.py +22 -24
- openai_sdk_helpers/agent/{validation.py → validator.py} +19 -23
- openai_sdk_helpers/cli.py +8 -22
- openai_sdk_helpers/environment.py +8 -13
- openai_sdk_helpers/errors.py +9 -0
- openai_sdk_helpers/extract/__init__.py +23 -0
- openai_sdk_helpers/extract/extractor.py +157 -0
- openai_sdk_helpers/extract/generator.py +476 -0
- openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja +6 -0
- openai_sdk_helpers/prompt/extractor_config_generator.jinja +37 -0
- openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja +9 -0
- openai_sdk_helpers/prompt/extractor_prompt_optimizer_agent_instructions.jinja +4 -0
- openai_sdk_helpers/prompt/extractor_prompt_optimizer_request.jinja +11 -0
- openai_sdk_helpers/prompt/vector_planner.jinja +7 -0
- openai_sdk_helpers/prompt/vector_search.jinja +6 -0
- openai_sdk_helpers/prompt/vector_writer.jinja +7 -0
- openai_sdk_helpers/response/__init__.py +3 -7
- openai_sdk_helpers/response/base.py +89 -98
- openai_sdk_helpers/response/{config.py → configuration.py} +45 -20
- openai_sdk_helpers/response/files.py +2 -0
- openai_sdk_helpers/response/planner.py +1 -1
- openai_sdk_helpers/response/prompter.py +1 -1
- openai_sdk_helpers/response/runner.py +1 -48
- openai_sdk_helpers/response/tool_call.py +0 -141
- openai_sdk_helpers/response/vector_store.py +8 -5
- openai_sdk_helpers/streamlit_app/__init__.py +1 -1
- openai_sdk_helpers/streamlit_app/app.py +17 -18
- openai_sdk_helpers/streamlit_app/{config.py → configuration.py} +13 -13
- openai_sdk_helpers/structure/__init__.py +16 -0
- openai_sdk_helpers/structure/base.py +239 -278
- openai_sdk_helpers/structure/extraction.py +1228 -0
- openai_sdk_helpers/structure/plan/plan.py +0 -20
- openai_sdk_helpers/structure/plan/task.py +0 -33
- openai_sdk_helpers/structure/prompt.py +16 -0
- openai_sdk_helpers/structure/responses.py +2 -2
- openai_sdk_helpers/structure/web_search.py +0 -10
- openai_sdk_helpers/tools.py +346 -99
- openai_sdk_helpers/types.py +3 -3
- openai_sdk_helpers/utils/__init__.py +9 -6
- openai_sdk_helpers/utils/json/base_model.py +316 -33
- openai_sdk_helpers/utils/json/data_class.py +1 -1
- openai_sdk_helpers/utils/langextract.py +194 -0
- openai_sdk_helpers/utils/registry.py +19 -15
- openai_sdk_helpers/vector_storage/storage.py +1 -1
- {openai_sdk_helpers-0.4.2.dist-info → openai_sdk_helpers-0.5.0.dist-info}/METADATA +25 -11
- openai_sdk_helpers-0.5.0.dist-info/RECORD +95 -0
- openai_sdk_helpers/agent/prompt_utils.py +0 -15
- openai_sdk_helpers/context_manager.py +0 -241
- openai_sdk_helpers/deprecation.py +0 -167
- openai_sdk_helpers/retry.py +0 -175
- openai_sdk_helpers/streamlit_app/streamlit_web_search.py +0 -75
- openai_sdk_helpers/utils/deprecation.py +0 -167
- openai_sdk_helpers-0.4.2.dist-info/RECORD +0 -88
- /openai_sdk_helpers/{logging_config.py → logging.py} +0 -0
- /openai_sdk_helpers/{config.py → settings.py} +0 -0
- {openai_sdk_helpers-0.4.2.dist-info → openai_sdk_helpers-0.5.0.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.4.2.dist-info → openai_sdk_helpers-0.5.0.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.4.2.dist-info → openai_sdk_helpers-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,14 +7,20 @@ customizable _serialize_fields/_deserialize_fields hooks.
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
+
from enum import Enum
|
|
10
11
|
import json
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
from
|
|
13
|
+
import inspect
|
|
14
|
+
import logging
|
|
15
|
+
import ast
|
|
16
|
+
from typing import Any, ClassVar, TypeVar, get_args, get_origin
|
|
17
|
+
from pydantic import BaseModel, ConfigDict
|
|
18
|
+
from ...logging import log
|
|
19
|
+
|
|
20
|
+
from .utils import customJSONEncoder
|
|
16
21
|
|
|
17
22
|
P = TypeVar("P", bound="BaseModelJSONSerializable")
|
|
23
|
+
_SENTINEL = object()
|
|
18
24
|
|
|
19
25
|
|
|
20
26
|
class BaseModelJSONSerializable(BaseModel):
|
|
@@ -49,6 +55,89 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
49
55
|
{'name': 'test', 'value': 42}
|
|
50
56
|
"""
|
|
51
57
|
|
|
58
|
+
@staticmethod
|
|
59
|
+
def format_output(label: str, *, value: Any) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Format a label and value for string output.
|
|
62
|
+
|
|
63
|
+
Handles None values and lists appropriately.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
label : str
|
|
68
|
+
Label describing the value.
|
|
69
|
+
value : Any
|
|
70
|
+
Value to format for display.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
str
|
|
75
|
+
Formatted string (for example ``"- Label: Value"``).
|
|
76
|
+
"""
|
|
77
|
+
if value is None:
|
|
78
|
+
return f"- {label}: None"
|
|
79
|
+
if isinstance(value, list):
|
|
80
|
+
formatted = ", ".join(str(v) for v in value)
|
|
81
|
+
return f"- {label}: {formatted or '[]'}"
|
|
82
|
+
return f"- {label}: {str(value)}"
|
|
83
|
+
|
|
84
|
+
def __repr__(self) -> str:
|
|
85
|
+
"""
|
|
86
|
+
Generate a string representation of the model fields.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
str
|
|
91
|
+
Formatted string for the model fields.
|
|
92
|
+
"""
|
|
93
|
+
return "\n".join(
|
|
94
|
+
[
|
|
95
|
+
BaseModelJSONSerializable.format_output(field, value=value)
|
|
96
|
+
for field, value in self.model_dump().items()
|
|
97
|
+
]
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def __str__(self) -> str:
|
|
101
|
+
"""
|
|
102
|
+
Generate a string representation of the model fields.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
str
|
|
107
|
+
Formatted string for the model fields.
|
|
108
|
+
"""
|
|
109
|
+
return self.__repr__()
|
|
110
|
+
|
|
111
|
+
def to_markdown(self) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Generate a markdown representation of the model fields.
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
str
|
|
118
|
+
Formatted markdown string for the model fields.
|
|
119
|
+
"""
|
|
120
|
+
return self.__repr__()
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def _get_all_fields(cls) -> dict[Any, Any]:
|
|
124
|
+
"""Collect all fields from the class hierarchy including inherited ones.
|
|
125
|
+
|
|
126
|
+
Traverses the method resolution order (MRO) to gather fields from
|
|
127
|
+
all parent classes that inherit from BaseModel, ensuring inherited
|
|
128
|
+
fields are included in schema generation.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
dict[Any, Any]
|
|
133
|
+
Mapping of field names to Pydantic ModelField instances.
|
|
134
|
+
"""
|
|
135
|
+
fields = {}
|
|
136
|
+
for base in reversed(cls.__mro__): # Traverse inheritance tree
|
|
137
|
+
if issubclass(base, BaseModel) and hasattr(base, "model_fields"):
|
|
138
|
+
fields.update(base.model_fields) # Merge fields from parent
|
|
139
|
+
return fields
|
|
140
|
+
|
|
52
141
|
def to_json(self) -> dict[str, Any]:
|
|
53
142
|
"""Return a JSON-compatible dict representation.
|
|
54
143
|
|
|
@@ -57,11 +146,7 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
57
146
|
dict[str, Any]
|
|
58
147
|
Serialized model data.
|
|
59
148
|
"""
|
|
60
|
-
|
|
61
|
-
data = getattr(self, "model_dump")()
|
|
62
|
-
else:
|
|
63
|
-
data = self.__dict__.copy()
|
|
64
|
-
return self._serialize_fields(_to_jsonable(data))
|
|
149
|
+
return self.model_dump()
|
|
65
150
|
|
|
66
151
|
def to_json_file(self, filepath: str | Path) -> str:
|
|
67
152
|
"""Write serialized JSON data to a file path.
|
|
@@ -76,6 +161,8 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
76
161
|
str
|
|
77
162
|
Absolute path to the written file.
|
|
78
163
|
"""
|
|
164
|
+
from .. import check_filepath
|
|
165
|
+
|
|
79
166
|
target = Path(filepath)
|
|
80
167
|
check_filepath(fullfilepath=str(target))
|
|
81
168
|
with open(target, "w", encoding="utf-8") as handle:
|
|
@@ -88,62 +175,216 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
88
175
|
)
|
|
89
176
|
return str(target)
|
|
90
177
|
|
|
91
|
-
|
|
92
|
-
|
|
178
|
+
@classmethod
|
|
179
|
+
def _extract_enum_class(cls, field_type: Any) -> type[Enum] | None:
|
|
180
|
+
"""Extract an Enum class from a field's type annotation.
|
|
93
181
|
|
|
94
|
-
|
|
182
|
+
Handles direct Enum types, list[Enum], and optional Enums.
|
|
95
183
|
|
|
96
184
|
Parameters
|
|
97
185
|
----------
|
|
98
|
-
|
|
99
|
-
|
|
186
|
+
field_type : Any
|
|
187
|
+
Type annotation of a field.
|
|
100
188
|
|
|
101
189
|
Returns
|
|
102
190
|
-------
|
|
103
|
-
|
|
104
|
-
|
|
191
|
+
type[Enum] or None
|
|
192
|
+
Enum class if found, otherwise None.
|
|
105
193
|
"""
|
|
106
|
-
|
|
194
|
+
origin = get_origin(field_type)
|
|
195
|
+
args = get_args(field_type)
|
|
196
|
+
|
|
197
|
+
if inspect.isclass(field_type) and issubclass(field_type, Enum):
|
|
198
|
+
return field_type
|
|
199
|
+
elif (
|
|
200
|
+
origin is list
|
|
201
|
+
and args
|
|
202
|
+
and inspect.isclass(args[0])
|
|
203
|
+
and issubclass(args[0], Enum)
|
|
204
|
+
):
|
|
205
|
+
return args[0]
|
|
206
|
+
elif origin is not None:
|
|
207
|
+
# Handle Union types
|
|
208
|
+
for arg in args:
|
|
209
|
+
enum_cls = cls._extract_enum_class(arg)
|
|
210
|
+
if enum_cls:
|
|
211
|
+
return enum_cls
|
|
212
|
+
return None
|
|
107
213
|
|
|
108
214
|
@classmethod
|
|
109
|
-
def
|
|
110
|
-
"""
|
|
215
|
+
def _try_coerce_value(cls, field_name: str, field_type: Any, raw_value: Any) -> Any:
|
|
216
|
+
"""Attempt to coerce a raw value to a specific field type.
|
|
111
217
|
|
|
112
|
-
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
field_name : str
|
|
221
|
+
Field name being converted.
|
|
222
|
+
field_type : Any
|
|
223
|
+
Field type annotation to coerce into.
|
|
224
|
+
raw_value : Any
|
|
225
|
+
Value to coerce.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
Any
|
|
230
|
+
Coerced value when conversion is possible, otherwise a sentinel
|
|
231
|
+
indicating no conversion was applied.
|
|
232
|
+
"""
|
|
233
|
+
if inspect.isclass(field_type):
|
|
234
|
+
if issubclass(field_type, Enum):
|
|
235
|
+
enum_value = cls._coerce_enum_value(field_name, field_type, raw_value)
|
|
236
|
+
return enum_value
|
|
237
|
+
if issubclass(field_type, BaseModelJSONSerializable):
|
|
238
|
+
if isinstance(raw_value, field_type):
|
|
239
|
+
return raw_value
|
|
240
|
+
if isinstance(raw_value, dict):
|
|
241
|
+
return field_type.from_json(raw_value)
|
|
242
|
+
return _SENTINEL
|
|
243
|
+
|
|
244
|
+
origin = get_origin(field_type)
|
|
245
|
+
args = get_args(field_type)
|
|
246
|
+
if origin is list and args:
|
|
247
|
+
if not isinstance(raw_value, list):
|
|
248
|
+
return _SENTINEL
|
|
249
|
+
item_type = args[0]
|
|
250
|
+
enum_cls = cls._extract_enum_class(item_type)
|
|
251
|
+
converted_items = []
|
|
252
|
+
for item in raw_value:
|
|
253
|
+
converted_item = cls._coerce_field_value(field_name, item_type, item)
|
|
254
|
+
if converted_item is None and enum_cls is not None:
|
|
255
|
+
continue
|
|
256
|
+
converted_items.append(converted_item)
|
|
257
|
+
return converted_items
|
|
258
|
+
return _SENTINEL
|
|
259
|
+
|
|
260
|
+
@classmethod
|
|
261
|
+
def _coerce_field_value(
|
|
262
|
+
cls, field_name: str, field_type: Any, raw_value: Any
|
|
263
|
+
) -> Any:
|
|
264
|
+
"""Coerce a raw value based on the field's type annotation.
|
|
113
265
|
|
|
114
266
|
Parameters
|
|
115
267
|
----------
|
|
116
|
-
|
|
117
|
-
|
|
268
|
+
field_name : str
|
|
269
|
+
Field name being converted.
|
|
270
|
+
field_type : Any
|
|
271
|
+
Field type annotation to coerce into.
|
|
272
|
+
raw_value : Any
|
|
273
|
+
Value to coerce.
|
|
118
274
|
|
|
119
275
|
Returns
|
|
120
276
|
-------
|
|
121
|
-
|
|
122
|
-
|
|
277
|
+
Any
|
|
278
|
+
Coerced value when conversion is possible, otherwise the original
|
|
279
|
+
raw value.
|
|
280
|
+
"""
|
|
281
|
+
origin = get_origin(field_type)
|
|
282
|
+
args = get_args(field_type)
|
|
283
|
+
|
|
284
|
+
if origin is not None and origin is not list:
|
|
285
|
+
for arg in args:
|
|
286
|
+
if arg is type(None):
|
|
287
|
+
continue
|
|
288
|
+
converted = cls._try_coerce_value(field_name, arg, raw_value)
|
|
289
|
+
if converted is not _SENTINEL:
|
|
290
|
+
return converted
|
|
291
|
+
return raw_value
|
|
292
|
+
|
|
293
|
+
converted = cls._try_coerce_value(field_name, field_type, raw_value)
|
|
294
|
+
return raw_value if converted is _SENTINEL else converted
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def _build_enum_field_mapping(cls) -> dict[str, type[Enum]]:
|
|
298
|
+
"""Build a mapping from field names to their Enum classes.
|
|
299
|
+
|
|
300
|
+
Used by from_json to correctly process enum values from raw API
|
|
301
|
+
responses.
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
dict[str, type[Enum]]
|
|
306
|
+
Mapping of field names to Enum types.
|
|
307
|
+
"""
|
|
308
|
+
mapping: dict[str, type[Enum]] = {}
|
|
309
|
+
|
|
310
|
+
for name, model_field in cls.model_fields.items():
|
|
311
|
+
field_type = model_field.annotation
|
|
312
|
+
enum_cls = cls._extract_enum_class(field_type)
|
|
313
|
+
|
|
314
|
+
if enum_cls is not None:
|
|
315
|
+
mapping[name] = enum_cls
|
|
316
|
+
|
|
317
|
+
return mapping
|
|
318
|
+
|
|
319
|
+
@classmethod
|
|
320
|
+
def _coerce_enum_value(
|
|
321
|
+
cls, field_name: str, enum_cls: type[Enum], raw_value: Any
|
|
322
|
+
) -> Enum | None:
|
|
323
|
+
"""Coerce a raw enum value into an Enum member.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
field_name : str
|
|
328
|
+
Field name being converted.
|
|
329
|
+
enum_cls : type[Enum]
|
|
330
|
+
Enum class to coerce into.
|
|
331
|
+
raw_value : Any
|
|
332
|
+
Value to coerce into an Enum member.
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
Enum or None
|
|
337
|
+
Enum member when conversion succeeds, otherwise None.
|
|
123
338
|
"""
|
|
124
|
-
|
|
339
|
+
if isinstance(raw_value, enum_cls):
|
|
340
|
+
return raw_value
|
|
341
|
+
if isinstance(raw_value, str):
|
|
342
|
+
if raw_value in enum_cls._value2member_map_:
|
|
343
|
+
return enum_cls(raw_value)
|
|
344
|
+
if raw_value in enum_cls.__members__:
|
|
345
|
+
return enum_cls.__members__[raw_value]
|
|
346
|
+
log(
|
|
347
|
+
message=(
|
|
348
|
+
f"[{cls.__name__}] Invalid value for '{field_name}': "
|
|
349
|
+
f"'{raw_value}' not in {enum_cls.__name__}"
|
|
350
|
+
),
|
|
351
|
+
level=logging.WARNING,
|
|
352
|
+
)
|
|
353
|
+
return None
|
|
125
354
|
|
|
126
355
|
@classmethod
|
|
127
356
|
def from_json(cls: type[P], data: dict[str, Any]) -> P:
|
|
128
|
-
"""
|
|
357
|
+
"""Construct an instance from a dictionary of raw input data.
|
|
358
|
+
|
|
359
|
+
Particularly useful for converting data from OpenAI API tool calls
|
|
360
|
+
or assistant outputs into validated structure instances. Handles
|
|
361
|
+
enum value conversion automatically.
|
|
129
362
|
|
|
130
363
|
Parameters
|
|
131
364
|
----------
|
|
132
365
|
data : dict[str, Any]
|
|
133
|
-
|
|
366
|
+
Raw input data dictionary from API response.
|
|
134
367
|
|
|
135
368
|
Returns
|
|
136
369
|
-------
|
|
137
370
|
P
|
|
138
|
-
|
|
371
|
+
Validated instance of the model class.
|
|
139
372
|
|
|
140
373
|
Examples
|
|
141
374
|
--------
|
|
142
|
-
>>>
|
|
143
|
-
>>> instance =
|
|
375
|
+
>>> raw_data = {"title": "Test", "score": 0.95}
|
|
376
|
+
>>> instance = MyStructure.from_json(raw_data)
|
|
144
377
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
378
|
+
clean_data = data.copy()
|
|
379
|
+
for field_name, model_field in cls.model_fields.items():
|
|
380
|
+
raw_value = clean_data.get(field_name)
|
|
381
|
+
if raw_value is None:
|
|
382
|
+
continue
|
|
383
|
+
clean_data[field_name] = cls._coerce_field_value(
|
|
384
|
+
field_name, model_field.annotation, raw_value
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
return cls(**clean_data)
|
|
147
388
|
|
|
148
389
|
@classmethod
|
|
149
390
|
def from_json_file(cls: type[P], filepath: str | Path) -> P:
|
|
@@ -166,7 +407,7 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
166
407
|
|
|
167
408
|
Examples
|
|
168
409
|
--------
|
|
169
|
-
>>> instance = MyConfig.from_json_file("
|
|
410
|
+
>>> instance = MyConfig.from_json_file("configuration.json")
|
|
170
411
|
"""
|
|
171
412
|
target = Path(filepath)
|
|
172
413
|
if not target.exists():
|
|
@@ -177,5 +418,47 @@ class BaseModelJSONSerializable(BaseModel):
|
|
|
177
418
|
|
|
178
419
|
return cls.from_json(data)
|
|
179
420
|
|
|
421
|
+
@classmethod
|
|
422
|
+
def from_string(cls: type[P], arguments: str) -> P:
|
|
423
|
+
"""Parse tool call arguments which may not be valid JSON.
|
|
424
|
+
|
|
425
|
+
The OpenAI API is expected to return well-formed JSON for tool arguments,
|
|
426
|
+
but minor formatting issues (such as the use of single quotes) can occur.
|
|
427
|
+
This helper first tries ``json.loads`` and falls back to
|
|
428
|
+
``ast.literal_eval`` for simple cases.
|
|
429
|
+
|
|
430
|
+
Parameters
|
|
431
|
+
----------
|
|
432
|
+
arguments : str
|
|
433
|
+
Raw argument string from the tool call.
|
|
434
|
+
|
|
435
|
+
Returns
|
|
436
|
+
-------
|
|
437
|
+
P
|
|
438
|
+
Parsed model instance from the arguments.
|
|
439
|
+
|
|
440
|
+
Raises
|
|
441
|
+
------
|
|
442
|
+
ValueError
|
|
443
|
+
If the arguments cannot be parsed as JSON.
|
|
444
|
+
|
|
445
|
+
Examples
|
|
446
|
+
--------
|
|
447
|
+
>>> MyModel.from_string('{"key": "value"}').key
|
|
448
|
+
'value'
|
|
449
|
+
"""
|
|
450
|
+
try:
|
|
451
|
+
structured_data = json.loads(arguments)
|
|
452
|
+
|
|
453
|
+
except json.JSONDecodeError:
|
|
454
|
+
try:
|
|
455
|
+
structured_data = ast.literal_eval(arguments)
|
|
456
|
+
except (SyntaxError, ValueError) as exc:
|
|
457
|
+
raise ValueError(
|
|
458
|
+
f"Invalid JSON arguments: {arguments}. "
|
|
459
|
+
f"Expected valid JSON or Python literal."
|
|
460
|
+
) from exc
|
|
461
|
+
return cls.from_json(structured_data)
|
|
462
|
+
|
|
180
463
|
|
|
181
464
|
__all__ = ["BaseModelJSONSerializable"]
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""LangExtract integration helpers.
|
|
2
|
+
|
|
3
|
+
This module provides a thin adapter around LangExtract-style extractors to
|
|
4
|
+
normalize how extraction results are collected and validated.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Protocol, TypeVar
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
TModel = TypeVar("TModel", bound=BaseModel)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LangExtractCallable(Protocol):
|
|
18
|
+
"""Define callable LangExtract extractor behavior.
|
|
19
|
+
|
|
20
|
+
Methods
|
|
21
|
+
-------
|
|
22
|
+
__call__
|
|
23
|
+
Extract structured data from text.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __call__(self, text: str, **kwargs: Any) -> Any:
|
|
27
|
+
"""Extract structured data from text.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
text : str
|
|
32
|
+
Source text to extract from.
|
|
33
|
+
**kwargs : Any
|
|
34
|
+
Extra keyword arguments forwarded to the extractor.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
Any
|
|
39
|
+
Extracted structured data.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LangExtractExtractor(Protocol):
|
|
44
|
+
"""Define LangExtract extractor object behavior.
|
|
45
|
+
|
|
46
|
+
Methods
|
|
47
|
+
-------
|
|
48
|
+
extract
|
|
49
|
+
Extract structured data from text.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def extract(self, text: str, **kwargs: Any) -> Any:
|
|
53
|
+
"""Extract structured data from text.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
text : str
|
|
58
|
+
Source text to extract from.
|
|
59
|
+
**kwargs : Any
|
|
60
|
+
Extra keyword arguments forwarded to the extractor.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
Any
|
|
65
|
+
Extracted structured data.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass(frozen=True)
|
|
70
|
+
class LangExtractAdapter:
|
|
71
|
+
"""Adapt LangExtract extractors to a consistent interface.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
extractor : LangExtractCallable | LangExtractExtractor
|
|
76
|
+
Callable or object providing an ``extract`` method.
|
|
77
|
+
|
|
78
|
+
Methods
|
|
79
|
+
-------
|
|
80
|
+
extract
|
|
81
|
+
Extract structured data from text with the configured extractor.
|
|
82
|
+
extract_to_model
|
|
83
|
+
Extract structured data and validate it into a Pydantic model.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
extractor: LangExtractCallable | LangExtractExtractor
|
|
87
|
+
|
|
88
|
+
def extract(self, text: str, **kwargs: Any) -> Any:
|
|
89
|
+
"""Extract structured data from text.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
text : str
|
|
94
|
+
Source text to extract from.
|
|
95
|
+
**kwargs : Any
|
|
96
|
+
Extra keyword arguments forwarded to the underlying extractor.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
Any
|
|
101
|
+
Extracted structured data.
|
|
102
|
+
|
|
103
|
+
Raises
|
|
104
|
+
------
|
|
105
|
+
TypeError
|
|
106
|
+
If the configured extractor cannot be called.
|
|
107
|
+
"""
|
|
108
|
+
if hasattr(self.extractor, "extract"):
|
|
109
|
+
extractor = self.extractor # type: ignore[assignment]
|
|
110
|
+
return extractor.extract(text, **kwargs) # type: ignore[union-attr]
|
|
111
|
+
if callable(self.extractor):
|
|
112
|
+
return self.extractor(text, **kwargs)
|
|
113
|
+
raise TypeError("LangExtract extractor must be callable or expose extract().")
|
|
114
|
+
|
|
115
|
+
def extract_to_model(
|
|
116
|
+
self,
|
|
117
|
+
text: str,
|
|
118
|
+
model: type[TModel],
|
|
119
|
+
**kwargs: Any,
|
|
120
|
+
) -> TModel:
|
|
121
|
+
"""Extract structured data and validate it into a Pydantic model.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
text : str
|
|
126
|
+
Source text to extract from.
|
|
127
|
+
model : type[BaseModel]
|
|
128
|
+
Pydantic model class to validate the extracted data.
|
|
129
|
+
**kwargs : Any
|
|
130
|
+
Extra keyword arguments forwarded to the underlying extractor.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
BaseModel
|
|
135
|
+
Validated Pydantic model instance.
|
|
136
|
+
"""
|
|
137
|
+
extracted = self.extract(text, **kwargs)
|
|
138
|
+
return model.model_validate(extracted)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def build_langextract_adapter(
|
|
142
|
+
extractor: LangExtractCallable | LangExtractExtractor | None = None,
|
|
143
|
+
) -> LangExtractAdapter:
|
|
144
|
+
"""Build a LangExtract adapter from an extractor or module defaults.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
extractor : LangExtractCallable | LangExtractExtractor, optional
|
|
149
|
+
Explicit extractor instance or callable. If omitted, this function
|
|
150
|
+
attempts to load LangExtract and use ``langextract.extract`` or
|
|
151
|
+
``langextract.Extractor``.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
LangExtractAdapter
|
|
156
|
+
Configured LangExtract adapter.
|
|
157
|
+
|
|
158
|
+
Raises
|
|
159
|
+
------
|
|
160
|
+
ImportError
|
|
161
|
+
If LangExtract cannot be imported.
|
|
162
|
+
AttributeError
|
|
163
|
+
If no supported extractor can be resolved.
|
|
164
|
+
"""
|
|
165
|
+
if extractor is None:
|
|
166
|
+
langextract_module = _import_langextract_module()
|
|
167
|
+
if hasattr(langextract_module, "extract"):
|
|
168
|
+
resolved_extractor = langextract_module.extract
|
|
169
|
+
elif hasattr(langextract_module, "Extractor"):
|
|
170
|
+
resolved_extractor = langextract_module.Extractor()
|
|
171
|
+
else:
|
|
172
|
+
raise AttributeError(
|
|
173
|
+
"LangExtract module does not expose extract or Extractor."
|
|
174
|
+
)
|
|
175
|
+
return LangExtractAdapter(extractor=resolved_extractor)
|
|
176
|
+
return LangExtractAdapter(extractor=extractor)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _import_langextract_module() -> Any:
|
|
180
|
+
"""Import the LangExtract module.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
Any
|
|
185
|
+
Imported LangExtract module.
|
|
186
|
+
|
|
187
|
+
Raises
|
|
188
|
+
------
|
|
189
|
+
ImportError
|
|
190
|
+
If LangExtract is not installed or cannot be imported.
|
|
191
|
+
"""
|
|
192
|
+
import importlib
|
|
193
|
+
|
|
194
|
+
return importlib.import_module("langextract")
|