openai-sdk-helpers 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/agent/__init__.py +2 -0
- openai_sdk_helpers/agent/classifier.py +268 -0
- openai_sdk_helpers/prompt/classifier.jinja +26 -0
- openai_sdk_helpers/response/base.py +42 -17
- openai_sdk_helpers/structure/__init__.py +12 -0
- openai_sdk_helpers/structure/classification.py +305 -0
- {openai_sdk_helpers-0.5.1.dist-info → openai_sdk_helpers-0.6.0.dist-info}/METADATA +1 -1
- {openai_sdk_helpers-0.5.1.dist-info → openai_sdk_helpers-0.6.0.dist-info}/RECORD +11 -8
- {openai_sdk_helpers-0.5.1.dist-info → openai_sdk_helpers-0.6.0.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.5.1.dist-info → openai_sdk_helpers-0.6.0.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.5.1.dist-info → openai_sdk_helpers-0.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,6 +7,7 @@ from ..structure.plan.enum import AgentEnum
|
|
|
7
7
|
from .coordinator import CoordinatorAgent
|
|
8
8
|
from .runner import run_sync, run_async
|
|
9
9
|
from .search.base import SearchPlanner, SearchToolAgent, SearchWriter
|
|
10
|
+
from .classifier import TaxonomyClassifierAgent
|
|
10
11
|
from .summarizer import SummarizerAgent
|
|
11
12
|
from .translator import TranslatorAgent
|
|
12
13
|
from .validator import ValidatorAgent
|
|
@@ -27,6 +28,7 @@ __all__ = [
|
|
|
27
28
|
"SearchPlanner",
|
|
28
29
|
"SearchToolAgent",
|
|
29
30
|
"SearchWriter",
|
|
31
|
+
"TaxonomyClassifierAgent",
|
|
30
32
|
"SummarizerAgent",
|
|
31
33
|
"TranslatorAgent",
|
|
32
34
|
"ValidatorAgent",
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Agent for taxonomy-driven text classification."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Iterable, Optional, Sequence
|
|
7
|
+
|
|
8
|
+
from ..structure import (
|
|
9
|
+
ClassificationResult,
|
|
10
|
+
ClassificationStep,
|
|
11
|
+
ClassificationStopReason,
|
|
12
|
+
TaxonomyNode,
|
|
13
|
+
)
|
|
14
|
+
from .base import AgentBase
|
|
15
|
+
from .configuration import AgentConfiguration
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TaxonomyClassifierAgent(AgentBase):
|
|
19
|
+
"""Classify text by traversing a taxonomy level by level.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
template_path : Path | str | None, default=None
|
|
24
|
+
Optional template file path for prompt rendering.
|
|
25
|
+
model : str | None, default=None
|
|
26
|
+
Model identifier to use for classification.
|
|
27
|
+
|
|
28
|
+
Methods
|
|
29
|
+
-------
|
|
30
|
+
run_agent(text, taxonomy, context, max_depth)
|
|
31
|
+
Classify text by walking the taxonomy tree.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
template_path: Path | str | None = None,
|
|
38
|
+
model: str | None = None,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Initialize the taxonomy classifier agent configuration.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
template_path : Path | str | None, default=None
|
|
45
|
+
Optional template file path for prompt rendering.
|
|
46
|
+
model : str | None, default=None
|
|
47
|
+
Model identifier to use for classification.
|
|
48
|
+
|
|
49
|
+
Raises
|
|
50
|
+
------
|
|
51
|
+
ValueError
|
|
52
|
+
If the model is not provided.
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
>>> classifier = TaxonomyClassifierAgent(model="gpt-4o-mini")
|
|
57
|
+
"""
|
|
58
|
+
resolved_template_path = template_path or _default_template_path()
|
|
59
|
+
configuration = AgentConfiguration(
|
|
60
|
+
name="taxonomy_classifier",
|
|
61
|
+
instructions="Agent instructions",
|
|
62
|
+
description="Classify text by traversing taxonomy levels.",
|
|
63
|
+
template_path=resolved_template_path,
|
|
64
|
+
output_structure=ClassificationStep,
|
|
65
|
+
model=model,
|
|
66
|
+
)
|
|
67
|
+
super().__init__(configuration=configuration)
|
|
68
|
+
|
|
69
|
+
async def run_agent(
|
|
70
|
+
self,
|
|
71
|
+
text: str,
|
|
72
|
+
taxonomy: TaxonomyNode | Sequence[TaxonomyNode],
|
|
73
|
+
*,
|
|
74
|
+
context: Optional[Dict[str, Any]] = None,
|
|
75
|
+
max_depth: Optional[int] = None,
|
|
76
|
+
) -> ClassificationResult:
|
|
77
|
+
"""Classify ``text`` by iterating over taxonomy levels.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
text : str
|
|
82
|
+
Source text to classify.
|
|
83
|
+
taxonomy : TaxonomyNode or Sequence[TaxonomyNode]
|
|
84
|
+
Root taxonomy node or list of root nodes to traverse.
|
|
85
|
+
context : dict or None, default=None
|
|
86
|
+
Additional context values to merge into the prompt.
|
|
87
|
+
max_depth : int or None, default=None
|
|
88
|
+
Maximum depth to traverse before stopping.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
ClassificationResult
|
|
93
|
+
Structured classification result describing the traversal.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
ValueError
|
|
98
|
+
If ``taxonomy`` is empty.
|
|
99
|
+
"""
|
|
100
|
+
roots = _normalize_roots(taxonomy)
|
|
101
|
+
if not roots:
|
|
102
|
+
raise ValueError("taxonomy must include at least one node")
|
|
103
|
+
|
|
104
|
+
path: list[ClassificationStep] = []
|
|
105
|
+
depth = 0
|
|
106
|
+
stop_reason = ClassificationStopReason.NO_MATCH
|
|
107
|
+
current_nodes = list(roots)
|
|
108
|
+
|
|
109
|
+
while current_nodes:
|
|
110
|
+
if max_depth is not None and depth >= max_depth:
|
|
111
|
+
stop_reason = ClassificationStopReason.MAX_DEPTH
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
template_context = _build_context(
|
|
115
|
+
current_nodes=current_nodes,
|
|
116
|
+
path=path,
|
|
117
|
+
depth=depth,
|
|
118
|
+
context=context,
|
|
119
|
+
)
|
|
120
|
+
step: ClassificationStep = await self.run_async(
|
|
121
|
+
input=text,
|
|
122
|
+
context=template_context,
|
|
123
|
+
output_structure=ClassificationStep,
|
|
124
|
+
)
|
|
125
|
+
path.append(step)
|
|
126
|
+
stop_reason = step.stop_reason
|
|
127
|
+
|
|
128
|
+
if step.stop_reason.is_terminal:
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
selected_node = _resolve_node(current_nodes, step)
|
|
132
|
+
if selected_node is None:
|
|
133
|
+
stop_reason = ClassificationStopReason.NO_MATCH
|
|
134
|
+
break
|
|
135
|
+
if not selected_node.children:
|
|
136
|
+
stop_reason = ClassificationStopReason.NO_CHILDREN
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
current_nodes = list(selected_node.children)
|
|
140
|
+
depth += 1
|
|
141
|
+
|
|
142
|
+
final_id, final_label, confidence = _final_values(path)
|
|
143
|
+
return ClassificationResult(
|
|
144
|
+
final_id=final_id,
|
|
145
|
+
final_label=final_label,
|
|
146
|
+
confidence=confidence,
|
|
147
|
+
stop_reason=stop_reason,
|
|
148
|
+
path=path,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _normalize_roots(
|
|
153
|
+
taxonomy: TaxonomyNode | Sequence[TaxonomyNode],
|
|
154
|
+
) -> list[TaxonomyNode]:
|
|
155
|
+
"""Normalize taxonomy input into a list of root nodes.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
taxonomy : TaxonomyNode or Sequence[TaxonomyNode]
|
|
160
|
+
Root taxonomy node or list of root nodes.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
list[TaxonomyNode]
|
|
165
|
+
Normalized list of root nodes.
|
|
166
|
+
"""
|
|
167
|
+
if isinstance(taxonomy, TaxonomyNode):
|
|
168
|
+
return [taxonomy]
|
|
169
|
+
return [node for node in taxonomy if node is not None]
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _default_template_path() -> Path:
|
|
173
|
+
"""Return the built-in classifier prompt template path.
|
|
174
|
+
|
|
175
|
+
Returns
|
|
176
|
+
-------
|
|
177
|
+
Path
|
|
178
|
+
Path to the bundled classifier Jinja template.
|
|
179
|
+
"""
|
|
180
|
+
return Path(__file__).resolve().parents[1] / "prompt" / "classifier.jinja"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _build_context(
|
|
184
|
+
*,
|
|
185
|
+
current_nodes: Iterable[TaxonomyNode],
|
|
186
|
+
path: Sequence[ClassificationStep],
|
|
187
|
+
depth: int,
|
|
188
|
+
context: Optional[Dict[str, Any]],
|
|
189
|
+
) -> Dict[str, Any]:
|
|
190
|
+
"""Build the template context for a classification step.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
current_nodes : Iterable[TaxonomyNode]
|
|
195
|
+
Nodes available at the current taxonomy level.
|
|
196
|
+
path : Sequence[ClassificationStep]
|
|
197
|
+
Steps recorded so far in the traversal.
|
|
198
|
+
depth : int
|
|
199
|
+
Current traversal depth.
|
|
200
|
+
context : dict or None
|
|
201
|
+
Optional additional context values.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
dict[str, Any]
|
|
206
|
+
Context dictionary for prompt rendering.
|
|
207
|
+
"""
|
|
208
|
+
template_context: Dict[str, Any] = {
|
|
209
|
+
"taxonomy_nodes": list(current_nodes),
|
|
210
|
+
"path": [step.as_summary() for step in path],
|
|
211
|
+
"depth": depth,
|
|
212
|
+
}
|
|
213
|
+
if context:
|
|
214
|
+
template_context.update(context)
|
|
215
|
+
return template_context
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _resolve_node(
|
|
219
|
+
nodes: Sequence[TaxonomyNode],
|
|
220
|
+
step: ClassificationStep,
|
|
221
|
+
) -> Optional[TaxonomyNode]:
|
|
222
|
+
"""Resolve the selected node for a classification step.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
nodes : Sequence[TaxonomyNode]
|
|
227
|
+
Candidate nodes at the current level.
|
|
228
|
+
step : ClassificationStep
|
|
229
|
+
Classification step output to resolve.
|
|
230
|
+
|
|
231
|
+
Returns
|
|
232
|
+
-------
|
|
233
|
+
TaxonomyNode or None
|
|
234
|
+
Matching taxonomy node if found.
|
|
235
|
+
"""
|
|
236
|
+
if step.selected_id:
|
|
237
|
+
for node in nodes:
|
|
238
|
+
if node.id == step.selected_id:
|
|
239
|
+
return node
|
|
240
|
+
if step.selected_label:
|
|
241
|
+
for node in nodes:
|
|
242
|
+
if node.label == step.selected_label:
|
|
243
|
+
return node
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _final_values(
|
|
248
|
+
path: Sequence[ClassificationStep],
|
|
249
|
+
) -> tuple[Optional[str], Optional[str], Optional[float]]:
|
|
250
|
+
"""Return the final selection values from the path.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
path : Sequence[ClassificationStep]
|
|
255
|
+
Recorded classification steps.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
tuple[str or None, str or None, float or None]
|
|
260
|
+
Final identifier, label, and confidence.
|
|
261
|
+
"""
|
|
262
|
+
if not path:
|
|
263
|
+
return None, None, None
|
|
264
|
+
last_step = path[-1]
|
|
265
|
+
return last_step.selected_id, last_step.selected_label, last_step.confidence
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
__all__ = ["TaxonomyClassifierAgent"]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
You are a taxonomy classification assistant.
|
|
2
|
+
|
|
3
|
+
Instructions:
|
|
4
|
+
- Review the text and select the best matching taxonomy node from the list.
|
|
5
|
+
- If a child level should be explored, set stop_reason to "continue".
|
|
6
|
+
- If no appropriate node exists, set stop_reason to "no_match" and leave selected_id empty.
|
|
7
|
+
- If you are confident this is the final level, set stop_reason to "stop".
|
|
8
|
+
- Provide a concise rationale in one or two sentences.
|
|
9
|
+
|
|
10
|
+
Current depth: {{ depth }}
|
|
11
|
+
|
|
12
|
+
Previous path:
|
|
13
|
+
{% if path %}
|
|
14
|
+
{% for step in path %}
|
|
15
|
+
- {{ step.selected_label }} (id={{ step.selected_id }}, confidence={{ step.confidence }})
|
|
16
|
+
{% endfor %}
|
|
17
|
+
{% else %}
|
|
18
|
+
- None
|
|
19
|
+
{% endif %}
|
|
20
|
+
|
|
21
|
+
Candidate taxonomy nodes:
|
|
22
|
+
{% for node in taxonomy_nodes %}
|
|
23
|
+
- id: {{ node.id }}
|
|
24
|
+
label: {{ node.label }}
|
|
25
|
+
description: {{ node.description or "None" }}
|
|
26
|
+
{% endfor %}
|
|
@@ -450,6 +450,9 @@ class ResponseBase(Generic[T]):
|
|
|
450
450
|
When use_vector_store is True, this method automatically creates
|
|
451
451
|
a vector store and adds a file_search tool for document retrieval.
|
|
452
452
|
Images are always base64-encoded regardless of this setting.
|
|
453
|
+
When multiple content strings are provided, file attachments are
|
|
454
|
+
included only with the first message to avoid duplicating input
|
|
455
|
+
files across messages.
|
|
453
456
|
|
|
454
457
|
Examples
|
|
455
458
|
--------
|
|
@@ -473,8 +476,17 @@ class ResponseBase(Generic[T]):
|
|
|
473
476
|
self, all_files, use_vector_store
|
|
474
477
|
)
|
|
475
478
|
|
|
476
|
-
|
|
477
|
-
|
|
479
|
+
attachments: list[
|
|
480
|
+
ResponseInputFileParam
|
|
481
|
+
| ResponseInputFileContentParam
|
|
482
|
+
| ResponseInputImageContentParam
|
|
483
|
+
] = []
|
|
484
|
+
attachments.extend(vector_file_refs)
|
|
485
|
+
attachments.extend(base64_files)
|
|
486
|
+
attachments.extend(image_contents)
|
|
487
|
+
|
|
488
|
+
# Add each content as a separate message.
|
|
489
|
+
for index, raw_content in enumerate(contents):
|
|
478
490
|
processed_text = raw_content.strip()
|
|
479
491
|
input_content: list[
|
|
480
492
|
ResponseInputTextParam
|
|
@@ -483,14 +495,8 @@ class ResponseBase(Generic[T]):
|
|
|
483
495
|
| ResponseInputImageContentParam
|
|
484
496
|
] = [ResponseInputTextParam(type="input_text", text=processed_text)]
|
|
485
497
|
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
# Add base64 files
|
|
490
|
-
input_content.extend(base64_files)
|
|
491
|
-
|
|
492
|
-
# Add images
|
|
493
|
-
input_content.extend(image_contents)
|
|
498
|
+
if index == 0:
|
|
499
|
+
input_content.extend(attachments)
|
|
494
500
|
|
|
495
501
|
message = cast(
|
|
496
502
|
ResponseInputItemParam,
|
|
@@ -503,6 +509,7 @@ class ResponseBase(Generic[T]):
|
|
|
503
509
|
content: str | list[str],
|
|
504
510
|
files: str | list[str] | None = None,
|
|
505
511
|
use_vector_store: bool = False,
|
|
512
|
+
save_messages: bool = True,
|
|
506
513
|
) -> T | str:
|
|
507
514
|
"""Generate a response asynchronously from the OpenAI API.
|
|
508
515
|
|
|
@@ -525,6 +532,9 @@ class ResponseBase(Generic[T]):
|
|
|
525
532
|
use_vector_store : bool, default False
|
|
526
533
|
If True, non-image files are uploaded to a vector store
|
|
527
534
|
for RAG-enabled search instead of inline base64 encoding.
|
|
535
|
+
save_messages : bool, default True
|
|
536
|
+
When True, persist the message history after each response or
|
|
537
|
+
tool call.
|
|
528
538
|
|
|
529
539
|
Returns
|
|
530
540
|
-------
|
|
@@ -615,7 +625,8 @@ class ResponseBase(Generic[T]):
|
|
|
615
625
|
self.messages.add_tool_message(
|
|
616
626
|
content=response_output, output=tool_output
|
|
617
627
|
)
|
|
618
|
-
|
|
628
|
+
if save_messages:
|
|
629
|
+
self.save()
|
|
619
630
|
except Exception as exc:
|
|
620
631
|
log(
|
|
621
632
|
f"Error executing tool handler '{tool_name}': {exc}",
|
|
@@ -640,7 +651,8 @@ class ResponseBase(Generic[T]):
|
|
|
640
651
|
self.messages.add_assistant_message(
|
|
641
652
|
response_output, metadata=kwargs
|
|
642
653
|
)
|
|
643
|
-
|
|
654
|
+
if save_messages:
|
|
655
|
+
self.save()
|
|
644
656
|
if hasattr(response, "output_text") and response.output_text:
|
|
645
657
|
raw_text = response.output_text
|
|
646
658
|
log("No tool call. Parsing output_text.")
|
|
@@ -676,6 +688,7 @@ class ResponseBase(Generic[T]):
|
|
|
676
688
|
*,
|
|
677
689
|
files: str | list[str] | None = None,
|
|
678
690
|
use_vector_store: bool = False,
|
|
691
|
+
save_messages: bool = True,
|
|
679
692
|
) -> T | str:
|
|
680
693
|
"""Execute run_async synchronously with proper event loop handling.
|
|
681
694
|
|
|
@@ -698,6 +711,9 @@ class ResponseBase(Generic[T]):
|
|
|
698
711
|
use_vector_store : bool, default False
|
|
699
712
|
If True, non-image files are uploaded to a vector store
|
|
700
713
|
for RAG-enabled search instead of inline base64 encoding.
|
|
714
|
+
save_messages : bool, default True
|
|
715
|
+
When True, persist the message history after each response or
|
|
716
|
+
tool call.
|
|
701
717
|
|
|
702
718
|
Returns
|
|
703
719
|
-------
|
|
@@ -733,6 +749,7 @@ class ResponseBase(Generic[T]):
|
|
|
733
749
|
content=content,
|
|
734
750
|
files=files,
|
|
735
751
|
use_vector_store=use_vector_store,
|
|
752
|
+
save_messages=save_messages,
|
|
736
753
|
)
|
|
737
754
|
|
|
738
755
|
try:
|
|
@@ -865,9 +882,11 @@ class ResponseBase(Generic[T]):
|
|
|
865
882
|
|
|
866
883
|
Notes
|
|
867
884
|
-----
|
|
868
|
-
If no filepath is provided, the save operation
|
|
869
|
-
|
|
870
|
-
|
|
885
|
+
If no filepath is provided, the save operation writes to the
|
|
886
|
+
session data path. If the configured data path already ends with
|
|
887
|
+
the response name, it writes to data_path / uuid.json. Otherwise,
|
|
888
|
+
it writes to data_path / name / uuid.json. The data path is
|
|
889
|
+
configured during initialization and defaults to get_data_path().
|
|
871
890
|
|
|
872
891
|
Raises
|
|
873
892
|
------
|
|
@@ -883,7 +902,7 @@ class ResponseBase(Generic[T]):
|
|
|
883
902
|
target = Path(filepath)
|
|
884
903
|
else:
|
|
885
904
|
filename = f"{str(self.uuid).lower()}.json"
|
|
886
|
-
target = self.
|
|
905
|
+
target = self._session_path(filename)
|
|
887
906
|
|
|
888
907
|
checked = check_filepath(filepath=target)
|
|
889
908
|
self.messages.to_json_file(str(checked))
|
|
@@ -913,12 +932,18 @@ class ResponseBase(Generic[T]):
|
|
|
913
932
|
traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
914
933
|
)
|
|
915
934
|
filename = f"{str(self.uuid).lower()}_error.txt"
|
|
916
|
-
target = self.
|
|
935
|
+
target = self._session_path(filename)
|
|
917
936
|
checked = check_filepath(filepath=target)
|
|
918
937
|
checked.write_text(error_text, encoding="utf-8")
|
|
919
938
|
log(f"Saved error details to {checked}")
|
|
920
939
|
return checked
|
|
921
940
|
|
|
941
|
+
def _session_path(self, filename: str) -> Path:
|
|
942
|
+
"""Return the resolved session filepath for a given filename."""
|
|
943
|
+
if self._data_path.name == self._name:
|
|
944
|
+
return self._data_path / filename
|
|
945
|
+
return self._data_path / self._name / filename
|
|
946
|
+
|
|
922
947
|
def __repr__(self) -> str:
|
|
923
948
|
"""Return a detailed string representation of the response session.
|
|
924
949
|
|
|
@@ -76,6 +76,13 @@ from __future__ import annotations
|
|
|
76
76
|
|
|
77
77
|
from .agent_blueprint import AgentBlueprint
|
|
78
78
|
from .base import *
|
|
79
|
+
from .classification import (
|
|
80
|
+
ClassificationResult,
|
|
81
|
+
ClassificationStep,
|
|
82
|
+
ClassificationStopReason,
|
|
83
|
+
TaxonomyNode,
|
|
84
|
+
flatten_taxonomy,
|
|
85
|
+
)
|
|
79
86
|
from .extraction import (
|
|
80
87
|
AnnotatedDocumentStructure,
|
|
81
88
|
AttributeStructure,
|
|
@@ -98,6 +105,11 @@ __all__ = [
|
|
|
98
105
|
"spec_field",
|
|
99
106
|
"AgentBlueprint",
|
|
100
107
|
"AgentEnum",
|
|
108
|
+
"ClassificationResult",
|
|
109
|
+
"ClassificationStep",
|
|
110
|
+
"ClassificationStopReason",
|
|
111
|
+
"TaxonomyNode",
|
|
112
|
+
"flatten_taxonomy",
|
|
101
113
|
"TaskStructure",
|
|
102
114
|
"PlanStructure",
|
|
103
115
|
"create_plan",
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Structured taxonomy and classification result models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any, Iterable, Optional
|
|
7
|
+
|
|
8
|
+
from .base import StructureBase, spec_field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TaxonomyNode(StructureBase):
|
|
12
|
+
"""Represent a taxonomy node with optional child categories.
|
|
13
|
+
|
|
14
|
+
Attributes
|
|
15
|
+
----------
|
|
16
|
+
id : str
|
|
17
|
+
Unique identifier for the taxonomy node.
|
|
18
|
+
label : str
|
|
19
|
+
Human-readable label for the taxonomy node.
|
|
20
|
+
description : str or None
|
|
21
|
+
Optional description of the node.
|
|
22
|
+
children : list[TaxonomyNode]
|
|
23
|
+
Child nodes in the taxonomy.
|
|
24
|
+
|
|
25
|
+
Methods
|
|
26
|
+
-------
|
|
27
|
+
build_path(parent_path)
|
|
28
|
+
Build a computed path using the provided parent path segments.
|
|
29
|
+
computed_path
|
|
30
|
+
Return the computed path for the node.
|
|
31
|
+
is_leaf
|
|
32
|
+
Return True when the taxonomy node has no children.
|
|
33
|
+
child_by_id(node_id)
|
|
34
|
+
Return the child node matching the provided identifier.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
id: str = spec_field("id", description="Unique identifier for the taxonomy.")
|
|
38
|
+
label: str = spec_field(
|
|
39
|
+
"label", description="Human-readable label for the taxonomy node."
|
|
40
|
+
)
|
|
41
|
+
description: Optional[str] = spec_field(
|
|
42
|
+
"description",
|
|
43
|
+
description="Optional description of the taxonomy node.",
|
|
44
|
+
default=None,
|
|
45
|
+
)
|
|
46
|
+
children: list["TaxonomyNode"] = spec_field(
|
|
47
|
+
"children",
|
|
48
|
+
description="Child nodes in the taxonomy.",
|
|
49
|
+
default_factory=list,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def is_leaf(self) -> bool:
|
|
54
|
+
"""Return True when the taxonomy node has no children.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
bool
|
|
59
|
+
True if the node has no children.
|
|
60
|
+
"""
|
|
61
|
+
return not self.children
|
|
62
|
+
|
|
63
|
+
def build_path(self, parent_path: Iterable[str] | None = None) -> list[str]:
|
|
64
|
+
"""Build a computed path using the provided parent path segments.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
parent_path : Iterable[str] or None, default=None
|
|
69
|
+
Parent path segments to prepend to the node label.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
list[str]
|
|
74
|
+
Computed path segments for this node.
|
|
75
|
+
"""
|
|
76
|
+
if parent_path is None:
|
|
77
|
+
return [self.label]
|
|
78
|
+
return [*parent_path, self.label]
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def computed_path(self) -> list[str]:
|
|
82
|
+
"""Return the computed path for the node.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
list[str]
|
|
87
|
+
Computed path segments for this node.
|
|
88
|
+
"""
|
|
89
|
+
return self.build_path()
|
|
90
|
+
|
|
91
|
+
def child_by_id(self, node_id: str | None) -> Optional["TaxonomyNode"]:
|
|
92
|
+
"""Return the child node matching the provided identifier.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
node_id : str or None
|
|
97
|
+
Identifier of the child node to locate.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
TaxonomyNode or None
|
|
102
|
+
Matching child node, if found.
|
|
103
|
+
"""
|
|
104
|
+
if node_id is None:
|
|
105
|
+
return None
|
|
106
|
+
return next((child for child in self.children if child.id == node_id), None)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class ClassificationStopReason(str, Enum):
|
|
110
|
+
"""Enumerate stop reasons for taxonomy classification.
|
|
111
|
+
|
|
112
|
+
Methods
|
|
113
|
+
-------
|
|
114
|
+
is_terminal
|
|
115
|
+
Return True if the stop reason should halt traversal.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
CONTINUE = "continue"
|
|
119
|
+
STOP = "stop"
|
|
120
|
+
NO_MATCH = "no_match"
|
|
121
|
+
MAX_DEPTH = "max_depth"
|
|
122
|
+
NO_CHILDREN = "no_children"
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def is_terminal(self) -> bool:
|
|
126
|
+
"""Return True if the stop reason should halt traversal.
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
bool
|
|
131
|
+
True when traversal should stop.
|
|
132
|
+
"""
|
|
133
|
+
return self in {
|
|
134
|
+
ClassificationStopReason.STOP,
|
|
135
|
+
ClassificationStopReason.NO_MATCH,
|
|
136
|
+
ClassificationStopReason.MAX_DEPTH,
|
|
137
|
+
ClassificationStopReason.NO_CHILDREN,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ClassificationStep(StructureBase):
|
|
142
|
+
"""Represent a single classification step within a taxonomy level.
|
|
143
|
+
|
|
144
|
+
Attributes
|
|
145
|
+
----------
|
|
146
|
+
selected_id : str or None
|
|
147
|
+
Identifier of the selected taxonomy node.
|
|
148
|
+
selected_label : str or None
|
|
149
|
+
Label of the selected taxonomy node.
|
|
150
|
+
confidence : float or None
|
|
151
|
+
Confidence score between 0 and 1.
|
|
152
|
+
stop_reason : ClassificationStopReason
|
|
153
|
+
Reason for stopping or continuing traversal.
|
|
154
|
+
rationale : str or None
|
|
155
|
+
Optional rationale for the classification decision.
|
|
156
|
+
|
|
157
|
+
Methods
|
|
158
|
+
-------
|
|
159
|
+
as_summary()
|
|
160
|
+
Return a dictionary summary of the classification step.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
selected_id: Optional[str] = spec_field(
|
|
164
|
+
"selected_id",
|
|
165
|
+
description="Identifier of the selected taxonomy node.",
|
|
166
|
+
default=None,
|
|
167
|
+
)
|
|
168
|
+
selected_label: Optional[str] = spec_field(
|
|
169
|
+
"selected_label",
|
|
170
|
+
description="Label of the selected taxonomy node.",
|
|
171
|
+
default=None,
|
|
172
|
+
)
|
|
173
|
+
confidence: Optional[float] = spec_field(
|
|
174
|
+
"confidence",
|
|
175
|
+
description="Confidence score between 0 and 1.",
|
|
176
|
+
default=None,
|
|
177
|
+
)
|
|
178
|
+
stop_reason: ClassificationStopReason = spec_field(
|
|
179
|
+
"stop_reason",
|
|
180
|
+
description="Reason for stopping or continuing traversal.",
|
|
181
|
+
default=ClassificationStopReason.STOP,
|
|
182
|
+
)
|
|
183
|
+
rationale: Optional[str] = spec_field(
|
|
184
|
+
"rationale",
|
|
185
|
+
description="Optional rationale for the classification decision.",
|
|
186
|
+
default=None,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def as_summary(self) -> dict[str, Any]:
|
|
190
|
+
"""Return a dictionary summary of the classification step.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
dict[str, Any]
|
|
195
|
+
Summary data for logging or inspection.
|
|
196
|
+
"""
|
|
197
|
+
return {
|
|
198
|
+
"selected_id": self.selected_id,
|
|
199
|
+
"selected_label": self.selected_label,
|
|
200
|
+
"confidence": self.confidence,
|
|
201
|
+
"stop_reason": self.stop_reason.value,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class ClassificationResult(StructureBase):
|
|
206
|
+
"""Represent the final result of taxonomy traversal.
|
|
207
|
+
|
|
208
|
+
Attributes
|
|
209
|
+
----------
|
|
210
|
+
final_id : str or None
|
|
211
|
+
Identifier of the final taxonomy node selection.
|
|
212
|
+
final_label : str or None
|
|
213
|
+
Label of the final taxonomy node selection.
|
|
214
|
+
confidence : float or None
|
|
215
|
+
Confidence score for the final selection.
|
|
216
|
+
stop_reason : ClassificationStopReason
|
|
217
|
+
Reason the traversal ended.
|
|
218
|
+
path : list[ClassificationStep]
|
|
219
|
+
Ordered list of classification steps.
|
|
220
|
+
|
|
221
|
+
Methods
|
|
222
|
+
-------
|
|
223
|
+
depth
|
|
224
|
+
Return the number of classification steps recorded.
|
|
225
|
+
path_labels
|
|
226
|
+
Return the labels selected at each step.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
final_id: Optional[str] = spec_field(
|
|
230
|
+
"final_id",
|
|
231
|
+
description="Identifier of the final taxonomy node selection.",
|
|
232
|
+
default=None,
|
|
233
|
+
)
|
|
234
|
+
final_label: Optional[str] = spec_field(
|
|
235
|
+
"final_label",
|
|
236
|
+
description="Label of the final taxonomy node selection.",
|
|
237
|
+
default=None,
|
|
238
|
+
)
|
|
239
|
+
confidence: Optional[float] = spec_field(
|
|
240
|
+
"confidence",
|
|
241
|
+
description="Confidence score for the final selection.",
|
|
242
|
+
default=None,
|
|
243
|
+
)
|
|
244
|
+
stop_reason: ClassificationStopReason = spec_field(
|
|
245
|
+
"stop_reason",
|
|
246
|
+
description="Reason the traversal ended.",
|
|
247
|
+
default=ClassificationStopReason.STOP,
|
|
248
|
+
)
|
|
249
|
+
path: list[ClassificationStep] = spec_field(
|
|
250
|
+
"path",
|
|
251
|
+
description="Ordered list of classification steps.",
|
|
252
|
+
default_factory=list,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def depth(self) -> int:
|
|
257
|
+
"""Return the number of classification steps recorded.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
int
|
|
262
|
+
Count of classification steps.
|
|
263
|
+
"""
|
|
264
|
+
return len(self.path)
|
|
265
|
+
|
|
266
|
+
@property
|
|
267
|
+
def path_labels(self) -> list[str]:
|
|
268
|
+
"""Return the labels selected at each step.
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
list[str]
|
|
273
|
+
Labels selected at each classification step.
|
|
274
|
+
"""
|
|
275
|
+
return [step.selected_label for step in self.path if step.selected_label]
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def flatten_taxonomy(nodes: Iterable[TaxonomyNode]) -> list[TaxonomyNode]:
|
|
279
|
+
"""Return a flattened list of taxonomy nodes.
|
|
280
|
+
|
|
281
|
+
Parameters
|
|
282
|
+
----------
|
|
283
|
+
nodes : Iterable[TaxonomyNode]
|
|
284
|
+
Root nodes to traverse.
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
list[TaxonomyNode]
|
|
289
|
+
Depth-first ordered list of nodes.
|
|
290
|
+
"""
|
|
291
|
+
flattened: list[TaxonomyNode] = []
|
|
292
|
+
for node in nodes:
|
|
293
|
+
flattened.append(node)
|
|
294
|
+
if node.children:
|
|
295
|
+
flattened.extend(flatten_taxonomy(node.children))
|
|
296
|
+
return flattened
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
__all__ = [
|
|
300
|
+
"ClassificationResult",
|
|
301
|
+
"ClassificationStep",
|
|
302
|
+
"ClassificationStopReason",
|
|
303
|
+
"TaxonomyNode",
|
|
304
|
+
"flatten_taxonomy",
|
|
305
|
+
]
|
|
@@ -8,8 +8,9 @@ openai_sdk_helpers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
8
8
|
openai_sdk_helpers/settings.py,sha256=xK_u0YNKgtPrLrZrVr4F4k0CvSuYbsmkqqw9mCMdyF8,10932
|
|
9
9
|
openai_sdk_helpers/tools.py,sha256=8hhcytpmDfoXV16UQbDmDVV0rhLOn8c_VjXO8XaTFLQ,19000
|
|
10
10
|
openai_sdk_helpers/types.py,sha256=ejCG0rYqJhjOQvKLoNnzq-TzcKCFt69GVfi7y805NkU,1451
|
|
11
|
-
openai_sdk_helpers/agent/__init__.py,sha256=
|
|
11
|
+
openai_sdk_helpers/agent/__init__.py,sha256=Nyvm8MJB-FhxhOfXD6ohDveSEUDR4KK76ffASCtgNIk,1119
|
|
12
12
|
openai_sdk_helpers/agent/base.py,sha256=8LIwi7zuYcOsXBjpsNdFTdpY8Ih-iAYXkqzLn4wkd1w,26144
|
|
13
|
+
openai_sdk_helpers/agent/classifier.py,sha256=CA2gB7Qm1UVerZXXUTdBWafTKwfmtcbUvBTrLtCQS8U,7729
|
|
13
14
|
openai_sdk_helpers/agent/configuration.py,sha256=FU3xnb8-8qoezLW47WwxZg7z2AxNXRW1Svl0FMsk8kc,14244
|
|
14
15
|
openai_sdk_helpers/agent/coordinator.py,sha256=lVjA0yI-GhGKlqbNR_k9GOCrUjFoZ0QoqRaafHckyME,18052
|
|
15
16
|
openai_sdk_helpers/agent/runner.py,sha256=l2NPS9VA9d4RISuBfanFfKxXNYSHQ7MTjRsuzx4APls,3473
|
|
@@ -28,6 +29,7 @@ openai_sdk_helpers/extract/extractor.py,sha256=vmRJyhKDEYAVfRk0KMgLH5hTqUfDAUyWB
|
|
|
28
29
|
openai_sdk_helpers/extract/generator.py,sha256=K9Euq0IaWs82oe5aRm73_18DelLKYyuH8VhfZ1_ZCEU,14695
|
|
29
30
|
openai_sdk_helpers/prompt/__init__.py,sha256=MOqgKwG9KLqKudoKRlUfLxiSmdOi2aD6hNrWDFqLHkk,418
|
|
30
31
|
openai_sdk_helpers/prompt/base.py,sha256=6X0zeopEvO0ba8207O8Nnj1QvFZEZier7kNNh4qkcmE,7782
|
|
32
|
+
openai_sdk_helpers/prompt/classifier.jinja,sha256=bKW-rG2xP6nF2G1vFAmVH3ONOrsCvzaJ14cbKSJaX3c,809
|
|
31
33
|
openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja,sha256=vCrsoUnsgHWSr7OS_ojMUjmPtHfbyv9bzKfaMaCJ99E,329
|
|
32
34
|
openai_sdk_helpers/prompt/extractor_config_generator.jinja,sha256=9rZ1PZdoQtnxDxFUlKRb0SooIEfNw4_Em99n9xvFyyU,960
|
|
33
35
|
openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja,sha256=GqV3DrGObyER_Fa-GMGGqhWBrQIH9FFlyKdgTjidyzg,534
|
|
@@ -40,7 +42,7 @@ openai_sdk_helpers/prompt/vector_planner.jinja,sha256=szzuJu6ZawYWuARgQn4DykBLig
|
|
|
40
42
|
openai_sdk_helpers/prompt/vector_search.jinja,sha256=KPEYQDRKsUesadSyQcBBiqYQEDL1NLN6BQsqw-GcKMA,249
|
|
41
43
|
openai_sdk_helpers/prompt/vector_writer.jinja,sha256=q5osfexGvt1xn8ZPtBWUP36n_1HK_Ziu8dkmCZDVamc,342
|
|
42
44
|
openai_sdk_helpers/response/__init__.py,sha256=YFrGpnMIfatnLWXAZgZDMvDx7Yjsqjat8W9INxKuPxY,1728
|
|
43
|
-
openai_sdk_helpers/response/base.py,sha256=
|
|
45
|
+
openai_sdk_helpers/response/base.py,sha256=tR_COd8VziI7y8bEtL6LysjQgpZQfeBNEUwswzp0QxU,38719
|
|
44
46
|
openai_sdk_helpers/response/configuration.py,sha256=jxneKd7oj08D40ceOWETB3TeUHd7Cnz-ooQp0akI9fA,10465
|
|
45
47
|
openai_sdk_helpers/response/files.py,sha256=O--boEPdFGsf9pHXPuNtG0aVJG2ZzwR4L1CZDW0hBP4,14450
|
|
46
48
|
openai_sdk_helpers/response/messages.py,sha256=qX3sW79rLuJEys28zyv5MovZikwGOaLevzdVN0VYMRE,10104
|
|
@@ -52,9 +54,10 @@ openai_sdk_helpers/response/vector_store.py,sha256=HClp6O_g20uklQTY7trC4age3rtDm
|
|
|
52
54
|
openai_sdk_helpers/streamlit_app/__init__.py,sha256=3yAkl6qV71cqtT5YFZuC9Bkqit0NtffDV6jmMWpT1k4,812
|
|
53
55
|
openai_sdk_helpers/streamlit_app/app.py,sha256=kkjtdCKVwrJ9nZWuBArm3dhvcjMESX0TMqAiF61_JLM,17402
|
|
54
56
|
openai_sdk_helpers/streamlit_app/configuration.py,sha256=0KeJ4HqCNFthBHsedV6ptqHluAcTPBb5_TujFOGkIUU,16685
|
|
55
|
-
openai_sdk_helpers/structure/__init__.py,sha256
|
|
57
|
+
openai_sdk_helpers/structure/__init__.py,sha256=-_bEFvvKhg99bgsMnimpxx7RpLQpQyReSMquOc-2Ts8,4173
|
|
56
58
|
openai_sdk_helpers/structure/agent_blueprint.py,sha256=VyJWkgPNzAYKRDMeR1M4kE6qqQURnwqtrrEn0TRJf0g,9698
|
|
57
59
|
openai_sdk_helpers/structure/base.py,sha256=nYViUME9pvzMRFIvNORTgwNFcjJdCjd4R1mLMo0nMSM,24822
|
|
60
|
+
openai_sdk_helpers/structure/classification.py,sha256=KKho8rmGeVVC2SvtmKbrUoyMf6CisjyK_a7e-v_9_1c,8548
|
|
58
61
|
openai_sdk_helpers/structure/extraction.py,sha256=wODP0iLAhhsdQkMWRYPYTiLUMU8bFMKiBjPl3PKUleg,37335
|
|
59
62
|
openai_sdk_helpers/structure/prompt.py,sha256=ZfsaHdA0hj5zmZDrOdpXjCsC8U-jjzwFG4JBsWYiaH4,1535
|
|
60
63
|
openai_sdk_helpers/structure/responses.py,sha256=WUwh0DhXj24pkvgqH1FMkdx5V2ArdvdtrDN_fuMBtDU,4882
|
|
@@ -88,8 +91,8 @@ openai_sdk_helpers/vector_storage/__init__.py,sha256=L5LxO09puh9_yBB9IDTvc1CvVkA
|
|
|
88
91
|
openai_sdk_helpers/vector_storage/cleanup.py,sha256=sZ4ZSTlnjF52o9Cc8A9dTX37ZYXXDxS_fdIpoOBWvrg,3666
|
|
89
92
|
openai_sdk_helpers/vector_storage/storage.py,sha256=t_ukacaXRa9EXE4-3BxsrB4Rjhu6nTu7NA9IjCJBIpQ,24259
|
|
90
93
|
openai_sdk_helpers/vector_storage/types.py,sha256=jTCcOYMeOpZWvcse0z4T3MVs-RBOPC-fqWTBeQrgafU,1639
|
|
91
|
-
openai_sdk_helpers-0.
|
|
92
|
-
openai_sdk_helpers-0.
|
|
93
|
-
openai_sdk_helpers-0.
|
|
94
|
-
openai_sdk_helpers-0.
|
|
95
|
-
openai_sdk_helpers-0.
|
|
94
|
+
openai_sdk_helpers-0.6.0.dist-info/METADATA,sha256=CbfzWHMqcXc6Pvur9uzPr1N8dUImDAw4ErUryzEN78g,24185
|
|
95
|
+
openai_sdk_helpers-0.6.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
96
|
+
openai_sdk_helpers-0.6.0.dist-info/entry_points.txt,sha256=gEOD1ZeXe8d2OP-KzUlG-b_9D9yUZTCt-GFW3EDbIIY,63
|
|
97
|
+
openai_sdk_helpers-0.6.0.dist-info/licenses/LICENSE,sha256=CUhc1NrE50bs45tcXF7OcTQBKEvkUuLqeOHgrWQ5jaA,1067
|
|
98
|
+
openai_sdk_helpers-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|