openai-sdk-helpers 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/agent/classifier.py +54 -103
- openai_sdk_helpers/prompt/classifier.jinja +5 -6
- openai_sdk_helpers/structure/__init__.py +4 -2
- openai_sdk_helpers/structure/classification.py +194 -113
- {openai_sdk_helpers-0.6.2.dist-info → openai_sdk_helpers-0.6.4.dist-info}/METADATA +1 -1
- {openai_sdk_helpers-0.6.2.dist-info → openai_sdk_helpers-0.6.4.dist-info}/RECORD +9 -9
- {openai_sdk_helpers-0.6.2.dist-info → openai_sdk_helpers-0.6.4.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.6.2.dist-info → openai_sdk_helpers-0.6.4.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.6.2.dist-info → openai_sdk_helpers-0.6.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,12 +10,17 @@ from enum import Enum
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any, Awaitable, Dict, Iterable, Optional, Sequence, cast
|
|
12
12
|
|
|
13
|
+
from agents.model_settings import ModelSettings
|
|
14
|
+
|
|
13
15
|
from ..structure import (
|
|
14
16
|
ClassificationResult,
|
|
15
17
|
ClassificationStep,
|
|
16
18
|
ClassificationStopReason,
|
|
17
19
|
StructureBase,
|
|
20
|
+
Taxonomy,
|
|
18
21
|
TaxonomyNode,
|
|
22
|
+
format_path_identifier,
|
|
23
|
+
split_path_identifier,
|
|
19
24
|
)
|
|
20
25
|
from ..utils import ensure_list
|
|
21
26
|
from .base import AgentBase
|
|
@@ -31,14 +36,14 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
31
36
|
Optional template file path for prompt rendering.
|
|
32
37
|
model : str | None, default=None
|
|
33
38
|
Model identifier to use for classification.
|
|
39
|
+
model_settings : ModelSettings | None, default=None
|
|
40
|
+
Optional model settings to apply to the classifier agent.
|
|
34
41
|
|
|
35
42
|
Methods
|
|
36
43
|
-------
|
|
37
|
-
|
|
38
|
-
Classify text by recursively walking the taxonomy tree.
|
|
39
|
-
run_async(input, context, max_depth, confidence_threshold, single_class)
|
|
44
|
+
run_async(input, context, max_depth, confidence_threshold)
|
|
40
45
|
Classify text asynchronously using taxonomy traversal.
|
|
41
|
-
run_sync(input, context, max_depth, confidence_threshold
|
|
46
|
+
run_sync(input, context, max_depth, confidence_threshold)
|
|
42
47
|
Classify text synchronously using taxonomy traversal.
|
|
43
48
|
|
|
44
49
|
Examples
|
|
@@ -57,6 +62,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
57
62
|
*,
|
|
58
63
|
template_path: Path | str | None = None,
|
|
59
64
|
model: str | None = None,
|
|
65
|
+
model_settings: ModelSettings | None = None,
|
|
60
66
|
taxonomy: TaxonomyNode | Sequence[TaxonomyNode],
|
|
61
67
|
) -> None:
|
|
62
68
|
"""Initialize the taxonomy classifier agent configuration.
|
|
@@ -67,6 +73,8 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
67
73
|
Optional template file path for prompt rendering.
|
|
68
74
|
model : str | None, default=None
|
|
69
75
|
Model identifier to use for classification.
|
|
76
|
+
model_settings : ModelSettings | None, default=None
|
|
77
|
+
Optional model settings to apply to the classifier agent.
|
|
70
78
|
taxonomy : TaxonomyNode | Sequence[TaxonomyNode]
|
|
71
79
|
Root taxonomy node or list of root nodes.
|
|
72
80
|
|
|
@@ -91,10 +99,11 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
91
99
|
template_path=resolved_template_path,
|
|
92
100
|
output_structure=ClassificationStep,
|
|
93
101
|
model=model,
|
|
102
|
+
model_settings=model_settings,
|
|
94
103
|
)
|
|
95
104
|
super().__init__(configuration=configuration)
|
|
96
105
|
|
|
97
|
-
async def
|
|
106
|
+
async def _run_agent(
|
|
98
107
|
self,
|
|
99
108
|
text: str,
|
|
100
109
|
*,
|
|
@@ -102,7 +111,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
102
111
|
file_ids: str | Sequence[str] | None = None,
|
|
103
112
|
max_depth: Optional[int] = None,
|
|
104
113
|
confidence_threshold: float | None = None,
|
|
105
|
-
single_class: bool = False,
|
|
106
114
|
session: Optional[Any] = None,
|
|
107
115
|
) -> ClassificationResult:
|
|
108
116
|
"""Classify ``text`` by recursively walking taxonomy levels.
|
|
@@ -119,8 +127,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
119
127
|
Maximum depth to traverse before stopping.
|
|
120
128
|
confidence_threshold : float or None, default=None
|
|
121
129
|
Minimum confidence required to accept a classification step.
|
|
122
|
-
single_class : bool, default=False
|
|
123
|
-
Whether to keep only the highest-priority selection per step.
|
|
124
130
|
session : Session or None, default=None
|
|
125
131
|
Optional session for maintaining conversation history across runs.
|
|
126
132
|
|
|
@@ -147,21 +153,17 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
147
153
|
file_ids=file_ids,
|
|
148
154
|
max_depth=max_depth,
|
|
149
155
|
confidence_threshold=confidence_threshold,
|
|
150
|
-
single_class=single_class,
|
|
151
156
|
session=session,
|
|
152
157
|
state=state,
|
|
153
158
|
)
|
|
154
159
|
|
|
155
160
|
final_nodes_value = state.final_nodes or None
|
|
156
|
-
final_node = state.final_nodes[0] if state.final_nodes else None
|
|
157
161
|
stop_reason = _resolve_stop_reason(state)
|
|
158
162
|
return ClassificationResult(
|
|
159
|
-
final_node=final_node,
|
|
160
163
|
final_nodes=final_nodes_value,
|
|
161
164
|
confidence=state.best_confidence,
|
|
162
165
|
stop_reason=stop_reason,
|
|
163
|
-
|
|
164
|
-
path_nodes=state.path_nodes,
|
|
166
|
+
steps=state.steps,
|
|
165
167
|
)
|
|
166
168
|
|
|
167
169
|
async def run_async(
|
|
@@ -174,7 +176,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
174
176
|
file_ids: str | Sequence[str] | None = None,
|
|
175
177
|
max_depth: Optional[int] = None,
|
|
176
178
|
confidence_threshold: float | None = None,
|
|
177
|
-
single_class: bool = False,
|
|
178
179
|
) -> ClassificationResult:
|
|
179
180
|
"""Classify ``input`` asynchronously with taxonomy traversal.
|
|
180
181
|
|
|
@@ -194,8 +195,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
194
195
|
Maximum depth to traverse before stopping.
|
|
195
196
|
confidence_threshold : float or None, default=None
|
|
196
197
|
Minimum confidence required to accept a classification step.
|
|
197
|
-
single_class : bool, default=False
|
|
198
|
-
Whether to keep only the highest-priority selection per step.
|
|
199
198
|
|
|
200
199
|
Returns
|
|
201
200
|
-------
|
|
@@ -211,11 +210,10 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
211
210
|
"file_ids": file_ids,
|
|
212
211
|
"max_depth": max_depth,
|
|
213
212
|
"confidence_threshold": confidence_threshold,
|
|
214
|
-
"single_class": single_class,
|
|
215
213
|
}
|
|
216
214
|
if session is not None:
|
|
217
215
|
kwargs["session"] = session
|
|
218
|
-
return await self.
|
|
216
|
+
return await self._run_agent(input, **kwargs)
|
|
219
217
|
|
|
220
218
|
def run_sync(
|
|
221
219
|
self,
|
|
@@ -227,7 +225,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
227
225
|
file_ids: str | Sequence[str] | None = None,
|
|
228
226
|
max_depth: Optional[int] = None,
|
|
229
227
|
confidence_threshold: float | None = None,
|
|
230
|
-
single_class: bool = False,
|
|
231
228
|
) -> ClassificationResult:
|
|
232
229
|
"""Classify ``input`` synchronously with taxonomy traversal.
|
|
233
230
|
|
|
@@ -247,8 +244,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
247
244
|
Maximum depth to traverse before stopping.
|
|
248
245
|
confidence_threshold : float or None, default=None
|
|
249
246
|
Minimum confidence required to accept a classification step.
|
|
250
|
-
single_class : bool, default=False
|
|
251
|
-
Whether to keep only the highest-priority selection per step.
|
|
252
247
|
|
|
253
248
|
Returns
|
|
254
249
|
-------
|
|
@@ -264,13 +259,12 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
264
259
|
"file_ids": file_ids,
|
|
265
260
|
"max_depth": max_depth,
|
|
266
261
|
"confidence_threshold": confidence_threshold,
|
|
267
|
-
"single_class": single_class,
|
|
268
262
|
}
|
|
269
263
|
if session is not None:
|
|
270
264
|
kwargs["session"] = session
|
|
271
265
|
|
|
272
266
|
async def runner() -> ClassificationResult:
|
|
273
|
-
return await self.
|
|
267
|
+
return await self._run_agent(input, **kwargs)
|
|
274
268
|
|
|
275
269
|
try:
|
|
276
270
|
asyncio.get_running_loop()
|
|
@@ -342,7 +336,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
342
336
|
file_ids: str | Sequence[str] | None,
|
|
343
337
|
max_depth: Optional[int],
|
|
344
338
|
confidence_threshold: float | None,
|
|
345
|
-
single_class: bool,
|
|
346
339
|
session: Optional[Any],
|
|
347
340
|
state: "_TraversalState",
|
|
348
341
|
) -> None:
|
|
@@ -364,8 +357,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
364
357
|
Maximum traversal depth before stopping.
|
|
365
358
|
confidence_threshold : float or None
|
|
366
359
|
Minimum confidence required to accept a classification step.
|
|
367
|
-
single_class : bool
|
|
368
|
-
Whether to keep only the highest-priority selection per step.
|
|
369
360
|
session : Session or None
|
|
370
361
|
Optional session for maintaining conversation history across runs.
|
|
371
362
|
state : _TraversalState
|
|
@@ -380,7 +371,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
380
371
|
node_paths = _build_node_path_map(nodes, parent_path)
|
|
381
372
|
template_context = _build_context(
|
|
382
373
|
node_descriptors=_build_node_descriptors(node_paths),
|
|
383
|
-
|
|
374
|
+
steps=state.steps,
|
|
384
375
|
depth=depth,
|
|
385
376
|
context=context,
|
|
386
377
|
)
|
|
@@ -392,7 +383,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
392
383
|
session=session,
|
|
393
384
|
)
|
|
394
385
|
step = _normalize_step_output(raw_step, step_structure)
|
|
395
|
-
state.
|
|
386
|
+
state.steps.append(step)
|
|
396
387
|
|
|
397
388
|
if (
|
|
398
389
|
confidence_threshold is not None
|
|
@@ -402,10 +393,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
402
393
|
return
|
|
403
394
|
|
|
404
395
|
resolved_nodes = _resolve_nodes(node_paths, step)
|
|
405
|
-
if resolved_nodes:
|
|
406
|
-
if single_class:
|
|
407
|
-
resolved_nodes = resolved_nodes[:1]
|
|
408
|
-
state.path_nodes.extend(resolved_nodes)
|
|
409
396
|
|
|
410
397
|
if step.stop_reason.is_terminal:
|
|
411
398
|
if resolved_nodes:
|
|
@@ -419,8 +406,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
419
406
|
if not resolved_nodes:
|
|
420
407
|
return
|
|
421
408
|
|
|
422
|
-
|
|
423
|
-
base_path_nodes_len = len(state.path_nodes)
|
|
409
|
+
base_steps_len = len(state.steps)
|
|
424
410
|
child_tasks: list[tuple[Awaitable["_TraversalState"], int]] = []
|
|
425
411
|
for node in resolved_nodes:
|
|
426
412
|
if node.children:
|
|
@@ -439,7 +425,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
439
425
|
file_ids=file_ids,
|
|
440
426
|
max_depth=max_depth,
|
|
441
427
|
confidence_threshold=confidence_threshold,
|
|
442
|
-
single_class=single_class,
|
|
443
428
|
session=session,
|
|
444
429
|
state=sub_state,
|
|
445
430
|
),
|
|
@@ -459,8 +444,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
459
444
|
for child_state, (_, base_final_nodes_len) in zip(
|
|
460
445
|
child_states, child_tasks, strict=True
|
|
461
446
|
):
|
|
462
|
-
state.
|
|
463
|
-
state.path_nodes.extend(child_state.path_nodes[base_path_nodes_len:])
|
|
447
|
+
state.steps.extend(child_state.steps[base_steps_len:])
|
|
464
448
|
state.final_nodes.extend(child_state.final_nodes[base_final_nodes_len:])
|
|
465
449
|
state.best_confidence = _max_confidence(
|
|
466
450
|
state.best_confidence, child_state.best_confidence
|
|
@@ -514,6 +498,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
514
498
|
sub_agent = TaxonomyClassifierAgent(
|
|
515
499
|
template_path=self._template_path,
|
|
516
500
|
model=self._model,
|
|
501
|
+
model_settings=self._model_settings,
|
|
517
502
|
taxonomy=list(nodes),
|
|
518
503
|
)
|
|
519
504
|
sub_agent._run_step_async = self._run_step_async
|
|
@@ -531,7 +516,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
531
516
|
file_ids: str | Sequence[str] | None,
|
|
532
517
|
max_depth: Optional[int],
|
|
533
518
|
confidence_threshold: float | None,
|
|
534
|
-
single_class: bool,
|
|
535
519
|
session: Optional[Any],
|
|
536
520
|
state: "_TraversalState",
|
|
537
521
|
) -> "_TraversalState":
|
|
@@ -557,8 +541,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
557
541
|
Maximum traversal depth before stopping.
|
|
558
542
|
confidence_threshold : float or None
|
|
559
543
|
Minimum confidence required to accept a classification step.
|
|
560
|
-
single_class : bool
|
|
561
|
-
Whether to keep only the highest-priority selection per step.
|
|
562
544
|
session : Session or None
|
|
563
545
|
Optional session for maintaining conversation history across runs.
|
|
564
546
|
state : _TraversalState
|
|
@@ -578,7 +560,6 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
578
560
|
file_ids=file_ids,
|
|
579
561
|
max_depth=max_depth,
|
|
580
562
|
confidence_threshold=confidence_threshold,
|
|
581
|
-
single_class=single_class,
|
|
582
563
|
session=session,
|
|
583
564
|
state=state,
|
|
584
565
|
)
|
|
@@ -589,8 +570,7 @@ class TaxonomyClassifierAgent(AgentBase):
|
|
|
589
570
|
class _TraversalState:
|
|
590
571
|
"""Track recursive traversal state."""
|
|
591
572
|
|
|
592
|
-
|
|
593
|
-
path_nodes: list[TaxonomyNode] = field(default_factory=list)
|
|
573
|
+
steps: list[ClassificationStep] = field(default_factory=list)
|
|
594
574
|
final_nodes: list[TaxonomyNode] = field(default_factory=list)
|
|
595
575
|
best_confidence: float | None = None
|
|
596
576
|
saw_max_depth: bool = False
|
|
@@ -612,8 +592,7 @@ def _copy_traversal_state(state: _TraversalState) -> _TraversalState:
|
|
|
612
592
|
Cloned traversal state with copied collections.
|
|
613
593
|
"""
|
|
614
594
|
return _TraversalState(
|
|
615
|
-
|
|
616
|
-
path_nodes=list(state.path_nodes),
|
|
595
|
+
steps=list(state.steps),
|
|
617
596
|
final_nodes=list(state.final_nodes),
|
|
618
597
|
best_confidence=state.best_confidence,
|
|
619
598
|
saw_max_depth=state.saw_max_depth,
|
|
@@ -663,6 +642,8 @@ def _normalize_roots(
|
|
|
663
642
|
list[TaxonomyNode]
|
|
664
643
|
Normalized list of root nodes.
|
|
665
644
|
"""
|
|
645
|
+
if isinstance(taxonomy, Taxonomy):
|
|
646
|
+
return [node for node in taxonomy.children if node is not None]
|
|
666
647
|
if isinstance(taxonomy, TaxonomyNode):
|
|
667
648
|
return [taxonomy]
|
|
668
649
|
return [node for node in taxonomy if node is not None]
|
|
@@ -682,7 +663,7 @@ def _default_template_path() -> Path:
|
|
|
682
663
|
def _build_context(
|
|
683
664
|
*,
|
|
684
665
|
node_descriptors: Iterable[dict[str, Any]],
|
|
685
|
-
|
|
666
|
+
steps: Sequence[ClassificationStep],
|
|
686
667
|
depth: int,
|
|
687
668
|
context: Optional[Dict[str, Any]],
|
|
688
669
|
) -> Dict[str, Any]:
|
|
@@ -692,7 +673,7 @@ def _build_context(
|
|
|
692
673
|
----------
|
|
693
674
|
node_descriptors : Iterable[dict[str, Any]]
|
|
694
675
|
Node descriptors available at the current taxonomy level.
|
|
695
|
-
|
|
676
|
+
steps : Sequence[ClassificationStep]
|
|
696
677
|
Steps recorded so far in the traversal.
|
|
697
678
|
depth : int
|
|
698
679
|
Current traversal depth.
|
|
@@ -704,9 +685,14 @@ def _build_context(
|
|
|
704
685
|
dict[str, Any]
|
|
705
686
|
Context dictionary for prompt rendering.
|
|
706
687
|
"""
|
|
688
|
+
summarized_steps = [
|
|
689
|
+
step.as_summary()
|
|
690
|
+
for step in steps
|
|
691
|
+
if step.selected_nodes and any(node is not None for node in step.selected_nodes)
|
|
692
|
+
]
|
|
707
693
|
template_context: Dict[str, Any] = {
|
|
708
694
|
"taxonomy_nodes": list(node_descriptors),
|
|
709
|
-
"
|
|
695
|
+
"steps": summarized_steps,
|
|
710
696
|
"depth": depth,
|
|
711
697
|
}
|
|
712
698
|
if context:
|
|
@@ -754,7 +740,7 @@ def _build_node_path_map(
|
|
|
754
740
|
path_map: dict[str, TaxonomyNode] = {}
|
|
755
741
|
seen: dict[str, int] = {}
|
|
756
742
|
for node in nodes:
|
|
757
|
-
base_path =
|
|
743
|
+
base_path = format_path_identifier([*parent_path, node.label])
|
|
758
744
|
count = seen.get(base_path, 0) + 1
|
|
759
745
|
seen[base_path] = count
|
|
760
746
|
path = f"{base_path} ({count})" if count > 1 else base_path
|
|
@@ -783,33 +769,12 @@ def _build_node_descriptors(
|
|
|
783
769
|
{
|
|
784
770
|
"identifier": path_id,
|
|
785
771
|
"label": node.label,
|
|
786
|
-
"
|
|
772
|
+
"computed_description": node.computed_description,
|
|
787
773
|
}
|
|
788
774
|
)
|
|
789
775
|
return descriptors
|
|
790
776
|
|
|
791
777
|
|
|
792
|
-
def _format_path_identifier(path_segments: Sequence[str]) -> str:
|
|
793
|
-
"""Format path segments into a safe identifier string.
|
|
794
|
-
|
|
795
|
-
Parameters
|
|
796
|
-
----------
|
|
797
|
-
path_segments : Sequence[str]
|
|
798
|
-
Path segments to format.
|
|
799
|
-
|
|
800
|
-
Returns
|
|
801
|
-
-------
|
|
802
|
-
str
|
|
803
|
-
Escaped path identifier string.
|
|
804
|
-
"""
|
|
805
|
-
delimiter = " > "
|
|
806
|
-
escape_token = "\\>"
|
|
807
|
-
escaped_segments = [
|
|
808
|
-
segment.replace(delimiter, escape_token) for segment in path_segments
|
|
809
|
-
]
|
|
810
|
-
return delimiter.join(escaped_segments)
|
|
811
|
-
|
|
812
|
-
|
|
813
778
|
def _build_taxonomy_enum(name: str, values: Sequence[str]) -> type[Enum]:
|
|
814
779
|
"""Build a safe Enum from taxonomy node values.
|
|
815
780
|
|
|
@@ -834,25 +799,6 @@ def _build_taxonomy_enum(name: str, values: Sequence[str]) -> type[Enum]:
|
|
|
834
799
|
return cast(type[Enum], Enum(name, members))
|
|
835
800
|
|
|
836
801
|
|
|
837
|
-
def _split_taxonomy_path(value: str) -> list[str]:
|
|
838
|
-
"""Split a taxonomy identifier into its path segments.
|
|
839
|
-
|
|
840
|
-
Parameters
|
|
841
|
-
----------
|
|
842
|
-
value : str
|
|
843
|
-
Taxonomy path identifier to split.
|
|
844
|
-
|
|
845
|
-
Returns
|
|
846
|
-
-------
|
|
847
|
-
list[str]
|
|
848
|
-
Path segments with escaped delimiters restored.
|
|
849
|
-
"""
|
|
850
|
-
delimiter = " > "
|
|
851
|
-
escape_token = "\\>"
|
|
852
|
-
segments = value.split(delimiter)
|
|
853
|
-
return [segment.replace(escape_token, delimiter) for segment in segments]
|
|
854
|
-
|
|
855
|
-
|
|
856
802
|
def _sanitize_enum_member(
|
|
857
803
|
value: str,
|
|
858
804
|
index: int,
|
|
@@ -875,7 +821,7 @@ def _sanitize_enum_member(
|
|
|
875
821
|
Sanitized enum member name.
|
|
876
822
|
"""
|
|
877
823
|
normalized_segments: list[str] = []
|
|
878
|
-
for segment in
|
|
824
|
+
for segment in split_path_identifier(value):
|
|
879
825
|
normalized = re.sub(r"[^0-9a-zA-Z]+", "_", segment).strip("_").upper()
|
|
880
826
|
if not normalized:
|
|
881
827
|
normalized = "VALUE"
|
|
@@ -933,7 +879,9 @@ def _build_input_payload(
|
|
|
933
879
|
str or list[dict[str, Any]]
|
|
934
880
|
Input payload suitable for the Agents SDK.
|
|
935
881
|
"""
|
|
936
|
-
normalized_file_ids = [
|
|
882
|
+
normalized_file_ids = [
|
|
883
|
+
file_id for file_id in dict.fromkeys(ensure_list(file_ids)) if file_id
|
|
884
|
+
]
|
|
937
885
|
if not normalized_file_ids:
|
|
938
886
|
return text
|
|
939
887
|
attachments = [
|
|
@@ -1038,17 +986,20 @@ def _selected_nodes(step: ClassificationStep) -> list[str]:
|
|
|
1038
986
|
list[str]
|
|
1039
987
|
Selected identifiers in priority order.
|
|
1040
988
|
"""
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
989
|
+
enum_cls: type[Enum] | None = None
|
|
990
|
+
step_cls = step.__class__
|
|
991
|
+
if hasattr(step_cls, "model_fields"):
|
|
992
|
+
field = step_cls.model_fields.get("selected_nodes")
|
|
993
|
+
if field is not None:
|
|
994
|
+
enum_cls = step_cls._extract_enum_class(field.annotation)
|
|
995
|
+
if enum_cls is None:
|
|
996
|
+
enum_cls = Enum
|
|
997
|
+
selected_nodes = [
|
|
998
|
+
str(_normalize_enum_value(selected_node, enum_cls))
|
|
999
|
+
for selected_node in step.selected_nodes or []
|
|
1000
|
+
if selected_node
|
|
1001
|
+
]
|
|
1002
|
+
return selected_nodes
|
|
1052
1003
|
|
|
1053
1004
|
|
|
1054
1005
|
def _max_confidence(
|
|
@@ -3,7 +3,6 @@ You are a taxonomy classification assistant.
|
|
|
3
3
|
Instructions:
|
|
4
4
|
- Review the text and select all matching taxonomy nodes from the list.
|
|
5
5
|
- Populate selected_nodes as a list of taxonomy node ids for multi-class matches.
|
|
6
|
-
- Use selected_node when a single best match is appropriate.
|
|
7
6
|
- Provide a confidence score between 0 and 1 for the selections; higher means more certain.
|
|
8
7
|
- Interpret confidence as:
|
|
9
8
|
- 0.90–1.00: explicit lexical match.
|
|
@@ -30,10 +29,10 @@ Instructions:
|
|
|
30
29
|
|
|
31
30
|
Current depth: {{ depth }}
|
|
32
31
|
|
|
33
|
-
Previous
|
|
34
|
-
{% if
|
|
35
|
-
{% for step in
|
|
36
|
-
- {{ step.
|
|
32
|
+
Previous steps:
|
|
33
|
+
{% if steps %}
|
|
34
|
+
{% for step in steps %}
|
|
35
|
+
- {{ step.selected_nodes | map('string') | join(', ') }} (confidence={{ step.confidence }}, stop_reason={{ step.stop_reason }})
|
|
37
36
|
{% endfor %}
|
|
38
37
|
{% else %}
|
|
39
38
|
- None
|
|
@@ -43,5 +42,5 @@ Candidate taxonomy nodes:
|
|
|
43
42
|
{% for node in taxonomy_nodes %}
|
|
44
43
|
- identifier: {{ node.identifier }}
|
|
45
44
|
label: {{ node.label }}
|
|
46
|
-
description: {{ node.
|
|
45
|
+
description: {{ node.computed_description }}
|
|
47
46
|
{% endfor %}
|
|
@@ -82,7 +82,8 @@ from .classification import (
|
|
|
82
82
|
ClassificationStopReason,
|
|
83
83
|
Taxonomy,
|
|
84
84
|
TaxonomyNode,
|
|
85
|
-
|
|
85
|
+
format_path_identifier,
|
|
86
|
+
split_path_identifier,
|
|
86
87
|
taxonomy_enum_path,
|
|
87
88
|
)
|
|
88
89
|
from .extraction import (
|
|
@@ -112,7 +113,8 @@ __all__ = [
|
|
|
112
113
|
"ClassificationStopReason",
|
|
113
114
|
"Taxonomy",
|
|
114
115
|
"TaxonomyNode",
|
|
115
|
-
"
|
|
116
|
+
"format_path_identifier",
|
|
117
|
+
"split_path_identifier",
|
|
116
118
|
"taxonomy_enum_path",
|
|
117
119
|
"TaskStructure",
|
|
118
120
|
"PlanStructure",
|
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Any, Iterable, Optional, cast
|
|
6
|
+
from typing import Any, Iterable, Optional, Sequence, cast
|
|
7
|
+
|
|
8
|
+
PATH_DELIMITER = " > "
|
|
9
|
+
PATH_ESCAPE_TOKEN = "\\>"
|
|
7
10
|
|
|
8
11
|
from .base import StructureBase, spec_field
|
|
9
12
|
|
|
@@ -30,18 +33,26 @@ class TaxonomyNode(StructureBase):
|
|
|
30
33
|
Return True when the taxonomy node has no children.
|
|
31
34
|
child_by_path(path)
|
|
32
35
|
Return the child node matching the provided path.
|
|
36
|
+
path_identifier
|
|
37
|
+
Return the path identifier string for the node.
|
|
38
|
+
keywords
|
|
39
|
+
Return a list of keywords for the node.
|
|
40
|
+
computed_description
|
|
41
|
+
Return the computed description for the node.
|
|
42
|
+
flattened_nodes
|
|
43
|
+
Return a flattened list of all taxonomy nodes.
|
|
33
44
|
"""
|
|
34
45
|
|
|
35
46
|
label: str = spec_field(
|
|
36
|
-
"label", description="Human-readable label for the taxonomy node."
|
|
47
|
+
name="label", description="Human-readable label for the taxonomy node."
|
|
37
48
|
)
|
|
38
49
|
description: str | None = spec_field(
|
|
39
|
-
"description",
|
|
50
|
+
name="description",
|
|
40
51
|
description="Optional description of the taxonomy node.",
|
|
41
52
|
default=None,
|
|
42
53
|
)
|
|
43
54
|
children: list["TaxonomyNode"] = spec_field(
|
|
44
|
-
"children",
|
|
55
|
+
name="children",
|
|
45
56
|
description="Child nodes in the taxonomy.",
|
|
46
57
|
default_factory=list,
|
|
47
58
|
)
|
|
@@ -103,7 +114,7 @@ class TaxonomyNode(StructureBase):
|
|
|
103
114
|
if path is None:
|
|
104
115
|
return None
|
|
105
116
|
if isinstance(path, str):
|
|
106
|
-
path_segments =
|
|
117
|
+
path_segments = split_path_identifier(path)
|
|
107
118
|
else:
|
|
108
119
|
path_segments = list(path)
|
|
109
120
|
last_segment = path_segments[-1] if path_segments else None
|
|
@@ -114,50 +125,142 @@ class TaxonomyNode(StructureBase):
|
|
|
114
125
|
None,
|
|
115
126
|
)
|
|
116
127
|
|
|
128
|
+
@property
|
|
129
|
+
def path_identifier(self) -> str:
|
|
130
|
+
"""Return the path identifier string for this node.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
str
|
|
135
|
+
Delimited path identifier.
|
|
136
|
+
"""
|
|
137
|
+
return format_path_identifier(self.computed_path)
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def keywords(self) -> list[str]:
|
|
141
|
+
"""Return a list of keywords for this node.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
list[str]
|
|
146
|
+
Unique list of keywords derived from the node and descendants.
|
|
147
|
+
"""
|
|
148
|
+
keywords = [self.label]
|
|
149
|
+
for child in self.children:
|
|
150
|
+
keywords.extend(child.keywords)
|
|
151
|
+
return list(dict.fromkeys(filter(None, keywords)))
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def computed_description(self) -> str:
|
|
155
|
+
"""Return the computed description for this node.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
str
|
|
160
|
+
Node description with optional keyword context.
|
|
161
|
+
"""
|
|
162
|
+
keywords = self.keywords
|
|
163
|
+
base = self.description or self.label
|
|
164
|
+
if len(keywords) == 1 and keywords[0] == self.label:
|
|
165
|
+
return base
|
|
166
|
+
if keywords:
|
|
167
|
+
return f"{base}\nKeywords: {', '.join(keywords)}"
|
|
168
|
+
return base
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def flattened_nodes(self) -> list[TaxonomyNode]:
|
|
172
|
+
"""Return a flattened list of all taxonomy nodes.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
list[TaxonomyNode]
|
|
177
|
+
Depth-first list of taxonomy nodes.
|
|
178
|
+
"""
|
|
179
|
+
flattened: list[TaxonomyNode] = []
|
|
180
|
+
for node in self.children:
|
|
181
|
+
flattened.append(node)
|
|
182
|
+
flattened.extend(node.flattened_nodes)
|
|
183
|
+
return flattened
|
|
184
|
+
|
|
117
185
|
|
|
118
|
-
class Taxonomy(
|
|
186
|
+
class Taxonomy(TaxonomyNode):
|
|
119
187
|
"""Represent a taxonomy with metadata and root nodes.
|
|
120
188
|
|
|
121
189
|
Attributes
|
|
122
190
|
----------
|
|
123
|
-
|
|
191
|
+
label : str
|
|
124
192
|
Human-readable taxonomy name.
|
|
125
193
|
description : str | None
|
|
126
194
|
Optional description of the taxonomy.
|
|
127
|
-
|
|
195
|
+
children : list[TaxonomyNode]
|
|
128
196
|
Root taxonomy nodes.
|
|
129
197
|
|
|
130
198
|
Methods
|
|
131
199
|
-------
|
|
200
|
+
root(label, *children)
|
|
201
|
+
Create a taxonomy from root nodes.
|
|
202
|
+
build_path(parent_path)
|
|
203
|
+
Build a computed path using the provided parent path segments.
|
|
204
|
+
computed_path
|
|
205
|
+
Return the computed path for the node.
|
|
206
|
+
is_leaf
|
|
207
|
+
Return True when the taxonomy node has no children.
|
|
208
|
+
child_by_path(path)
|
|
209
|
+
Return the child node matching the provided path.
|
|
210
|
+
path_identifier
|
|
211
|
+
Return the path identifier string for the node.
|
|
212
|
+
keywords
|
|
213
|
+
Return a list of keywords for the node.
|
|
214
|
+
computed_description
|
|
215
|
+
Return the computed description for the node.
|
|
132
216
|
flattened_nodes
|
|
133
217
|
Return a flattened list of all taxonomy nodes.
|
|
134
218
|
"""
|
|
135
219
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
"nodes
|
|
144
|
-
description="Root taxonomy nodes.",
|
|
145
|
-
default_factory=list,
|
|
146
|
-
)
|
|
220
|
+
def __init__(
|
|
221
|
+
self,
|
|
222
|
+
*,
|
|
223
|
+
label: str,
|
|
224
|
+
description: str | None = None,
|
|
225
|
+
children: list[TaxonomyNode] | None = None,
|
|
226
|
+
) -> None:
|
|
227
|
+
"""Initialize a taxonomy with name and root nodes.
|
|
147
228
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
label : str
|
|
232
|
+
Human-readable taxonomy name.
|
|
233
|
+
description : str or None, default=None
|
|
234
|
+
Optional description of the taxonomy.
|
|
235
|
+
children : list[TaxonomyNode] or None, default=None
|
|
236
|
+
Root taxonomy nodes. Defaults to an empty list.
|
|
237
|
+
"""
|
|
238
|
+
super().__init__(
|
|
239
|
+
label=label,
|
|
240
|
+
description=description,
|
|
241
|
+
children=children or [],
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
@classmethod
|
|
245
|
+
def root(cls, label: str, *children: TaxonomyNode) -> "Taxonomy":
|
|
246
|
+
"""Create a taxonomy from root nodes.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
label : str
|
|
251
|
+
Human-readable taxonomy name.
|
|
252
|
+
*children : TaxonomyNode
|
|
253
|
+
Root taxonomy nodes.
|
|
151
254
|
|
|
152
255
|
Returns
|
|
153
256
|
-------
|
|
154
|
-
|
|
155
|
-
|
|
257
|
+
Taxonomy
|
|
258
|
+
Taxonomy instance with provided root nodes.
|
|
156
259
|
"""
|
|
157
|
-
return
|
|
260
|
+
return cls(label=label, children=list(children))
|
|
158
261
|
|
|
159
262
|
|
|
160
|
-
def
|
|
263
|
+
def split_path_identifier(path: str) -> list[str]:
|
|
161
264
|
"""Split a path identifier into label segments.
|
|
162
265
|
|
|
163
266
|
Parameters
|
|
@@ -170,10 +273,27 @@ def _split_path_identifier(path: str) -> list[str]:
|
|
|
170
273
|
list[str]
|
|
171
274
|
Label segments extracted from the path identifier.
|
|
172
275
|
"""
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
276
|
+
segments = path.split(PATH_DELIMITER) if path else []
|
|
277
|
+
return [segment.replace(PATH_ESCAPE_TOKEN, PATH_DELIMITER) for segment in segments]
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def format_path_identifier(path_segments: Sequence[str]) -> str:
|
|
281
|
+
"""Format path segments into a safe identifier string.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
path_segments : Sequence[str]
|
|
286
|
+
Path segments to format.
|
|
287
|
+
|
|
288
|
+
Returns
|
|
289
|
+
-------
|
|
290
|
+
str
|
|
291
|
+
Escaped path identifier string.
|
|
292
|
+
"""
|
|
293
|
+
escaped_segments = [
|
|
294
|
+
segment.replace(PATH_DELIMITER, PATH_ESCAPE_TOKEN) for segment in path_segments
|
|
295
|
+
]
|
|
296
|
+
return PATH_DELIMITER.join(escaped_segments)
|
|
177
297
|
|
|
178
298
|
|
|
179
299
|
class ClassificationStopReason(str, Enum):
|
|
@@ -213,8 +333,6 @@ class ClassificationStep(StructureBase):
|
|
|
213
333
|
|
|
214
334
|
Attributes
|
|
215
335
|
----------
|
|
216
|
-
selected_node : Enum or None
|
|
217
|
-
Enum value of the selected taxonomy node.
|
|
218
336
|
selected_nodes : list[Enum] or None
|
|
219
337
|
Enum values of selected taxonomy nodes for multi-class classification.
|
|
220
338
|
confidence : float or None
|
|
@@ -246,11 +364,6 @@ class ClassificationStep(StructureBase):
|
|
|
246
364
|
[<NodeEnum.BILLING: 'billing'>]
|
|
247
365
|
"""
|
|
248
366
|
|
|
249
|
-
selected_node: Enum | None = spec_field(
|
|
250
|
-
"selected_node",
|
|
251
|
-
description="Path identifier of the selected taxonomy node.",
|
|
252
|
-
default=None,
|
|
253
|
-
)
|
|
254
367
|
selected_nodes: list[Enum] | None = spec_field(
|
|
255
368
|
"selected_nodes",
|
|
256
369
|
description="Path identifiers of selected taxonomy nodes.",
|
|
@@ -289,14 +402,8 @@ class ClassificationStep(StructureBase):
|
|
|
289
402
|
"""
|
|
290
403
|
namespace: dict[str, Any] = {
|
|
291
404
|
"__annotations__": {
|
|
292
|
-
"selected_node": enum_cls | None,
|
|
293
405
|
"selected_nodes": list[enum_cls] | None,
|
|
294
406
|
},
|
|
295
|
-
"selected_node": spec_field(
|
|
296
|
-
"selected_node",
|
|
297
|
-
description="Path identifier of the selected taxonomy node.",
|
|
298
|
-
default=None,
|
|
299
|
-
),
|
|
300
407
|
"selected_nodes": spec_field(
|
|
301
408
|
"selected_nodes",
|
|
302
409
|
description="Path identifiers of selected taxonomy nodes.",
|
|
@@ -317,16 +424,14 @@ class ClassificationStep(StructureBase):
|
|
|
317
424
|
--------
|
|
318
425
|
>>> NodeEnum = Enum("NodeEnum", {"ROOT": "root"})
|
|
319
426
|
>>> StepEnum = ClassificationStep.build_for_enum(NodeEnum)
|
|
320
|
-
>>> step = StepEnum(
|
|
321
|
-
>>> step.as_summary()["
|
|
322
|
-
<NodeEnum.ROOT: 'root'>
|
|
427
|
+
>>> step = StepEnum(selected_nodes=[NodeEnum.ROOT])
|
|
428
|
+
>>> step.as_summary()["selected_nodes"]
|
|
429
|
+
[<NodeEnum.ROOT: 'root'>]
|
|
323
430
|
"""
|
|
324
|
-
selected_node = _normalize_enum_value(self.selected_node)
|
|
325
431
|
selected_nodes = [
|
|
326
432
|
_normalize_enum_value(item) for item in self.selected_nodes or []
|
|
327
433
|
]
|
|
328
434
|
return {
|
|
329
|
-
"selected_node": selected_node,
|
|
330
435
|
"selected_nodes": selected_nodes or None,
|
|
331
436
|
"confidence": self.confidence,
|
|
332
437
|
"stop_reason": self.stop_reason.value,
|
|
@@ -356,25 +461,25 @@ class ClassificationResult(StructureBase):
|
|
|
356
461
|
|
|
357
462
|
Attributes
|
|
358
463
|
----------
|
|
359
|
-
final_node : TaxonomyNode or None
|
|
360
|
-
Resolved taxonomy node for the final selection.
|
|
361
464
|
final_nodes : list[TaxonomyNode] or None
|
|
362
465
|
Resolved taxonomy nodes for the final selections across branches.
|
|
363
466
|
confidence : float or None
|
|
364
467
|
Confidence score for the final selection.
|
|
365
468
|
stop_reason : ClassificationStopReason
|
|
366
469
|
Reason the traversal ended.
|
|
367
|
-
|
|
470
|
+
steps : list[ClassificationStep]
|
|
368
471
|
Ordered list of classification steps.
|
|
369
|
-
path_nodes : list[TaxonomyNode]
|
|
370
|
-
Resolved taxonomy nodes selected across the path.
|
|
371
472
|
|
|
372
473
|
Methods
|
|
373
474
|
-------
|
|
374
475
|
depth
|
|
375
476
|
Return the number of classification steps recorded.
|
|
376
|
-
|
|
377
|
-
Return the
|
|
477
|
+
final_node
|
|
478
|
+
Return the first resolved taxonomy node, if available.
|
|
479
|
+
iter_selected_nodes
|
|
480
|
+
Yield selected identifiers across all steps.
|
|
481
|
+
selected_nodes
|
|
482
|
+
Return the selected identifiers across all steps.
|
|
378
483
|
|
|
379
484
|
Examples
|
|
380
485
|
--------
|
|
@@ -382,7 +487,6 @@ class ClassificationResult(StructureBase):
|
|
|
382
487
|
|
|
383
488
|
>>> node = TaxonomyNode(label="Tax")
|
|
384
489
|
>>> result = ClassificationResult(
|
|
385
|
-
... final_node=node,
|
|
386
490
|
... final_nodes=[node],
|
|
387
491
|
... confidence=0.91,
|
|
388
492
|
... stop_reason=ClassificationStopReason.STOP,
|
|
@@ -391,11 +495,6 @@ class ClassificationResult(StructureBase):
|
|
|
391
495
|
[TaxonomyNode(label='Tax', description=None, children=[])]
|
|
392
496
|
"""
|
|
393
497
|
|
|
394
|
-
final_node: TaxonomyNode | None = spec_field(
|
|
395
|
-
"final_node",
|
|
396
|
-
description="Resolved taxonomy node for the final selection.",
|
|
397
|
-
default=None,
|
|
398
|
-
)
|
|
399
498
|
final_nodes: list[TaxonomyNode] | None = spec_field(
|
|
400
499
|
"final_nodes",
|
|
401
500
|
description="Resolved taxonomy nodes for the final selections.",
|
|
@@ -411,16 +510,11 @@ class ClassificationResult(StructureBase):
|
|
|
411
510
|
description="Reason the traversal ended.",
|
|
412
511
|
default=ClassificationStopReason.STOP,
|
|
413
512
|
)
|
|
414
|
-
|
|
415
|
-
"
|
|
513
|
+
steps: list[ClassificationStep] = spec_field(
|
|
514
|
+
"steps",
|
|
416
515
|
description="Ordered list of classification steps.",
|
|
417
516
|
default_factory=list,
|
|
418
517
|
)
|
|
419
|
-
path_nodes: list[TaxonomyNode] = spec_field(
|
|
420
|
-
"path_nodes",
|
|
421
|
-
description="Resolved taxonomy nodes selected across the path.",
|
|
422
|
-
default_factory=list,
|
|
423
|
-
)
|
|
424
518
|
|
|
425
519
|
@property
|
|
426
520
|
def depth(self) -> int:
|
|
@@ -431,59 +525,45 @@ class ClassificationResult(StructureBase):
|
|
|
431
525
|
int
|
|
432
526
|
Count of classification steps.
|
|
433
527
|
"""
|
|
434
|
-
return len(self.
|
|
528
|
+
return len(self.steps)
|
|
435
529
|
|
|
436
530
|
@property
|
|
437
|
-
def
|
|
438
|
-
"""Return the
|
|
531
|
+
def final_node(self) -> TaxonomyNode | None:
|
|
532
|
+
"""Return the first resolved taxonomy node.
|
|
439
533
|
|
|
440
534
|
Returns
|
|
441
535
|
-------
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
Examples
|
|
446
|
-
--------
|
|
447
|
-
>>> steps = [
|
|
448
|
-
... ClassificationStep(selected_node="Root"),
|
|
449
|
-
... ClassificationStep(selected_nodes=["Root > Leaf", "Root > Branch"]),
|
|
450
|
-
... ]
|
|
451
|
-
>>> ClassificationResult(
|
|
452
|
-
... stop_reason=ClassificationStopReason.STOP,
|
|
453
|
-
... path=steps,
|
|
454
|
-
... ).path_identifiers
|
|
455
|
-
['Root', 'Root > Leaf', 'Root > Branch']
|
|
536
|
+
TaxonomyNode or None
|
|
537
|
+
First resolved taxonomy node, if available.
|
|
456
538
|
"""
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
identifiers.extend(
|
|
461
|
-
_normalize_enum_value(value) for value in step.selected_nodes
|
|
462
|
-
)
|
|
463
|
-
elif step.selected_node:
|
|
464
|
-
identifiers.append(_normalize_enum_value(step.selected_node))
|
|
465
|
-
return [identifier for identifier in identifiers if identifier]
|
|
539
|
+
if not self.final_nodes:
|
|
540
|
+
return None
|
|
541
|
+
return self.final_nodes[0]
|
|
466
542
|
|
|
543
|
+
@property
|
|
544
|
+
def selected_nodes(self) -> list[str]:
|
|
545
|
+
"""Return the selected identifiers across all steps.
|
|
467
546
|
|
|
468
|
-
|
|
469
|
-
|
|
547
|
+
Returns
|
|
548
|
+
-------
|
|
549
|
+
list[str]
|
|
550
|
+
Selected identifiers in traversal order.
|
|
551
|
+
"""
|
|
552
|
+
return list(self.iter_selected_nodes())
|
|
470
553
|
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
nodes : Iterable[TaxonomyNode]
|
|
474
|
-
Root nodes to traverse.
|
|
554
|
+
def iter_selected_nodes(self) -> Iterable[str]:
|
|
555
|
+
"""Yield selected identifiers across all steps.
|
|
475
556
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
return flattened
|
|
557
|
+
Yields
|
|
558
|
+
------
|
|
559
|
+
str
|
|
560
|
+
Selected identifier in traversal order.
|
|
561
|
+
"""
|
|
562
|
+
for step in self.steps:
|
|
563
|
+
for value in step.selected_nodes or []:
|
|
564
|
+
normalized = _normalize_enum_value(value)
|
|
565
|
+
if normalized:
|
|
566
|
+
yield normalized
|
|
487
567
|
|
|
488
568
|
|
|
489
569
|
def taxonomy_enum_path(value: Enum | str | None) -> list[str]:
|
|
@@ -513,7 +593,7 @@ def taxonomy_enum_path(value: Enum | str | None) -> list[str]:
|
|
|
513
593
|
return []
|
|
514
594
|
if not isinstance(normalized_value, str):
|
|
515
595
|
normalized_value = str(normalized_value)
|
|
516
|
-
return
|
|
596
|
+
return split_path_identifier(normalized_value)
|
|
517
597
|
|
|
518
598
|
|
|
519
599
|
__all__ = [
|
|
@@ -522,6 +602,7 @@ __all__ = [
|
|
|
522
602
|
"ClassificationStopReason",
|
|
523
603
|
"Taxonomy",
|
|
524
604
|
"TaxonomyNode",
|
|
525
|
-
"
|
|
605
|
+
"format_path_identifier",
|
|
606
|
+
"split_path_identifier",
|
|
526
607
|
"taxonomy_enum_path",
|
|
527
608
|
]
|
|
@@ -10,7 +10,7 @@ openai_sdk_helpers/tools.py,sha256=8hhcytpmDfoXV16UQbDmDVV0rhLOn8c_VjXO8XaTFLQ,1
|
|
|
10
10
|
openai_sdk_helpers/types.py,sha256=ejCG0rYqJhjOQvKLoNnzq-TzcKCFt69GVfi7y805NkU,1451
|
|
11
11
|
openai_sdk_helpers/agent/__init__.py,sha256=qyzKzPhD8KsEl6d79XERK32AK5It_BZNOqChOpBdmhg,1199
|
|
12
12
|
openai_sdk_helpers/agent/base.py,sha256=vLs0oALhxsd_Xy5dGjSZTUFTug-YwZkF1LabQ2ruLxk,29508
|
|
13
|
-
openai_sdk_helpers/agent/classifier.py,sha256=
|
|
13
|
+
openai_sdk_helpers/agent/classifier.py,sha256=PHUnA5dSDWBQeRxwo0Qe8xIe7Ren3xSRAOmsQrRK_oA,33241
|
|
14
14
|
openai_sdk_helpers/agent/configuration.py,sha256=ZeH4ErgVe-BZamjUeNONbQi60ViolgYAWh-c8hNAQTw,15810
|
|
15
15
|
openai_sdk_helpers/agent/coordinator.py,sha256=lVjA0yI-GhGKlqbNR_k9GOCrUjFoZ0QoqRaafHckyME,18052
|
|
16
16
|
openai_sdk_helpers/agent/files.py,sha256=H7UfSZSjFUbv1cjRvNld9kZwIjc5wPq4vynqU8HgGJE,4478
|
|
@@ -30,7 +30,7 @@ openai_sdk_helpers/extract/extractor.py,sha256=vmRJyhKDEYAVfRk0KMgLH5hTqUfDAUyWB
|
|
|
30
30
|
openai_sdk_helpers/extract/generator.py,sha256=K9Euq0IaWs82oe5aRm73_18DelLKYyuH8VhfZ1_ZCEU,14695
|
|
31
31
|
openai_sdk_helpers/prompt/__init__.py,sha256=MOqgKwG9KLqKudoKRlUfLxiSmdOi2aD6hNrWDFqLHkk,418
|
|
32
32
|
openai_sdk_helpers/prompt/base.py,sha256=6X0zeopEvO0ba8207O8Nnj1QvFZEZier7kNNh4qkcmE,7782
|
|
33
|
-
openai_sdk_helpers/prompt/classifier.jinja,sha256=
|
|
33
|
+
openai_sdk_helpers/prompt/classifier.jinja,sha256=u4gTwImOdKHu9qRdnnhl2aX23ns2THYYh6l8bc2EeNo,1866
|
|
34
34
|
openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja,sha256=vCrsoUnsgHWSr7OS_ojMUjmPtHfbyv9bzKfaMaCJ99E,329
|
|
35
35
|
openai_sdk_helpers/prompt/extractor_config_generator.jinja,sha256=9rZ1PZdoQtnxDxFUlKRb0SooIEfNw4_Em99n9xvFyyU,960
|
|
36
36
|
openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja,sha256=GqV3DrGObyER_Fa-GMGGqhWBrQIH9FFlyKdgTjidyzg,534
|
|
@@ -55,10 +55,10 @@ openai_sdk_helpers/response/vector_store.py,sha256=HClp6O_g20uklQTY7trC4age3rtDm
|
|
|
55
55
|
openai_sdk_helpers/streamlit_app/__init__.py,sha256=3yAkl6qV71cqtT5YFZuC9Bkqit0NtffDV6jmMWpT1k4,812
|
|
56
56
|
openai_sdk_helpers/streamlit_app/app.py,sha256=kkjtdCKVwrJ9nZWuBArm3dhvcjMESX0TMqAiF61_JLM,17402
|
|
57
57
|
openai_sdk_helpers/streamlit_app/configuration.py,sha256=0KeJ4HqCNFthBHsedV6ptqHluAcTPBb5_TujFOGkIUU,16685
|
|
58
|
-
openai_sdk_helpers/structure/__init__.py,sha256=
|
|
58
|
+
openai_sdk_helpers/structure/__init__.py,sha256=w27ezTYVLzZdDMFfA8mawE82h8zO53idFBCiCfYfh7s,4321
|
|
59
59
|
openai_sdk_helpers/structure/agent_blueprint.py,sha256=VyJWkgPNzAYKRDMeR1M4kE6qqQURnwqtrrEn0TRJf0g,9698
|
|
60
60
|
openai_sdk_helpers/structure/base.py,sha256=UrnNNU9qQ9mEES8MB9y6QESbDgPXH47XW8LVWSxYUYM,25280
|
|
61
|
-
openai_sdk_helpers/structure/classification.py,sha256=
|
|
61
|
+
openai_sdk_helpers/structure/classification.py,sha256=SYrrsv0Y2A2kXhL3jbn7lWnTb5jB_UE-cx-sJSRCxEA,17312
|
|
62
62
|
openai_sdk_helpers/structure/extraction.py,sha256=wODP0iLAhhsdQkMWRYPYTiLUMU8bFMKiBjPl3PKUleg,37335
|
|
63
63
|
openai_sdk_helpers/structure/prompt.py,sha256=ZfsaHdA0hj5zmZDrOdpXjCsC8U-jjzwFG4JBsWYiaH4,1535
|
|
64
64
|
openai_sdk_helpers/structure/responses.py,sha256=WUwh0DhXj24pkvgqH1FMkdx5V2ArdvdtrDN_fuMBtDU,4882
|
|
@@ -92,8 +92,8 @@ openai_sdk_helpers/vector_storage/__init__.py,sha256=L5LxO09puh9_yBB9IDTvc1CvVkA
|
|
|
92
92
|
openai_sdk_helpers/vector_storage/cleanup.py,sha256=sZ4ZSTlnjF52o9Cc8A9dTX37ZYXXDxS_fdIpoOBWvrg,3666
|
|
93
93
|
openai_sdk_helpers/vector_storage/storage.py,sha256=t_ukacaXRa9EXE4-3BxsrB4Rjhu6nTu7NA9IjCJBIpQ,24259
|
|
94
94
|
openai_sdk_helpers/vector_storage/types.py,sha256=jTCcOYMeOpZWvcse0z4T3MVs-RBOPC-fqWTBeQrgafU,1639
|
|
95
|
-
openai_sdk_helpers-0.6.
|
|
96
|
-
openai_sdk_helpers-0.6.
|
|
97
|
-
openai_sdk_helpers-0.6.
|
|
98
|
-
openai_sdk_helpers-0.6.
|
|
99
|
-
openai_sdk_helpers-0.6.
|
|
95
|
+
openai_sdk_helpers-0.6.4.dist-info/METADATA,sha256=l5XBsVFPOrOSDskGR0ZhgKHjJFLtO_-ZgWrRjXi1_bU,24622
|
|
96
|
+
openai_sdk_helpers-0.6.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
97
|
+
openai_sdk_helpers-0.6.4.dist-info/entry_points.txt,sha256=gEOD1ZeXe8d2OP-KzUlG-b_9D9yUZTCt-GFW3EDbIIY,63
|
|
98
|
+
openai_sdk_helpers-0.6.4.dist-info/licenses/LICENSE,sha256=CUhc1NrE50bs45tcXF7OcTQBKEvkUuLqeOHgrWQ5jaA,1067
|
|
99
|
+
openai_sdk_helpers-0.6.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|