hamtaa-texttools 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.1.18.dist-info}/METADATA +3 -2
- {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.1.18.dist-info}/RECORD +17 -15
- texttools/__init__.py +1 -1
- texttools/batch/batch_runner.py +75 -64
- texttools/{tools/internals → internals}/async_operator.py +96 -48
- texttools/internals/exceptions.py +28 -0
- texttools/{tools/internals → internals}/models.py +63 -56
- texttools/internals/prompt_loader.py +80 -0
- texttools/{tools/internals → internals}/sync_operator.py +92 -47
- texttools/prompts/propositionize.yaml +15 -0
- texttools/tools/async_tools.py +627 -321
- texttools/tools/sync_tools.py +625 -319
- texttools/tools/internals/prompt_loader.py +0 -56
- {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.1.18.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.1.18.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.16.dist-info → hamtaa_texttools-1.1.18.dist-info}/top_level.txt +0 -0
- /texttools/{tools/internals → internals}/formatters.py +0 -0
- /texttools/{tools/internals → internals}/operator_utils.py +0 -0
|
@@ -6,19 +6,24 @@ from pydantic import BaseModel, Field, create_model
|
|
|
6
6
|
|
|
7
7
|
class ToolOutput(BaseModel):
|
|
8
8
|
result: Any = None
|
|
9
|
-
analysis: str = ""
|
|
10
9
|
logprobs: list[dict[str, Any]] = []
|
|
11
|
-
|
|
10
|
+
analysis: str = ""
|
|
11
|
+
process: str | None = None
|
|
12
12
|
processed_at: datetime = datetime.now()
|
|
13
|
-
execution_time: float =
|
|
13
|
+
execution_time: float | None = None
|
|
14
14
|
errors: list[str] = []
|
|
15
15
|
|
|
16
16
|
def __repr__(self) -> str:
|
|
17
|
-
return f"
|
|
17
|
+
return f"""
|
|
18
|
+
ToolOutput(process='{self.process}', result_type='{type(self.result)}',
|
|
19
|
+
result='{self.result}', analysis='{self.analysis}',
|
|
20
|
+
logprobs='{self.logprobs}', errors='{self.errors}',
|
|
21
|
+
processed_at='{self.processed_at}', execution_time='{self.execution_time}'
|
|
22
|
+
"""
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class StrOutput(BaseModel):
|
|
21
|
-
result: str = Field(..., description="The output string")
|
|
26
|
+
result: str = Field(..., description="The output string", example="text")
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class BoolOutput(BaseModel):
|
|
@@ -37,13 +42,15 @@ class ListDictStrStrOutput(BaseModel):
|
|
|
37
42
|
result: list[dict[str, str]] = Field(
|
|
38
43
|
...,
|
|
39
44
|
description="List of dictionaries containing string key-value pairs",
|
|
40
|
-
example=[{"text": "Mohammad", "type": "PER"}],
|
|
45
|
+
example=[{"text": "Mohammad", "type": "PER"}, {"text": "Iran", "type": "LOC"}],
|
|
41
46
|
)
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
class ReasonListStrOutput(BaseModel):
|
|
45
50
|
reason: str = Field(..., description="Thinking process that led to the output")
|
|
46
|
-
result: list[str] = Field(
|
|
51
|
+
result: list[str] = Field(
|
|
52
|
+
..., description="The output list of strings", example=["text_1", "text_2"]
|
|
53
|
+
)
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
class Node(BaseModel):
|
|
@@ -51,14 +58,44 @@ class Node(BaseModel):
|
|
|
51
58
|
name: str
|
|
52
59
|
level: int
|
|
53
60
|
parent_id: int | None
|
|
54
|
-
description: str
|
|
61
|
+
description: str
|
|
55
62
|
|
|
56
63
|
|
|
57
64
|
class CategoryTree:
|
|
58
65
|
def __init__(self, tree_name):
|
|
59
|
-
self.
|
|
60
|
-
|
|
61
|
-
|
|
66
|
+
self._root = Node(
|
|
67
|
+
node_id=0, name=tree_name, level=0, parent_id=None, description="Root node"
|
|
68
|
+
)
|
|
69
|
+
self._all_nodes: list[Node] = [self._root]
|
|
70
|
+
self._new_id = 1
|
|
71
|
+
|
|
72
|
+
def get_all_nodes(self) -> list[Node]:
|
|
73
|
+
return self._all_nodes
|
|
74
|
+
|
|
75
|
+
def get_level_count(self) -> int:
|
|
76
|
+
return max([item.level for item in self._all_nodes])
|
|
77
|
+
|
|
78
|
+
def get_node(self, identifier: int | str) -> Node | None:
|
|
79
|
+
if isinstance(identifier, str):
|
|
80
|
+
for node in self.get_all_nodes():
|
|
81
|
+
if node.name == identifier:
|
|
82
|
+
return node
|
|
83
|
+
return None
|
|
84
|
+
elif isinstance(identifier, int):
|
|
85
|
+
for node in self.get_all_nodes():
|
|
86
|
+
if node.node_id == identifier:
|
|
87
|
+
return node
|
|
88
|
+
return None
|
|
89
|
+
else:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
def get_children(self, parent_node: Node) -> list[Node] | None:
|
|
93
|
+
children = [
|
|
94
|
+
node
|
|
95
|
+
for node in self.get_all_nodes()
|
|
96
|
+
if parent_node.node_id == node.parent_id
|
|
97
|
+
]
|
|
98
|
+
return children if children else None
|
|
62
99
|
|
|
63
100
|
def add_node(
|
|
64
101
|
self,
|
|
@@ -66,12 +103,12 @@ class CategoryTree:
|
|
|
66
103
|
parent_name: str | None = None,
|
|
67
104
|
description: str | None = None,
|
|
68
105
|
) -> None:
|
|
69
|
-
if self.
|
|
106
|
+
if self.get_node(node_name):
|
|
70
107
|
raise ValueError(f"{node_name} has been chosen for another category before")
|
|
71
108
|
|
|
72
109
|
if parent_name:
|
|
73
|
-
parent_node = self.
|
|
74
|
-
if parent_node
|
|
110
|
+
parent_node = self.get_node(parent_name)
|
|
111
|
+
if not parent_node:
|
|
75
112
|
raise ValueError(f"Parent category '{parent_name}' not found")
|
|
76
113
|
parent_id = parent_node.node_id
|
|
77
114
|
level = parent_node.level + 1
|
|
@@ -80,61 +117,31 @@ class CategoryTree:
|
|
|
80
117
|
parent_id = 0
|
|
81
118
|
|
|
82
119
|
node_data = {
|
|
83
|
-
"node_id": self.
|
|
120
|
+
"node_id": self._new_id,
|
|
84
121
|
"name": node_name,
|
|
85
122
|
"level": level,
|
|
86
123
|
"parent_id": parent_id,
|
|
124
|
+
"description": description if description else "No description provided",
|
|
87
125
|
}
|
|
88
126
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
self.all_nodes.append(Node(**node_data))
|
|
93
|
-
self.new_id += 1
|
|
94
|
-
|
|
95
|
-
def get_nodes(self) -> list[Node]:
|
|
96
|
-
return self.all_nodes
|
|
97
|
-
|
|
98
|
-
def get_level_count(self) -> int:
|
|
99
|
-
return max([item.level for item in self.all_nodes])
|
|
100
|
-
|
|
101
|
-
def find_node(self, identifier: int | str) -> Node | None:
|
|
102
|
-
if isinstance(identifier, str):
|
|
103
|
-
for node in self.get_nodes():
|
|
104
|
-
if node.name == identifier:
|
|
105
|
-
return node
|
|
106
|
-
return None
|
|
107
|
-
elif isinstance(identifier, int):
|
|
108
|
-
for node in self.get_nodes():
|
|
109
|
-
if node.node_id == identifier:
|
|
110
|
-
return node
|
|
111
|
-
return None
|
|
112
|
-
else:
|
|
113
|
-
return None
|
|
114
|
-
|
|
115
|
-
def find_children(self, parent_node: Node) -> list[Node] | None:
|
|
116
|
-
children = [
|
|
117
|
-
node for node in self.get_nodes() if parent_node.node_id == node.parent_id
|
|
118
|
-
]
|
|
119
|
-
return children if children else None
|
|
127
|
+
self._all_nodes.append(Node(**node_data))
|
|
128
|
+
self._new_id += 1
|
|
120
129
|
|
|
121
130
|
def remove_node(self, identifier: int | str) -> None:
|
|
122
|
-
node = self.
|
|
131
|
+
node = self.get_node(identifier)
|
|
123
132
|
|
|
124
|
-
if node
|
|
133
|
+
if node:
|
|
125
134
|
# Remove node's children recursively
|
|
126
|
-
children = self.
|
|
135
|
+
children = self.get_children(node)
|
|
127
136
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
self.all_nodes.remove(node)
|
|
137
|
+
if not children:
|
|
138
|
+
self._all_nodes.remove(node)
|
|
131
139
|
return
|
|
132
140
|
|
|
133
141
|
for child in children:
|
|
134
142
|
self.remove_node(child.name)
|
|
135
143
|
|
|
136
|
-
|
|
137
|
-
self.all_nodes.remove(node)
|
|
144
|
+
self._all_nodes.remove(node)
|
|
138
145
|
else:
|
|
139
146
|
raise ValueError(f"Node with identifier: '{identifier}' not found.")
|
|
140
147
|
|
|
@@ -142,7 +149,7 @@ class CategoryTree:
|
|
|
142
149
|
def build_dict(node: Node) -> dict:
|
|
143
150
|
children = [
|
|
144
151
|
build_dict(child)
|
|
145
|
-
for child in self.
|
|
152
|
+
for child in self._all_nodes
|
|
146
153
|
if child.parent_id == node.node_id
|
|
147
154
|
]
|
|
148
155
|
return {
|
|
@@ -153,7 +160,7 @@ class CategoryTree:
|
|
|
153
160
|
"children": children,
|
|
154
161
|
}
|
|
155
162
|
|
|
156
|
-
return {"category_tree": build_dict(self.
|
|
163
|
+
return {"category_tree": build_dict(self._root)["children"]}
|
|
157
164
|
|
|
158
165
|
|
|
159
166
|
# This function is needed to create CategorizerOutput with dynamic categories
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from texttools.internals.exceptions import PromptError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PromptLoader:
|
|
9
|
+
"""
|
|
10
|
+
Utility for loading and formatting YAML prompt templates.
|
|
11
|
+
|
|
12
|
+
Responsibilities:
|
|
13
|
+
- Load and parse YAML prompt definitions.
|
|
14
|
+
- Select the right template (by mode, if applicable).
|
|
15
|
+
- Inject variables (`{input}`, plus any extra kwargs) into the templates.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
MAIN_TEMPLATE = "main_template"
|
|
19
|
+
ANALYZE_TEMPLATE = "analyze_template"
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def _build_format_args(text: str, **extra_kwargs) -> dict[str, str]:
|
|
23
|
+
# Base formatting args
|
|
24
|
+
format_args = {"input": text}
|
|
25
|
+
# Merge extras
|
|
26
|
+
format_args.update(extra_kwargs)
|
|
27
|
+
return format_args
|
|
28
|
+
|
|
29
|
+
# Use lru_cache to load each file once
|
|
30
|
+
@lru_cache(maxsize=32)
|
|
31
|
+
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
32
|
+
"""
|
|
33
|
+
Loads prompt templates from YAML file with optional mode selection.
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
base_dir = Path(__file__).parent.parent / Path("prompts")
|
|
37
|
+
prompt_path = base_dir / prompt_file
|
|
38
|
+
|
|
39
|
+
if not prompt_path.exists():
|
|
40
|
+
raise PromptError(f"Prompt file not found: {prompt_file}")
|
|
41
|
+
|
|
42
|
+
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
43
|
+
|
|
44
|
+
if self.MAIN_TEMPLATE not in data:
|
|
45
|
+
raise PromptError(f"Missing 'main_template' in {prompt_file}")
|
|
46
|
+
|
|
47
|
+
if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
|
|
48
|
+
raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
|
|
52
|
+
if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
|
|
53
|
+
else data[self.MAIN_TEMPLATE],
|
|
54
|
+
self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE, {}).get(mode)
|
|
55
|
+
if mode and isinstance(data.get(self.ANALYZE_TEMPLATE), dict)
|
|
56
|
+
else data.get(self.ANALYZE_TEMPLATE, ""),
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
except yaml.YAMLError as e:
|
|
60
|
+
raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
|
|
61
|
+
except Exception as e:
|
|
62
|
+
raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
|
|
63
|
+
|
|
64
|
+
def load(
|
|
65
|
+
self, prompt_file: str, text: str, mode: str, **extra_kwargs
|
|
66
|
+
) -> dict[str, str]:
|
|
67
|
+
try:
|
|
68
|
+
template_configs = self._load_templates(prompt_file, mode)
|
|
69
|
+
format_args = self._build_format_args(text, **extra_kwargs)
|
|
70
|
+
|
|
71
|
+
# Inject variables inside each template
|
|
72
|
+
for key in template_configs.keys():
|
|
73
|
+
template_configs[key] = template_configs[key].format(**format_args)
|
|
74
|
+
|
|
75
|
+
return template_configs
|
|
76
|
+
|
|
77
|
+
except KeyError as e:
|
|
78
|
+
raise PromptError(f"Missing template variable: {e}")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
raise PromptError(f"Failed to format prompt: {e}")
|
|
@@ -5,15 +5,21 @@ import logging
|
|
|
5
5
|
from openai import OpenAI
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
|
-
from texttools.
|
|
9
|
-
from texttools.
|
|
10
|
-
from texttools.
|
|
11
|
-
from texttools.
|
|
8
|
+
from texttools.internals.models import ToolOutput
|
|
9
|
+
from texttools.internals.operator_utils import OperatorUtils
|
|
10
|
+
from texttools.internals.formatters import Formatter
|
|
11
|
+
from texttools.internals.prompt_loader import PromptLoader
|
|
12
|
+
from texttools.internals.exceptions import (
|
|
13
|
+
TextToolsError,
|
|
14
|
+
LLMError,
|
|
15
|
+
ValidationError,
|
|
16
|
+
PromptError,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
# Base Model type for output models
|
|
14
20
|
T = TypeVar("T", bound=BaseModel)
|
|
15
21
|
|
|
16
|
-
logger = logging.getLogger("texttools.
|
|
22
|
+
logger = logging.getLogger("texttools.sync_operator")
|
|
17
23
|
|
|
18
24
|
|
|
19
25
|
class Operator:
|
|
@@ -35,15 +41,33 @@ class Operator:
|
|
|
35
41
|
Calls OpenAI API for analysis using the configured prompt template.
|
|
36
42
|
Returns the analyzed content as a string.
|
|
37
43
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
try:
|
|
45
|
+
analyze_prompt = prompt_configs["analyze_template"]
|
|
46
|
+
|
|
47
|
+
if not analyze_prompt:
|
|
48
|
+
raise PromptError("Analyze template is empty")
|
|
49
|
+
|
|
50
|
+
analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
|
|
51
|
+
completion = self._client.chat.completions.create(
|
|
52
|
+
model=self._model,
|
|
53
|
+
messages=analyze_message,
|
|
54
|
+
temperature=temperature,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if not completion.choices:
|
|
58
|
+
raise LLMError("No choices returned from LLM")
|
|
59
|
+
|
|
60
|
+
analysis = completion.choices[0].message.content.strip()
|
|
61
|
+
|
|
62
|
+
if not analysis:
|
|
63
|
+
raise LLMError("Empty analysis response")
|
|
64
|
+
|
|
65
|
+
return analysis.strip()
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
if isinstance(e, (PromptError, LLMError)):
|
|
69
|
+
raise
|
|
70
|
+
raise LLMError(f"Analysis failed: {e}")
|
|
47
71
|
|
|
48
72
|
def _parse_completion(
|
|
49
73
|
self,
|
|
@@ -58,23 +82,35 @@ class Operator:
|
|
|
58
82
|
Parses a chat completion using OpenAI's structured output format.
|
|
59
83
|
Returns both the parsed object and the raw completion for logprobs.
|
|
60
84
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
85
|
+
try:
|
|
86
|
+
request_kwargs = {
|
|
87
|
+
"model": self._model,
|
|
88
|
+
"messages": message,
|
|
89
|
+
"response_format": output_model,
|
|
90
|
+
"temperature": temperature,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if logprobs:
|
|
94
|
+
request_kwargs["logprobs"] = True
|
|
95
|
+
request_kwargs["top_logprobs"] = top_logprobs
|
|
96
|
+
if priority:
|
|
97
|
+
request_kwargs["extra_body"] = {"priority": priority}
|
|
98
|
+
completion = self._client.beta.chat.completions.parse(**request_kwargs)
|
|
99
|
+
|
|
100
|
+
if not completion.choices:
|
|
101
|
+
raise LLMError("No choices returned from LLM")
|
|
102
|
+
|
|
103
|
+
parsed = completion.choices[0].message.parsed
|
|
67
104
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
request_kwargs["top_logprobs"] = top_logprobs
|
|
105
|
+
if not parsed:
|
|
106
|
+
raise LLMError("Failed to parse LLM response")
|
|
71
107
|
|
|
72
|
-
|
|
73
|
-
request_kwargs["extra_body"] = {"priority": priority}
|
|
108
|
+
return parsed, completion
|
|
74
109
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
110
|
+
except Exception as e:
|
|
111
|
+
if isinstance(e, LLMError):
|
|
112
|
+
raise
|
|
113
|
+
raise LLMError(f"Completion failed: {e}")
|
|
78
114
|
|
|
79
115
|
def run(
|
|
80
116
|
self,
|
|
@@ -96,12 +132,13 @@ class Operator:
|
|
|
96
132
|
**extra_kwargs,
|
|
97
133
|
) -> ToolOutput:
|
|
98
134
|
"""
|
|
99
|
-
Execute the LLM pipeline with the given input text.
|
|
135
|
+
Execute the LLM pipeline with the given input text. (Sync)
|
|
100
136
|
"""
|
|
101
|
-
prompt_loader = PromptLoader()
|
|
102
|
-
formatter = Formatter()
|
|
103
|
-
output = ToolOutput()
|
|
104
137
|
try:
|
|
138
|
+
prompt_loader = PromptLoader()
|
|
139
|
+
formatter = Formatter()
|
|
140
|
+
output = ToolOutput()
|
|
141
|
+
|
|
105
142
|
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
106
143
|
prompt_configs = prompt_loader.load(
|
|
107
144
|
prompt_file=prompt_file,
|
|
@@ -140,6 +177,9 @@ class Operator:
|
|
|
140
177
|
|
|
141
178
|
messages = formatter.user_merge_format(messages)
|
|
142
179
|
|
|
180
|
+
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
181
|
+
raise ValueError("top_logprobs should be an integer greater than 1")
|
|
182
|
+
|
|
143
183
|
parsed, completion = self._parse_completion(
|
|
144
184
|
messages, output_model, temperature, logprobs, top_logprobs, priority
|
|
145
185
|
)
|
|
@@ -148,6 +188,15 @@ class Operator:
|
|
|
148
188
|
|
|
149
189
|
# Retry logic if validation fails
|
|
150
190
|
if validator and not validator(output.result):
|
|
191
|
+
if (
|
|
192
|
+
not isinstance(max_validation_retries, int)
|
|
193
|
+
or max_validation_retries < 1
|
|
194
|
+
):
|
|
195
|
+
raise ValueError(
|
|
196
|
+
"max_validation_retries should be a positive integer"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
succeeded = False
|
|
151
200
|
for attempt in range(max_validation_retries):
|
|
152
201
|
logger.warning(
|
|
153
202
|
f"Validation failed, retrying for the {attempt + 1} time."
|
|
@@ -155,6 +204,7 @@ class Operator:
|
|
|
155
204
|
|
|
156
205
|
# Generate new temperature for retry
|
|
157
206
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
207
|
+
|
|
158
208
|
try:
|
|
159
209
|
parsed, completion = self._parse_completion(
|
|
160
210
|
messages,
|
|
@@ -162,28 +212,23 @@ class Operator:
|
|
|
162
212
|
retry_temperature,
|
|
163
213
|
logprobs,
|
|
164
214
|
top_logprobs,
|
|
215
|
+
priority=priority,
|
|
165
216
|
)
|
|
166
217
|
|
|
167
218
|
output.result = parsed.result
|
|
168
219
|
|
|
169
220
|
# Check if retry was successful
|
|
170
221
|
if validator(output.result):
|
|
171
|
-
|
|
172
|
-
f"Validation passed on retry attempt {attempt + 1}"
|
|
173
|
-
)
|
|
222
|
+
succeeded = True
|
|
174
223
|
break
|
|
175
|
-
else:
|
|
176
|
-
logger.warning(
|
|
177
|
-
f"Validation still failing after retry attempt {attempt + 1}"
|
|
178
|
-
)
|
|
179
224
|
|
|
180
|
-
except
|
|
225
|
+
except LLMError as e:
|
|
181
226
|
logger.error(f"Retry attempt {attempt + 1} failed: {e}")
|
|
182
|
-
# Continue to next retry attempt if this one fails
|
|
183
227
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
228
|
+
if not succeeded:
|
|
229
|
+
raise ValidationError(
|
|
230
|
+
f"Validation failed after {max_validation_retries} retries"
|
|
231
|
+
)
|
|
187
232
|
|
|
188
233
|
if logprobs:
|
|
189
234
|
output.logprobs = OperatorUtils.extract_logprobs(completion)
|
|
@@ -195,7 +240,7 @@ class Operator:
|
|
|
195
240
|
|
|
196
241
|
return output
|
|
197
242
|
|
|
243
|
+
except (PromptError, LLMError, ValidationError):
|
|
244
|
+
raise
|
|
198
245
|
except Exception as e:
|
|
199
|
-
|
|
200
|
-
output.errors.append(str(e))
|
|
201
|
-
return output
|
|
246
|
+
raise TextToolsError(f"Unexpected error in operator: {e}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
main_template: |
|
|
2
|
+
You are an expert in breaking down text into atomic propositions in that language.
|
|
3
|
+
An atomic proposition is a single, self-contained fact that is concise, verifiable,
|
|
4
|
+
and does not rely on external context.
|
|
5
|
+
Each proposition must stand alone.
|
|
6
|
+
Rewrite sentences if needed to keep the context saved in each sentence.
|
|
7
|
+
Extract the atomic propositions of this text:
|
|
8
|
+
{input}
|
|
9
|
+
|
|
10
|
+
analyze_template: |
|
|
11
|
+
We want to analyze this text snippet and think about where we can split sentence to atomic meaningful propositions.
|
|
12
|
+
An atomic proposition is a single, self-contained fact that is concise,
|
|
13
|
+
verifiable, and does not rely on external context.
|
|
14
|
+
You just have to think around the possible propositions in the text and how a proposition can be made.
|
|
15
|
+
{input}
|