hamtaa-texttools 1.3.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/METADATA +40 -47
- hamtaa_texttools-2.1.0.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/WHEEL +1 -1
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/licenses/LICENSE +1 -1
- texttools/__init__.py +1 -1
- texttools/core/internal_models.py +16 -7
- texttools/core/operators/async_operator.py +10 -16
- texttools/core/operators/sync_operator.py +10 -16
- texttools/core/utils.py +260 -0
- texttools/models.py +77 -22
- texttools/prompts/{rewrite.yaml → augment.yaml} +3 -3
- texttools/prompts/categorize.yaml +7 -8
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +4 -2
- texttools/prompts/{check_fact.yaml → is_fact.yaml} +5 -4
- texttools/prompts/is_question.yaml +1 -1
- texttools/prompts/merge_questions.yaml +8 -6
- texttools/prompts/propositionize.yaml +11 -7
- texttools/prompts/run_custom.yaml +3 -1
- texttools/prompts/summarize.yaml +3 -3
- texttools/prompts/to_question.yaml +60 -0
- texttools/prompts/translate.yaml +4 -4
- texttools/tools/async_tools.py +152 -169
- texttools/tools/sync_tools.py +138 -150
- hamtaa_texttools-1.3.2.dist-info/RECORD +0 -31
- texttools/core/engine.py +0 -262
- texttools/prompts/subject_to_question.yaml +0 -26
- texttools/prompts/text_to_question.yaml +0 -26
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/top_level.txt +0 -0
texttools/core/utils.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import math
|
|
3
|
+
import random
|
|
4
|
+
import re
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from .exceptions import PromptError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OperatorUtils:
|
|
15
|
+
"""
|
|
16
|
+
Collection of utilities used in operators
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
@lru_cache(maxsize=32)
|
|
21
|
+
def _load_prompt_yaml(prompt_file: str) -> dict:
|
|
22
|
+
base_dir = Path(__file__).parent.parent / "prompts"
|
|
23
|
+
prompt_path = base_dir / prompt_file
|
|
24
|
+
|
|
25
|
+
if not prompt_path.exists():
|
|
26
|
+
raise PromptError(f"Prompt file not found: {prompt_file}")
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
return yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
30
|
+
except yaml.YAMLError as e:
|
|
31
|
+
raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def load_prompt(
|
|
35
|
+
prompt_file: str, text: str, mode: str, **extra_kwargs
|
|
36
|
+
) -> dict[str, str]:
|
|
37
|
+
try:
|
|
38
|
+
data = OperatorUtils._load_prompt_yaml(prompt_file)
|
|
39
|
+
|
|
40
|
+
if "main_template" not in data:
|
|
41
|
+
raise PromptError(f"Missing 'main_template' in {prompt_file}")
|
|
42
|
+
|
|
43
|
+
if "analyze_template" not in data:
|
|
44
|
+
raise PromptError(f"Missing 'analyze_template' in {prompt_file}")
|
|
45
|
+
|
|
46
|
+
if mode and mode not in data.get("main_template", {}):
|
|
47
|
+
raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
|
|
48
|
+
|
|
49
|
+
main_template = (
|
|
50
|
+
data["main_template"][mode]
|
|
51
|
+
if mode and isinstance(data["main_template"], dict)
|
|
52
|
+
else data["main_template"]
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
analyze_template = (
|
|
56
|
+
data["analyze_template"][mode]
|
|
57
|
+
if mode and isinstance(data["analyze_template"], dict)
|
|
58
|
+
else data["analyze_template"]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if not main_template or not main_template.strip():
|
|
62
|
+
raise PromptError(
|
|
63
|
+
f"Empty main_template in {prompt_file}"
|
|
64
|
+
+ (f" for mode '{mode}'" if mode else "")
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
template_configs = {
|
|
68
|
+
"main_template": main_template,
|
|
69
|
+
"analyze_template": analyze_template,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
format_args = {"text": text}
|
|
73
|
+
format_args.update(extra_kwargs)
|
|
74
|
+
|
|
75
|
+
# Inject variables into the templates
|
|
76
|
+
for key, value in template_configs.items():
|
|
77
|
+
template_configs[key] = value.format(**format_args)
|
|
78
|
+
|
|
79
|
+
return template_configs
|
|
80
|
+
|
|
81
|
+
except yaml.YAMLError as e:
|
|
82
|
+
raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
|
|
83
|
+
except KeyError as e:
|
|
84
|
+
raise PromptError(f"Missing template variable: {e}")
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
|
|
87
|
+
|
|
88
|
+
@staticmethod
|
|
89
|
+
def build_main_prompt(
|
|
90
|
+
main_template: str,
|
|
91
|
+
analysis: str | None,
|
|
92
|
+
output_lang: str | None,
|
|
93
|
+
user_prompt: str | None,
|
|
94
|
+
) -> str:
|
|
95
|
+
parts = []
|
|
96
|
+
|
|
97
|
+
if analysis:
|
|
98
|
+
parts.append(f"Based on this analysis: {analysis}")
|
|
99
|
+
if output_lang:
|
|
100
|
+
parts.append(f"Respond only in the {output_lang} language.")
|
|
101
|
+
if user_prompt:
|
|
102
|
+
parts.append(f"Consider this instruction: {user_prompt}")
|
|
103
|
+
|
|
104
|
+
parts.append(main_template)
|
|
105
|
+
return "\n".join(parts)
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def build_message(prompt: str) -> list[dict[str, str]]:
|
|
109
|
+
return [{"role": "user", "content": prompt}]
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def extract_logprobs(completion: Any) -> list[dict]:
|
|
113
|
+
"""
|
|
114
|
+
Extracts and filters logprobs from completion.
|
|
115
|
+
Skips punctuation and structural tokens.
|
|
116
|
+
"""
|
|
117
|
+
logprobs_data = []
|
|
118
|
+
|
|
119
|
+
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
120
|
+
|
|
121
|
+
for choice in completion.choices:
|
|
122
|
+
if not getattr(choice, "logprobs", None):
|
|
123
|
+
raise ValueError("Your model does not support logprobs")
|
|
124
|
+
|
|
125
|
+
for logprob_item in choice.logprobs.content:
|
|
126
|
+
if ignore_pattern.match(logprob_item.token):
|
|
127
|
+
continue
|
|
128
|
+
token_entry = {
|
|
129
|
+
"token": logprob_item.token,
|
|
130
|
+
"prob": round(math.exp(logprob_item.logprob), 8),
|
|
131
|
+
"top_alternatives": [],
|
|
132
|
+
}
|
|
133
|
+
for alt in logprob_item.top_logprobs:
|
|
134
|
+
if ignore_pattern.match(alt.token):
|
|
135
|
+
continue
|
|
136
|
+
token_entry["top_alternatives"].append(
|
|
137
|
+
{
|
|
138
|
+
"token": alt.token,
|
|
139
|
+
"prob": round(math.exp(alt.logprob), 8),
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
logprobs_data.append(token_entry)
|
|
143
|
+
|
|
144
|
+
return logprobs_data
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def get_retry_temp(base_temp: float) -> float:
|
|
148
|
+
new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
|
|
149
|
+
return max(0.0, min(new_temp, 1.5))
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class TheToolUtils:
|
|
153
|
+
"""
|
|
154
|
+
Collection of utilities used in TheTool's tools
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def to_chunks(text: str, size: int, overlap: int) -> list[str]:
|
|
159
|
+
separators = ["\n\n", "\n", " ", ""]
|
|
160
|
+
is_separator_regex = False
|
|
161
|
+
keep_separator = True
|
|
162
|
+
length_function = len
|
|
163
|
+
strip_whitespace = True
|
|
164
|
+
chunk_size = size
|
|
165
|
+
chunk_overlap = overlap
|
|
166
|
+
|
|
167
|
+
def _split_text_with_regex(
|
|
168
|
+
text: str, separator: str, keep_separator: bool
|
|
169
|
+
) -> list[str]:
|
|
170
|
+
if not separator:
|
|
171
|
+
return [text]
|
|
172
|
+
if not keep_separator:
|
|
173
|
+
return re.split(separator, text)
|
|
174
|
+
_splits = re.split(f"({separator})", text)
|
|
175
|
+
splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
|
|
176
|
+
if len(_splits) % 2 == 0:
|
|
177
|
+
splits += [_splits[-1]]
|
|
178
|
+
return [_splits[0]] + splits if _splits[0] else splits
|
|
179
|
+
|
|
180
|
+
def _join_docs(docs: list[str], separator: str) -> str | None:
|
|
181
|
+
text = separator.join(docs)
|
|
182
|
+
if strip_whitespace:
|
|
183
|
+
text = text.strip()
|
|
184
|
+
return text if text else None
|
|
185
|
+
|
|
186
|
+
def _merge_splits(splits: list[str], separator: str) -> list[str]:
|
|
187
|
+
separator_len = length_function(separator)
|
|
188
|
+
docs = []
|
|
189
|
+
current_doc = []
|
|
190
|
+
total = 0
|
|
191
|
+
for d in splits:
|
|
192
|
+
len_ = length_function(d)
|
|
193
|
+
if total + len_ + (separator_len if current_doc else 0) > chunk_size:
|
|
194
|
+
if total > chunk_size:
|
|
195
|
+
pass
|
|
196
|
+
if current_doc:
|
|
197
|
+
doc = _join_docs(current_doc, separator)
|
|
198
|
+
if doc is not None:
|
|
199
|
+
docs.append(doc)
|
|
200
|
+
while total > chunk_overlap or (
|
|
201
|
+
total + len_ + (separator_len if current_doc else 0)
|
|
202
|
+
> chunk_size
|
|
203
|
+
and total > 0
|
|
204
|
+
):
|
|
205
|
+
total -= length_function(current_doc[0]) + (
|
|
206
|
+
separator_len if len(current_doc) > 1 else 0
|
|
207
|
+
)
|
|
208
|
+
current_doc = current_doc[1:]
|
|
209
|
+
current_doc.append(d)
|
|
210
|
+
total += len_ + (separator_len if len(current_doc) > 1 else 0)
|
|
211
|
+
doc = _join_docs(current_doc, separator)
|
|
212
|
+
if doc is not None:
|
|
213
|
+
docs.append(doc)
|
|
214
|
+
return docs
|
|
215
|
+
|
|
216
|
+
def _split_text(text: str, separators: list[str]) -> list[str]:
|
|
217
|
+
final_chunks = []
|
|
218
|
+
separator = separators[-1]
|
|
219
|
+
new_separators = []
|
|
220
|
+
for i, _s in enumerate(separators):
|
|
221
|
+
separator_ = _s if is_separator_regex else re.escape(_s)
|
|
222
|
+
if not _s:
|
|
223
|
+
separator = _s
|
|
224
|
+
break
|
|
225
|
+
if re.search(separator_, text):
|
|
226
|
+
separator = _s
|
|
227
|
+
new_separators = separators[i + 1 :]
|
|
228
|
+
break
|
|
229
|
+
separator_ = separator if is_separator_regex else re.escape(separator)
|
|
230
|
+
splits = _split_text_with_regex(text, separator_, keep_separator)
|
|
231
|
+
_separator = "" if keep_separator else separator
|
|
232
|
+
good_splits = []
|
|
233
|
+
for s in splits:
|
|
234
|
+
if length_function(s) < chunk_size:
|
|
235
|
+
good_splits.append(s)
|
|
236
|
+
else:
|
|
237
|
+
if good_splits:
|
|
238
|
+
merged_text = _merge_splits(good_splits, _separator)
|
|
239
|
+
final_chunks.extend(merged_text)
|
|
240
|
+
good_splits = []
|
|
241
|
+
if not new_separators:
|
|
242
|
+
final_chunks.append(s)
|
|
243
|
+
else:
|
|
244
|
+
other_info = _split_text(s, new_separators)
|
|
245
|
+
final_chunks.extend(other_info)
|
|
246
|
+
if good_splits:
|
|
247
|
+
merged_text = _merge_splits(good_splits, _separator)
|
|
248
|
+
final_chunks.extend(merged_text)
|
|
249
|
+
return final_chunks
|
|
250
|
+
|
|
251
|
+
return _split_text(text, separators)
|
|
252
|
+
|
|
253
|
+
@staticmethod
|
|
254
|
+
async def run_with_timeout(coro: Any, timeout: float | None) -> Any:
|
|
255
|
+
if timeout is None:
|
|
256
|
+
return await coro
|
|
257
|
+
try:
|
|
258
|
+
return await asyncio.wait_for(coro, timeout=timeout)
|
|
259
|
+
except asyncio.TimeoutError:
|
|
260
|
+
raise TimeoutError(f"Operation exceeded timeout of {timeout} seconds")
|
texttools/models.py
CHANGED
|
@@ -3,12 +3,12 @@ from __future__ import annotations
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ToolOutputMetadata(BaseModel):
|
|
10
10
|
tool_name: str
|
|
11
|
-
processed_at: datetime = datetime.now
|
|
11
|
+
processed_at: datetime = Field(default_factory=datetime.now)
|
|
12
12
|
execution_time: float | None = None
|
|
13
13
|
|
|
14
14
|
|
|
@@ -19,22 +19,26 @@ class ToolOutput(BaseModel):
|
|
|
19
19
|
errors: list[str] = []
|
|
20
20
|
metadata: ToolOutputMetadata | None = None
|
|
21
21
|
|
|
22
|
-
def
|
|
23
|
-
return
|
|
22
|
+
def is_successful(self) -> bool:
|
|
23
|
+
return not self.errors and self.result is not None
|
|
24
24
|
|
|
25
|
+
def to_dict(self, exclude_none: bool = False) -> dict:
|
|
26
|
+
return self.model_dump(exclude_none=exclude_none)
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
def to_json(self, indent: int = 2, exclude_none: bool = False) -> str:
|
|
29
|
+
return self.model_dump_json(indent=indent, exclude_none=exclude_none)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Node(BaseModel):
|
|
33
|
+
name: str
|
|
34
|
+
description: str | None
|
|
35
|
+
level: int
|
|
36
|
+
children: dict[str, Node] | None = Field(default_factory=dict)
|
|
33
37
|
|
|
34
38
|
|
|
35
39
|
class CategoryTree:
|
|
36
40
|
def __init__(self):
|
|
37
|
-
self._root = Node(name="root", description="root", level=0
|
|
41
|
+
self._root = Node(name="root", description="root", level=0)
|
|
38
42
|
self._all_nodes = {"root": self._root}
|
|
39
43
|
|
|
40
44
|
def get_all_nodes(self) -> dict[str, Node]:
|
|
@@ -56,33 +60,84 @@ class CategoryTree:
|
|
|
56
60
|
raise ValueError(f"Cannot add {name} category twice")
|
|
57
61
|
|
|
58
62
|
parent = self.get_node(parent_name)
|
|
59
|
-
|
|
60
63
|
if not parent:
|
|
61
|
-
raise ValueError(f"Parent category
|
|
64
|
+
raise ValueError(f"Parent category {parent_name} not found")
|
|
62
65
|
|
|
63
66
|
node_data = {
|
|
64
67
|
"name": name,
|
|
65
68
|
"description": description if description else "No description provided",
|
|
66
69
|
"level": parent.level + 1,
|
|
67
|
-
"parent": parent,
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
new_node = Node(**node_data)
|
|
71
73
|
parent.children[name] = new_node
|
|
72
74
|
self._all_nodes[name] = new_node
|
|
73
75
|
|
|
74
|
-
def
|
|
76
|
+
def _find_parent(self, name: str) -> Node | None:
|
|
77
|
+
def traverse(node: Node) -> Node | None:
|
|
78
|
+
if name in node.children:
|
|
79
|
+
return node
|
|
80
|
+
for child in node.children.values():
|
|
81
|
+
found = traverse(child)
|
|
82
|
+
if found:
|
|
83
|
+
return found
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
if name == "root":
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
return traverse(self._root)
|
|
90
|
+
|
|
91
|
+
def remove_node(self, name: str, remove_children: bool = True) -> None:
|
|
75
92
|
if name == "root":
|
|
76
93
|
raise ValueError("Cannot remove the root node")
|
|
77
94
|
|
|
78
95
|
node = self.get_node(name)
|
|
79
96
|
if not node:
|
|
80
|
-
raise ValueError(f"Category:
|
|
97
|
+
raise ValueError(f"Category: {name} not found")
|
|
98
|
+
|
|
99
|
+
parent = self._find_parent(name)
|
|
100
|
+
if not parent and name != "root":
|
|
101
|
+
raise ValueError("Parent not found, tree inconsistent")
|
|
102
|
+
|
|
103
|
+
if remove_children:
|
|
104
|
+
# Recursively remove children
|
|
105
|
+
for child_name in list(node.children.keys()):
|
|
106
|
+
self.remove_node(child_name, remove_children=True)
|
|
107
|
+
else:
|
|
108
|
+
# Move children to parent (grandparent for the children)
|
|
109
|
+
for child_name, child in list(node.children.items()):
|
|
110
|
+
if child_name in parent.children:
|
|
111
|
+
raise ValueError(f"Name conflict when moving child {child_name}")
|
|
112
|
+
parent.children[child_name] = child
|
|
113
|
+
|
|
114
|
+
# Update levels for moved subtree
|
|
115
|
+
def update_levels(n: Node, new_level: int):
|
|
116
|
+
n.level = new_level
|
|
117
|
+
for c in n.children.values():
|
|
118
|
+
update_levels(c, new_level + 1)
|
|
119
|
+
|
|
120
|
+
update_levels(child, parent.level + 1)
|
|
121
|
+
|
|
122
|
+
del parent.children[name]
|
|
123
|
+
del self._all_nodes[name]
|
|
81
124
|
|
|
82
|
-
|
|
83
|
-
|
|
125
|
+
def dump_tree(self) -> dict:
|
|
126
|
+
return self._root.model_dump()
|
|
84
127
|
|
|
85
|
-
|
|
86
|
-
|
|
128
|
+
def _index_subtree(self, node: Node):
|
|
129
|
+
if node.name in self._all_nodes:
|
|
130
|
+
raise ValueError(f"Duplicate node name: {node.name}")
|
|
87
131
|
|
|
88
|
-
|
|
132
|
+
self._all_nodes[node.name] = node
|
|
133
|
+
|
|
134
|
+
for child in node.children.values():
|
|
135
|
+
self._index_subtree(child)
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def from_dict(cls, root: dict) -> CategoryTree:
|
|
139
|
+
tree = cls()
|
|
140
|
+
tree._root = Node.model_validate(root)
|
|
141
|
+
tree._all_nodes = {}
|
|
142
|
+
tree._index_subtree(tree._root)
|
|
143
|
+
return tree
|
|
@@ -15,7 +15,7 @@ main_template:
|
|
|
15
15
|
- Avoid Minor Changes: Do not just add/remove a few words or swap names. Create a fundamentally different sentence.
|
|
16
16
|
|
|
17
17
|
Respond only in JSON format:
|
|
18
|
-
{{"result": "
|
|
18
|
+
{{"result": "rewriteen_text"}}
|
|
19
19
|
|
|
20
20
|
Anchor Text:
|
|
21
21
|
"{text}"
|
|
@@ -32,7 +32,7 @@ main_template:
|
|
|
32
32
|
- Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
|
|
33
33
|
|
|
34
34
|
Respond only in JSON format:
|
|
35
|
-
{{"result": "
|
|
35
|
+
{{"result": "rewriteen_text"}}
|
|
36
36
|
|
|
37
37
|
Anchor Text:
|
|
38
38
|
"{text}"
|
|
@@ -53,7 +53,7 @@ main_template:
|
|
|
53
53
|
- Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
|
|
54
54
|
|
|
55
55
|
Respond only in JSON format:
|
|
56
|
-
{{"result": "
|
|
56
|
+
{{"result": "rewriteen_text"}}
|
|
57
57
|
|
|
58
58
|
Anchor Text:
|
|
59
59
|
"{text}"
|
|
@@ -14,23 +14,22 @@ main_template: |
|
|
|
14
14
|
- If descriptions are missing or empty, rely on the category name.
|
|
15
15
|
- If the correct answer cannot be determined with certainty, choose the most likely one.
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
Respond only in JSON format:
|
|
18
18
|
{{
|
|
19
|
-
"reason": "
|
|
20
|
-
"result": "
|
|
19
|
+
"reason": "explanation",
|
|
20
|
+
"result": "category_name",
|
|
21
21
|
}}
|
|
22
22
|
|
|
23
|
-
Available categories
|
|
23
|
+
Available categories:
|
|
24
24
|
{category_list}
|
|
25
25
|
|
|
26
26
|
Here is the text:
|
|
27
27
|
{text}
|
|
28
28
|
|
|
29
29
|
analyze_template: |
|
|
30
|
-
|
|
31
|
-
To improve categorization,
|
|
32
|
-
Analyze the given text and write its main idea and a short analysis of
|
|
33
|
-
Analysis should be very short.
|
|
30
|
+
The task is to categorize the given text.
|
|
31
|
+
To improve categorization, you must write an analysis of the text.
|
|
32
|
+
Analyze the given text and write its main idea and a short analysis of it.
|
|
34
33
|
|
|
35
34
|
Here is the text:
|
|
36
35
|
{text}
|
|
@@ -12,7 +12,7 @@ main_template:
|
|
|
12
12
|
- Output between 3 and 7 keywords based on the input length.
|
|
13
13
|
|
|
14
14
|
Respond only in JSON format:
|
|
15
|
-
{{"result": ["keyword1", "keyword2",
|
|
15
|
+
{{"result": ["keyword1", "keyword2", ...]}}
|
|
16
16
|
|
|
17
17
|
Here is the text:
|
|
18
18
|
{text}
|
|
@@ -34,7 +34,7 @@ main_template:
|
|
|
34
34
|
- Long texts (more than 4 paragraphs): 6–7 keywords
|
|
35
35
|
|
|
36
36
|
Respond only in JSON format:
|
|
37
|
-
{{"result": ["keyword1", "keyword2",
|
|
37
|
+
{{"result": ["keyword1", "keyword2", ...]}}
|
|
38
38
|
|
|
39
39
|
Here is the text:
|
|
40
40
|
{text}
|
|
@@ -57,7 +57,9 @@ main_template:
|
|
|
57
57
|
Here is the text:
|
|
58
58
|
{text}
|
|
59
59
|
|
|
60
|
+
|
|
60
61
|
analyze_template:
|
|
62
|
+
|
|
61
63
|
auto: |
|
|
62
64
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
63
65
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
@@ -13,12 +13,13 @@ main_template: |
|
|
|
13
13
|
{source_text}
|
|
14
14
|
|
|
15
15
|
analyze_template: |
|
|
16
|
-
You
|
|
17
|
-
summarized analysis that could help in determining
|
|
18
|
-
be concluded from the source or not.
|
|
16
|
+
You must analyze a statement and a source text and provide a brief,
|
|
17
|
+
summarized analysis that could help in determining whether the statement
|
|
18
|
+
can be concluded from the source or not.
|
|
19
19
|
|
|
20
20
|
The statement is:
|
|
21
21
|
{text}
|
|
22
22
|
|
|
23
23
|
The source text is:
|
|
24
|
-
{source_text}
|
|
24
|
+
{source_text}
|
|
25
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
main_template:
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
simple: |
|
|
4
4
|
You are a language expert.
|
|
5
5
|
I will give you a list of questions that are semantically similar.
|
|
6
6
|
Your task is to merge them into one unified question.
|
|
@@ -12,27 +12,29 @@ main_template:
|
|
|
12
12
|
- Does not omit any unique idea from the originals.
|
|
13
13
|
|
|
14
14
|
Respond only in JSON format:
|
|
15
|
-
{{"result": "
|
|
15
|
+
{{"result": "merged_question"}}
|
|
16
16
|
|
|
17
17
|
Here is the questions:
|
|
18
18
|
{text}
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
stepwise: |
|
|
21
21
|
You are an AI assistant helping to unify semantically similar questions.
|
|
22
22
|
First, briefly extract the unique intent or content from each input question.
|
|
23
23
|
Then, write one merged question that combines all their content clearly and naturally, without redundancy.
|
|
24
|
+
|
|
24
25
|
Step 1: Extract key ideas.
|
|
25
26
|
Step 2: Write the final merged question.
|
|
26
27
|
|
|
27
28
|
Respond only in JSON format:
|
|
28
|
-
{{"result": "
|
|
29
|
+
{{"result": "merged_question"}}
|
|
29
30
|
|
|
30
31
|
Here is the questions:
|
|
31
32
|
{text}
|
|
32
33
|
|
|
34
|
+
|
|
33
35
|
analyze_template:
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
simple: |
|
|
36
38
|
You are a language expert.
|
|
37
39
|
Analyze the following questions to identify their core intent, key concepts,
|
|
38
40
|
and the specific information they are seeking.
|
|
@@ -42,7 +44,7 @@ analyze_template:
|
|
|
42
44
|
Here is the question:
|
|
43
45
|
{text}
|
|
44
46
|
|
|
45
|
-
|
|
47
|
+
stepwise: |
|
|
46
48
|
Analyze the following questions to identify their exact wording, phrasing,
|
|
47
49
|
and the literal meaning it conveys.
|
|
48
50
|
Provide a brief, summarized analysis of their linguistic structure and current meaning,
|
|
@@ -6,19 +6,23 @@ main_template: |
|
|
|
6
6
|
A single, self-contained statement of fact that is concise and verifiable.
|
|
7
7
|
|
|
8
8
|
Strict Guidelines:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
- Remove Meta-Data: STRICTLY EXCLUDE all citations, references, URLs, source attributions (e.g., "Source: makarem.ir"), and conversational fillers (e.g., "Based on the documents...", "In conclusion...").
|
|
10
|
+
- Resolve Context: Replace pronouns ("it", "this", "they") with the specific nouns they refer to. Each proposition must make sense in isolation.
|
|
11
|
+
- Preserve Logic: Keep conditions attached to their facts. Do not split a rule from its condition (e.g., "If X, then Y" should be one proposition).
|
|
12
|
+
- No Redundancy: Do not extract summary statements that merely repeat facts already listed.
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
Respond only in JSON format:
|
|
15
|
+
{{"result": ["text1", "text2", ...]}}
|
|
16
|
+
|
|
17
|
+
Here is the text:
|
|
15
18
|
{text}
|
|
16
19
|
|
|
17
20
|
analyze_template: |
|
|
18
|
-
|
|
21
|
+
You must analyze this text snippet and think about where we can split sentence to atomic meaningful propositions.
|
|
19
22
|
An atomic proposition is a single, self-contained fact that is concise,
|
|
20
23
|
verifiable, and does not rely on external context.
|
|
21
24
|
You just have to think around the possible propositions in the text and how a proposition can be made.
|
|
22
25
|
|
|
23
26
|
Here is the text:
|
|
24
|
-
{text}
|
|
27
|
+
{text}
|
|
28
|
+
|
texttools/prompts/summarize.yaml
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
main_template: |
|
|
2
|
-
You are
|
|
2
|
+
You are an expert summarizer.
|
|
3
3
|
You must summarize the given text, preserving its meaning.
|
|
4
4
|
|
|
5
5
|
Respond only in JSON format:
|
|
6
|
-
{{"result": "
|
|
6
|
+
{{"result": "summary"}}
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Here is the text:
|
|
9
9
|
{text}
|
|
10
10
|
|
|
11
11
|
|