hamtaa-texttools 1.1.20__py3-none-any.whl → 1.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.20.dist-info → hamtaa_texttools-1.1.21.dist-info}/METADATA +8 -27
- hamtaa_texttools-1.1.21.dist-info/RECORD +32 -0
- texttools/batch/batch_config.py +14 -1
- texttools/batch/batch_runner.py +1 -1
- texttools/internals/async_operator.py +45 -79
- texttools/internals/models.py +74 -105
- texttools/internals/operator_utils.py +2 -26
- texttools/internals/prompt_loader.py +3 -20
- texttools/internals/sync_operator.py +44 -78
- texttools/prompts/README.md +2 -2
- texttools/prompts/categorize.yaml +35 -77
- texttools/prompts/check_fact.yaml +2 -2
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +6 -6
- texttools/prompts/is_question.yaml +2 -2
- texttools/prompts/merge_questions.yaml +4 -4
- texttools/prompts/propositionize.yaml +2 -2
- texttools/prompts/rewrite.yaml +6 -6
- texttools/prompts/run_custom.yaml +1 -1
- texttools/prompts/subject_to_question.yaml +2 -2
- texttools/prompts/summarize.yaml +2 -2
- texttools/prompts/text_to_question.yaml +2 -2
- texttools/prompts/translate.yaml +2 -2
- texttools/tools/async_tools.py +393 -485
- texttools/tools/sync_tools.py +394 -486
- hamtaa_texttools-1.1.20.dist-info/RECORD +0 -33
- texttools/batch/internals/utils.py +0 -13
- {hamtaa_texttools-1.1.20.dist-info → hamtaa_texttools-1.1.21.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.20.dist-info → hamtaa_texttools-1.1.21.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.20.dist-info → hamtaa_texttools-1.1.21.dist-info}/top_level.txt +0 -0
- /texttools/batch/{internals/batch_manager.py → batch_manager.py} +0 -0
|
@@ -12,20 +12,12 @@ class PromptLoader:
|
|
|
12
12
|
Responsibilities:
|
|
13
13
|
- Load and parse YAML prompt definitions.
|
|
14
14
|
- Select the right template (by mode, if applicable).
|
|
15
|
-
- Inject variables (`{
|
|
15
|
+
- Inject variables (`{text}`, plus any extra kwargs) into the templates.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
MAIN_TEMPLATE = "main_template"
|
|
19
19
|
ANALYZE_TEMPLATE = "analyze_template"
|
|
20
20
|
|
|
21
|
-
@staticmethod
|
|
22
|
-
def _build_format_args(text: str, **extra_kwargs) -> dict[str, str]:
|
|
23
|
-
# Base formatting args
|
|
24
|
-
format_args = {"input": text}
|
|
25
|
-
# Merge extras
|
|
26
|
-
format_args.update(extra_kwargs)
|
|
27
|
-
return format_args
|
|
28
|
-
|
|
29
21
|
# Use lru_cache to load each file once
|
|
30
22
|
@lru_cache(maxsize=32)
|
|
31
23
|
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
@@ -69,16 +61,6 @@ class PromptLoader:
|
|
|
69
61
|
+ (f" for mode '{mode}'" if mode else "")
|
|
70
62
|
)
|
|
71
63
|
|
|
72
|
-
if (
|
|
73
|
-
not analyze_template
|
|
74
|
-
or not analyze_template.strip()
|
|
75
|
-
or analyze_template.strip() in ["{analyze_template}", "{}"]
|
|
76
|
-
):
|
|
77
|
-
raise PromptError(
|
|
78
|
-
"analyze_template cannot be empty"
|
|
79
|
-
+ (f" for mode '{mode}'" if mode else "")
|
|
80
|
-
)
|
|
81
|
-
|
|
82
64
|
return {
|
|
83
65
|
self.MAIN_TEMPLATE: main_template,
|
|
84
66
|
self.ANALYZE_TEMPLATE: analyze_template,
|
|
@@ -94,7 +76,8 @@ class PromptLoader:
|
|
|
94
76
|
) -> dict[str, str]:
|
|
95
77
|
try:
|
|
96
78
|
template_configs = self._load_templates(prompt_file, mode)
|
|
97
|
-
format_args =
|
|
79
|
+
format_args = {"text": text}
|
|
80
|
+
format_args.update(extra_kwargs)
|
|
98
81
|
|
|
99
82
|
# Inject variables inside each template
|
|
100
83
|
for key in template_configs.keys():
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from typing import TypeVar, Type
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
-
import logging
|
|
4
3
|
|
|
5
4
|
from openai import OpenAI
|
|
6
5
|
from pydantic import BaseModel
|
|
7
6
|
|
|
8
|
-
from texttools.internals.models import
|
|
7
|
+
from texttools.internals.models import OperatorOutput
|
|
9
8
|
from texttools.internals.operator_utils import OperatorUtils
|
|
10
9
|
from texttools.internals.prompt_loader import PromptLoader
|
|
11
10
|
from texttools.internals.exceptions import (
|
|
@@ -18,35 +17,23 @@ from texttools.internals.exceptions import (
|
|
|
18
17
|
# Base Model type for output models
|
|
19
18
|
T = TypeVar("T", bound=BaseModel)
|
|
20
19
|
|
|
21
|
-
logger = logging.getLogger("texttools.sync_operator")
|
|
22
|
-
|
|
23
20
|
|
|
24
21
|
class Operator:
|
|
25
22
|
"""
|
|
26
|
-
Core engine for running text-processing operations with an LLM
|
|
27
|
-
|
|
28
|
-
It wires together:
|
|
29
|
-
- `PromptLoader` → loads YAML prompt templates.
|
|
30
|
-
- `UserMergeFormatter` → applies formatting to messages (e.g., merging).
|
|
31
|
-
- OpenAI client → executes completions/parsed completions.
|
|
23
|
+
Core engine for running text-processing operations with an LLM.
|
|
32
24
|
"""
|
|
33
25
|
|
|
34
26
|
def __init__(self, client: OpenAI, model: str):
|
|
35
27
|
self._client = client
|
|
36
28
|
self._model = model
|
|
37
29
|
|
|
38
|
-
def
|
|
39
|
-
"""
|
|
40
|
-
Calls OpenAI API for analysis using the configured prompt template.
|
|
41
|
-
Returns the analyzed content as a string.
|
|
42
|
-
"""
|
|
30
|
+
def _analyze_completion(self, analyze_prompt: str, temperature: float) -> str:
|
|
43
31
|
try:
|
|
44
|
-
analyze_prompt = prompt_configs["analyze_template"]
|
|
45
|
-
|
|
46
32
|
if not analyze_prompt:
|
|
47
33
|
raise PromptError("Analyze template is empty")
|
|
48
34
|
|
|
49
|
-
analyze_message =
|
|
35
|
+
analyze_message = OperatorUtils.build_user_message(analyze_prompt)
|
|
36
|
+
|
|
50
37
|
completion = self._client.chat.completions.create(
|
|
51
38
|
model=self._model,
|
|
52
39
|
messages=analyze_message,
|
|
@@ -61,7 +48,7 @@ class Operator:
|
|
|
61
48
|
if not analysis:
|
|
62
49
|
raise LLMError("Empty analysis response")
|
|
63
50
|
|
|
64
|
-
return analysis
|
|
51
|
+
return analysis
|
|
65
52
|
|
|
66
53
|
except Exception as e:
|
|
67
54
|
if isinstance(e, (PromptError, LLMError)):
|
|
@@ -70,21 +57,23 @@ class Operator:
|
|
|
70
57
|
|
|
71
58
|
def _parse_completion(
|
|
72
59
|
self,
|
|
73
|
-
|
|
60
|
+
main_prompt: str,
|
|
74
61
|
output_model: Type[T],
|
|
75
62
|
temperature: float,
|
|
76
|
-
logprobs: bool
|
|
77
|
-
top_logprobs: int
|
|
78
|
-
priority: int
|
|
63
|
+
logprobs: bool,
|
|
64
|
+
top_logprobs: int,
|
|
65
|
+
priority: int,
|
|
79
66
|
) -> tuple[T, object]:
|
|
80
67
|
"""
|
|
81
68
|
Parses a chat completion using OpenAI's structured output format.
|
|
82
69
|
Returns both the parsed object and the raw completion for logprobs.
|
|
83
70
|
"""
|
|
84
71
|
try:
|
|
72
|
+
main_message = OperatorUtils.build_user_message(main_prompt)
|
|
73
|
+
|
|
85
74
|
request_kwargs = {
|
|
86
75
|
"model": self._model,
|
|
87
|
-
"messages":
|
|
76
|
+
"messages": main_message,
|
|
88
77
|
"response_format": output_model,
|
|
89
78
|
"temperature": temperature,
|
|
90
79
|
}
|
|
@@ -92,8 +81,10 @@ class Operator:
|
|
|
92
81
|
if logprobs:
|
|
93
82
|
request_kwargs["logprobs"] = True
|
|
94
83
|
request_kwargs["top_logprobs"] = top_logprobs
|
|
84
|
+
|
|
95
85
|
if priority:
|
|
96
86
|
request_kwargs["extra_body"] = {"priority": priority}
|
|
87
|
+
|
|
97
88
|
completion = self._client.beta.chat.completions.parse(**request_kwargs)
|
|
98
89
|
|
|
99
90
|
if not completion.choices:
|
|
@@ -120,24 +111,22 @@ class Operator:
|
|
|
120
111
|
user_prompt: str | None,
|
|
121
112
|
temperature: float,
|
|
122
113
|
logprobs: bool,
|
|
123
|
-
top_logprobs: int
|
|
114
|
+
top_logprobs: int,
|
|
124
115
|
validator: Callable[[object], bool] | None,
|
|
125
116
|
max_validation_retries: int | None,
|
|
117
|
+
priority: int,
|
|
126
118
|
# Internal parameters
|
|
127
119
|
prompt_file: str,
|
|
128
120
|
output_model: Type[T],
|
|
129
121
|
mode: str | None,
|
|
130
|
-
priority: int | None = 0,
|
|
131
122
|
**extra_kwargs,
|
|
132
|
-
) ->
|
|
123
|
+
) -> OperatorOutput:
|
|
133
124
|
"""
|
|
134
125
|
Execute the LLM pipeline with the given input text. (Sync)
|
|
135
126
|
"""
|
|
136
127
|
try:
|
|
137
128
|
prompt_loader = PromptLoader()
|
|
138
|
-
output = ToolOutput()
|
|
139
129
|
|
|
140
|
-
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
141
130
|
prompt_configs = prompt_loader.load(
|
|
142
131
|
prompt_file=prompt_file,
|
|
143
132
|
text=text.strip(),
|
|
@@ -145,47 +134,32 @@ class Operator:
|
|
|
145
134
|
**extra_kwargs,
|
|
146
135
|
)
|
|
147
136
|
|
|
148
|
-
|
|
137
|
+
main_prompt = ""
|
|
138
|
+
analysis = ""
|
|
149
139
|
|
|
150
140
|
if with_analysis:
|
|
151
|
-
analysis = self.
|
|
152
|
-
|
|
153
|
-
OperatorUtils.build_user_message(
|
|
154
|
-
f"Based on this analysis: {analysis}"
|
|
155
|
-
)
|
|
141
|
+
analysis = self._analyze_completion(
|
|
142
|
+
prompt_configs["analyze_template"], temperature
|
|
156
143
|
)
|
|
144
|
+
main_prompt += f"Based on this analysis:\n{analysis}\n"
|
|
157
145
|
|
|
158
146
|
if output_lang:
|
|
159
|
-
|
|
160
|
-
OperatorUtils.build_user_message(
|
|
161
|
-
f"Respond only in the {output_lang} language."
|
|
162
|
-
)
|
|
163
|
-
)
|
|
147
|
+
main_prompt += f"Respond only in the {output_lang} language.\n"
|
|
164
148
|
|
|
165
149
|
if user_prompt:
|
|
166
|
-
|
|
167
|
-
OperatorUtils.build_user_message(
|
|
168
|
-
f"Consider this instruction {user_prompt}"
|
|
169
|
-
)
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
messages.append(
|
|
173
|
-
OperatorUtils.build_user_message(prompt_configs["main_template"])
|
|
174
|
-
)
|
|
150
|
+
main_prompt += f"Consider this instruction {user_prompt}\n"
|
|
175
151
|
|
|
176
|
-
|
|
152
|
+
main_prompt += prompt_configs["main_template"]
|
|
177
153
|
|
|
178
154
|
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
179
155
|
raise ValueError("top_logprobs should be an integer greater than 1")
|
|
180
156
|
|
|
181
157
|
parsed, completion = self._parse_completion(
|
|
182
|
-
|
|
158
|
+
main_prompt, output_model, temperature, logprobs, top_logprobs, priority
|
|
183
159
|
)
|
|
184
160
|
|
|
185
|
-
output.result = parsed.result
|
|
186
|
-
|
|
187
161
|
# Retry logic if validation fails
|
|
188
|
-
if validator and not validator(
|
|
162
|
+
if validator and not validator(parsed.result):
|
|
189
163
|
if (
|
|
190
164
|
not isinstance(max_validation_retries, int)
|
|
191
165
|
or max_validation_retries < 1
|
|
@@ -195,17 +169,13 @@ class Operator:
|
|
|
195
169
|
)
|
|
196
170
|
|
|
197
171
|
succeeded = False
|
|
198
|
-
for
|
|
199
|
-
|
|
200
|
-
f"Validation failed, retrying for the {attempt + 1} time."
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
# Generate new temperature for retry
|
|
172
|
+
for _ in range(max_validation_retries):
|
|
173
|
+
# Generate a new temperature to retry
|
|
204
174
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
205
175
|
|
|
206
176
|
try:
|
|
207
177
|
parsed, completion = self._parse_completion(
|
|
208
|
-
|
|
178
|
+
main_prompt,
|
|
209
179
|
output_model,
|
|
210
180
|
retry_temperature,
|
|
211
181
|
logprobs,
|
|
@@ -213,30 +183,26 @@ class Operator:
|
|
|
213
183
|
priority=priority,
|
|
214
184
|
)
|
|
215
185
|
|
|
216
|
-
output.result = parsed.result
|
|
217
|
-
|
|
218
186
|
# Check if retry was successful
|
|
219
|
-
if validator(
|
|
187
|
+
if validator(parsed.result):
|
|
220
188
|
succeeded = True
|
|
221
189
|
break
|
|
222
190
|
|
|
223
|
-
except LLMError
|
|
224
|
-
|
|
191
|
+
except LLMError:
|
|
192
|
+
pass
|
|
225
193
|
|
|
226
194
|
if not succeeded:
|
|
227
|
-
raise ValidationError(
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
output.process = prompt_file[:-5]
|
|
195
|
+
raise ValidationError("Validation failed after all retries")
|
|
196
|
+
|
|
197
|
+
operator_output = OperatorOutput(
|
|
198
|
+
result=parsed.result,
|
|
199
|
+
analysis=analysis if with_analysis else None,
|
|
200
|
+
logprobs=OperatorUtils.extract_logprobs(completion)
|
|
201
|
+
if logprobs
|
|
202
|
+
else None,
|
|
203
|
+
)
|
|
238
204
|
|
|
239
|
-
return
|
|
205
|
+
return operator_output
|
|
240
206
|
|
|
241
207
|
except (PromptError, LLMError, ValidationError):
|
|
242
208
|
raise
|
texttools/prompts/README.md
CHANGED
|
@@ -15,7 +15,7 @@ This folder contains YAML files for all prompts used in the project. Each file r
|
|
|
15
15
|
```yaml
|
|
16
16
|
main_template:
|
|
17
17
|
mode_1: |
|
|
18
|
-
Your main instructions here with placeholders like {
|
|
18
|
+
Your main instructions here with placeholders like {text}.
|
|
19
19
|
mode_2: |
|
|
20
20
|
Optional reasoning instructions here.
|
|
21
21
|
|
|
@@ -30,6 +30,6 @@ analyze_template:
|
|
|
30
30
|
|
|
31
31
|
## Guidelines
|
|
32
32
|
1. **Naming**: Use descriptive names for each YAML file corresponding to the tool or task it serves.
|
|
33
|
-
2. **Placeholders**: Use `{
|
|
33
|
+
2. **Placeholders**: Use `{text}` or other relevant placeholders to dynamically inject data.
|
|
34
34
|
3. **Modes**: If using modes, ensure both `main_template` and `analyze_template` contain the corresponding keys.
|
|
35
35
|
4. **Consistency**: Keep formatting consistent across files for easier parsing by scripts.
|
|
@@ -1,77 +1,35 @@
|
|
|
1
|
-
main_template:
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
- Read all provided categories carefully.
|
|
37
|
-
- Consider the user query, intent, and task explanation.
|
|
38
|
-
- Select exactly one category name from the list that best matches the user’s intent.
|
|
39
|
-
- Return only the category name, nothing else.
|
|
40
|
-
|
|
41
|
-
Rules:
|
|
42
|
-
- Never invent categories that are not in the list.
|
|
43
|
-
- If multiple categories seem possible, choose the closest match based on the description and user intent.
|
|
44
|
-
- If descriptions are missing or empty, rely on the category name.
|
|
45
|
-
- If the correct answer cannot be determined with certainty, choose the most likely one.
|
|
46
|
-
|
|
47
|
-
Output format:
|
|
48
|
-
{{
|
|
49
|
-
"reason": "Explanation of why the input belongs to the category"
|
|
50
|
-
"result": "<category_name_only>"
|
|
51
|
-
}}
|
|
52
|
-
|
|
53
|
-
Available categories with their descriptions at this level:
|
|
54
|
-
{category_list}
|
|
55
|
-
|
|
56
|
-
Do not include category descriptions at all. Only write the raw category.
|
|
57
|
-
|
|
58
|
-
The text that has to be categorized:
|
|
59
|
-
{input}
|
|
60
|
-
|
|
61
|
-
analyze_template:
|
|
62
|
-
|
|
63
|
-
category_list: |
|
|
64
|
-
We want to categorize the given text.
|
|
65
|
-
To improve categorization, we need an analysis of the text.
|
|
66
|
-
Analyze the given text and write its main idea and a short analysis of that.
|
|
67
|
-
Analysis should be very short.
|
|
68
|
-
Text:
|
|
69
|
-
{input}
|
|
70
|
-
|
|
71
|
-
category_tree: |
|
|
72
|
-
We want to categorize the given text.
|
|
73
|
-
To improve categorization, we need an analysis of the text.
|
|
74
|
-
Analyze the given text and write its main idea and a short analysis of that.
|
|
75
|
-
Analysis should be very short.
|
|
76
|
-
Text:
|
|
77
|
-
{input}
|
|
1
|
+
main_template: |
|
|
2
|
+
You are an expert classification agent.
|
|
3
|
+
You receive a list of categories.
|
|
4
|
+
|
|
5
|
+
Your task:
|
|
6
|
+
- Read all provided categories carefully.
|
|
7
|
+
- Consider the user query, intent, and task explanation.
|
|
8
|
+
- Select exactly one category name from the list that best matches the user’s intent.
|
|
9
|
+
- Return only the category name, nothing else.
|
|
10
|
+
|
|
11
|
+
Rules:
|
|
12
|
+
- Never invent categories that are not in the list.
|
|
13
|
+
- If multiple categories seem possible, choose the closest match based on the description and user intent.
|
|
14
|
+
- If descriptions are missing or empty, rely on the category name.
|
|
15
|
+
- If the correct answer cannot be determined with certainty, choose the most likely one.
|
|
16
|
+
|
|
17
|
+
Output format:
|
|
18
|
+
{{
|
|
19
|
+
"reason": "Explanation of why the input belongs to the category"
|
|
20
|
+
"result": "<category_name_only>"
|
|
21
|
+
}}
|
|
22
|
+
|
|
23
|
+
Available categories with their descriptions:
|
|
24
|
+
{category_list}
|
|
25
|
+
|
|
26
|
+
The text that has to be categorized:
|
|
27
|
+
{text}
|
|
28
|
+
|
|
29
|
+
analyze_template: |
|
|
30
|
+
We want to categorize the given text.
|
|
31
|
+
To improve categorization, we need an analysis of the text.
|
|
32
|
+
Analyze the given text and write its main idea and a short analysis of that.
|
|
33
|
+
Analysis should be very short.
|
|
34
|
+
Text:
|
|
35
|
+
{text}
|
|
@@ -5,7 +5,7 @@ main_template: |
|
|
|
5
5
|
Respond only in JSON format (Output should be a boolean):
|
|
6
6
|
{{"result": True/False}}
|
|
7
7
|
The statement is:
|
|
8
|
-
{
|
|
8
|
+
{text}
|
|
9
9
|
The source text is:
|
|
10
10
|
{source_text}
|
|
11
11
|
|
|
@@ -14,6 +14,6 @@ analyze_template: |
|
|
|
14
14
|
summarized analysis that could help in determining that can the statement
|
|
15
15
|
be concluded from the source or not.
|
|
16
16
|
The statement is:
|
|
17
|
-
{
|
|
17
|
+
{text}
|
|
18
18
|
The source text is:
|
|
19
19
|
{source_text}
|
|
@@ -12,9 +12,9 @@ main_template: |
|
|
|
12
12
|
]
|
|
13
13
|
}}
|
|
14
14
|
Here is the text:
|
|
15
|
-
{
|
|
15
|
+
{text}
|
|
16
16
|
|
|
17
17
|
analyze_template: |
|
|
18
18
|
Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
|
|
19
19
|
Provide a brief, summarized analysis that could help in categorizing these entities.
|
|
20
|
-
{
|
|
20
|
+
{text}
|
|
@@ -12,7 +12,7 @@ main_template:
|
|
|
12
12
|
- Respond only in JSON format:
|
|
13
13
|
{{"result": ["keyword1", "keyword2", etc.]}}
|
|
14
14
|
Here is the text:
|
|
15
|
-
{
|
|
15
|
+
{text}
|
|
16
16
|
|
|
17
17
|
threshold: |
|
|
18
18
|
You are an expert keyword extractor specialized in fine-grained concept identification.
|
|
@@ -32,7 +32,7 @@ main_template:
|
|
|
32
32
|
- Respond only in JSON format:
|
|
33
33
|
{{"result": ["keyword1", "keyword2", etc.]}}
|
|
34
34
|
Here is the text:
|
|
35
|
-
{
|
|
35
|
+
{text}
|
|
36
36
|
|
|
37
37
|
count: |
|
|
38
38
|
You are an expert keyword extractor with precise output requirements.
|
|
@@ -49,20 +49,20 @@ main_template:
|
|
|
49
49
|
{{"result": ["keyword1", "keyword2", "keyword3", ...]}}
|
|
50
50
|
|
|
51
51
|
Here is the text:
|
|
52
|
-
{
|
|
52
|
+
{text}
|
|
53
53
|
|
|
54
54
|
analyze_template:
|
|
55
55
|
auto: |
|
|
56
56
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
57
57
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
58
|
-
{
|
|
58
|
+
{text}
|
|
59
59
|
|
|
60
60
|
threshold: |
|
|
61
61
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
62
62
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
63
|
-
{
|
|
63
|
+
{text}
|
|
64
64
|
|
|
65
65
|
count: |
|
|
66
66
|
Analyze the following text to identify its main topics, concepts, and important terms.
|
|
67
67
|
Provide a concise summary of your findings that will help in extracting relevant keywords.
|
|
68
|
-
{
|
|
68
|
+
{text}
|
|
@@ -4,11 +4,11 @@ main_template: |
|
|
|
4
4
|
Respond only in JSON format (Output should be a boolean):
|
|
5
5
|
{{"result": True/False}}
|
|
6
6
|
Here is the text:
|
|
7
|
-
{
|
|
7
|
+
{text}
|
|
8
8
|
|
|
9
9
|
analyze_template: |
|
|
10
10
|
We want to analyze this text snippet to see if it contains any question or request of some kind or not.
|
|
11
11
|
Read the text, and reason about it being a request or not.
|
|
12
12
|
Summerized, short answer.
|
|
13
|
-
{
|
|
13
|
+
{text}
|
|
14
14
|
|
|
@@ -12,7 +12,7 @@ main_template:
|
|
|
12
12
|
- Respond only in JSON format:
|
|
13
13
|
{{"result": "string"}}
|
|
14
14
|
Here is the questions:
|
|
15
|
-
{
|
|
15
|
+
{text}
|
|
16
16
|
|
|
17
17
|
reason: |
|
|
18
18
|
You are an AI assistant helping to unify semantically similar questions.
|
|
@@ -23,7 +23,7 @@ main_template:
|
|
|
23
23
|
Respond only in JSON format:
|
|
24
24
|
{{"result": "string"}}
|
|
25
25
|
Here is the questions:
|
|
26
|
-
{
|
|
26
|
+
{text}
|
|
27
27
|
|
|
28
28
|
analyze_template:
|
|
29
29
|
|
|
@@ -34,7 +34,7 @@ analyze_template:
|
|
|
34
34
|
Provide a brief, summarized understanding of the questions' meaning that
|
|
35
35
|
will help in merging and rephrasing it accurately without changing its intent.
|
|
36
36
|
Here is the question:
|
|
37
|
-
{
|
|
37
|
+
{text}
|
|
38
38
|
|
|
39
39
|
reason: |
|
|
40
40
|
Analyze the following questions to identify their exact wording, phrasing,
|
|
@@ -42,5 +42,5 @@ analyze_template:
|
|
|
42
42
|
Provide a brief, summarized analysis of their linguistic structure and current meaning,
|
|
43
43
|
which will then be used to create a new question containing all of their contents.
|
|
44
44
|
Here is the question:
|
|
45
|
-
{
|
|
45
|
+
{text}
|
|
46
46
|
|
|
@@ -12,11 +12,11 @@ main_template: |
|
|
|
12
12
|
4. No Redundancy: Do not extract summary statements that merely repeat facts already listed.
|
|
13
13
|
|
|
14
14
|
Extract the atomic propositions from the following text:
|
|
15
|
-
{
|
|
15
|
+
{text}
|
|
16
16
|
|
|
17
17
|
analyze_template: |
|
|
18
18
|
We want to analyze this text snippet and think about where we can split sentence to atomic meaningful propositions.
|
|
19
19
|
An atomic proposition is a single, self-contained fact that is concise,
|
|
20
20
|
verifiable, and does not rely on external context.
|
|
21
21
|
You just have to think around the possible propositions in the text and how a proposition can be made.
|
|
22
|
-
{
|
|
22
|
+
{text}
|
texttools/prompts/rewrite.yaml
CHANGED
|
@@ -18,7 +18,7 @@ main_template:
|
|
|
18
18
|
{{"result": "str"}}
|
|
19
19
|
|
|
20
20
|
Anchor Text:
|
|
21
|
-
"{
|
|
21
|
+
"{text}"
|
|
22
22
|
|
|
23
23
|
negative: |
|
|
24
24
|
You are an AI assistant designed to generate high-quality training data for semantic text embedding models.
|
|
@@ -35,7 +35,7 @@ main_template:
|
|
|
35
35
|
{{"result": "str"}}
|
|
36
36
|
|
|
37
37
|
Anchor Text:
|
|
38
|
-
"{
|
|
38
|
+
"{text}"
|
|
39
39
|
|
|
40
40
|
hard_negative: |
|
|
41
41
|
You are an AI assistant designed to generate high-quality training data for semantic text embedding models.
|
|
@@ -57,7 +57,7 @@ main_template:
|
|
|
57
57
|
{{"result": "str"}}
|
|
58
58
|
|
|
59
59
|
Anchor Text:
|
|
60
|
-
"{
|
|
60
|
+
"{text}"
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
analyze_template:
|
|
@@ -74,7 +74,7 @@ analyze_template:
|
|
|
74
74
|
Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
|
|
75
75
|
|
|
76
76
|
Text:
|
|
77
|
-
{
|
|
77
|
+
{text}
|
|
78
78
|
|
|
79
79
|
negative: |
|
|
80
80
|
Analyze the following text to identify its SPECIFIC TOPIC and DOMAIN for creating a high-quality NEGATIVE sample.
|
|
@@ -88,7 +88,7 @@ analyze_template:
|
|
|
88
88
|
The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
|
|
89
89
|
|
|
90
90
|
Text:
|
|
91
|
-
{
|
|
91
|
+
{text}
|
|
92
92
|
|
|
93
93
|
hard_negative: |
|
|
94
94
|
Analyze this text to identify EXACTLY ONE ELEMENT that can be changed to create a hard-negative sample.
|
|
@@ -107,5 +107,5 @@ analyze_template:
|
|
|
107
107
|
- 80-90% of the vocabulary
|
|
108
108
|
|
|
109
109
|
Text:
|
|
110
|
-
{
|
|
110
|
+
{text}
|
|
111
111
|
|
|
@@ -9,7 +9,7 @@ main_template: |
|
|
|
9
9
|
Respond only in JSON format:
|
|
10
10
|
{{"result": ["question1", "question2", ...], "reason": "string"}}
|
|
11
11
|
Here is the text:
|
|
12
|
-
{
|
|
12
|
+
{text}
|
|
13
13
|
|
|
14
14
|
analyze_template: |
|
|
15
15
|
Our goal is to generate questions from the given subject.
|
|
@@ -19,4 +19,4 @@ analyze_template: |
|
|
|
19
19
|
What is the subject about?
|
|
20
20
|
What point of views can we see and generate questoins from it? (Questions that real users might have.)
|
|
21
21
|
Here is the subject:
|
|
22
|
-
{
|
|
22
|
+
{text}
|
texttools/prompts/summarize.yaml
CHANGED
|
@@ -4,11 +4,11 @@ main_template: |
|
|
|
4
4
|
Respond only in JSON format:
|
|
5
5
|
{{"result": "string"}}
|
|
6
6
|
Provide a concise summary of the following text:
|
|
7
|
-
{
|
|
7
|
+
{text}
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
analyze_template: |
|
|
11
11
|
Read the following text and identify its main points, key arguments, and overall purpose.
|
|
12
12
|
Provide a brief, summarized analysis that will help in generating an accurate and concise summary.
|
|
13
|
-
{
|
|
13
|
+
{text}
|
|
14
14
|
|
|
@@ -9,7 +9,7 @@ main_template: |
|
|
|
9
9
|
Respond only in JSON format:
|
|
10
10
|
{{"result": ["question1", "question2", ...], "reason": "string"}}
|
|
11
11
|
Here is the answer:
|
|
12
|
-
{
|
|
12
|
+
{text}
|
|
13
13
|
|
|
14
14
|
analyze_template: |
|
|
15
15
|
Analyze the following answer to identify its key facts,
|
|
@@ -18,5 +18,5 @@ analyze_template: |
|
|
|
18
18
|
help in formulating relevant and direct questions.
|
|
19
19
|
Just mention the keypoints that was provided in the answer
|
|
20
20
|
Here is the answer:
|
|
21
|
-
{
|
|
21
|
+
{text}
|
|
22
22
|
|