hamtaa-texttools 1.1.10__py3-none-any.whl → 1.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.10.dist-info → hamtaa_texttools-1.1.12.dist-info}/METADATA +2 -2
- {hamtaa_texttools-1.1.10.dist-info → hamtaa_texttools-1.1.12.dist-info}/RECORD +10 -11
- texttools/tools/async_tools.py +159 -3
- texttools/tools/internals/async_operator.py +20 -22
- texttools/tools/internals/{base_operator.py → operator_utils.py} +7 -20
- texttools/tools/internals/{operator.py → sync_operator.py} +20 -20
- texttools/tools/sync_tools.py +160 -4
- texttools/tools/internals/formatters.py +0 -24
- {hamtaa_texttools-1.1.10.dist-info → hamtaa_texttools-1.1.12.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.10.dist-info → hamtaa_texttools-1.1.12.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.10.dist-info → hamtaa_texttools-1.1.12.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.12
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -79,7 +79,7 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
79
79
|
|
|
80
80
|
- **`temperature (float)`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
81
81
|
|
|
82
|
-
- **`validator (Callable)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
|
|
82
|
+
- **`validator (Callable)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
|
|
83
83
|
|
|
84
84
|
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
85
85
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
hamtaa_texttools-1.1.
|
|
1
|
+
hamtaa_texttools-1.1.12.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
2
|
texttools/__init__.py,sha256=EZPPNPafVGvBaxjG9anP0piqH3gAC0DdjdAckQeAgNU,251
|
|
3
3
|
texttools/batch/batch_config.py,sha256=FCDXy9TfH7xjd1PHvn_CtdwEQSq-YO5sktiaMZEId58,740
|
|
4
4
|
texttools/batch/batch_runner.py,sha256=zzzVIXedmaq-8fqsFtGRR64F7CtYRLlhQeBu8uMwJQg,9385
|
|
@@ -16,15 +16,14 @@ texttools/prompts/subject_to_question.yaml,sha256=C7x7rNNm6U_ZG9HOn6zuzYOtvJUZ2s
|
|
|
16
16
|
texttools/prompts/summarize.yaml,sha256=o6rxGPfWtZd61Duvm8NVvCJqfq73b-wAuMSKR6UYUqY,459
|
|
17
17
|
texttools/prompts/text_to_question.yaml,sha256=UheKYpDn6iyKI8NxunHZtFpNyfCLZZe5cvkuXpurUJY,783
|
|
18
18
|
texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6sr50,634
|
|
19
|
-
texttools/tools/async_tools.py,sha256=
|
|
20
|
-
texttools/tools/sync_tools.py,sha256=
|
|
21
|
-
texttools/tools/internals/async_operator.py,sha256=
|
|
22
|
-
texttools/tools/internals/
|
|
23
|
-
texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
|
|
24
|
-
texttools/tools/internals/operator.py,sha256=W0DxTGB3cbtDfzLqwMCM8x5xiVWgN0vZWX8PzJwAQKE,6795
|
|
19
|
+
texttools/tools/async_tools.py,sha256=60VAAZyVRxI2rKVFFiCnbY--F4kNtVxYQticE0RyhOs,24677
|
|
20
|
+
texttools/tools/sync_tools.py,sha256=F5TN3KQ_vlF7AC9J0vm2NzjIZC19Ox11tpc9K1SMRwQ,24448
|
|
21
|
+
texttools/tools/internals/async_operator.py,sha256=aHXhCweEhyl8uP4qTeccAQOmAxVrThFOmdtgtlChQWs,6804
|
|
22
|
+
texttools/tools/internals/operator_utils.py,sha256=w1k0RJ_W_CRbVc_J2w337VuL-opHpHiCxfhEOwtyuOo,1856
|
|
25
23
|
texttools/tools/internals/output_models.py,sha256=ekpbyocmXj_dee7ieOT1zOkMo9cPHT7xcUFCZoUaXA0,1886
|
|
26
24
|
texttools/tools/internals/prompt_loader.py,sha256=4g6-U8kqrGN7VpNaRcrBcnF-h03PXjUDBP0lL0_4EZY,1953
|
|
27
|
-
|
|
28
|
-
hamtaa_texttools-1.1.
|
|
29
|
-
hamtaa_texttools-1.1.
|
|
30
|
-
hamtaa_texttools-1.1.
|
|
25
|
+
texttools/tools/internals/sync_operator.py,sha256=ar1o5apJgDtQEPLCiGBghS_Y3IXfJMEbXOG9EeS7a9Q,6710
|
|
26
|
+
hamtaa_texttools-1.1.12.dist-info/METADATA,sha256=iPj8bakwOVrpkdHe0n2sn36ZjphP1gREP7Ud5Z0uNxU,9179
|
|
27
|
+
hamtaa_texttools-1.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
28
|
+
hamtaa_texttools-1.1.12.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
29
|
+
hamtaa_texttools-1.1.12.dist-info/RECORD,,
|
texttools/tools/async_tools.py
CHANGED
|
@@ -35,15 +35,27 @@ class AsyncTheTool:
|
|
|
35
35
|
logprobs: bool = False,
|
|
36
36
|
top_logprobs: int | None = None,
|
|
37
37
|
validator: Callable[[Any], bool] | None = None,
|
|
38
|
+
max_validation_retries: int | None = None,
|
|
38
39
|
) -> OM.ToolOutput:
|
|
39
40
|
"""
|
|
40
41
|
Categorize a text into a single Islamic studies domain category.
|
|
41
42
|
|
|
43
|
+
Arguments:
|
|
44
|
+
text: The input text to categorize
|
|
45
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
46
|
+
user_prompt: Additional instructions for the categorization
|
|
47
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
48
|
+
logprobs: Whether to return token probability information
|
|
49
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
50
|
+
validator: Custom validation function to validate the output
|
|
51
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
52
|
+
|
|
42
53
|
Returns:
|
|
43
54
|
ToolOutput: Object containing:
|
|
44
55
|
- result (str): The assigned Islamic studies category
|
|
45
56
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
46
57
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
58
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
47
59
|
"""
|
|
48
60
|
return await self._operator.run(
|
|
49
61
|
# User parameters
|
|
@@ -54,6 +66,7 @@ class AsyncTheTool:
|
|
|
54
66
|
logprobs=logprobs,
|
|
55
67
|
top_logprobs=top_logprobs,
|
|
56
68
|
validator=validator,
|
|
69
|
+
max_validation_retries=max_validation_retries,
|
|
57
70
|
# Internal parameters
|
|
58
71
|
prompt_file="categorizer.yaml",
|
|
59
72
|
output_model=OM.CategorizerOutput,
|
|
@@ -71,15 +84,28 @@ class AsyncTheTool:
|
|
|
71
84
|
logprobs: bool = False,
|
|
72
85
|
top_logprobs: int | None = None,
|
|
73
86
|
validator: Callable[[Any], bool] | None = None,
|
|
87
|
+
max_validation_retries: int | None = None,
|
|
74
88
|
) -> OM.ToolOutput:
|
|
75
89
|
"""
|
|
76
90
|
Extract salient keywords from text.
|
|
77
91
|
|
|
92
|
+
Arguments:
|
|
93
|
+
text: The input text to extract keywords from
|
|
94
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
95
|
+
output_lang: Language for the output response
|
|
96
|
+
user_prompt: Additional instructions for keyword extraction
|
|
97
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
98
|
+
logprobs: Whether to return token probability information
|
|
99
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
100
|
+
validator: Custom validation function to validate the output
|
|
101
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
102
|
+
|
|
78
103
|
Returns:
|
|
79
104
|
ToolOutput: Object containing:
|
|
80
105
|
- result (list[str]): List of extracted keywords
|
|
81
106
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
82
107
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
108
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
83
109
|
"""
|
|
84
110
|
return await self._operator.run(
|
|
85
111
|
# User parameters
|
|
@@ -91,6 +117,7 @@ class AsyncTheTool:
|
|
|
91
117
|
logprobs=logprobs,
|
|
92
118
|
top_logprobs=top_logprobs,
|
|
93
119
|
validator=validator,
|
|
120
|
+
max_validation_retries=max_validation_retries,
|
|
94
121
|
# Internal parameters
|
|
95
122
|
prompt_file="extract_keywords.yaml",
|
|
96
123
|
output_model=OM.ListStrOutput,
|
|
@@ -107,15 +134,28 @@ class AsyncTheTool:
|
|
|
107
134
|
logprobs: bool = False,
|
|
108
135
|
top_logprobs: int | None = None,
|
|
109
136
|
validator: Callable[[Any], bool] | None = None,
|
|
137
|
+
max_validation_retries: int | None = None,
|
|
110
138
|
) -> OM.ToolOutput:
|
|
111
139
|
"""
|
|
112
140
|
Perform Named Entity Recognition (NER) over the input text.
|
|
113
141
|
|
|
142
|
+
Arguments:
|
|
143
|
+
text: The input text to extract entities from
|
|
144
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
145
|
+
output_lang: Language for the output response
|
|
146
|
+
user_prompt: Additional instructions for entity extraction
|
|
147
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
148
|
+
logprobs: Whether to return token probability information
|
|
149
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
150
|
+
validator: Custom validation function to validate the output
|
|
151
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
152
|
+
|
|
114
153
|
Returns:
|
|
115
154
|
ToolOutput: Object containing:
|
|
116
155
|
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
117
156
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
118
157
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
158
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
119
159
|
"""
|
|
120
160
|
return await self._operator.run(
|
|
121
161
|
# User parameters
|
|
@@ -127,6 +167,7 @@ class AsyncTheTool:
|
|
|
127
167
|
logprobs=logprobs,
|
|
128
168
|
top_logprobs=top_logprobs,
|
|
129
169
|
validator=validator,
|
|
170
|
+
max_validation_retries=max_validation_retries,
|
|
130
171
|
# Internal parameters
|
|
131
172
|
prompt_file="extract_entities.yaml",
|
|
132
173
|
output_model=OM.ListDictStrStrOutput,
|
|
@@ -142,15 +183,27 @@ class AsyncTheTool:
|
|
|
142
183
|
logprobs: bool = False,
|
|
143
184
|
top_logprobs: int | None = None,
|
|
144
185
|
validator: Callable[[Any], bool] | None = None,
|
|
186
|
+
max_validation_retries: int | None = None,
|
|
145
187
|
) -> OM.ToolOutput:
|
|
146
188
|
"""
|
|
147
189
|
Detect if the input is phrased as a question.
|
|
148
190
|
|
|
191
|
+
Arguments:
|
|
192
|
+
text: The input text to analyze
|
|
193
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
194
|
+
user_prompt: Additional instructions for question detection
|
|
195
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
196
|
+
logprobs: Whether to return token probability information
|
|
197
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
198
|
+
validator: Custom validation function to validate the output
|
|
199
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
200
|
+
|
|
149
201
|
Returns:
|
|
150
202
|
ToolOutput: Object containing:
|
|
151
203
|
- result (bool): True if text is a question, False otherwise
|
|
152
204
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
153
205
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
206
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
154
207
|
"""
|
|
155
208
|
return await self._operator.run(
|
|
156
209
|
# User parameters
|
|
@@ -161,6 +214,7 @@ class AsyncTheTool:
|
|
|
161
214
|
logprobs=logprobs,
|
|
162
215
|
top_logprobs=top_logprobs,
|
|
163
216
|
validator=validator,
|
|
217
|
+
max_validation_retries=max_validation_retries,
|
|
164
218
|
# Internal parameters
|
|
165
219
|
prompt_file="is_question.yaml",
|
|
166
220
|
output_model=OM.BoolOutput,
|
|
@@ -178,15 +232,28 @@ class AsyncTheTool:
|
|
|
178
232
|
logprobs: bool = False,
|
|
179
233
|
top_logprobs: int | None = None,
|
|
180
234
|
validator: Callable[[Any], bool] | None = None,
|
|
235
|
+
max_validation_retries: int | None = None,
|
|
181
236
|
) -> OM.ToolOutput:
|
|
182
237
|
"""
|
|
183
238
|
Generate a single question from the given text.
|
|
184
239
|
|
|
240
|
+
Arguments:
|
|
241
|
+
text: The input text to generate a question from
|
|
242
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
243
|
+
output_lang: Language for the output question
|
|
244
|
+
user_prompt: Additional instructions for question generation
|
|
245
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
246
|
+
logprobs: Whether to return token probability information
|
|
247
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
248
|
+
validator: Custom validation function to validate the output
|
|
249
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
250
|
+
|
|
185
251
|
Returns:
|
|
186
252
|
ToolOutput: Object containing:
|
|
187
253
|
- result (str): The generated question
|
|
188
254
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
189
255
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
256
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
190
257
|
"""
|
|
191
258
|
return await self._operator.run(
|
|
192
259
|
# User parameters
|
|
@@ -198,6 +265,7 @@ class AsyncTheTool:
|
|
|
198
265
|
logprobs=logprobs,
|
|
199
266
|
top_logprobs=top_logprobs,
|
|
200
267
|
validator=validator,
|
|
268
|
+
max_validation_retries=max_validation_retries,
|
|
201
269
|
# Internal parameters
|
|
202
270
|
prompt_file="text_to_question.yaml",
|
|
203
271
|
output_model=OM.StrOutput,
|
|
@@ -215,15 +283,29 @@ class AsyncTheTool:
|
|
|
215
283
|
top_logprobs: int | None = None,
|
|
216
284
|
mode: Literal["default", "reason"] = "default",
|
|
217
285
|
validator: Callable[[Any], bool] | None = None,
|
|
286
|
+
max_validation_retries: int | None = None,
|
|
218
287
|
) -> OM.ToolOutput:
|
|
219
288
|
"""
|
|
220
289
|
Merge multiple questions into a single unified question.
|
|
221
290
|
|
|
291
|
+
Arguments:
|
|
292
|
+
text: List of questions to merge
|
|
293
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
294
|
+
output_lang: Language for the output merged question
|
|
295
|
+
user_prompt: Additional instructions for question merging
|
|
296
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
297
|
+
logprobs: Whether to return token probability information
|
|
298
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
299
|
+
mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
|
|
300
|
+
validator: Custom validation function to validate the output
|
|
301
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
302
|
+
|
|
222
303
|
Returns:
|
|
223
304
|
ToolOutput: Object containing:
|
|
224
305
|
- result (str): The merged question
|
|
225
306
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
226
307
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
308
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
227
309
|
"""
|
|
228
310
|
text = ", ".join(text)
|
|
229
311
|
return await self._operator.run(
|
|
@@ -236,6 +318,7 @@ class AsyncTheTool:
|
|
|
236
318
|
logprobs=logprobs,
|
|
237
319
|
top_logprobs=top_logprobs,
|
|
238
320
|
validator=validator,
|
|
321
|
+
max_validation_retries=max_validation_retries,
|
|
239
322
|
# Internal parameters
|
|
240
323
|
prompt_file="merge_questions.yaml",
|
|
241
324
|
output_model=OM.StrOutput,
|
|
@@ -253,15 +336,29 @@ class AsyncTheTool:
|
|
|
253
336
|
top_logprobs: int | None = None,
|
|
254
337
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
255
338
|
validator: Callable[[Any], bool] | None = None,
|
|
339
|
+
max_validation_retries: int | None = None,
|
|
256
340
|
) -> OM.ToolOutput:
|
|
257
341
|
"""
|
|
258
342
|
Rewrite a text with different modes.
|
|
259
343
|
|
|
344
|
+
Arguments:
|
|
345
|
+
text: The input text to rewrite
|
|
346
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
347
|
+
output_lang: Language for the output rewritten text
|
|
348
|
+
user_prompt: Additional instructions for rewriting
|
|
349
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
350
|
+
logprobs: Whether to return token probability information
|
|
351
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
352
|
+
mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
|
|
353
|
+
validator: Custom validation function to validate the output
|
|
354
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
355
|
+
|
|
260
356
|
Returns:
|
|
261
357
|
ToolOutput: Object containing:
|
|
262
358
|
- result (str): The rewritten text
|
|
263
359
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
264
360
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
361
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
265
362
|
"""
|
|
266
363
|
return await self._operator.run(
|
|
267
364
|
# User parameters
|
|
@@ -273,6 +370,7 @@ class AsyncTheTool:
|
|
|
273
370
|
logprobs=logprobs,
|
|
274
371
|
top_logprobs=top_logprobs,
|
|
275
372
|
validator=validator,
|
|
373
|
+
max_validation_retries=max_validation_retries,
|
|
276
374
|
# Internal parameters
|
|
277
375
|
prompt_file="rewrite.yaml",
|
|
278
376
|
output_model=OM.StrOutput,
|
|
@@ -290,15 +388,29 @@ class AsyncTheTool:
|
|
|
290
388
|
logprobs: bool = False,
|
|
291
389
|
top_logprobs: int | None = None,
|
|
292
390
|
validator: Callable[[Any], bool] | None = None,
|
|
391
|
+
max_validation_retries: int | None = None,
|
|
293
392
|
) -> OM.ToolOutput:
|
|
294
393
|
"""
|
|
295
394
|
Generate a list of questions about a subject.
|
|
296
395
|
|
|
396
|
+
Arguments:
|
|
397
|
+
text: The subject text to generate questions about
|
|
398
|
+
number_of_questions: Number of questions to generate
|
|
399
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
400
|
+
output_lang: Language for the output questions
|
|
401
|
+
user_prompt: Additional instructions for question generation
|
|
402
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
403
|
+
logprobs: Whether to return token probability information
|
|
404
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
405
|
+
validator: Custom validation function to validate the output
|
|
406
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
407
|
+
|
|
297
408
|
Returns:
|
|
298
409
|
ToolOutput: Object containing:
|
|
299
410
|
- result (list[str]): List of generated questions
|
|
300
411
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
301
412
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
413
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
302
414
|
"""
|
|
303
415
|
return await self._operator.run(
|
|
304
416
|
# User parameters
|
|
@@ -311,6 +423,7 @@ class AsyncTheTool:
|
|
|
311
423
|
logprobs=logprobs,
|
|
312
424
|
top_logprobs=top_logprobs,
|
|
313
425
|
validator=validator,
|
|
426
|
+
max_validation_retries=max_validation_retries,
|
|
314
427
|
# Internal parameters
|
|
315
428
|
prompt_file="subject_to_question.yaml",
|
|
316
429
|
output_model=OM.ReasonListStrOutput,
|
|
@@ -327,15 +440,28 @@ class AsyncTheTool:
|
|
|
327
440
|
logprobs: bool = False,
|
|
328
441
|
top_logprobs: int | None = None,
|
|
329
442
|
validator: Callable[[Any], bool] | None = None,
|
|
443
|
+
max_validation_retries: int | None = None,
|
|
330
444
|
) -> OM.ToolOutput:
|
|
331
445
|
"""
|
|
332
446
|
Summarize the given subject text.
|
|
333
447
|
|
|
448
|
+
Arguments:
|
|
449
|
+
text: The input text to summarize
|
|
450
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
451
|
+
output_lang: Language for the output summary
|
|
452
|
+
user_prompt: Additional instructions for summarization
|
|
453
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
454
|
+
logprobs: Whether to return token probability information
|
|
455
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
456
|
+
validator: Custom validation function to validate the output
|
|
457
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
458
|
+
|
|
334
459
|
Returns:
|
|
335
460
|
ToolOutput: Object containing:
|
|
336
461
|
- result (str): The summary text
|
|
337
462
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
338
463
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
464
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
339
465
|
"""
|
|
340
466
|
return await self._operator.run(
|
|
341
467
|
# User parameters
|
|
@@ -347,6 +473,7 @@ class AsyncTheTool:
|
|
|
347
473
|
logprobs=logprobs,
|
|
348
474
|
top_logprobs=top_logprobs,
|
|
349
475
|
validator=validator,
|
|
476
|
+
max_validation_retries=max_validation_retries,
|
|
350
477
|
# Internal parameters
|
|
351
478
|
prompt_file="summarize.yaml",
|
|
352
479
|
output_model=OM.StrOutput,
|
|
@@ -363,15 +490,28 @@ class AsyncTheTool:
|
|
|
363
490
|
logprobs: bool = False,
|
|
364
491
|
top_logprobs: int | None = None,
|
|
365
492
|
validator: Callable[[Any], bool] | None = None,
|
|
493
|
+
max_validation_retries: int | None = None,
|
|
366
494
|
) -> OM.ToolOutput:
|
|
367
495
|
"""
|
|
368
496
|
Translate text between languages.
|
|
369
497
|
|
|
498
|
+
Arguments:
|
|
499
|
+
text: The input text to translate
|
|
500
|
+
target_language: The target language for translation
|
|
501
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
502
|
+
user_prompt: Additional instructions for translation
|
|
503
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
504
|
+
logprobs: Whether to return token probability information
|
|
505
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
506
|
+
validator: Custom validation function to validate the output
|
|
507
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
508
|
+
|
|
370
509
|
Returns:
|
|
371
510
|
ToolOutput: Object containing:
|
|
372
511
|
- result (str): The translated text
|
|
373
512
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
374
513
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
514
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
375
515
|
"""
|
|
376
516
|
return await self._operator.run(
|
|
377
517
|
# User parameters
|
|
@@ -383,6 +523,7 @@ class AsyncTheTool:
|
|
|
383
523
|
logprobs=logprobs,
|
|
384
524
|
top_logprobs=top_logprobs,
|
|
385
525
|
validator=validator,
|
|
526
|
+
max_validation_retries=max_validation_retries,
|
|
386
527
|
# Internal parameters
|
|
387
528
|
prompt_file="translate.yaml",
|
|
388
529
|
output_model=OM.StrOutput,
|
|
@@ -398,13 +539,27 @@ class AsyncTheTool:
|
|
|
398
539
|
temperature: float | None = None,
|
|
399
540
|
logprobs: bool | None = None,
|
|
400
541
|
top_logprobs: int | None = None,
|
|
542
|
+
validator: Callable[[Any], bool] | None = None,
|
|
543
|
+
max_validation_retries: int | None = None,
|
|
401
544
|
) -> OM.ToolOutput:
|
|
402
545
|
"""
|
|
403
546
|
Custom tool that can do almost anything!
|
|
404
547
|
|
|
548
|
+
Arguments:
|
|
549
|
+
text: The user prompt
|
|
550
|
+
output_lang: Language for the output summary
|
|
551
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
552
|
+
logprobs: Whether to return token probability information
|
|
553
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
554
|
+
validator: Custom validation function to validate the output
|
|
555
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
556
|
+
|
|
405
557
|
Returns:
|
|
406
|
-
ToolOutput: Object
|
|
407
|
-
- result (str): The
|
|
558
|
+
ToolOutput: Object containing:
|
|
559
|
+
- result (str): The translated text
|
|
560
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
561
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
562
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
408
563
|
"""
|
|
409
564
|
return await self._operator.run(
|
|
410
565
|
# User paramaeters
|
|
@@ -415,10 +570,11 @@ class AsyncTheTool:
|
|
|
415
570
|
temperature=temperature,
|
|
416
571
|
logprobs=logprobs,
|
|
417
572
|
top_logprobs=top_logprobs,
|
|
573
|
+
validator=validator,
|
|
574
|
+
max_validation_retries=max_validation_retries,
|
|
418
575
|
# Internal parameters
|
|
419
576
|
prompt_file="run_custom.yaml",
|
|
420
577
|
user_prompt=None,
|
|
421
578
|
with_analysis=False,
|
|
422
579
|
mode=None,
|
|
423
|
-
validator=None,
|
|
424
580
|
)
|
|
@@ -5,8 +5,7 @@ from openai import AsyncOpenAI
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
7
|
from texttools.tools.internals.output_models import ToolOutput
|
|
8
|
-
from texttools.tools.internals.
|
|
9
|
-
from texttools.tools.internals.formatters import Formatter
|
|
8
|
+
from texttools.tools.internals.operator_utils import OperatorUtils
|
|
10
9
|
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
11
10
|
|
|
12
11
|
# Base Model type for output models
|
|
@@ -15,7 +14,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
15
14
|
logger = logging.getLogger("texttools.async_operator")
|
|
16
15
|
|
|
17
16
|
|
|
18
|
-
class AsyncOperator
|
|
17
|
+
class AsyncOperator:
|
|
19
18
|
"""
|
|
20
19
|
Core engine for running text-processing operations with an LLM (Async).
|
|
21
20
|
|
|
@@ -26,7 +25,8 @@ class AsyncOperator(BaseOperator):
|
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
27
|
def __init__(self, client: AsyncOpenAI, model: str):
|
|
29
|
-
|
|
28
|
+
self._client = client
|
|
29
|
+
self._model = model
|
|
30
30
|
|
|
31
31
|
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
32
32
|
"""
|
|
@@ -34,7 +34,7 @@ class AsyncOperator(BaseOperator):
|
|
|
34
34
|
Returns the analyzed content as a string.
|
|
35
35
|
"""
|
|
36
36
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
37
|
-
analyze_message = [
|
|
37
|
+
analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
|
|
38
38
|
completion = await self._client.chat.completions.create(
|
|
39
39
|
model=self._model,
|
|
40
40
|
messages=analyze_message,
|
|
@@ -53,7 +53,7 @@ class AsyncOperator(BaseOperator):
|
|
|
53
53
|
) -> tuple[T, Any]:
|
|
54
54
|
"""
|
|
55
55
|
Parses a chat completion using OpenAI's structured output format.
|
|
56
|
-
Returns both the parsed object and the raw completion for
|
|
56
|
+
Returns both the parsed object and the raw completion for logprobs.
|
|
57
57
|
"""
|
|
58
58
|
request_kwargs = {
|
|
59
59
|
"model": self._model,
|
|
@@ -81,6 +81,7 @@ class AsyncOperator(BaseOperator):
|
|
|
81
81
|
logprobs: bool,
|
|
82
82
|
top_logprobs: int | None,
|
|
83
83
|
validator: Callable[[Any], bool] | None,
|
|
84
|
+
max_validation_retries: int | None,
|
|
84
85
|
# Internal parameters
|
|
85
86
|
prompt_file: str,
|
|
86
87
|
output_model: Type[T],
|
|
@@ -91,7 +92,6 @@ class AsyncOperator(BaseOperator):
|
|
|
91
92
|
Execute the async LLM pipeline with the given input text. (Async)
|
|
92
93
|
"""
|
|
93
94
|
prompt_loader = PromptLoader()
|
|
94
|
-
formatter = Formatter()
|
|
95
95
|
output = ToolOutput()
|
|
96
96
|
|
|
97
97
|
try:
|
|
@@ -108,46 +108,44 @@ class AsyncOperator(BaseOperator):
|
|
|
108
108
|
if with_analysis:
|
|
109
109
|
analysis = await self._analyze(prompt_configs, temperature)
|
|
110
110
|
messages.append(
|
|
111
|
-
|
|
111
|
+
OperatorUtils.build_user_message(
|
|
112
|
+
f"Based on this analysis: {analysis}"
|
|
113
|
+
)
|
|
112
114
|
)
|
|
113
115
|
|
|
114
116
|
if output_lang:
|
|
115
117
|
messages.append(
|
|
116
|
-
|
|
118
|
+
OperatorUtils.build_user_message(
|
|
117
119
|
f"Respond only in the {output_lang} language."
|
|
118
120
|
)
|
|
119
121
|
)
|
|
120
122
|
|
|
121
123
|
if user_prompt:
|
|
122
124
|
messages.append(
|
|
123
|
-
|
|
125
|
+
OperatorUtils.build_user_message(
|
|
126
|
+
f"Consider this instruction {user_prompt}"
|
|
127
|
+
)
|
|
124
128
|
)
|
|
125
129
|
|
|
126
|
-
messages.append(
|
|
127
|
-
|
|
130
|
+
messages.append(
|
|
131
|
+
OperatorUtils.build_user_message(prompt_configs["main_template"])
|
|
132
|
+
)
|
|
128
133
|
|
|
129
134
|
parsed, completion = await self._parse_completion(
|
|
130
135
|
messages, output_model, temperature, logprobs, top_logprobs
|
|
131
136
|
)
|
|
132
137
|
|
|
133
|
-
# Ensure output_model has a `result` field
|
|
134
|
-
if not hasattr(parsed, "result"):
|
|
135
|
-
error = "The provided output_model must define a field named 'result'"
|
|
136
|
-
logger.error(error)
|
|
137
|
-
output.errors.append(error)
|
|
138
|
-
return output
|
|
139
|
-
|
|
140
138
|
output.result = parsed.result
|
|
141
139
|
|
|
142
140
|
# Retry logic if validation fails
|
|
143
141
|
if validator and not validator(output.result):
|
|
144
|
-
for attempt in range(
|
|
142
|
+
for attempt in range(max_validation_retries):
|
|
145
143
|
logger.warning(
|
|
146
144
|
f"Validation failed, retrying for the {attempt + 1} time."
|
|
147
145
|
)
|
|
148
146
|
|
|
149
147
|
# Generate new temperature for retry
|
|
150
|
-
retry_temperature =
|
|
148
|
+
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
151
149
|
try:
|
|
152
150
|
parsed, completion = await self._parse_completion(
|
|
153
151
|
messages,
|
|
@@ -179,7 +177,7 @@ class AsyncOperator(BaseOperator):
|
|
|
179
177
|
output.errors.append("Validation failed after all retry attempts")
|
|
180
178
|
|
|
181
179
|
if logprobs:
|
|
182
|
-
output.logprobs =
|
|
180
|
+
output.logprobs = OperatorUtils.extract_logprobs(completion)
|
|
183
181
|
|
|
184
182
|
if with_analysis:
|
|
185
183
|
output.analysis = analysis
|
|
@@ -1,29 +1,15 @@
|
|
|
1
|
-
from typing import TypeVar, Any, Union
|
|
2
1
|
import re
|
|
3
2
|
import math
|
|
4
3
|
import random
|
|
5
4
|
|
|
6
|
-
from pydantic import BaseModel
|
|
7
|
-
from openai import OpenAI, AsyncOpenAI
|
|
8
5
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
ClientType = Union[OpenAI, AsyncOpenAI]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class BaseOperator:
|
|
16
|
-
# Max retry in case of failed output validation
|
|
17
|
-
MAX_RETRIES = 3
|
|
18
|
-
|
|
19
|
-
def __init__(self, client: ClientType, model: str):
|
|
20
|
-
self._client = client
|
|
21
|
-
self._model = model
|
|
22
|
-
|
|
23
|
-
def _build_user_message(self, prompt: str) -> dict[str, str]:
|
|
6
|
+
class OperatorUtils:
|
|
7
|
+
@staticmethod
|
|
8
|
+
def build_user_message(prompt: str) -> dict[str, str]:
|
|
24
9
|
return {"role": "user", "content": prompt}
|
|
25
10
|
|
|
26
|
-
|
|
11
|
+
@staticmethod
|
|
12
|
+
def extract_logprobs(completion: dict) -> list[dict]:
|
|
27
13
|
"""
|
|
28
14
|
Extracts and filters token probabilities from completion logprobs.
|
|
29
15
|
Skips punctuation and structural tokens, returns cleaned probability data.
|
|
@@ -57,7 +43,8 @@ class BaseOperator:
|
|
|
57
43
|
|
|
58
44
|
return logprobs_data
|
|
59
45
|
|
|
60
|
-
|
|
46
|
+
@staticmethod
|
|
47
|
+
def get_retry_temp(base_temp: float) -> float:
|
|
61
48
|
"""
|
|
62
49
|
Calculate temperature for retry attempts.
|
|
63
50
|
"""
|
|
@@ -5,7 +5,7 @@ from openai import OpenAI
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
7
|
from texttools.tools.internals.output_models import ToolOutput
|
|
8
|
-
from texttools.tools.internals.
|
|
8
|
+
from texttools.tools.internals.operator_utils import OperatorUtils
|
|
9
9
|
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
10
10
|
|
|
11
11
|
# Base Model type for output models
|
|
@@ -14,7 +14,7 @@ T = TypeVar("T", bound=BaseModel)
|
|
|
14
14
|
logger = logging.getLogger("texttools.operator")
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class Operator
|
|
17
|
+
class Operator:
|
|
18
18
|
"""
|
|
19
19
|
Core engine for running text-processing operations with an LLM (Sync).
|
|
20
20
|
|
|
@@ -25,7 +25,8 @@ class Operator(BaseOperator):
|
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
27
|
def __init__(self, client: OpenAI, model: str):
|
|
28
|
-
|
|
28
|
+
self._client = client
|
|
29
|
+
self._model = model
|
|
29
30
|
|
|
30
31
|
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
31
32
|
"""
|
|
@@ -33,7 +34,7 @@ class Operator(BaseOperator):
|
|
|
33
34
|
Returns the analyzed content as a string.
|
|
34
35
|
"""
|
|
35
36
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
|
-
analyze_message = [
|
|
37
|
+
analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
|
|
37
38
|
completion = self._client.chat.completions.create(
|
|
38
39
|
model=self._model,
|
|
39
40
|
messages=analyze_message,
|
|
@@ -52,7 +53,7 @@ class Operator(BaseOperator):
|
|
|
52
53
|
) -> tuple[T, Any]:
|
|
53
54
|
"""
|
|
54
55
|
Parses a chat completion using OpenAI's structured output format.
|
|
55
|
-
Returns both the parsed object and the raw completion for
|
|
56
|
+
Returns both the parsed object and the raw completion for logprobs.
|
|
56
57
|
"""
|
|
57
58
|
request_kwargs = {
|
|
58
59
|
"model": self._model,
|
|
@@ -80,6 +81,7 @@ class Operator(BaseOperator):
|
|
|
80
81
|
logprobs: bool,
|
|
81
82
|
top_logprobs: int | None,
|
|
82
83
|
validator: Callable[[Any], bool] | None,
|
|
84
|
+
max_validation_retries: int | None,
|
|
83
85
|
# Internal parameters
|
|
84
86
|
prompt_file: str,
|
|
85
87
|
output_model: Type[T],
|
|
@@ -106,46 +108,44 @@ class Operator(BaseOperator):
|
|
|
106
108
|
if with_analysis:
|
|
107
109
|
analysis = self._analyze(prompt_configs, temperature)
|
|
108
110
|
messages.append(
|
|
109
|
-
|
|
111
|
+
OperatorUtils.build_user_message(
|
|
112
|
+
f"Based on this analysis: {analysis}"
|
|
113
|
+
)
|
|
110
114
|
)
|
|
111
115
|
|
|
112
116
|
if output_lang:
|
|
113
117
|
messages.append(
|
|
114
|
-
|
|
118
|
+
OperatorUtils.build_user_message(
|
|
115
119
|
f"Respond only in the {output_lang} language."
|
|
116
120
|
)
|
|
117
121
|
)
|
|
118
122
|
|
|
119
123
|
if user_prompt:
|
|
120
124
|
messages.append(
|
|
121
|
-
|
|
125
|
+
OperatorUtils.build_user_message(
|
|
126
|
+
f"Consider this instruction {user_prompt}"
|
|
127
|
+
)
|
|
122
128
|
)
|
|
123
129
|
|
|
124
|
-
messages.append(
|
|
125
|
-
|
|
130
|
+
messages.append(
|
|
131
|
+
OperatorUtils.build_user_message(prompt_configs["main_template"])
|
|
132
|
+
)
|
|
126
133
|
|
|
127
134
|
parsed, completion = self._parse_completion(
|
|
128
135
|
messages, output_model, temperature, logprobs, top_logprobs
|
|
129
136
|
)
|
|
130
137
|
|
|
131
|
-
# Ensure output_model has a `result` field
|
|
132
|
-
if not hasattr(parsed, "result"):
|
|
133
|
-
error = "The provided output_model must define a field named 'result'"
|
|
134
|
-
logger.error(error)
|
|
135
|
-
output.errors.append(error)
|
|
136
|
-
return output
|
|
137
|
-
|
|
138
138
|
output.result = parsed.result
|
|
139
139
|
|
|
140
140
|
# Retry logic if validation fails
|
|
141
141
|
if validator and not validator(output.result):
|
|
142
|
-
for attempt in range(
|
|
142
|
+
for attempt in range(max_validation_retries):
|
|
143
143
|
logger.warning(
|
|
144
144
|
f"Validation failed, retrying for the {attempt + 1} time."
|
|
145
145
|
)
|
|
146
146
|
|
|
147
147
|
# Generate new temperature for retry
|
|
148
|
-
retry_temperature =
|
|
148
|
+
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
149
149
|
try:
|
|
150
150
|
parsed, completion = self._parse_completion(
|
|
151
151
|
messages,
|
|
@@ -177,7 +177,7 @@ class Operator(BaseOperator):
|
|
|
177
177
|
output.errors.append("Validation failed after all retry attempts")
|
|
178
178
|
|
|
179
179
|
if logprobs:
|
|
180
|
-
output.logprobs =
|
|
180
|
+
output.logprobs = OperatorUtils.extract_logprobs(completion)
|
|
181
181
|
|
|
182
182
|
if with_analysis:
|
|
183
183
|
output.analysis = analysis
|
texttools/tools/sync_tools.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import Literal, Any, Callable
|
|
|
2
2
|
|
|
3
3
|
from openai import OpenAI
|
|
4
4
|
|
|
5
|
-
from texttools.tools.internals.
|
|
5
|
+
from texttools.tools.internals.sync_operator import Operator
|
|
6
6
|
import texttools.tools.internals.output_models as OM
|
|
7
7
|
|
|
8
8
|
|
|
@@ -33,15 +33,27 @@ class TheTool:
|
|
|
33
33
|
logprobs: bool = False,
|
|
34
34
|
top_logprobs: int | None = None,
|
|
35
35
|
validator: Callable[[Any], bool] | None = None,
|
|
36
|
+
max_validation_retries: int | None = None,
|
|
36
37
|
) -> OM.ToolOutput:
|
|
37
38
|
"""
|
|
38
39
|
Categorize a text into a single Islamic studies domain category.
|
|
39
40
|
|
|
41
|
+
Arguments:
|
|
42
|
+
text: The input text to categorize
|
|
43
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
44
|
+
user_prompt: Additional instructions for the categorization
|
|
45
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
46
|
+
logprobs: Whether to return token probability information
|
|
47
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
48
|
+
validator: Custom validation function to validate the output
|
|
49
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
50
|
+
|
|
40
51
|
Returns:
|
|
41
52
|
ToolOutput: Object containing:
|
|
42
53
|
- result (str): The assigned Islamic studies category
|
|
43
54
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
44
55
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
56
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
45
57
|
"""
|
|
46
58
|
return self._operator.run(
|
|
47
59
|
# User parameters
|
|
@@ -52,6 +64,7 @@ class TheTool:
|
|
|
52
64
|
logprobs=logprobs,
|
|
53
65
|
top_logprobs=top_logprobs,
|
|
54
66
|
validator=validator,
|
|
67
|
+
max_validation_retries=max_validation_retries,
|
|
55
68
|
# Internal parameters
|
|
56
69
|
prompt_file="categorizer.yaml",
|
|
57
70
|
output_model=OM.CategorizerOutput,
|
|
@@ -69,15 +82,28 @@ class TheTool:
|
|
|
69
82
|
logprobs: bool = False,
|
|
70
83
|
top_logprobs: int | None = None,
|
|
71
84
|
validator: Callable[[Any], bool] | None = None,
|
|
85
|
+
max_validation_retries: int | None = None,
|
|
72
86
|
) -> OM.ToolOutput:
|
|
73
87
|
"""
|
|
74
88
|
Extract salient keywords from text.
|
|
75
89
|
|
|
90
|
+
Arguments:
|
|
91
|
+
text: The input text to extract keywords from
|
|
92
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
93
|
+
output_lang: Language for the output response
|
|
94
|
+
user_prompt: Additional instructions for keyword extraction
|
|
95
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
96
|
+
logprobs: Whether to return token probability information
|
|
97
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
98
|
+
validator: Custom validation function to validate the output
|
|
99
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
100
|
+
|
|
76
101
|
Returns:
|
|
77
102
|
ToolOutput: Object containing:
|
|
78
103
|
- result (list[str]): List of extracted keywords
|
|
79
104
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
80
105
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
106
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
81
107
|
"""
|
|
82
108
|
return self._operator.run(
|
|
83
109
|
# User parameters
|
|
@@ -89,6 +115,7 @@ class TheTool:
|
|
|
89
115
|
logprobs=logprobs,
|
|
90
116
|
top_logprobs=top_logprobs,
|
|
91
117
|
validator=validator,
|
|
118
|
+
max_validation_retries=max_validation_retries,
|
|
92
119
|
# Internal parameters
|
|
93
120
|
prompt_file="extract_keywords.yaml",
|
|
94
121
|
output_model=OM.ListStrOutput,
|
|
@@ -105,15 +132,28 @@ class TheTool:
|
|
|
105
132
|
logprobs: bool = False,
|
|
106
133
|
top_logprobs: int | None = None,
|
|
107
134
|
validator: Callable[[Any], bool] | None = None,
|
|
135
|
+
max_validation_retries: int | None = None,
|
|
108
136
|
) -> OM.ToolOutput:
|
|
109
137
|
"""
|
|
110
138
|
Perform Named Entity Recognition (NER) over the input text.
|
|
111
139
|
|
|
140
|
+
Arguments:
|
|
141
|
+
text: The input text to extract entities from
|
|
142
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
143
|
+
output_lang: Language for the output response
|
|
144
|
+
user_prompt: Additional instructions for entity extraction
|
|
145
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
146
|
+
logprobs: Whether to return token probability information
|
|
147
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
148
|
+
validator: Custom validation function to validate the output
|
|
149
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
150
|
+
|
|
112
151
|
Returns:
|
|
113
152
|
ToolOutput: Object containing:
|
|
114
153
|
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
115
154
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
116
155
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
156
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
117
157
|
"""
|
|
118
158
|
return self._operator.run(
|
|
119
159
|
# User parameters
|
|
@@ -125,6 +165,7 @@ class TheTool:
|
|
|
125
165
|
logprobs=logprobs,
|
|
126
166
|
top_logprobs=top_logprobs,
|
|
127
167
|
validator=validator,
|
|
168
|
+
max_validation_retries=max_validation_retries,
|
|
128
169
|
# Internal parameters
|
|
129
170
|
prompt_file="extract_entities.yaml",
|
|
130
171
|
output_model=OM.ListDictStrStrOutput,
|
|
@@ -140,15 +181,27 @@ class TheTool:
|
|
|
140
181
|
logprobs: bool = False,
|
|
141
182
|
top_logprobs: int | None = None,
|
|
142
183
|
validator: Callable[[Any], bool] | None = None,
|
|
184
|
+
max_validation_retries: int | None = None,
|
|
143
185
|
) -> OM.ToolOutput:
|
|
144
186
|
"""
|
|
145
187
|
Detect if the input is phrased as a question.
|
|
146
188
|
|
|
189
|
+
Arguments:
|
|
190
|
+
text: The input text to analyze
|
|
191
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
192
|
+
user_prompt: Additional instructions for question detection
|
|
193
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
194
|
+
logprobs: Whether to return token probability information
|
|
195
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
196
|
+
validator: Custom validation function to validate the output
|
|
197
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
198
|
+
|
|
147
199
|
Returns:
|
|
148
200
|
ToolOutput: Object containing:
|
|
149
201
|
- result (bool): True if text is a question, False otherwise
|
|
150
202
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
151
203
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
204
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
152
205
|
"""
|
|
153
206
|
return self._operator.run(
|
|
154
207
|
# User parameters
|
|
@@ -159,6 +212,7 @@ class TheTool:
|
|
|
159
212
|
logprobs=logprobs,
|
|
160
213
|
top_logprobs=top_logprobs,
|
|
161
214
|
validator=validator,
|
|
215
|
+
max_validation_retries=max_validation_retries,
|
|
162
216
|
# Internal parameters
|
|
163
217
|
prompt_file="is_question.yaml",
|
|
164
218
|
output_model=OM.BoolOutput,
|
|
@@ -176,15 +230,28 @@ class TheTool:
|
|
|
176
230
|
logprobs: bool = False,
|
|
177
231
|
top_logprobs: int | None = None,
|
|
178
232
|
validator: Callable[[Any], bool] | None = None,
|
|
233
|
+
max_validation_retries: int | None = None,
|
|
179
234
|
) -> OM.ToolOutput:
|
|
180
235
|
"""
|
|
181
236
|
Generate a single question from the given text.
|
|
182
237
|
|
|
238
|
+
Arguments:
|
|
239
|
+
text: The input text to generate a question from
|
|
240
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
241
|
+
output_lang: Language for the output question
|
|
242
|
+
user_prompt: Additional instructions for question generation
|
|
243
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
244
|
+
logprobs: Whether to return token probability information
|
|
245
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
246
|
+
validator: Custom validation function to validate the output
|
|
247
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
248
|
+
|
|
183
249
|
Returns:
|
|
184
250
|
ToolOutput: Object containing:
|
|
185
251
|
- result (str): The generated question
|
|
186
252
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
187
253
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
254
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
188
255
|
"""
|
|
189
256
|
return self._operator.run(
|
|
190
257
|
# User parameters
|
|
@@ -196,6 +263,7 @@ class TheTool:
|
|
|
196
263
|
logprobs=logprobs,
|
|
197
264
|
top_logprobs=top_logprobs,
|
|
198
265
|
validator=validator,
|
|
266
|
+
max_validation_retries=max_validation_retries,
|
|
199
267
|
# Internal parameters
|
|
200
268
|
prompt_file="text_to_question.yaml",
|
|
201
269
|
output_model=OM.StrOutput,
|
|
@@ -213,15 +281,29 @@ class TheTool:
|
|
|
213
281
|
top_logprobs: int | None = None,
|
|
214
282
|
mode: Literal["default", "reason"] = "default",
|
|
215
283
|
validator: Callable[[Any], bool] | None = None,
|
|
284
|
+
max_validation_retries: int | None = None,
|
|
216
285
|
) -> OM.ToolOutput:
|
|
217
286
|
"""
|
|
218
287
|
Merge multiple questions into a single unified question.
|
|
219
288
|
|
|
289
|
+
Arguments:
|
|
290
|
+
text: List of questions to merge
|
|
291
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
292
|
+
output_lang: Language for the output merged question
|
|
293
|
+
user_prompt: Additional instructions for question merging
|
|
294
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
295
|
+
logprobs: Whether to return token probability information
|
|
296
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
297
|
+
mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
|
|
298
|
+
validator: Custom validation function to validate the output
|
|
299
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
300
|
+
|
|
220
301
|
Returns:
|
|
221
302
|
ToolOutput: Object containing:
|
|
222
303
|
- result (str): The merged question
|
|
223
304
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
224
305
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
306
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
225
307
|
"""
|
|
226
308
|
text = ", ".join(text)
|
|
227
309
|
return self._operator.run(
|
|
@@ -234,6 +316,7 @@ class TheTool:
|
|
|
234
316
|
logprobs=logprobs,
|
|
235
317
|
top_logprobs=top_logprobs,
|
|
236
318
|
validator=validator,
|
|
319
|
+
max_validation_retries=max_validation_retries,
|
|
237
320
|
# Internal parameters
|
|
238
321
|
prompt_file="merge_questions.yaml",
|
|
239
322
|
output_model=OM.StrOutput,
|
|
@@ -251,15 +334,29 @@ class TheTool:
|
|
|
251
334
|
top_logprobs: int | None = None,
|
|
252
335
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
253
336
|
validator: Callable[[Any], bool] | None = None,
|
|
337
|
+
max_validation_retries: int | None = None,
|
|
254
338
|
) -> OM.ToolOutput:
|
|
255
339
|
"""
|
|
256
340
|
Rewrite a text with different modes.
|
|
257
341
|
|
|
342
|
+
Arguments:
|
|
343
|
+
text: The input text to rewrite
|
|
344
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
345
|
+
output_lang: Language for the output rewritten text
|
|
346
|
+
user_prompt: Additional instructions for rewriting
|
|
347
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
348
|
+
logprobs: Whether to return token probability information
|
|
349
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
350
|
+
mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
|
|
351
|
+
validator: Custom validation function to validate the output
|
|
352
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
353
|
+
|
|
258
354
|
Returns:
|
|
259
355
|
ToolOutput: Object containing:
|
|
260
356
|
- result (str): The rewritten text
|
|
261
357
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
262
358
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
359
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
263
360
|
"""
|
|
264
361
|
return self._operator.run(
|
|
265
362
|
# User parameters
|
|
@@ -271,6 +368,7 @@ class TheTool:
|
|
|
271
368
|
logprobs=logprobs,
|
|
272
369
|
top_logprobs=top_logprobs,
|
|
273
370
|
validator=validator,
|
|
371
|
+
max_validation_retries=max_validation_retries,
|
|
274
372
|
# Internal parameters
|
|
275
373
|
prompt_file="rewrite.yaml",
|
|
276
374
|
output_model=OM.StrOutput,
|
|
@@ -288,15 +386,29 @@ class TheTool:
|
|
|
288
386
|
logprobs: bool = False,
|
|
289
387
|
top_logprobs: int | None = None,
|
|
290
388
|
validator: Callable[[Any], bool] | None = None,
|
|
389
|
+
max_validation_retries: int | None = None,
|
|
291
390
|
) -> OM.ToolOutput:
|
|
292
391
|
"""
|
|
293
392
|
Generate a list of questions about a subject.
|
|
294
393
|
|
|
394
|
+
Arguments:
|
|
395
|
+
text: The subject text to generate questions about
|
|
396
|
+
number_of_questions: Number of questions to generate
|
|
397
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
398
|
+
output_lang: Language for the output questions
|
|
399
|
+
user_prompt: Additional instructions for question generation
|
|
400
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
401
|
+
logprobs: Whether to return token probability information
|
|
402
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
403
|
+
validator: Custom validation function to validate the output
|
|
404
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
405
|
+
|
|
295
406
|
Returns:
|
|
296
407
|
ToolOutput: Object containing:
|
|
297
408
|
- result (list[str]): List of generated questions
|
|
298
409
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
299
410
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
411
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
300
412
|
"""
|
|
301
413
|
return self._operator.run(
|
|
302
414
|
# User parameters
|
|
@@ -309,6 +421,7 @@ class TheTool:
|
|
|
309
421
|
logprobs=logprobs,
|
|
310
422
|
top_logprobs=top_logprobs,
|
|
311
423
|
validator=validator,
|
|
424
|
+
max_validation_retries=max_validation_retries,
|
|
312
425
|
# Internal parameters
|
|
313
426
|
prompt_file="subject_to_question.yaml",
|
|
314
427
|
output_model=OM.ReasonListStrOutput,
|
|
@@ -325,15 +438,28 @@ class TheTool:
|
|
|
325
438
|
logprobs: bool = False,
|
|
326
439
|
top_logprobs: int | None = None,
|
|
327
440
|
validator: Callable[[Any], bool] | None = None,
|
|
441
|
+
max_validation_retries: int | None = None,
|
|
328
442
|
) -> OM.ToolOutput:
|
|
329
443
|
"""
|
|
330
444
|
Summarize the given subject text.
|
|
331
445
|
|
|
446
|
+
Arguments:
|
|
447
|
+
text: The input text to summarize
|
|
448
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
449
|
+
output_lang: Language for the output summary
|
|
450
|
+
user_prompt: Additional instructions for summarization
|
|
451
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
452
|
+
logprobs: Whether to return token probability information
|
|
453
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
454
|
+
validator: Custom validation function to validate the output
|
|
455
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
456
|
+
|
|
332
457
|
Returns:
|
|
333
458
|
ToolOutput: Object containing:
|
|
334
459
|
- result (str): The summary text
|
|
335
460
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
336
461
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
462
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
337
463
|
"""
|
|
338
464
|
return self._operator.run(
|
|
339
465
|
# User parameters
|
|
@@ -345,6 +471,7 @@ class TheTool:
|
|
|
345
471
|
logprobs=logprobs,
|
|
346
472
|
top_logprobs=top_logprobs,
|
|
347
473
|
validator=validator,
|
|
474
|
+
max_validation_retries=max_validation_retries,
|
|
348
475
|
# Internal parameters
|
|
349
476
|
prompt_file="summarize.yaml",
|
|
350
477
|
output_model=OM.StrOutput,
|
|
@@ -361,15 +488,28 @@ class TheTool:
|
|
|
361
488
|
logprobs: bool = False,
|
|
362
489
|
top_logprobs: int | None = None,
|
|
363
490
|
validator: Callable[[Any], bool] | None = None,
|
|
491
|
+
max_validation_retries: int | None = None,
|
|
364
492
|
) -> OM.ToolOutput:
|
|
365
493
|
"""
|
|
366
494
|
Translate text between languages.
|
|
367
495
|
|
|
496
|
+
Arguments:
|
|
497
|
+
text: The input text to translate
|
|
498
|
+
target_language: The target language for translation
|
|
499
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
500
|
+
user_prompt: Additional instructions for translation
|
|
501
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
502
|
+
logprobs: Whether to return token probability information
|
|
503
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
504
|
+
validator: Custom validation function to validate the output
|
|
505
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
506
|
+
|
|
368
507
|
Returns:
|
|
369
508
|
ToolOutput: Object containing:
|
|
370
509
|
- result (str): The translated text
|
|
371
510
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
372
511
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
512
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
373
513
|
"""
|
|
374
514
|
return self._operator.run(
|
|
375
515
|
# User parameters
|
|
@@ -381,6 +521,7 @@ class TheTool:
|
|
|
381
521
|
logprobs=logprobs,
|
|
382
522
|
top_logprobs=top_logprobs,
|
|
383
523
|
validator=validator,
|
|
524
|
+
max_validation_retries=max_validation_retries,
|
|
384
525
|
# Internal parameters
|
|
385
526
|
prompt_file="translate.yaml",
|
|
386
527
|
output_model=OM.StrOutput,
|
|
@@ -396,13 +537,27 @@ class TheTool:
|
|
|
396
537
|
temperature: float | None = None,
|
|
397
538
|
logprobs: bool | None = None,
|
|
398
539
|
top_logprobs: int | None = None,
|
|
540
|
+
validator: Callable[[Any], bool] | None = None,
|
|
541
|
+
max_validation_retries: int | None = None,
|
|
399
542
|
) -> OM.ToolOutput:
|
|
400
543
|
"""
|
|
401
544
|
Custom tool that can do almost anything!
|
|
402
545
|
|
|
546
|
+
Arguments:
|
|
547
|
+
text: The user prompt
|
|
548
|
+
output_lang: Language for the output summary
|
|
549
|
+
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
550
|
+
logprobs: Whether to return token probability information
|
|
551
|
+
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
552
|
+
validator: Custom validation function to validate the output
|
|
553
|
+
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
554
|
+
|
|
403
555
|
Returns:
|
|
404
|
-
ToolOutput: Object
|
|
405
|
-
- result (str): The
|
|
556
|
+
ToolOutput: Object containing:
|
|
557
|
+
- result (str): The translated text
|
|
558
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
559
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
560
|
+
- errors (list(str) | None): Errors occured during tool call
|
|
406
561
|
"""
|
|
407
562
|
return self._operator.run(
|
|
408
563
|
# User paramaeters
|
|
@@ -413,10 +568,11 @@ class TheTool:
|
|
|
413
568
|
temperature=temperature,
|
|
414
569
|
logprobs=logprobs,
|
|
415
570
|
top_logprobs=top_logprobs,
|
|
571
|
+
validator=validator,
|
|
572
|
+
max_validation_retries=max_validation_retries,
|
|
416
573
|
# Internal parameters
|
|
417
574
|
prompt_file="run_custom.yaml",
|
|
418
575
|
user_prompt=None,
|
|
419
576
|
with_analysis=False,
|
|
420
577
|
mode=None,
|
|
421
|
-
validator=None,
|
|
422
578
|
)
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
class Formatter:
|
|
2
|
-
@staticmethod
|
|
3
|
-
def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
4
|
-
"""
|
|
5
|
-
Merges consecutive user messages into a single message, separated by newlines.
|
|
6
|
-
|
|
7
|
-
This is useful for condensing a multi-turn user input into a single
|
|
8
|
-
message for the LLM. Assistant and system messages are left unchanged and
|
|
9
|
-
act as separators between user message groups.
|
|
10
|
-
"""
|
|
11
|
-
merged: list[dict[str, str]] = []
|
|
12
|
-
|
|
13
|
-
for message in messages:
|
|
14
|
-
role, content = message["role"], message["content"].strip()
|
|
15
|
-
|
|
16
|
-
# Merge with previous user turn
|
|
17
|
-
if merged and role == "user" and merged[-1]["role"] == "user":
|
|
18
|
-
merged[-1]["content"] += "\n" + content
|
|
19
|
-
|
|
20
|
-
# Otherwise, start a new turn
|
|
21
|
-
else:
|
|
22
|
-
merged.append({"role": role, "content": content})
|
|
23
|
-
|
|
24
|
-
return merged
|
|
File without changes
|
|
File without changes
|
|
File without changes
|