hamtaa-texttools 1.3.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/METADATA +40 -47
- hamtaa_texttools-2.1.0.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/WHEEL +1 -1
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/licenses/LICENSE +1 -1
- texttools/__init__.py +1 -1
- texttools/core/internal_models.py +16 -7
- texttools/core/operators/async_operator.py +10 -16
- texttools/core/operators/sync_operator.py +10 -16
- texttools/core/utils.py +260 -0
- texttools/models.py +77 -22
- texttools/prompts/{rewrite.yaml → augment.yaml} +3 -3
- texttools/prompts/categorize.yaml +7 -8
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +4 -2
- texttools/prompts/{check_fact.yaml → is_fact.yaml} +5 -4
- texttools/prompts/is_question.yaml +1 -1
- texttools/prompts/merge_questions.yaml +8 -6
- texttools/prompts/propositionize.yaml +11 -7
- texttools/prompts/run_custom.yaml +3 -1
- texttools/prompts/summarize.yaml +3 -3
- texttools/prompts/to_question.yaml +60 -0
- texttools/prompts/translate.yaml +4 -4
- texttools/tools/async_tools.py +152 -169
- texttools/tools/sync_tools.py +138 -150
- hamtaa_texttools-1.3.2.dist-info/RECORD +0 -31
- texttools/core/engine.py +0 -262
- texttools/prompts/subject_to_question.yaml +0 -26
- texttools/prompts/text_to_question.yaml +0 -26
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/top_level.txt +0 -0
texttools/tools/async_tools.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Callable
|
|
2
3
|
from time import perf_counter
|
|
3
4
|
from typing import Any, Literal
|
|
4
5
|
|
|
5
6
|
from openai import AsyncOpenAI
|
|
6
7
|
|
|
7
|
-
from ..core.engine import text_to_chunks, run_with_timeout
|
|
8
8
|
from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
9
9
|
from ..core.internal_models import (
|
|
10
10
|
Bool,
|
|
@@ -15,21 +15,20 @@ from ..core.internal_models import (
|
|
|
15
15
|
create_dynamic_model,
|
|
16
16
|
)
|
|
17
17
|
from ..core.operators.async_operator import AsyncOperator
|
|
18
|
+
from ..core.utils import TheToolUtils
|
|
18
19
|
from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class AsyncTheTool:
|
|
22
|
-
"""
|
|
23
|
-
Each method configures the operator with a specific YAML prompt,
|
|
24
|
-
output schema, and flags, then delegates execution to `operator.run()`.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
23
|
def __init__(
|
|
28
24
|
self,
|
|
29
25
|
client: AsyncOpenAI,
|
|
30
26
|
model: str,
|
|
27
|
+
raise_on_error: bool = True,
|
|
31
28
|
):
|
|
32
29
|
self._operator = AsyncOperator(client=client, model=model)
|
|
30
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
|
31
|
+
self.raise_on_error = raise_on_error
|
|
33
32
|
|
|
34
33
|
async def categorize(
|
|
35
34
|
self,
|
|
@@ -46,16 +45,14 @@ class AsyncTheTool:
|
|
|
46
45
|
timeout: float | None = None,
|
|
47
46
|
) -> ToolOutput:
|
|
48
47
|
"""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
48
|
+
Classify text into given categories
|
|
52
49
|
|
|
53
50
|
Arguments:
|
|
54
51
|
text: The input text
|
|
55
52
|
categories: The category list / category tree
|
|
56
|
-
with_analysis:
|
|
53
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
57
54
|
user_prompt: Additional instructions
|
|
58
|
-
temperature: Controls randomness
|
|
55
|
+
temperature: Controls randomness
|
|
59
56
|
logprobs: Whether to return token probability information
|
|
60
57
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
61
58
|
validator: Custom validation function to validate the output
|
|
@@ -72,7 +69,7 @@ class AsyncTheTool:
|
|
|
72
69
|
|
|
73
70
|
try:
|
|
74
71
|
if isinstance(categories, list):
|
|
75
|
-
operator_output = await run_with_timeout(
|
|
72
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
76
73
|
self._operator.run(
|
|
77
74
|
# User parameters
|
|
78
75
|
text=text,
|
|
@@ -121,7 +118,7 @@ class AsyncTheTool:
|
|
|
121
118
|
]
|
|
122
119
|
category_names = list(parent_node.children.keys())
|
|
123
120
|
|
|
124
|
-
level_operator_output = await run_with_timeout(
|
|
121
|
+
level_operator_output = await TheToolUtils.run_with_timeout(
|
|
125
122
|
self._operator.run(
|
|
126
123
|
# User parameters
|
|
127
124
|
text=text,
|
|
@@ -165,6 +162,11 @@ class AsyncTheTool:
|
|
|
165
162
|
)
|
|
166
163
|
|
|
167
164
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
165
|
+
self.logger.error(str(e))
|
|
166
|
+
|
|
167
|
+
if self.raise_on_error:
|
|
168
|
+
raise
|
|
169
|
+
|
|
168
170
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
169
171
|
tool_output = ToolOutput(
|
|
170
172
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -175,28 +177,30 @@ class AsyncTheTool:
|
|
|
175
177
|
async def extract_keywords(
|
|
176
178
|
self,
|
|
177
179
|
text: str,
|
|
180
|
+
mode: Literal["auto", "threshold", "count"],
|
|
181
|
+
number_of_keywords: int | None = None,
|
|
178
182
|
with_analysis: bool = False,
|
|
179
183
|
output_lang: str | None = None,
|
|
180
184
|
user_prompt: str | None = None,
|
|
181
185
|
temperature: float | None = 0.0,
|
|
182
186
|
logprobs: bool = False,
|
|
183
187
|
top_logprobs: int = 3,
|
|
184
|
-
mode: Literal["auto", "threshold", "count"] = "auto",
|
|
185
|
-
number_of_keywords: int | None = None,
|
|
186
188
|
validator: Callable[[Any], bool] | None = None,
|
|
187
189
|
max_validation_retries: int | None = None,
|
|
188
190
|
priority: int | None = None,
|
|
189
191
|
timeout: float | None = None,
|
|
190
192
|
) -> ToolOutput:
|
|
191
193
|
"""
|
|
192
|
-
Extract
|
|
194
|
+
Extract keywords from the text
|
|
193
195
|
|
|
194
196
|
Arguments:
|
|
195
197
|
text: The input text
|
|
196
|
-
|
|
197
|
-
|
|
198
|
+
mode: auto -> decide n of keywords automatically, threshold -> decide n of keywords by a threshold, count -> takes number of keywords as the parameter
|
|
199
|
+
number_of_keywords: Must be set only when using "count" mode
|
|
200
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
201
|
+
output_lang: Forces the model to respond in a specific language
|
|
198
202
|
user_prompt: Additional instructions
|
|
199
|
-
temperature: Controls randomness
|
|
203
|
+
temperature: Controls randomness
|
|
200
204
|
logprobs: Whether to return token probability information
|
|
201
205
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
202
206
|
validator: Custom validation function to validate the output
|
|
@@ -211,24 +215,24 @@ class AsyncTheTool:
|
|
|
211
215
|
start = perf_counter()
|
|
212
216
|
|
|
213
217
|
try:
|
|
214
|
-
operator_output = await run_with_timeout(
|
|
218
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
215
219
|
self._operator.run(
|
|
216
220
|
# User parameters
|
|
217
221
|
text=text,
|
|
218
222
|
with_analysis=with_analysis,
|
|
223
|
+
number_of_keywords=number_of_keywords,
|
|
224
|
+
mode=mode,
|
|
219
225
|
output_lang=output_lang,
|
|
220
226
|
user_prompt=user_prompt,
|
|
221
227
|
temperature=temperature,
|
|
222
228
|
logprobs=logprobs,
|
|
223
229
|
top_logprobs=top_logprobs,
|
|
224
|
-
number_of_keywords=number_of_keywords,
|
|
225
230
|
validator=validator,
|
|
226
231
|
max_validation_retries=max_validation_retries,
|
|
227
232
|
priority=priority,
|
|
228
233
|
# Internal parameters
|
|
229
234
|
tool_name=tool_name,
|
|
230
235
|
output_model=ListStr,
|
|
231
|
-
mode=mode,
|
|
232
236
|
),
|
|
233
237
|
timeout=timeout,
|
|
234
238
|
)
|
|
@@ -244,6 +248,11 @@ class AsyncTheTool:
|
|
|
244
248
|
)
|
|
245
249
|
|
|
246
250
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
251
|
+
self.logger.error(str(e))
|
|
252
|
+
|
|
253
|
+
if self.raise_on_error:
|
|
254
|
+
raise
|
|
255
|
+
|
|
247
256
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
248
257
|
tool_output = ToolOutput(
|
|
249
258
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -254,7 +263,7 @@ class AsyncTheTool:
|
|
|
254
263
|
async def extract_entities(
|
|
255
264
|
self,
|
|
256
265
|
text: str,
|
|
257
|
-
entities: list[str]
|
|
266
|
+
entities: list[str] = ["all named entities"],
|
|
258
267
|
with_analysis: bool = False,
|
|
259
268
|
output_lang: str | None = None,
|
|
260
269
|
user_prompt: str | None = None,
|
|
@@ -267,15 +276,15 @@ class AsyncTheTool:
|
|
|
267
276
|
timeout: float | None = None,
|
|
268
277
|
) -> ToolOutput:
|
|
269
278
|
"""
|
|
270
|
-
Perform Named Entity Recognition (NER)
|
|
279
|
+
Perform Named Entity Recognition (NER)
|
|
271
280
|
|
|
272
281
|
Arguments:
|
|
273
282
|
text: The input text
|
|
274
|
-
entities: List of entities
|
|
275
|
-
with_analysis:
|
|
276
|
-
output_lang:
|
|
283
|
+
entities: List of entities
|
|
284
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
285
|
+
output_lang: Forces the model to respond in a specific language
|
|
277
286
|
user_prompt: Additional instructions
|
|
278
|
-
temperature: Controls randomness
|
|
287
|
+
temperature: Controls randomness
|
|
279
288
|
logprobs: Whether to return token probability information
|
|
280
289
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
281
290
|
validator: Custom validation function to validate the output
|
|
@@ -290,12 +299,11 @@ class AsyncTheTool:
|
|
|
290
299
|
start = perf_counter()
|
|
291
300
|
|
|
292
301
|
try:
|
|
293
|
-
operator_output = await run_with_timeout(
|
|
302
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
294
303
|
self._operator.run(
|
|
295
304
|
# User parameters
|
|
296
305
|
text=text,
|
|
297
|
-
entities=entities
|
|
298
|
-
or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
|
|
306
|
+
entities=entities,
|
|
299
307
|
with_analysis=with_analysis,
|
|
300
308
|
output_lang=output_lang,
|
|
301
309
|
user_prompt=user_prompt,
|
|
@@ -324,6 +332,11 @@ class AsyncTheTool:
|
|
|
324
332
|
)
|
|
325
333
|
|
|
326
334
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
335
|
+
self.logger.error(str(e))
|
|
336
|
+
|
|
337
|
+
if self.raise_on_error:
|
|
338
|
+
raise
|
|
339
|
+
|
|
327
340
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
328
341
|
tool_output = ToolOutput(
|
|
329
342
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -349,9 +362,9 @@ class AsyncTheTool:
|
|
|
349
362
|
|
|
350
363
|
Arguments:
|
|
351
364
|
text: The input text
|
|
352
|
-
with_analysis:
|
|
365
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
353
366
|
user_prompt: Additional instructions
|
|
354
|
-
temperature: Controls randomness
|
|
367
|
+
temperature: Controls randomness
|
|
355
368
|
logprobs: Whether to return token probability information
|
|
356
369
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
357
370
|
validator: Custom validation function to validate the output
|
|
@@ -366,7 +379,7 @@ class AsyncTheTool:
|
|
|
366
379
|
start = perf_counter()
|
|
367
380
|
|
|
368
381
|
try:
|
|
369
|
-
operator_output = await run_with_timeout(
|
|
382
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
370
383
|
self._operator.run(
|
|
371
384
|
# User parameters
|
|
372
385
|
text=text,
|
|
@@ -398,6 +411,11 @@ class AsyncTheTool:
|
|
|
398
411
|
)
|
|
399
412
|
|
|
400
413
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
414
|
+
self.logger.error(str(e))
|
|
415
|
+
|
|
416
|
+
if self.raise_on_error:
|
|
417
|
+
raise
|
|
418
|
+
|
|
401
419
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
402
420
|
tool_output = ToolOutput(
|
|
403
421
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -405,10 +423,11 @@ class AsyncTheTool:
|
|
|
405
423
|
|
|
406
424
|
return tool_output
|
|
407
425
|
|
|
408
|
-
async def
|
|
426
|
+
async def to_question(
|
|
409
427
|
self,
|
|
410
428
|
text: str,
|
|
411
429
|
number_of_questions: int,
|
|
430
|
+
mode: Literal["from_text", "from_subject"],
|
|
412
431
|
with_analysis: bool = False,
|
|
413
432
|
output_lang: str | None = None,
|
|
414
433
|
user_prompt: str | None = None,
|
|
@@ -421,15 +440,16 @@ class AsyncTheTool:
|
|
|
421
440
|
timeout: float | None = None,
|
|
422
441
|
) -> ToolOutput:
|
|
423
442
|
"""
|
|
424
|
-
Generate
|
|
443
|
+
Generate questions from the given text / subject
|
|
425
444
|
|
|
426
445
|
Arguments:
|
|
427
446
|
text: The input text
|
|
447
|
+
mode: from_text -> generate questions from an answer, from_subject -> generate questions from a subject
|
|
428
448
|
number_of_questions: Number of questions to generate
|
|
429
|
-
with_analysis:
|
|
430
|
-
output_lang:
|
|
449
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
450
|
+
output_lang: Forces the model to respond in a specific language
|
|
431
451
|
user_prompt: Additional instructions
|
|
432
|
-
temperature: Controls randomness
|
|
452
|
+
temperature: Controls randomness
|
|
433
453
|
logprobs: Whether to return token probability information
|
|
434
454
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
435
455
|
validator: Custom validation function to validate the output
|
|
@@ -440,15 +460,16 @@ class AsyncTheTool:
|
|
|
440
460
|
Returns:
|
|
441
461
|
ToolOutput
|
|
442
462
|
"""
|
|
443
|
-
tool_name = "
|
|
463
|
+
tool_name = "to_question"
|
|
444
464
|
start = perf_counter()
|
|
445
465
|
|
|
446
466
|
try:
|
|
447
|
-
operator_output = await run_with_timeout(
|
|
467
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
448
468
|
self._operator.run(
|
|
449
469
|
# User parameters
|
|
450
470
|
text=text,
|
|
451
471
|
number_of_questions=number_of_questions,
|
|
472
|
+
mode=mode,
|
|
452
473
|
with_analysis=with_analysis,
|
|
453
474
|
output_lang=output_lang,
|
|
454
475
|
user_prompt=user_prompt,
|
|
@@ -461,7 +482,6 @@ class AsyncTheTool:
|
|
|
461
482
|
# Internal parameters
|
|
462
483
|
tool_name=tool_name,
|
|
463
484
|
output_model=ReasonListStr,
|
|
464
|
-
mode=None,
|
|
465
485
|
),
|
|
466
486
|
timeout=timeout,
|
|
467
487
|
)
|
|
@@ -477,6 +497,11 @@ class AsyncTheTool:
|
|
|
477
497
|
)
|
|
478
498
|
|
|
479
499
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
500
|
+
self.logger.error(str(e))
|
|
501
|
+
|
|
502
|
+
if self.raise_on_error:
|
|
503
|
+
raise
|
|
504
|
+
|
|
480
505
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
481
506
|
tool_output = ToolOutput(
|
|
482
507
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -487,27 +512,28 @@ class AsyncTheTool:
|
|
|
487
512
|
async def merge_questions(
|
|
488
513
|
self,
|
|
489
514
|
text: list[str],
|
|
515
|
+
mode: Literal["simple", "stepwise"],
|
|
490
516
|
with_analysis: bool = False,
|
|
491
517
|
output_lang: str | None = None,
|
|
492
518
|
user_prompt: str | None = None,
|
|
493
519
|
temperature: float | None = 0.0,
|
|
494
520
|
logprobs: bool = False,
|
|
495
521
|
top_logprobs: int = 3,
|
|
496
|
-
mode: Literal["default", "reason"] = "default",
|
|
497
522
|
validator: Callable[[Any], bool] | None = None,
|
|
498
523
|
max_validation_retries: int | None = None,
|
|
499
524
|
priority: int | None = None,
|
|
500
525
|
timeout: float | None = None,
|
|
501
526
|
) -> ToolOutput:
|
|
502
527
|
"""
|
|
503
|
-
Merge multiple questions into a single unified question
|
|
528
|
+
Merge multiple questions into a single unified question
|
|
504
529
|
|
|
505
530
|
Arguments:
|
|
506
531
|
text: List of questions to merge
|
|
507
|
-
|
|
508
|
-
|
|
532
|
+
mode: simple -> regular question merging, stepwise -> merge questions in two steps
|
|
533
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
534
|
+
output_lang: Forces the model to respond in a specific language
|
|
509
535
|
user_prompt: Additional instructions
|
|
510
|
-
temperature: Controls randomness
|
|
536
|
+
temperature: Controls randomness
|
|
511
537
|
logprobs: Whether to return token probability information
|
|
512
538
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
513
539
|
validator: Custom validation function to validate the output
|
|
@@ -523,10 +549,11 @@ class AsyncTheTool:
|
|
|
523
549
|
|
|
524
550
|
try:
|
|
525
551
|
text = ", ".join(text)
|
|
526
|
-
operator_output = await run_with_timeout(
|
|
552
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
527
553
|
self._operator.run(
|
|
528
554
|
# User parameters
|
|
529
555
|
text=text,
|
|
556
|
+
mode=mode,
|
|
530
557
|
with_analysis=with_analysis,
|
|
531
558
|
output_lang=output_lang,
|
|
532
559
|
user_prompt=user_prompt,
|
|
@@ -539,7 +566,6 @@ class AsyncTheTool:
|
|
|
539
566
|
# Internal parameters
|
|
540
567
|
tool_name=tool_name,
|
|
541
568
|
output_model=Str,
|
|
542
|
-
mode=mode,
|
|
543
569
|
),
|
|
544
570
|
timeout=timeout,
|
|
545
571
|
)
|
|
@@ -555,6 +581,11 @@ class AsyncTheTool:
|
|
|
555
581
|
)
|
|
556
582
|
|
|
557
583
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
584
|
+
self.logger.error(str(e))
|
|
585
|
+
|
|
586
|
+
if self.raise_on_error:
|
|
587
|
+
raise
|
|
588
|
+
|
|
558
589
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
559
590
|
tool_output = ToolOutput(
|
|
560
591
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -562,30 +593,31 @@ class AsyncTheTool:
|
|
|
562
593
|
|
|
563
594
|
return tool_output
|
|
564
595
|
|
|
565
|
-
async def
|
|
596
|
+
async def augment(
|
|
566
597
|
self,
|
|
567
598
|
text: str,
|
|
599
|
+
mode: Literal["positive", "negative", "hard_negative"],
|
|
568
600
|
with_analysis: bool = False,
|
|
569
601
|
output_lang: str | None = None,
|
|
570
602
|
user_prompt: str | None = None,
|
|
571
603
|
temperature: float | None = 0.0,
|
|
572
604
|
logprobs: bool = False,
|
|
573
605
|
top_logprobs: int = 3,
|
|
574
|
-
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
575
606
|
validator: Callable[[Any], bool] | None = None,
|
|
576
607
|
max_validation_retries: int | None = None,
|
|
577
608
|
priority: int | None = None,
|
|
578
609
|
timeout: float | None = None,
|
|
579
610
|
) -> ToolOutput:
|
|
580
611
|
"""
|
|
581
|
-
Rewrite
|
|
612
|
+
Rewrite text in different augmentations
|
|
582
613
|
|
|
583
614
|
Arguments:
|
|
584
615
|
text: The input text
|
|
585
|
-
|
|
586
|
-
|
|
616
|
+
mode: positive -> positive augmentation, negative -> negative augmentation, hard_negative -> hard negative augmentation
|
|
617
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
618
|
+
output_lang: Forces the model to respond in a specific language
|
|
587
619
|
user_prompt: Additional instructions
|
|
588
|
-
temperature: Controls randomness
|
|
620
|
+
temperature: Controls randomness
|
|
589
621
|
logprobs: Whether to return token probability information
|
|
590
622
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
591
623
|
validator: Custom validation function to validate the output
|
|
@@ -596,14 +628,15 @@ class AsyncTheTool:
|
|
|
596
628
|
Returns:
|
|
597
629
|
ToolOutput
|
|
598
630
|
"""
|
|
599
|
-
tool_name = "
|
|
631
|
+
tool_name = "augment"
|
|
600
632
|
start = perf_counter()
|
|
601
633
|
|
|
602
634
|
try:
|
|
603
|
-
operator_output = await run_with_timeout(
|
|
635
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
604
636
|
self._operator.run(
|
|
605
637
|
# User parameters
|
|
606
638
|
text=text,
|
|
639
|
+
mode=mode,
|
|
607
640
|
with_analysis=with_analysis,
|
|
608
641
|
output_lang=output_lang,
|
|
609
642
|
user_prompt=user_prompt,
|
|
@@ -616,7 +649,6 @@ class AsyncTheTool:
|
|
|
616
649
|
# Internal parameters
|
|
617
650
|
tool_name=tool_name,
|
|
618
651
|
output_model=Str,
|
|
619
|
-
mode=mode,
|
|
620
652
|
),
|
|
621
653
|
timeout=timeout,
|
|
622
654
|
)
|
|
@@ -632,85 +664,11 @@ class AsyncTheTool:
|
|
|
632
664
|
)
|
|
633
665
|
|
|
634
666
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
635
|
-
|
|
636
|
-
tool_output = ToolOutput(
|
|
637
|
-
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
638
|
-
)
|
|
667
|
+
self.logger.error(str(e))
|
|
639
668
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
async def subject_to_question(
|
|
643
|
-
self,
|
|
644
|
-
text: str,
|
|
645
|
-
number_of_questions: int,
|
|
646
|
-
with_analysis: bool = False,
|
|
647
|
-
output_lang: str | None = None,
|
|
648
|
-
user_prompt: str | None = None,
|
|
649
|
-
temperature: float | None = 0.0,
|
|
650
|
-
logprobs: bool = False,
|
|
651
|
-
top_logprobs: int = 3,
|
|
652
|
-
validator: Callable[[Any], bool] | None = None,
|
|
653
|
-
max_validation_retries: int | None = None,
|
|
654
|
-
priority: int | None = None,
|
|
655
|
-
timeout: float | None = None,
|
|
656
|
-
) -> ToolOutput:
|
|
657
|
-
"""
|
|
658
|
-
Generate a list of questions about a subject.
|
|
669
|
+
if self.raise_on_error:
|
|
670
|
+
raise
|
|
659
671
|
|
|
660
|
-
Arguments:
|
|
661
|
-
text: The subject text to generate questions about
|
|
662
|
-
number_of_questions: Number of questions to generate
|
|
663
|
-
with_analysis: Whether to include detailed reasoning analysis
|
|
664
|
-
output_lang: Language for the output
|
|
665
|
-
user_prompt: Additional instructions
|
|
666
|
-
temperature: Controls randomness (0.0 - 2.0)
|
|
667
|
-
logprobs: Whether to return token probability information
|
|
668
|
-
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
669
|
-
validator: Custom validation function to validate the output
|
|
670
|
-
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
671
|
-
priority: Task execution priority (if enabled by vLLM and the model)
|
|
672
|
-
timeout: Maximum time in seconds to wait for the response before raising a timeout error
|
|
673
|
-
|
|
674
|
-
Returns:
|
|
675
|
-
ToolOutput
|
|
676
|
-
"""
|
|
677
|
-
tool_name = "subject_to_question"
|
|
678
|
-
start = perf_counter()
|
|
679
|
-
|
|
680
|
-
try:
|
|
681
|
-
operator_output = await run_with_timeout(
|
|
682
|
-
self._operator.run(
|
|
683
|
-
# User parameters
|
|
684
|
-
text=text,
|
|
685
|
-
number_of_questions=number_of_questions,
|
|
686
|
-
with_analysis=with_analysis,
|
|
687
|
-
output_lang=output_lang,
|
|
688
|
-
user_prompt=user_prompt,
|
|
689
|
-
temperature=temperature,
|
|
690
|
-
logprobs=logprobs,
|
|
691
|
-
top_logprobs=top_logprobs,
|
|
692
|
-
validator=validator,
|
|
693
|
-
max_validation_retries=max_validation_retries,
|
|
694
|
-
priority=priority,
|
|
695
|
-
# Internal parameters
|
|
696
|
-
tool_name=tool_name,
|
|
697
|
-
output_model=ReasonListStr,
|
|
698
|
-
mode=None,
|
|
699
|
-
),
|
|
700
|
-
timeout=timeout,
|
|
701
|
-
)
|
|
702
|
-
|
|
703
|
-
metadata = ToolOutputMetadata(
|
|
704
|
-
tool_name=tool_name, execution_time=perf_counter() - start
|
|
705
|
-
)
|
|
706
|
-
tool_output = ToolOutput(
|
|
707
|
-
result=operator_output.result,
|
|
708
|
-
logprobs=operator_output.logprobs,
|
|
709
|
-
analysis=operator_output.analysis,
|
|
710
|
-
metadata=metadata,
|
|
711
|
-
)
|
|
712
|
-
|
|
713
|
-
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
714
672
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
715
673
|
tool_output = ToolOutput(
|
|
716
674
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -733,14 +691,14 @@ class AsyncTheTool:
|
|
|
733
691
|
timeout: float | None = None,
|
|
734
692
|
) -> ToolOutput:
|
|
735
693
|
"""
|
|
736
|
-
Summarize the given
|
|
694
|
+
Summarize the given text
|
|
737
695
|
|
|
738
696
|
Arguments:
|
|
739
697
|
text: The input text
|
|
740
|
-
with_analysis:
|
|
741
|
-
output_lang:
|
|
698
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
699
|
+
output_lang: Forces the model to respond in a specific language
|
|
742
700
|
user_prompt: Additional instructions
|
|
743
|
-
temperature: Controls randomness
|
|
701
|
+
temperature: Controls randomness
|
|
744
702
|
logprobs: Whether to return token probability information
|
|
745
703
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
746
704
|
validator: Custom validation function to validate the output
|
|
@@ -755,7 +713,7 @@ class AsyncTheTool:
|
|
|
755
713
|
start = perf_counter()
|
|
756
714
|
|
|
757
715
|
try:
|
|
758
|
-
operator_output = await run_with_timeout(
|
|
716
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
759
717
|
self._operator.run(
|
|
760
718
|
# User parameters
|
|
761
719
|
text=text,
|
|
@@ -787,6 +745,11 @@ class AsyncTheTool:
|
|
|
787
745
|
)
|
|
788
746
|
|
|
789
747
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
748
|
+
self.logger.error(str(e))
|
|
749
|
+
|
|
750
|
+
if self.raise_on_error:
|
|
751
|
+
raise
|
|
752
|
+
|
|
790
753
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
791
754
|
tool_output = ToolOutput(
|
|
792
755
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -797,7 +760,7 @@ class AsyncTheTool:
|
|
|
797
760
|
async def translate(
|
|
798
761
|
self,
|
|
799
762
|
text: str,
|
|
800
|
-
|
|
763
|
+
target_lang: str,
|
|
801
764
|
use_chunker: bool = True,
|
|
802
765
|
with_analysis: bool = False,
|
|
803
766
|
user_prompt: str | None = None,
|
|
@@ -810,17 +773,17 @@ class AsyncTheTool:
|
|
|
810
773
|
timeout: float | None = None,
|
|
811
774
|
) -> ToolOutput:
|
|
812
775
|
"""
|
|
813
|
-
Translate text between languages
|
|
776
|
+
Translate text between languages
|
|
814
777
|
|
|
815
778
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
816
779
|
|
|
817
780
|
Arguments:
|
|
818
781
|
text: The input text
|
|
819
|
-
|
|
820
|
-
use_chunker: Whether to use text chunker for
|
|
821
|
-
with_analysis:
|
|
782
|
+
target_lang: The target language for translation
|
|
783
|
+
use_chunker: Whether to use text chunker for large texts
|
|
784
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
822
785
|
user_prompt: Additional instructions
|
|
823
|
-
temperature: Controls randomness
|
|
786
|
+
temperature: Controls randomness
|
|
824
787
|
logprobs: Whether to return token probability information
|
|
825
788
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
826
789
|
validator: Custom validation function to validate the output
|
|
@@ -836,17 +799,17 @@ class AsyncTheTool:
|
|
|
836
799
|
|
|
837
800
|
try:
|
|
838
801
|
if len(text.split(" ")) > 1500 and use_chunker:
|
|
839
|
-
chunks =
|
|
802
|
+
chunks = TheToolUtils.to_chunks(text, 1200, 0)
|
|
840
803
|
translation = ""
|
|
841
804
|
analysis = ""
|
|
842
805
|
logprobs_list = []
|
|
843
806
|
|
|
844
807
|
for chunk in chunks:
|
|
845
|
-
chunk_operator_output = await run_with_timeout(
|
|
808
|
+
chunk_operator_output = await TheToolUtils.run_with_timeout(
|
|
846
809
|
self._operator.run(
|
|
847
810
|
# User parameters
|
|
848
811
|
text=chunk,
|
|
849
|
-
|
|
812
|
+
target_lang=target_lang,
|
|
850
813
|
with_analysis=with_analysis,
|
|
851
814
|
user_prompt=user_prompt,
|
|
852
815
|
temperature=temperature,
|
|
@@ -882,11 +845,11 @@ class AsyncTheTool:
|
|
|
882
845
|
)
|
|
883
846
|
|
|
884
847
|
else:
|
|
885
|
-
operator_output = await run_with_timeout(
|
|
848
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
886
849
|
self._operator.run(
|
|
887
850
|
# User parameters
|
|
888
851
|
text=text,
|
|
889
|
-
|
|
852
|
+
target_lang=target_lang,
|
|
890
853
|
with_analysis=with_analysis,
|
|
891
854
|
user_prompt=user_prompt,
|
|
892
855
|
temperature=temperature,
|
|
@@ -915,6 +878,11 @@ class AsyncTheTool:
|
|
|
915
878
|
)
|
|
916
879
|
|
|
917
880
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
881
|
+
self.logger.error(str(e))
|
|
882
|
+
|
|
883
|
+
if self.raise_on_error:
|
|
884
|
+
raise
|
|
885
|
+
|
|
918
886
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
919
887
|
tool_output = ToolOutput(
|
|
920
888
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -937,16 +905,16 @@ class AsyncTheTool:
|
|
|
937
905
|
timeout: float | None = None,
|
|
938
906
|
) -> ToolOutput:
|
|
939
907
|
"""
|
|
940
|
-
|
|
908
|
+
Convert a text into atomic, independent, meaningful sentences
|
|
941
909
|
|
|
942
910
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
943
911
|
|
|
944
912
|
Arguments:
|
|
945
913
|
text: The input text
|
|
946
|
-
with_analysis:
|
|
947
|
-
output_lang:
|
|
914
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
915
|
+
output_lang: Forces the model to respond in a specific language
|
|
948
916
|
user_prompt: Additional instructions
|
|
949
|
-
temperature: Controls randomness
|
|
917
|
+
temperature: Controls randomness
|
|
950
918
|
logprobs: Whether to return token probability information
|
|
951
919
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
952
920
|
validator: Custom validation function to validate the output
|
|
@@ -961,7 +929,7 @@ class AsyncTheTool:
|
|
|
961
929
|
start = perf_counter()
|
|
962
930
|
|
|
963
931
|
try:
|
|
964
|
-
operator_output = await run_with_timeout(
|
|
932
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
965
933
|
self._operator.run(
|
|
966
934
|
# User parameters
|
|
967
935
|
text=text,
|
|
@@ -993,6 +961,11 @@ class AsyncTheTool:
|
|
|
993
961
|
)
|
|
994
962
|
|
|
995
963
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
964
|
+
self.logger.error(str(e))
|
|
965
|
+
|
|
966
|
+
if self.raise_on_error:
|
|
967
|
+
raise
|
|
968
|
+
|
|
996
969
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
997
970
|
tool_output = ToolOutput(
|
|
998
971
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -1000,7 +973,7 @@ class AsyncTheTool:
|
|
|
1000
973
|
|
|
1001
974
|
return tool_output
|
|
1002
975
|
|
|
1003
|
-
async def
|
|
976
|
+
async def is_fact(
|
|
1004
977
|
self,
|
|
1005
978
|
text: str,
|
|
1006
979
|
source_text: str,
|
|
@@ -1016,17 +989,17 @@ class AsyncTheTool:
|
|
|
1016
989
|
timeout: float | None = None,
|
|
1017
990
|
) -> ToolOutput:
|
|
1018
991
|
"""
|
|
1019
|
-
|
|
992
|
+
Check whether a statement is a fact based on the source text
|
|
1020
993
|
|
|
1021
994
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
1022
995
|
|
|
1023
996
|
Arguments:
|
|
1024
997
|
text: The input text
|
|
1025
|
-
source_text: The source text
|
|
1026
|
-
with_analysis:
|
|
1027
|
-
output_lang:
|
|
998
|
+
source_text: The source text
|
|
999
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
1000
|
+
output_lang: Forces the model to respond in a specific language
|
|
1028
1001
|
user_prompt: Additional instructions
|
|
1029
|
-
temperature: Controls randomness
|
|
1002
|
+
temperature: Controls randomness
|
|
1030
1003
|
logprobs: Whether to return token probability information
|
|
1031
1004
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
1032
1005
|
validator: Custom validation function to validate the output
|
|
@@ -1037,11 +1010,11 @@ class AsyncTheTool:
|
|
|
1037
1010
|
Returns:
|
|
1038
1011
|
ToolOutput
|
|
1039
1012
|
"""
|
|
1040
|
-
tool_name = "
|
|
1013
|
+
tool_name = "is_fact"
|
|
1041
1014
|
start = perf_counter()
|
|
1042
1015
|
|
|
1043
1016
|
try:
|
|
1044
|
-
operator_output = await run_with_timeout(
|
|
1017
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
1045
1018
|
self._operator.run(
|
|
1046
1019
|
# User parameters
|
|
1047
1020
|
text=text,
|
|
@@ -1074,6 +1047,11 @@ class AsyncTheTool:
|
|
|
1074
1047
|
)
|
|
1075
1048
|
|
|
1076
1049
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
1050
|
+
self.logger.error(str(e))
|
|
1051
|
+
|
|
1052
|
+
if self.raise_on_error:
|
|
1053
|
+
raise
|
|
1054
|
+
|
|
1077
1055
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
1078
1056
|
tool_output = ToolOutput(
|
|
1079
1057
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -1097,15 +1075,15 @@ class AsyncTheTool:
|
|
|
1097
1075
|
timeout: float | None = None,
|
|
1098
1076
|
) -> ToolOutput:
|
|
1099
1077
|
"""
|
|
1100
|
-
Custom tool that can do almost anything
|
|
1078
|
+
Custom tool that can do almost anything
|
|
1101
1079
|
|
|
1102
1080
|
Arguments:
|
|
1103
1081
|
prompt: The user prompt
|
|
1104
1082
|
output_model: Pydantic BaseModel used for structured output
|
|
1105
|
-
with_analysis:
|
|
1083
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
1106
1084
|
analyze_template: The analyze template used for reasoning analysis
|
|
1107
|
-
output_lang:
|
|
1108
|
-
temperature: Controls randomness
|
|
1085
|
+
output_lang: Forces the model to respond in a specific language
|
|
1086
|
+
temperature: Controls randomness
|
|
1109
1087
|
logprobs: Whether to return token probability information
|
|
1110
1088
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
1111
1089
|
validator: Custom validation function to validate the output
|
|
@@ -1120,7 +1098,7 @@ class AsyncTheTool:
|
|
|
1120
1098
|
start = perf_counter()
|
|
1121
1099
|
|
|
1122
1100
|
try:
|
|
1123
|
-
operator_output = await run_with_timeout(
|
|
1101
|
+
operator_output = await TheToolUtils.run_with_timeout(
|
|
1124
1102
|
self._operator.run(
|
|
1125
1103
|
# User paramaeters
|
|
1126
1104
|
text=prompt,
|
|
@@ -1154,6 +1132,11 @@ class AsyncTheTool:
|
|
|
1154
1132
|
)
|
|
1155
1133
|
|
|
1156
1134
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
1135
|
+
self.logger.error(str(e))
|
|
1136
|
+
|
|
1137
|
+
if self.raise_on_error:
|
|
1138
|
+
raise
|
|
1139
|
+
|
|
1157
1140
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
1158
1141
|
tool_output = ToolOutput(
|
|
1159
1142
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|