hamtaa-texttools 1.3.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/METADATA +40 -47
- hamtaa_texttools-2.1.0.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/WHEEL +1 -1
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/licenses/LICENSE +1 -1
- texttools/__init__.py +1 -1
- texttools/core/internal_models.py +16 -7
- texttools/core/operators/async_operator.py +10 -16
- texttools/core/operators/sync_operator.py +10 -16
- texttools/core/utils.py +260 -0
- texttools/models.py +77 -22
- texttools/prompts/{rewrite.yaml → augment.yaml} +3 -3
- texttools/prompts/categorize.yaml +7 -8
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +4 -2
- texttools/prompts/{check_fact.yaml → is_fact.yaml} +5 -4
- texttools/prompts/is_question.yaml +1 -1
- texttools/prompts/merge_questions.yaml +8 -6
- texttools/prompts/propositionize.yaml +11 -7
- texttools/prompts/run_custom.yaml +3 -1
- texttools/prompts/summarize.yaml +3 -3
- texttools/prompts/to_question.yaml +60 -0
- texttools/prompts/translate.yaml +4 -4
- texttools/tools/async_tools.py +152 -169
- texttools/tools/sync_tools.py +138 -150
- hamtaa_texttools-1.3.2.dist-info/RECORD +0 -31
- texttools/core/engine.py +0 -262
- texttools/prompts/subject_to_question.yaml +0 -26
- texttools/prompts/text_to_question.yaml +0 -26
- {hamtaa_texttools-1.3.2.dist-info → hamtaa_texttools-2.1.0.dist-info}/top_level.txt +0 -0
texttools/tools/sync_tools.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Callable
|
|
2
3
|
from time import perf_counter
|
|
3
4
|
from typing import Any, Literal
|
|
4
5
|
|
|
5
6
|
from openai import OpenAI
|
|
6
7
|
|
|
7
|
-
from ..core.engine import text_to_chunks
|
|
8
8
|
from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
9
9
|
from ..core.internal_models import (
|
|
10
10
|
Bool,
|
|
@@ -15,21 +15,20 @@ from ..core.internal_models import (
|
|
|
15
15
|
create_dynamic_model,
|
|
16
16
|
)
|
|
17
17
|
from ..core.operators.sync_operator import Operator
|
|
18
|
+
from ..core.utils import TheToolUtils
|
|
18
19
|
from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class TheTool:
|
|
22
|
-
"""
|
|
23
|
-
Each method configures the operator with a specific YAML prompt,
|
|
24
|
-
output schema, and flags, then delegates execution to `operator.run()`.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
23
|
def __init__(
|
|
28
24
|
self,
|
|
29
25
|
client: OpenAI,
|
|
30
26
|
model: str,
|
|
27
|
+
raise_on_error: bool = True,
|
|
31
28
|
):
|
|
32
29
|
self._operator = Operator(client=client, model=model)
|
|
30
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
|
31
|
+
self.raise_on_error = raise_on_error
|
|
33
32
|
|
|
34
33
|
def categorize(
|
|
35
34
|
self,
|
|
@@ -45,16 +44,14 @@ class TheTool:
|
|
|
45
44
|
priority: int | None = None,
|
|
46
45
|
) -> ToolOutput:
|
|
47
46
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
47
|
+
Classify text into given categories
|
|
51
48
|
|
|
52
49
|
Arguments:
|
|
53
50
|
text: The input text
|
|
54
51
|
categories: The category list / category tree
|
|
55
|
-
with_analysis:
|
|
52
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
56
53
|
user_prompt: Additional instructions
|
|
57
|
-
temperature: Controls randomness
|
|
54
|
+
temperature: Controls randomness
|
|
58
55
|
logprobs: Whether to return token probability information
|
|
59
56
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
60
57
|
validator: Custom validation function to validate the output
|
|
@@ -157,6 +154,11 @@ class TheTool:
|
|
|
157
154
|
)
|
|
158
155
|
|
|
159
156
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
157
|
+
self.logger.error(str(e))
|
|
158
|
+
|
|
159
|
+
if self.raise_on_error:
|
|
160
|
+
raise
|
|
161
|
+
|
|
160
162
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
161
163
|
tool_output = ToolOutput(
|
|
162
164
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -167,27 +169,29 @@ class TheTool:
|
|
|
167
169
|
def extract_keywords(
|
|
168
170
|
self,
|
|
169
171
|
text: str,
|
|
172
|
+
mode: Literal["auto", "threshold", "count"],
|
|
173
|
+
number_of_keywords: int | None = None,
|
|
170
174
|
with_analysis: bool = False,
|
|
171
175
|
output_lang: str | None = None,
|
|
172
176
|
user_prompt: str | None = None,
|
|
173
177
|
temperature: float | None = 0.0,
|
|
174
178
|
logprobs: bool = False,
|
|
175
179
|
top_logprobs: int = 3,
|
|
176
|
-
mode: Literal["auto", "threshold", "count"] = "auto",
|
|
177
|
-
number_of_keywords: int | None = None,
|
|
178
180
|
validator: Callable[[Any], bool] | None = None,
|
|
179
181
|
max_validation_retries: int | None = None,
|
|
180
182
|
priority: int | None = None,
|
|
181
183
|
) -> ToolOutput:
|
|
182
184
|
"""
|
|
183
|
-
Extract
|
|
185
|
+
Extract keywords from the text
|
|
184
186
|
|
|
185
187
|
Arguments:
|
|
186
188
|
text: The input text
|
|
187
|
-
|
|
188
|
-
|
|
189
|
+
mode: auto -> decide n of keywords automatically, threshold -> decide n of keywords by a threshold, count -> takes number of keywords as the parameter
|
|
190
|
+
number_of_keywords: Must be set only when using "count" mode
|
|
191
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
192
|
+
output_lang: Forces the model to respond in a specific language
|
|
189
193
|
user_prompt: Additional instructions
|
|
190
|
-
temperature: Controls randomness
|
|
194
|
+
temperature: Controls randomness
|
|
191
195
|
logprobs: Whether to return token probability information
|
|
192
196
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
193
197
|
validator: Custom validation function to validate the output
|
|
@@ -204,20 +208,20 @@ class TheTool:
|
|
|
204
208
|
operator_output = self._operator.run(
|
|
205
209
|
# User parameters
|
|
206
210
|
text=text,
|
|
211
|
+
number_of_keywords=number_of_keywords,
|
|
212
|
+
mode=mode,
|
|
207
213
|
with_analysis=with_analysis,
|
|
208
214
|
output_lang=output_lang,
|
|
209
215
|
user_prompt=user_prompt,
|
|
210
216
|
temperature=temperature,
|
|
211
217
|
logprobs=logprobs,
|
|
212
218
|
top_logprobs=top_logprobs,
|
|
213
|
-
number_of_keywords=number_of_keywords,
|
|
214
219
|
validator=validator,
|
|
215
220
|
max_validation_retries=max_validation_retries,
|
|
216
221
|
priority=priority,
|
|
217
222
|
# Internal parameters
|
|
218
223
|
tool_name=tool_name,
|
|
219
224
|
output_model=ListStr,
|
|
220
|
-
mode=mode,
|
|
221
225
|
)
|
|
222
226
|
|
|
223
227
|
metadata = ToolOutputMetadata(
|
|
@@ -231,6 +235,11 @@ class TheTool:
|
|
|
231
235
|
)
|
|
232
236
|
|
|
233
237
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
238
|
+
self.logger.error(str(e))
|
|
239
|
+
|
|
240
|
+
if self.raise_on_error:
|
|
241
|
+
raise
|
|
242
|
+
|
|
234
243
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
235
244
|
tool_output = ToolOutput(
|
|
236
245
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -241,7 +250,7 @@ class TheTool:
|
|
|
241
250
|
def extract_entities(
|
|
242
251
|
self,
|
|
243
252
|
text: str,
|
|
244
|
-
entities: list[str]
|
|
253
|
+
entities: list[str] = ["all named entities"],
|
|
245
254
|
with_analysis: bool = False,
|
|
246
255
|
output_lang: str | None = None,
|
|
247
256
|
user_prompt: str | None = None,
|
|
@@ -253,15 +262,15 @@ class TheTool:
|
|
|
253
262
|
priority: int | None = None,
|
|
254
263
|
) -> ToolOutput:
|
|
255
264
|
"""
|
|
256
|
-
Perform Named Entity Recognition (NER)
|
|
265
|
+
Perform Named Entity Recognition (NER)
|
|
257
266
|
|
|
258
267
|
Arguments:
|
|
259
268
|
text: The input text
|
|
260
|
-
entities: List of entities
|
|
261
|
-
with_analysis:
|
|
262
|
-
output_lang:
|
|
269
|
+
entities: List of entities
|
|
270
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
271
|
+
output_lang: Forces the model to respond in a specific language
|
|
263
272
|
user_prompt: Additional instructions
|
|
264
|
-
temperature: Controls randomness
|
|
273
|
+
temperature: Controls randomness
|
|
265
274
|
logprobs: Whether to return token probability information
|
|
266
275
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
267
276
|
validator: Custom validation function to validate the output
|
|
@@ -278,8 +287,7 @@ class TheTool:
|
|
|
278
287
|
operator_output = self._operator.run(
|
|
279
288
|
# User parameters
|
|
280
289
|
text=text,
|
|
281
|
-
entities=entities
|
|
282
|
-
or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
|
|
290
|
+
entities=entities,
|
|
283
291
|
with_analysis=with_analysis,
|
|
284
292
|
output_lang=output_lang,
|
|
285
293
|
user_prompt=user_prompt,
|
|
@@ -306,6 +314,11 @@ class TheTool:
|
|
|
306
314
|
)
|
|
307
315
|
|
|
308
316
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
317
|
+
self.logger.error(str(e))
|
|
318
|
+
|
|
319
|
+
if self.raise_on_error:
|
|
320
|
+
raise
|
|
321
|
+
|
|
309
322
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
310
323
|
tool_output = ToolOutput(
|
|
311
324
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -330,9 +343,9 @@ class TheTool:
|
|
|
330
343
|
|
|
331
344
|
Arguments:
|
|
332
345
|
text: The input text
|
|
333
|
-
with_analysis:
|
|
346
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
334
347
|
user_prompt: Additional instructions
|
|
335
|
-
temperature: Controls randomness
|
|
348
|
+
temperature: Controls randomness
|
|
336
349
|
logprobs: Whether to return token probability information
|
|
337
350
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
338
351
|
validator: Custom validation function to validate the output
|
|
@@ -375,6 +388,11 @@ class TheTool:
|
|
|
375
388
|
)
|
|
376
389
|
|
|
377
390
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
391
|
+
self.logger.error(str(e))
|
|
392
|
+
|
|
393
|
+
if self.raise_on_error:
|
|
394
|
+
raise
|
|
395
|
+
|
|
378
396
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
379
397
|
tool_output = ToolOutput(
|
|
380
398
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -382,10 +400,11 @@ class TheTool:
|
|
|
382
400
|
|
|
383
401
|
return tool_output
|
|
384
402
|
|
|
385
|
-
def
|
|
403
|
+
def to_question(
|
|
386
404
|
self,
|
|
387
405
|
text: str,
|
|
388
406
|
number_of_questions: int,
|
|
407
|
+
mode: Literal["from_text", "from_subject"],
|
|
389
408
|
with_analysis: bool = False,
|
|
390
409
|
output_lang: str | None = None,
|
|
391
410
|
user_prompt: str | None = None,
|
|
@@ -397,15 +416,16 @@ class TheTool:
|
|
|
397
416
|
priority: int | None = None,
|
|
398
417
|
) -> ToolOutput:
|
|
399
418
|
"""
|
|
400
|
-
Generate
|
|
419
|
+
Generate questions from the given text / subject
|
|
401
420
|
|
|
402
421
|
Arguments:
|
|
403
422
|
text: The input text
|
|
423
|
+
mode: from_text -> generate questions from an answer, from_subject -> generate questions from a subject
|
|
404
424
|
number_of_questions: Number of questions to generate
|
|
405
|
-
with_analysis:
|
|
406
|
-
output_lang:
|
|
425
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
426
|
+
output_lang: Forces the model to respond in a specific language
|
|
407
427
|
user_prompt: Additional instructions
|
|
408
|
-
temperature: Controls randomness
|
|
428
|
+
temperature: Controls randomness
|
|
409
429
|
logprobs: Whether to return token probability information
|
|
410
430
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
411
431
|
validator: Custom validation function to validate the output
|
|
@@ -415,7 +435,7 @@ class TheTool:
|
|
|
415
435
|
Returns:
|
|
416
436
|
ToolOutput
|
|
417
437
|
"""
|
|
418
|
-
tool_name = "
|
|
438
|
+
tool_name = "to_question"
|
|
419
439
|
start = perf_counter()
|
|
420
440
|
|
|
421
441
|
try:
|
|
@@ -423,6 +443,7 @@ class TheTool:
|
|
|
423
443
|
# User parameters
|
|
424
444
|
text=text,
|
|
425
445
|
number_of_questions=number_of_questions,
|
|
446
|
+
mode=mode,
|
|
426
447
|
with_analysis=with_analysis,
|
|
427
448
|
output_lang=output_lang,
|
|
428
449
|
user_prompt=user_prompt,
|
|
@@ -435,7 +456,6 @@ class TheTool:
|
|
|
435
456
|
# Internal parameters
|
|
436
457
|
tool_name=tool_name,
|
|
437
458
|
output_model=ReasonListStr,
|
|
438
|
-
mode=None,
|
|
439
459
|
)
|
|
440
460
|
|
|
441
461
|
metadata = ToolOutputMetadata(
|
|
@@ -449,6 +469,11 @@ class TheTool:
|
|
|
449
469
|
)
|
|
450
470
|
|
|
451
471
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
472
|
+
self.logger.error(str(e))
|
|
473
|
+
|
|
474
|
+
if self.raise_on_error:
|
|
475
|
+
raise
|
|
476
|
+
|
|
452
477
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
453
478
|
tool_output = ToolOutput(
|
|
454
479
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -459,26 +484,27 @@ class TheTool:
|
|
|
459
484
|
def merge_questions(
|
|
460
485
|
self,
|
|
461
486
|
text: list[str],
|
|
487
|
+
mode: Literal["simple", "stepwise"],
|
|
462
488
|
with_analysis: bool = False,
|
|
463
489
|
output_lang: str | None = None,
|
|
464
490
|
user_prompt: str | None = None,
|
|
465
491
|
temperature: float | None = 0.0,
|
|
466
492
|
logprobs: bool = False,
|
|
467
493
|
top_logprobs: int = 3,
|
|
468
|
-
mode: Literal["default", "reason"] = "default",
|
|
469
494
|
validator: Callable[[Any], bool] | None = None,
|
|
470
495
|
max_validation_retries: int | None = None,
|
|
471
496
|
priority: int | None = None,
|
|
472
497
|
) -> ToolOutput:
|
|
473
498
|
"""
|
|
474
|
-
Merge multiple questions into a single unified question
|
|
499
|
+
Merge multiple questions into a single unified question
|
|
475
500
|
|
|
476
501
|
Arguments:
|
|
477
502
|
text: List of questions to merge
|
|
478
|
-
|
|
479
|
-
|
|
503
|
+
mode: simple -> regular question merging, stepwise -> merge questions in two steps
|
|
504
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
505
|
+
output_lang: Forces the model to respond in a specific language
|
|
480
506
|
user_prompt: Additional instructions
|
|
481
|
-
temperature: Controls randomness
|
|
507
|
+
temperature: Controls randomness
|
|
482
508
|
logprobs: Whether to return token probability information
|
|
483
509
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
484
510
|
validator: Custom validation function to validate the output
|
|
@@ -496,6 +522,7 @@ class TheTool:
|
|
|
496
522
|
operator_output = self._operator.run(
|
|
497
523
|
# User parameters
|
|
498
524
|
text=text,
|
|
525
|
+
mode=mode,
|
|
499
526
|
with_analysis=with_analysis,
|
|
500
527
|
output_lang=output_lang,
|
|
501
528
|
user_prompt=user_prompt,
|
|
@@ -508,7 +535,6 @@ class TheTool:
|
|
|
508
535
|
# Internal parameters
|
|
509
536
|
tool_name=tool_name,
|
|
510
537
|
output_model=Str,
|
|
511
|
-
mode=mode,
|
|
512
538
|
)
|
|
513
539
|
|
|
514
540
|
metadata = ToolOutputMetadata(
|
|
@@ -522,6 +548,11 @@ class TheTool:
|
|
|
522
548
|
)
|
|
523
549
|
|
|
524
550
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
551
|
+
self.logger.error(str(e))
|
|
552
|
+
|
|
553
|
+
if self.raise_on_error:
|
|
554
|
+
raise
|
|
555
|
+
|
|
525
556
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
526
557
|
tool_output = ToolOutput(
|
|
527
558
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -529,29 +560,30 @@ class TheTool:
|
|
|
529
560
|
|
|
530
561
|
return tool_output
|
|
531
562
|
|
|
532
|
-
def
|
|
563
|
+
def augment(
|
|
533
564
|
self,
|
|
534
565
|
text: str,
|
|
566
|
+
mode: Literal["positive", "negative", "hard_negative"],
|
|
535
567
|
with_analysis: bool = False,
|
|
536
568
|
output_lang: str | None = None,
|
|
537
569
|
user_prompt: str | None = None,
|
|
538
570
|
temperature: float | None = 0.0,
|
|
539
571
|
logprobs: bool = False,
|
|
540
572
|
top_logprobs: int = 3,
|
|
541
|
-
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
542
573
|
validator: Callable[[Any], bool] | None = None,
|
|
543
574
|
max_validation_retries: int | None = None,
|
|
544
575
|
priority: int | None = None,
|
|
545
576
|
) -> ToolOutput:
|
|
546
577
|
"""
|
|
547
|
-
Rewrite
|
|
578
|
+
Rewrite text in different augmentations
|
|
548
579
|
|
|
549
580
|
Arguments:
|
|
550
581
|
text: The input text
|
|
551
|
-
|
|
552
|
-
|
|
582
|
+
mode: positive -> positive augmentation, negative -> negative augmentation, hard_negative -> hard negative augmentation
|
|
583
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
584
|
+
output_lang: Forces the model to respond in a specific language
|
|
553
585
|
user_prompt: Additional instructions
|
|
554
|
-
temperature: Controls randomness
|
|
586
|
+
temperature: Controls randomness
|
|
555
587
|
logprobs: Whether to return token probability information
|
|
556
588
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
557
589
|
validator: Custom validation function to validate the output
|
|
@@ -561,13 +593,14 @@ class TheTool:
|
|
|
561
593
|
Returns:
|
|
562
594
|
ToolOutput
|
|
563
595
|
"""
|
|
564
|
-
tool_name = "
|
|
596
|
+
tool_name = "augment"
|
|
565
597
|
start = perf_counter()
|
|
566
598
|
|
|
567
599
|
try:
|
|
568
600
|
operator_output = self._operator.run(
|
|
569
601
|
# User parameters
|
|
570
602
|
text=text,
|
|
603
|
+
mode=mode,
|
|
571
604
|
with_analysis=with_analysis,
|
|
572
605
|
output_lang=output_lang,
|
|
573
606
|
user_prompt=user_prompt,
|
|
@@ -580,7 +613,6 @@ class TheTool:
|
|
|
580
613
|
# Internal parameters
|
|
581
614
|
tool_name=tool_name,
|
|
582
615
|
output_model=Str,
|
|
583
|
-
mode=mode,
|
|
584
616
|
)
|
|
585
617
|
|
|
586
618
|
metadata = ToolOutputMetadata(
|
|
@@ -594,80 +626,11 @@ class TheTool:
|
|
|
594
626
|
)
|
|
595
627
|
|
|
596
628
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
597
|
-
|
|
598
|
-
tool_output = ToolOutput(
|
|
599
|
-
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
600
|
-
)
|
|
629
|
+
self.logger.error(str(e))
|
|
601
630
|
|
|
602
|
-
|
|
631
|
+
if self.raise_on_error:
|
|
632
|
+
raise
|
|
603
633
|
|
|
604
|
-
def subject_to_question(
|
|
605
|
-
self,
|
|
606
|
-
text: str,
|
|
607
|
-
number_of_questions: int,
|
|
608
|
-
with_analysis: bool = False,
|
|
609
|
-
output_lang: str | None = None,
|
|
610
|
-
user_prompt: str | None = None,
|
|
611
|
-
temperature: float | None = 0.0,
|
|
612
|
-
logprobs: bool = False,
|
|
613
|
-
top_logprobs: int = 3,
|
|
614
|
-
validator: Callable[[Any], bool] | None = None,
|
|
615
|
-
max_validation_retries: int | None = None,
|
|
616
|
-
priority: int | None = None,
|
|
617
|
-
) -> ToolOutput:
|
|
618
|
-
"""
|
|
619
|
-
Generate a list of questions about a subject.
|
|
620
|
-
|
|
621
|
-
Arguments:
|
|
622
|
-
text: The subject text to generate questions about
|
|
623
|
-
number_of_questions: Number of questions to generate
|
|
624
|
-
with_analysis: Whether to include detailed reasoning analysis
|
|
625
|
-
output_lang: Language for the output
|
|
626
|
-
user_prompt: Additional instructions
|
|
627
|
-
temperature: Controls randomness (0.0 - 2.0)
|
|
628
|
-
logprobs: Whether to return token probability information
|
|
629
|
-
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
630
|
-
validator: Custom validation function to validate the output
|
|
631
|
-
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
632
|
-
priority: Task execution priority (if enabled by vLLM and the model)
|
|
633
|
-
|
|
634
|
-
Returns:
|
|
635
|
-
ToolOutput
|
|
636
|
-
"""
|
|
637
|
-
tool_name = "subject_to_question"
|
|
638
|
-
start = perf_counter()
|
|
639
|
-
|
|
640
|
-
try:
|
|
641
|
-
operator_output = self._operator.run(
|
|
642
|
-
# User parameters
|
|
643
|
-
text=text,
|
|
644
|
-
number_of_questions=number_of_questions,
|
|
645
|
-
with_analysis=with_analysis,
|
|
646
|
-
output_lang=output_lang,
|
|
647
|
-
user_prompt=user_prompt,
|
|
648
|
-
temperature=temperature,
|
|
649
|
-
logprobs=logprobs,
|
|
650
|
-
top_logprobs=top_logprobs,
|
|
651
|
-
validator=validator,
|
|
652
|
-
max_validation_retries=max_validation_retries,
|
|
653
|
-
priority=priority,
|
|
654
|
-
# Internal parameters
|
|
655
|
-
tool_name=tool_name,
|
|
656
|
-
output_model=ReasonListStr,
|
|
657
|
-
mode=None,
|
|
658
|
-
)
|
|
659
|
-
|
|
660
|
-
metadata = ToolOutputMetadata(
|
|
661
|
-
tool_name=tool_name, execution_time=perf_counter() - start
|
|
662
|
-
)
|
|
663
|
-
tool_output = ToolOutput(
|
|
664
|
-
result=operator_output.result,
|
|
665
|
-
logprobs=operator_output.logprobs,
|
|
666
|
-
analysis=operator_output.analysis,
|
|
667
|
-
metadata=metadata,
|
|
668
|
-
)
|
|
669
|
-
|
|
670
|
-
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
671
634
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
672
635
|
tool_output = ToolOutput(
|
|
673
636
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -689,14 +652,14 @@ class TheTool:
|
|
|
689
652
|
priority: int | None = None,
|
|
690
653
|
) -> ToolOutput:
|
|
691
654
|
"""
|
|
692
|
-
Summarize the given
|
|
655
|
+
Summarize the given text
|
|
693
656
|
|
|
694
657
|
Arguments:
|
|
695
658
|
text: The input text
|
|
696
|
-
with_analysis:
|
|
697
|
-
output_lang:
|
|
659
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
660
|
+
output_lang: Forces the model to respond in a specific language
|
|
698
661
|
user_prompt: Additional instructions
|
|
699
|
-
temperature: Controls randomness
|
|
662
|
+
temperature: Controls randomness
|
|
700
663
|
logprobs: Whether to return token probability information
|
|
701
664
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
702
665
|
validator: Custom validation function to validate the output
|
|
@@ -739,6 +702,11 @@ class TheTool:
|
|
|
739
702
|
)
|
|
740
703
|
|
|
741
704
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
705
|
+
self.logger.error(str(e))
|
|
706
|
+
|
|
707
|
+
if self.raise_on_error:
|
|
708
|
+
raise
|
|
709
|
+
|
|
742
710
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
743
711
|
tool_output = ToolOutput(
|
|
744
712
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -749,7 +717,7 @@ class TheTool:
|
|
|
749
717
|
def translate(
|
|
750
718
|
self,
|
|
751
719
|
text: str,
|
|
752
|
-
|
|
720
|
+
target_lang: str,
|
|
753
721
|
use_chunker: bool = True,
|
|
754
722
|
with_analysis: bool = False,
|
|
755
723
|
user_prompt: str | None = None,
|
|
@@ -761,17 +729,17 @@ class TheTool:
|
|
|
761
729
|
priority: int | None = None,
|
|
762
730
|
) -> ToolOutput:
|
|
763
731
|
"""
|
|
764
|
-
Translate text between languages
|
|
732
|
+
Translate text between languages
|
|
765
733
|
|
|
766
734
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
767
735
|
|
|
768
736
|
Arguments:
|
|
769
737
|
text: The input text
|
|
770
|
-
|
|
771
|
-
use_chunker: Whether to use text chunker for
|
|
772
|
-
with_analysis:
|
|
738
|
+
target_lang: The target language for translation
|
|
739
|
+
use_chunker: Whether to use text chunker for large texts
|
|
740
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
773
741
|
user_prompt: Additional instructions
|
|
774
|
-
temperature: Controls randomness
|
|
742
|
+
temperature: Controls randomness
|
|
775
743
|
logprobs: Whether to return token probability information
|
|
776
744
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
777
745
|
validator: Custom validation function to validate the output
|
|
@@ -786,7 +754,7 @@ class TheTool:
|
|
|
786
754
|
|
|
787
755
|
try:
|
|
788
756
|
if len(text.split(" ")) > 1500 and use_chunker:
|
|
789
|
-
chunks =
|
|
757
|
+
chunks = TheToolUtils.to_chunks(text, 1200, 0)
|
|
790
758
|
translation = ""
|
|
791
759
|
analysis = ""
|
|
792
760
|
logprobs_list = []
|
|
@@ -795,7 +763,7 @@ class TheTool:
|
|
|
795
763
|
chunk_operator_output = self._operator.run(
|
|
796
764
|
# User parameters
|
|
797
765
|
text=chunk,
|
|
798
|
-
|
|
766
|
+
target_lang=target_lang,
|
|
799
767
|
with_analysis=with_analysis,
|
|
800
768
|
user_prompt=user_prompt,
|
|
801
769
|
temperature=temperature,
|
|
@@ -832,7 +800,7 @@ class TheTool:
|
|
|
832
800
|
operator_output = self._operator.run(
|
|
833
801
|
# User parameters
|
|
834
802
|
text=text,
|
|
835
|
-
|
|
803
|
+
target_lang=target_lang,
|
|
836
804
|
with_analysis=with_analysis,
|
|
837
805
|
user_prompt=user_prompt,
|
|
838
806
|
temperature=temperature,
|
|
@@ -859,6 +827,11 @@ class TheTool:
|
|
|
859
827
|
)
|
|
860
828
|
|
|
861
829
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
830
|
+
self.logger.error(str(e))
|
|
831
|
+
|
|
832
|
+
if self.raise_on_error:
|
|
833
|
+
raise
|
|
834
|
+
|
|
862
835
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
863
836
|
tool_output = ToolOutput(
|
|
864
837
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -880,16 +853,16 @@ class TheTool:
|
|
|
880
853
|
priority: int | None = None,
|
|
881
854
|
) -> ToolOutput:
|
|
882
855
|
"""
|
|
883
|
-
|
|
856
|
+
Convert a text into atomic, independent, meaningful sentences
|
|
884
857
|
|
|
885
858
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
886
859
|
|
|
887
860
|
Arguments:
|
|
888
861
|
text: The input text
|
|
889
|
-
with_analysis:
|
|
890
|
-
output_lang:
|
|
862
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
863
|
+
output_lang: Forces the model to respond in a specific language
|
|
891
864
|
user_prompt: Additional instructions
|
|
892
|
-
temperature: Controls randomness
|
|
865
|
+
temperature: Controls randomness
|
|
893
866
|
logprobs: Whether to return token probability information
|
|
894
867
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
895
868
|
validator: Custom validation function to validate the output
|
|
@@ -932,6 +905,11 @@ class TheTool:
|
|
|
932
905
|
)
|
|
933
906
|
|
|
934
907
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
908
|
+
self.logger.error(str(e))
|
|
909
|
+
|
|
910
|
+
if self.raise_on_error:
|
|
911
|
+
raise
|
|
912
|
+
|
|
935
913
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
936
914
|
tool_output = ToolOutput(
|
|
937
915
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -939,7 +917,7 @@ class TheTool:
|
|
|
939
917
|
|
|
940
918
|
return tool_output
|
|
941
919
|
|
|
942
|
-
def
|
|
920
|
+
def is_fact(
|
|
943
921
|
self,
|
|
944
922
|
text: str,
|
|
945
923
|
source_text: str,
|
|
@@ -954,17 +932,17 @@ class TheTool:
|
|
|
954
932
|
priority: int | None = None,
|
|
955
933
|
) -> ToolOutput:
|
|
956
934
|
"""
|
|
957
|
-
|
|
935
|
+
Check whether a statement is a fact based on the source text
|
|
958
936
|
|
|
959
937
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
960
938
|
|
|
961
939
|
Arguments:
|
|
962
940
|
text: The input text
|
|
963
|
-
source_text: The source text
|
|
964
|
-
with_analysis:
|
|
965
|
-
output_lang:
|
|
941
|
+
source_text: The source text
|
|
942
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
943
|
+
output_lang: Forces the model to respond in a specific language
|
|
966
944
|
user_prompt: Additional instructions
|
|
967
|
-
temperature: Controls randomness
|
|
945
|
+
temperature: Controls randomness
|
|
968
946
|
logprobs: Whether to return token probability information
|
|
969
947
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
970
948
|
validator: Custom validation function to validate the output
|
|
@@ -974,7 +952,7 @@ class TheTool:
|
|
|
974
952
|
Returns:
|
|
975
953
|
ToolOutput
|
|
976
954
|
"""
|
|
977
|
-
tool_name = "
|
|
955
|
+
tool_name = "is_fact"
|
|
978
956
|
start = perf_counter()
|
|
979
957
|
|
|
980
958
|
try:
|
|
@@ -1008,6 +986,11 @@ class TheTool:
|
|
|
1008
986
|
)
|
|
1009
987
|
|
|
1010
988
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
989
|
+
self.logger.error(str(e))
|
|
990
|
+
|
|
991
|
+
if self.raise_on_error:
|
|
992
|
+
raise
|
|
993
|
+
|
|
1011
994
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
1012
995
|
tool_output = ToolOutput(
|
|
1013
996
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
@@ -1030,15 +1013,15 @@ class TheTool:
|
|
|
1030
1013
|
priority: int | None = None,
|
|
1031
1014
|
) -> ToolOutput:
|
|
1032
1015
|
"""
|
|
1033
|
-
Custom tool that can do almost anything
|
|
1016
|
+
Custom tool that can do almost anything
|
|
1034
1017
|
|
|
1035
1018
|
Arguments:
|
|
1036
1019
|
prompt: The user prompt
|
|
1037
1020
|
output_model: Pydantic BaseModel used for structured output
|
|
1038
|
-
with_analysis:
|
|
1021
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
1039
1022
|
analyze_template: The analyze template used for reasoning analysis
|
|
1040
|
-
output_lang:
|
|
1041
|
-
temperature: Controls randomness
|
|
1023
|
+
output_lang: Forces the model to respond in a specific language
|
|
1024
|
+
temperature: Controls randomness
|
|
1042
1025
|
logprobs: Whether to return token probability information
|
|
1043
1026
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
1044
1027
|
validator: Custom validation function to validate the output
|
|
@@ -1083,6 +1066,11 @@ class TheTool:
|
|
|
1083
1066
|
)
|
|
1084
1067
|
|
|
1085
1068
|
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
1069
|
+
self.logger.error(str(e))
|
|
1070
|
+
|
|
1071
|
+
if self.raise_on_error:
|
|
1072
|
+
raise
|
|
1073
|
+
|
|
1086
1074
|
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
1087
1075
|
tool_output = ToolOutput(
|
|
1088
1076
|
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|