hamtaa-texttools 1.3.2__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
+ import logging
1
2
  from collections.abc import Callable
2
3
  from time import perf_counter
3
4
  from typing import Any, Literal
4
5
 
5
6
  from openai import AsyncOpenAI
6
7
 
7
- from ..core.engine import text_to_chunks, run_with_timeout
8
8
  from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
9
9
  from ..core.internal_models import (
10
10
  Bool,
@@ -15,21 +15,20 @@ from ..core.internal_models import (
15
15
  create_dynamic_model,
16
16
  )
17
17
  from ..core.operators.async_operator import AsyncOperator
18
+ from ..core.utils import TheToolUtils
18
19
  from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
19
20
 
20
21
 
21
22
  class AsyncTheTool:
22
- """
23
- Each method configures the operator with a specific YAML prompt,
24
- output schema, and flags, then delegates execution to `operator.run()`.
25
- """
26
-
27
23
  def __init__(
28
24
  self,
29
25
  client: AsyncOpenAI,
30
26
  model: str,
27
+ raise_on_error: bool = True,
31
28
  ):
32
29
  self._operator = AsyncOperator(client=client, model=model)
30
+ self.logger = logging.getLogger(self.__class__.__name__)
31
+ self.raise_on_error = raise_on_error
33
32
 
34
33
  async def categorize(
35
34
  self,
@@ -46,16 +45,14 @@ class AsyncTheTool:
46
45
  timeout: float | None = None,
47
46
  ) -> ToolOutput:
48
47
  """
49
- Categorize a text into a category / category tree.
50
-
51
- Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
48
+ Classify text into given categories
52
49
 
53
50
  Arguments:
54
51
  text: The input text
55
52
  categories: The category list / category tree
56
- with_analysis: Whether to include detailed reasoning analysis
53
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
57
54
  user_prompt: Additional instructions
58
- temperature: Controls randomness (0.0 - 2.0)
55
+ temperature: Controls randomness
59
56
  logprobs: Whether to return token probability information
60
57
  top_logprobs: Number of top token alternatives to return if logprobs enabled
61
58
  validator: Custom validation function to validate the output
@@ -72,7 +69,7 @@ class AsyncTheTool:
72
69
 
73
70
  try:
74
71
  if isinstance(categories, list):
75
- operator_output = await run_with_timeout(
72
+ operator_output = await TheToolUtils.run_with_timeout(
76
73
  self._operator.run(
77
74
  # User parameters
78
75
  text=text,
@@ -121,7 +118,7 @@ class AsyncTheTool:
121
118
  ]
122
119
  category_names = list(parent_node.children.keys())
123
120
 
124
- level_operator_output = await run_with_timeout(
121
+ level_operator_output = await TheToolUtils.run_with_timeout(
125
122
  self._operator.run(
126
123
  # User parameters
127
124
  text=text,
@@ -165,6 +162,11 @@ class AsyncTheTool:
165
162
  )
166
163
 
167
164
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
165
+ self.logger.error(str(e))
166
+
167
+ if self.raise_on_error:
168
+ raise
169
+
168
170
  metadata = ToolOutputMetadata(tool_name=tool_name)
169
171
  tool_output = ToolOutput(
170
172
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -175,28 +177,30 @@ class AsyncTheTool:
175
177
  async def extract_keywords(
176
178
  self,
177
179
  text: str,
180
+ mode: Literal["auto", "threshold", "count"],
181
+ number_of_keywords: int | None = None,
178
182
  with_analysis: bool = False,
179
183
  output_lang: str | None = None,
180
184
  user_prompt: str | None = None,
181
185
  temperature: float | None = 0.0,
182
186
  logprobs: bool = False,
183
187
  top_logprobs: int = 3,
184
- mode: Literal["auto", "threshold", "count"] = "auto",
185
- number_of_keywords: int | None = None,
186
188
  validator: Callable[[Any], bool] | None = None,
187
189
  max_validation_retries: int | None = None,
188
190
  priority: int | None = None,
189
191
  timeout: float | None = None,
190
192
  ) -> ToolOutput:
191
193
  """
192
- Extract salient keywords from text.
194
+ Extract keywords from the text
193
195
 
194
196
  Arguments:
195
197
  text: The input text
196
- with_analysis: Whether to include detailed reasoning analysis
197
- output_lang: Language for the output
198
+ mode: auto -> decide n of keywords automatically, threshold -> decide n of keywords by a threshold, count -> takes number of keywords as the parameter
199
+ number_of_keywords: Must be set only when using "count" mode
200
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
201
+ output_lang: Forces the model to respond in a specific language
198
202
  user_prompt: Additional instructions
199
- temperature: Controls randomness (0.0 - 2.0)
203
+ temperature: Controls randomness
200
204
  logprobs: Whether to return token probability information
201
205
  top_logprobs: Number of top token alternatives to return if logprobs enabled
202
206
  validator: Custom validation function to validate the output
@@ -211,24 +215,24 @@ class AsyncTheTool:
211
215
  start = perf_counter()
212
216
 
213
217
  try:
214
- operator_output = await run_with_timeout(
218
+ operator_output = await TheToolUtils.run_with_timeout(
215
219
  self._operator.run(
216
220
  # User parameters
217
221
  text=text,
218
222
  with_analysis=with_analysis,
223
+ number_of_keywords=number_of_keywords,
224
+ mode=mode,
219
225
  output_lang=output_lang,
220
226
  user_prompt=user_prompt,
221
227
  temperature=temperature,
222
228
  logprobs=logprobs,
223
229
  top_logprobs=top_logprobs,
224
- number_of_keywords=number_of_keywords,
225
230
  validator=validator,
226
231
  max_validation_retries=max_validation_retries,
227
232
  priority=priority,
228
233
  # Internal parameters
229
234
  tool_name=tool_name,
230
235
  output_model=ListStr,
231
- mode=mode,
232
236
  ),
233
237
  timeout=timeout,
234
238
  )
@@ -244,6 +248,11 @@ class AsyncTheTool:
244
248
  )
245
249
 
246
250
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
251
+ self.logger.error(str(e))
252
+
253
+ if self.raise_on_error:
254
+ raise
255
+
247
256
  metadata = ToolOutputMetadata(tool_name=tool_name)
248
257
  tool_output = ToolOutput(
249
258
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -254,7 +263,7 @@ class AsyncTheTool:
254
263
  async def extract_entities(
255
264
  self,
256
265
  text: str,
257
- entities: list[str] | None = None,
266
+ entities: list[str] = ["all named entities"],
258
267
  with_analysis: bool = False,
259
268
  output_lang: str | None = None,
260
269
  user_prompt: str | None = None,
@@ -267,15 +276,15 @@ class AsyncTheTool:
267
276
  timeout: float | None = None,
268
277
  ) -> ToolOutput:
269
278
  """
270
- Perform Named Entity Recognition (NER) over the input text.
279
+ Perform Named Entity Recognition (NER)
271
280
 
272
281
  Arguments:
273
282
  text: The input text
274
- entities: List of entities provided by user (Optional)
275
- with_analysis: Whether to include detailed reasoning analysis
276
- output_lang: Language for the output
283
+ entities: List of entities
284
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
285
+ output_lang: Forces the model to respond in a specific language
277
286
  user_prompt: Additional instructions
278
- temperature: Controls randomness (0.0 - 2.0)
287
+ temperature: Controls randomness
279
288
  logprobs: Whether to return token probability information
280
289
  top_logprobs: Number of top token alternatives to return if logprobs enabled
281
290
  validator: Custom validation function to validate the output
@@ -290,12 +299,11 @@ class AsyncTheTool:
290
299
  start = perf_counter()
291
300
 
292
301
  try:
293
- operator_output = await run_with_timeout(
302
+ operator_output = await TheToolUtils.run_with_timeout(
294
303
  self._operator.run(
295
304
  # User parameters
296
305
  text=text,
297
- entities=entities
298
- or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
306
+ entities=entities,
299
307
  with_analysis=with_analysis,
300
308
  output_lang=output_lang,
301
309
  user_prompt=user_prompt,
@@ -324,6 +332,11 @@ class AsyncTheTool:
324
332
  )
325
333
 
326
334
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
335
+ self.logger.error(str(e))
336
+
337
+ if self.raise_on_error:
338
+ raise
339
+
327
340
  metadata = ToolOutputMetadata(tool_name=tool_name)
328
341
  tool_output = ToolOutput(
329
342
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -349,9 +362,9 @@ class AsyncTheTool:
349
362
 
350
363
  Arguments:
351
364
  text: The input text
352
- with_analysis: Whether to include detailed reasoning analysis
365
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
353
366
  user_prompt: Additional instructions
354
- temperature: Controls randomness (0.0 - 2.0)
367
+ temperature: Controls randomness
355
368
  logprobs: Whether to return token probability information
356
369
  top_logprobs: Number of top token alternatives to return if logprobs enabled
357
370
  validator: Custom validation function to validate the output
@@ -366,7 +379,7 @@ class AsyncTheTool:
366
379
  start = perf_counter()
367
380
 
368
381
  try:
369
- operator_output = await run_with_timeout(
382
+ operator_output = await TheToolUtils.run_with_timeout(
370
383
  self._operator.run(
371
384
  # User parameters
372
385
  text=text,
@@ -398,6 +411,11 @@ class AsyncTheTool:
398
411
  )
399
412
 
400
413
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
414
+ self.logger.error(str(e))
415
+
416
+ if self.raise_on_error:
417
+ raise
418
+
401
419
  metadata = ToolOutputMetadata(tool_name=tool_name)
402
420
  tool_output = ToolOutput(
403
421
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -405,10 +423,11 @@ class AsyncTheTool:
405
423
 
406
424
  return tool_output
407
425
 
408
- async def text_to_question(
426
+ async def to_question(
409
427
  self,
410
428
  text: str,
411
429
  number_of_questions: int,
430
+ mode: Literal["from_text", "from_subject"],
412
431
  with_analysis: bool = False,
413
432
  output_lang: str | None = None,
414
433
  user_prompt: str | None = None,
@@ -421,15 +440,16 @@ class AsyncTheTool:
421
440
  timeout: float | None = None,
422
441
  ) -> ToolOutput:
423
442
  """
424
- Generate a single question from the given text.
443
+ Generate questions from the given text / subject
425
444
 
426
445
  Arguments:
427
446
  text: The input text
447
+ mode: from_text -> generate questions from an answer, from_subject -> generate questions from a subject
428
448
  number_of_questions: Number of questions to generate
429
- with_analysis: Whether to include detailed reasoning analysis
430
- output_lang: Language for the output
449
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
450
+ output_lang: Forces the model to respond in a specific language
431
451
  user_prompt: Additional instructions
432
- temperature: Controls randomness (0.0 - 2.0)
452
+ temperature: Controls randomness
433
453
  logprobs: Whether to return token probability information
434
454
  top_logprobs: Number of top token alternatives to return if logprobs enabled
435
455
  validator: Custom validation function to validate the output
@@ -440,15 +460,16 @@ class AsyncTheTool:
440
460
  Returns:
441
461
  ToolOutput
442
462
  """
443
- tool_name = "text_to_question"
463
+ tool_name = "to_question"
444
464
  start = perf_counter()
445
465
 
446
466
  try:
447
- operator_output = await run_with_timeout(
467
+ operator_output = await TheToolUtils.run_with_timeout(
448
468
  self._operator.run(
449
469
  # User parameters
450
470
  text=text,
451
471
  number_of_questions=number_of_questions,
472
+ mode=mode,
452
473
  with_analysis=with_analysis,
453
474
  output_lang=output_lang,
454
475
  user_prompt=user_prompt,
@@ -461,7 +482,6 @@ class AsyncTheTool:
461
482
  # Internal parameters
462
483
  tool_name=tool_name,
463
484
  output_model=ReasonListStr,
464
- mode=None,
465
485
  ),
466
486
  timeout=timeout,
467
487
  )
@@ -477,6 +497,11 @@ class AsyncTheTool:
477
497
  )
478
498
 
479
499
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
500
+ self.logger.error(str(e))
501
+
502
+ if self.raise_on_error:
503
+ raise
504
+
480
505
  metadata = ToolOutputMetadata(tool_name=tool_name)
481
506
  tool_output = ToolOutput(
482
507
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -487,27 +512,28 @@ class AsyncTheTool:
487
512
  async def merge_questions(
488
513
  self,
489
514
  text: list[str],
515
+ mode: Literal["simple", "stepwise"],
490
516
  with_analysis: bool = False,
491
517
  output_lang: str | None = None,
492
518
  user_prompt: str | None = None,
493
519
  temperature: float | None = 0.0,
494
520
  logprobs: bool = False,
495
521
  top_logprobs: int = 3,
496
- mode: Literal["default", "reason"] = "default",
497
522
  validator: Callable[[Any], bool] | None = None,
498
523
  max_validation_retries: int | None = None,
499
524
  priority: int | None = None,
500
525
  timeout: float | None = None,
501
526
  ) -> ToolOutput:
502
527
  """
503
- Merge multiple questions into a single unified question.
528
+ Merge multiple questions into a single unified question
504
529
 
505
530
  Arguments:
506
531
  text: List of questions to merge
507
- with_analysis: Whether to include detailed reasoning analysis
508
- output_lang: Language for the output
532
+ mode: simple -> regular question merging, stepwise -> merge questions in two steps
533
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
534
+ output_lang: Forces the model to respond in a specific language
509
535
  user_prompt: Additional instructions
510
- temperature: Controls randomness (0.0 - 2.0)
536
+ temperature: Controls randomness
511
537
  logprobs: Whether to return token probability information
512
538
  top_logprobs: Number of top token alternatives to return if logprobs enabled
513
539
  validator: Custom validation function to validate the output
@@ -523,10 +549,11 @@ class AsyncTheTool:
523
549
 
524
550
  try:
525
551
  text = ", ".join(text)
526
- operator_output = await run_with_timeout(
552
+ operator_output = await TheToolUtils.run_with_timeout(
527
553
  self._operator.run(
528
554
  # User parameters
529
555
  text=text,
556
+ mode=mode,
530
557
  with_analysis=with_analysis,
531
558
  output_lang=output_lang,
532
559
  user_prompt=user_prompt,
@@ -539,7 +566,6 @@ class AsyncTheTool:
539
566
  # Internal parameters
540
567
  tool_name=tool_name,
541
568
  output_model=Str,
542
- mode=mode,
543
569
  ),
544
570
  timeout=timeout,
545
571
  )
@@ -555,6 +581,11 @@ class AsyncTheTool:
555
581
  )
556
582
 
557
583
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
584
+ self.logger.error(str(e))
585
+
586
+ if self.raise_on_error:
587
+ raise
588
+
558
589
  metadata = ToolOutputMetadata(tool_name=tool_name)
559
590
  tool_output = ToolOutput(
560
591
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -562,30 +593,31 @@ class AsyncTheTool:
562
593
 
563
594
  return tool_output
564
595
 
565
- async def rewrite(
596
+ async def augment(
566
597
  self,
567
598
  text: str,
599
+ mode: Literal["positive", "negative", "hard_negative"],
568
600
  with_analysis: bool = False,
569
601
  output_lang: str | None = None,
570
602
  user_prompt: str | None = None,
571
603
  temperature: float | None = 0.0,
572
604
  logprobs: bool = False,
573
605
  top_logprobs: int = 3,
574
- mode: Literal["positive", "negative", "hard_negative"] = "positive",
575
606
  validator: Callable[[Any], bool] | None = None,
576
607
  max_validation_retries: int | None = None,
577
608
  priority: int | None = None,
578
609
  timeout: float | None = None,
579
610
  ) -> ToolOutput:
580
611
  """
581
- Rewrite a text with different modes.
612
+ Rewrite text in different augmentations
582
613
 
583
614
  Arguments:
584
615
  text: The input text
585
- with_analysis: Whether to include detailed reasoning analysis
586
- output_lang: Language for the output
616
+ mode: positive -> positive augmentation, negative -> negative augmentation, hard_negative -> hard negative augmentation
617
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
618
+ output_lang: Forces the model to respond in a specific language
587
619
  user_prompt: Additional instructions
588
- temperature: Controls randomness (0.0 - 2.0)
620
+ temperature: Controls randomness
589
621
  logprobs: Whether to return token probability information
590
622
  top_logprobs: Number of top token alternatives to return if logprobs enabled
591
623
  validator: Custom validation function to validate the output
@@ -596,14 +628,15 @@ class AsyncTheTool:
596
628
  Returns:
597
629
  ToolOutput
598
630
  """
599
- tool_name = "rewrite"
631
+ tool_name = "augment"
600
632
  start = perf_counter()
601
633
 
602
634
  try:
603
- operator_output = await run_with_timeout(
635
+ operator_output = await TheToolUtils.run_with_timeout(
604
636
  self._operator.run(
605
637
  # User parameters
606
638
  text=text,
639
+ mode=mode,
607
640
  with_analysis=with_analysis,
608
641
  output_lang=output_lang,
609
642
  user_prompt=user_prompt,
@@ -616,7 +649,6 @@ class AsyncTheTool:
616
649
  # Internal parameters
617
650
  tool_name=tool_name,
618
651
  output_model=Str,
619
- mode=mode,
620
652
  ),
621
653
  timeout=timeout,
622
654
  )
@@ -632,85 +664,11 @@ class AsyncTheTool:
632
664
  )
633
665
 
634
666
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
635
- metadata = ToolOutputMetadata(tool_name=tool_name)
636
- tool_output = ToolOutput(
637
- errors=[f"{type(e).__name__}: {e}"], metadata=metadata
638
- )
667
+ self.logger.error(str(e))
639
668
 
640
- return tool_output
641
-
642
- async def subject_to_question(
643
- self,
644
- text: str,
645
- number_of_questions: int,
646
- with_analysis: bool = False,
647
- output_lang: str | None = None,
648
- user_prompt: str | None = None,
649
- temperature: float | None = 0.0,
650
- logprobs: bool = False,
651
- top_logprobs: int = 3,
652
- validator: Callable[[Any], bool] | None = None,
653
- max_validation_retries: int | None = None,
654
- priority: int | None = None,
655
- timeout: float | None = None,
656
- ) -> ToolOutput:
657
- """
658
- Generate a list of questions about a subject.
669
+ if self.raise_on_error:
670
+ raise
659
671
 
660
- Arguments:
661
- text: The subject text to generate questions about
662
- number_of_questions: Number of questions to generate
663
- with_analysis: Whether to include detailed reasoning analysis
664
- output_lang: Language for the output
665
- user_prompt: Additional instructions
666
- temperature: Controls randomness (0.0 - 2.0)
667
- logprobs: Whether to return token probability information
668
- top_logprobs: Number of top token alternatives to return if logprobs enabled
669
- validator: Custom validation function to validate the output
670
- max_validation_retries: Maximum number of retry attempts if validation fails
671
- priority: Task execution priority (if enabled by vLLM and the model)
672
- timeout: Maximum time in seconds to wait for the response before raising a timeout error
673
-
674
- Returns:
675
- ToolOutput
676
- """
677
- tool_name = "subject_to_question"
678
- start = perf_counter()
679
-
680
- try:
681
- operator_output = await run_with_timeout(
682
- self._operator.run(
683
- # User parameters
684
- text=text,
685
- number_of_questions=number_of_questions,
686
- with_analysis=with_analysis,
687
- output_lang=output_lang,
688
- user_prompt=user_prompt,
689
- temperature=temperature,
690
- logprobs=logprobs,
691
- top_logprobs=top_logprobs,
692
- validator=validator,
693
- max_validation_retries=max_validation_retries,
694
- priority=priority,
695
- # Internal parameters
696
- tool_name=tool_name,
697
- output_model=ReasonListStr,
698
- mode=None,
699
- ),
700
- timeout=timeout,
701
- )
702
-
703
- metadata = ToolOutputMetadata(
704
- tool_name=tool_name, execution_time=perf_counter() - start
705
- )
706
- tool_output = ToolOutput(
707
- result=operator_output.result,
708
- logprobs=operator_output.logprobs,
709
- analysis=operator_output.analysis,
710
- metadata=metadata,
711
- )
712
-
713
- except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
714
672
  metadata = ToolOutputMetadata(tool_name=tool_name)
715
673
  tool_output = ToolOutput(
716
674
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -733,14 +691,14 @@ class AsyncTheTool:
733
691
  timeout: float | None = None,
734
692
  ) -> ToolOutput:
735
693
  """
736
- Summarize the given subject text.
694
+ Summarize the given text
737
695
 
738
696
  Arguments:
739
697
  text: The input text
740
- with_analysis: Whether to include detailed reasoning analysis
741
- output_lang: Language for the output
698
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
699
+ output_lang: Forces the model to respond in a specific language
742
700
  user_prompt: Additional instructions
743
- temperature: Controls randomness (0.0 - 2.0)
701
+ temperature: Controls randomness
744
702
  logprobs: Whether to return token probability information
745
703
  top_logprobs: Number of top token alternatives to return if logprobs enabled
746
704
  validator: Custom validation function to validate the output
@@ -755,7 +713,7 @@ class AsyncTheTool:
755
713
  start = perf_counter()
756
714
 
757
715
  try:
758
- operator_output = await run_with_timeout(
716
+ operator_output = await TheToolUtils.run_with_timeout(
759
717
  self._operator.run(
760
718
  # User parameters
761
719
  text=text,
@@ -787,6 +745,11 @@ class AsyncTheTool:
787
745
  )
788
746
 
789
747
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
748
+ self.logger.error(str(e))
749
+
750
+ if self.raise_on_error:
751
+ raise
752
+
790
753
  metadata = ToolOutputMetadata(tool_name=tool_name)
791
754
  tool_output = ToolOutput(
792
755
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -797,7 +760,7 @@ class AsyncTheTool:
797
760
  async def translate(
798
761
  self,
799
762
  text: str,
800
- target_language: str,
763
+ target_lang: str,
801
764
  use_chunker: bool = True,
802
765
  with_analysis: bool = False,
803
766
  user_prompt: str | None = None,
@@ -810,17 +773,17 @@ class AsyncTheTool:
810
773
  timeout: float | None = None,
811
774
  ) -> ToolOutput:
812
775
  """
813
- Translate text between languages.
776
+ Translate text between languages
814
777
 
815
778
  Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
816
779
 
817
780
  Arguments:
818
781
  text: The input text
819
- target_language: The target language for translation
820
- use_chunker: Whether to use text chunker for text length bigger than 1500
821
- with_analysis: Whether to include detailed reasoning analysis
782
+ target_lang: The target language for translation
783
+ use_chunker: Whether to use text chunker for large texts
784
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
822
785
  user_prompt: Additional instructions
823
- temperature: Controls randomness (0.0 - 2.0)
786
+ temperature: Controls randomness
824
787
  logprobs: Whether to return token probability information
825
788
  top_logprobs: Number of top token alternatives to return if logprobs enabled
826
789
  validator: Custom validation function to validate the output
@@ -836,17 +799,17 @@ class AsyncTheTool:
836
799
 
837
800
  try:
838
801
  if len(text.split(" ")) > 1500 and use_chunker:
839
- chunks = text_to_chunks(text, 1200, 0)
802
+ chunks = TheToolUtils.to_chunks(text, 1200, 0)
840
803
  translation = ""
841
804
  analysis = ""
842
805
  logprobs_list = []
843
806
 
844
807
  for chunk in chunks:
845
- chunk_operator_output = await run_with_timeout(
808
+ chunk_operator_output = await TheToolUtils.run_with_timeout(
846
809
  self._operator.run(
847
810
  # User parameters
848
811
  text=chunk,
849
- target_language=target_language,
812
+ target_lang=target_lang,
850
813
  with_analysis=with_analysis,
851
814
  user_prompt=user_prompt,
852
815
  temperature=temperature,
@@ -882,11 +845,11 @@ class AsyncTheTool:
882
845
  )
883
846
 
884
847
  else:
885
- operator_output = await run_with_timeout(
848
+ operator_output = await TheToolUtils.run_with_timeout(
886
849
  self._operator.run(
887
850
  # User parameters
888
851
  text=text,
889
- target_language=target_language,
852
+ target_lang=target_lang,
890
853
  with_analysis=with_analysis,
891
854
  user_prompt=user_prompt,
892
855
  temperature=temperature,
@@ -915,6 +878,11 @@ class AsyncTheTool:
915
878
  )
916
879
 
917
880
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
881
+ self.logger.error(str(e))
882
+
883
+ if self.raise_on_error:
884
+ raise
885
+
918
886
  metadata = ToolOutputMetadata(tool_name=tool_name)
919
887
  tool_output = ToolOutput(
920
888
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -937,16 +905,16 @@ class AsyncTheTool:
937
905
  timeout: float | None = None,
938
906
  ) -> ToolOutput:
939
907
  """
940
- Proposition input text to meaningful sentences.
908
+ Convert a text into atomic, independent, meaningful sentences
941
909
 
942
910
  Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
943
911
 
944
912
  Arguments:
945
913
  text: The input text
946
- with_analysis: Whether to include detailed reasoning analysis
947
- output_lang: Language for the output
914
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
915
+ output_lang: Forces the model to respond in a specific language
948
916
  user_prompt: Additional instructions
949
- temperature: Controls randomness (0.0 - 2.0)
917
+ temperature: Controls randomness
950
918
  logprobs: Whether to return token probability information
951
919
  top_logprobs: Number of top token alternatives to return if logprobs enabled
952
920
  validator: Custom validation function to validate the output
@@ -961,7 +929,7 @@ class AsyncTheTool:
961
929
  start = perf_counter()
962
930
 
963
931
  try:
964
- operator_output = await run_with_timeout(
932
+ operator_output = await TheToolUtils.run_with_timeout(
965
933
  self._operator.run(
966
934
  # User parameters
967
935
  text=text,
@@ -993,6 +961,11 @@ class AsyncTheTool:
993
961
  )
994
962
 
995
963
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
964
+ self.logger.error(str(e))
965
+
966
+ if self.raise_on_error:
967
+ raise
968
+
996
969
  metadata = ToolOutputMetadata(tool_name=tool_name)
997
970
  tool_output = ToolOutput(
998
971
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -1000,7 +973,7 @@ class AsyncTheTool:
1000
973
 
1001
974
  return tool_output
1002
975
 
1003
- async def check_fact(
976
+ async def is_fact(
1004
977
  self,
1005
978
  text: str,
1006
979
  source_text: str,
@@ -1016,17 +989,17 @@ class AsyncTheTool:
1016
989
  timeout: float | None = None,
1017
990
  ) -> ToolOutput:
1018
991
  """
1019
- Checks wheather a statement is relevant to the source text or not.
992
+ Check whether a statement is a fact based on the source text
1020
993
 
1021
994
  Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
1022
995
 
1023
996
  Arguments:
1024
997
  text: The input text
1025
- source_text: The source text that we want to check relation of text to it
1026
- with_analysis: Whether to include detailed reasoning analysis
1027
- output_lang: Language for the output
998
+ source_text: The source text
999
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
1000
+ output_lang: Forces the model to respond in a specific language
1028
1001
  user_prompt: Additional instructions
1029
- temperature: Controls randomness (0.0 - 2.0)
1002
+ temperature: Controls randomness
1030
1003
  logprobs: Whether to return token probability information
1031
1004
  top_logprobs: Number of top token alternatives to return if logprobs enabled
1032
1005
  validator: Custom validation function to validate the output
@@ -1037,11 +1010,11 @@ class AsyncTheTool:
1037
1010
  Returns:
1038
1011
  ToolOutput
1039
1012
  """
1040
- tool_name = "check_fact"
1013
+ tool_name = "is_fact"
1041
1014
  start = perf_counter()
1042
1015
 
1043
1016
  try:
1044
- operator_output = await run_with_timeout(
1017
+ operator_output = await TheToolUtils.run_with_timeout(
1045
1018
  self._operator.run(
1046
1019
  # User parameters
1047
1020
  text=text,
@@ -1074,6 +1047,11 @@ class AsyncTheTool:
1074
1047
  )
1075
1048
 
1076
1049
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
1050
+ self.logger.error(str(e))
1051
+
1052
+ if self.raise_on_error:
1053
+ raise
1054
+
1077
1055
  metadata = ToolOutputMetadata(tool_name=tool_name)
1078
1056
  tool_output = ToolOutput(
1079
1057
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata
@@ -1097,15 +1075,15 @@ class AsyncTheTool:
1097
1075
  timeout: float | None = None,
1098
1076
  ) -> ToolOutput:
1099
1077
  """
1100
- Custom tool that can do almost anything!
1078
+ Custom tool that can do almost anything
1101
1079
 
1102
1080
  Arguments:
1103
1081
  prompt: The user prompt
1104
1082
  output_model: Pydantic BaseModel used for structured output
1105
- with_analysis: Whether to include detailed reasoning analysis
1083
+ with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
1106
1084
  analyze_template: The analyze template used for reasoning analysis
1107
- output_lang: Language for the output
1108
- temperature: Controls randomness (0.0 - 2.0)
1085
+ output_lang: Forces the model to respond in a specific language
1086
+ temperature: Controls randomness
1109
1087
  logprobs: Whether to return token probability information
1110
1088
  top_logprobs: Number of top token alternatives to return if logprobs enabled
1111
1089
  validator: Custom validation function to validate the output
@@ -1120,7 +1098,7 @@ class AsyncTheTool:
1120
1098
  start = perf_counter()
1121
1099
 
1122
1100
  try:
1123
- operator_output = await run_with_timeout(
1101
+ operator_output = await TheToolUtils.run_with_timeout(
1124
1102
  self._operator.run(
1125
1103
  # User paramaeters
1126
1104
  text=prompt,
@@ -1154,6 +1132,11 @@ class AsyncTheTool:
1154
1132
  )
1155
1133
 
1156
1134
  except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
1135
+ self.logger.error(str(e))
1136
+
1137
+ if self.raise_on_error:
1138
+ raise
1139
+
1157
1140
  metadata = ToolOutputMetadata(tool_name=tool_name)
1158
1141
  tool_output = ToolOutput(
1159
1142
  errors=[f"{type(e).__name__}: {e}"], metadata=metadata