hamtaa-texttools 1.1.18__py3-none-any.whl → 1.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import Literal, Any
2
+ from typing import Literal
3
3
  from collections.abc import Callable
4
4
 
5
5
  from openai import AsyncOpenAI
@@ -12,6 +12,7 @@ from texttools.internals.exceptions import (
12
12
  LLMError,
13
13
  ValidationError,
14
14
  )
15
+ from texttools.internals.text_to_chunks import text_to_chunks
15
16
 
16
17
 
17
18
  class AsyncTheTool:
@@ -35,15 +36,17 @@ class AsyncTheTool:
35
36
  user_prompt: str | None = None,
36
37
  temperature: float | None = 0.0,
37
38
  logprobs: bool = False,
38
- top_logprobs: int | None = None,
39
+ top_logprobs: int = 3,
39
40
  mode: Literal["category_list", "category_tree"] = "category_list",
40
- validator: Callable[[Any], bool] | None = None,
41
+ validator: Callable[[object], bool] | None = None,
41
42
  max_validation_retries: int | None = None,
42
43
  priority: int | None = 0,
43
44
  ) -> Models.ToolOutput:
44
45
  """
45
46
  Categorize a text into a category / category tree.
46
47
 
48
+ Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
49
+
47
50
  Arguments:
48
51
  text: The input text to categorize
49
52
  categories: The category / category_tree to give to LLM
@@ -73,11 +76,11 @@ class AsyncTheTool:
73
76
  start = datetime.now()
74
77
 
75
78
  if mode == "category_tree":
76
- # Initializations
77
- output = Models.ToolOutput()
78
79
  levels = categories.get_level_count()
79
80
  parent_id = 0
80
- final_output = []
81
+ final_categories = []
82
+ analysis = ""
83
+ logprobs = []
81
84
 
82
85
  for _ in range(levels):
83
86
  # Get child nodes for current parent
@@ -100,7 +103,7 @@ class AsyncTheTool:
100
103
  ]
101
104
  category_names = [node.name for node in children]
102
105
 
103
- # Run categorization for this level
106
+ # Run categorization for current level
104
107
  level_output = await self._operator.run(
105
108
  # User parameters
106
109
  text=text,
@@ -141,16 +144,22 @@ class AsyncTheTool:
141
144
  return output
142
145
 
143
146
  parent_id = parent_node.node_id
144
- final_output.append(parent_node.name)
147
+ final_categories.append(parent_node.name)
145
148
 
146
- # Copy analysis/logprobs/process from the last level's output
147
- output.analysis = level_output.analysis
148
- output.logprobs = level_output.logprobs
149
- output.process = level_output.process
149
+ if with_analysis:
150
+ analysis += level_output.analysis
151
+ if logprobs:
152
+ logprobs += level_output.logprobs
150
153
 
151
- output.result = final_output
152
154
  end = datetime.now()
153
- output.execution_time = (end - start).total_seconds()
155
+ output = Models.ToolOutput(
156
+ result=final_categories,
157
+ logprobs=logprobs,
158
+ analysis=analysis,
159
+ process="categorize",
160
+ execution_time=(end - start).total_seconds(),
161
+ )
162
+
154
163
  return output
155
164
 
156
165
  else:
@@ -197,10 +206,10 @@ class AsyncTheTool:
197
206
  user_prompt: str | None = None,
198
207
  temperature: float | None = 0.0,
199
208
  logprobs: bool = False,
200
- top_logprobs: int | None = None,
209
+ top_logprobs: int = 3,
201
210
  mode: Literal["auto", "threshold", "count"] = "auto",
202
211
  number_of_keywords: int | None = None,
203
- validator: Callable[[Any], bool] | None = None,
212
+ validator: Callable[[object], bool] | None = None,
204
213
  max_validation_retries: int | None = None,
205
214
  priority: int | None = 0,
206
215
  ) -> Models.ToolOutput:
@@ -249,7 +258,7 @@ class AsyncTheTool:
249
258
  priority=priority,
250
259
  # Internal parameters
251
260
  prompt_file="extract_keywords.yaml",
252
- output_model=Models.ListStrOutput,
261
+ output_model=Models.ListStr,
253
262
  )
254
263
  end = datetime.now()
255
264
  output.execution_time = (end - start).total_seconds()
@@ -271,13 +280,14 @@ class AsyncTheTool:
271
280
  async def extract_entities(
272
281
  self,
273
282
  text: str,
283
+ entities: list[str] | None = None,
274
284
  with_analysis: bool = False,
275
285
  output_lang: str | None = None,
276
286
  user_prompt: str | None = None,
277
287
  temperature: float | None = 0.0,
278
288
  logprobs: bool = False,
279
- top_logprobs: int | None = None,
280
- validator: Callable[[Any], bool] | None = None,
289
+ top_logprobs: int = 3,
290
+ validator: Callable[[object], bool] | None = None,
281
291
  max_validation_retries: int | None = None,
282
292
  priority: int | None = 0,
283
293
  ) -> Models.ToolOutput:
@@ -286,6 +296,7 @@ class AsyncTheTool:
286
296
 
287
297
  Arguments:
288
298
  text: The input text to extract entities from
299
+ entities: List of entities provided by user (Optional)
289
300
  with_analysis: Whether to include detailed reasoning analysis
290
301
  output_lang: Language for the output response
291
302
  user_prompt: Additional instructions for entity extraction
@@ -313,6 +324,8 @@ class AsyncTheTool:
313
324
  output = await self._operator.run(
314
325
  # User parameters
315
326
  text=text,
327
+ entities=entities
328
+ or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
316
329
  with_analysis=with_analysis,
317
330
  output_lang=output_lang,
318
331
  user_prompt=user_prompt,
@@ -324,7 +337,7 @@ class AsyncTheTool:
324
337
  priority=priority,
325
338
  # Internal parameters
326
339
  prompt_file="extract_entities.yaml",
327
- output_model=Models.ListDictStrStrOutput,
340
+ output_model=Models.ListDictStrStr,
328
341
  mode=None,
329
342
  )
330
343
  end = datetime.now()
@@ -351,8 +364,8 @@ class AsyncTheTool:
351
364
  user_prompt: str | None = None,
352
365
  temperature: float | None = 0.0,
353
366
  logprobs: bool = False,
354
- top_logprobs: int | None = None,
355
- validator: Callable[[Any], bool] | None = None,
367
+ top_logprobs: int = 3,
368
+ validator: Callable[[object], bool] | None = None,
356
369
  max_validation_retries: int | None = None,
357
370
  priority: int | None = 0,
358
371
  ) -> Models.ToolOutput:
@@ -397,7 +410,7 @@ class AsyncTheTool:
397
410
  priority=priority,
398
411
  # Internal parameters
399
412
  prompt_file="is_question.yaml",
400
- output_model=Models.BoolOutput,
413
+ output_model=Models.Bool,
401
414
  mode=None,
402
415
  output_lang=None,
403
416
  )
@@ -421,13 +434,14 @@ class AsyncTheTool:
421
434
  async def text_to_question(
422
435
  self,
423
436
  text: str,
437
+ number_of_questions: int,
424
438
  with_analysis: bool = False,
425
439
  output_lang: str | None = None,
426
440
  user_prompt: str | None = None,
427
441
  temperature: float | None = 0.0,
428
442
  logprobs: bool = False,
429
- top_logprobs: int | None = None,
430
- validator: Callable[[Any], bool] | None = None,
443
+ top_logprobs: int = 3,
444
+ validator: Callable[[object], bool] | None = None,
431
445
  max_validation_retries: int | None = None,
432
446
  priority: int | None = 0,
433
447
  ) -> Models.ToolOutput:
@@ -436,6 +450,7 @@ class AsyncTheTool:
436
450
 
437
451
  Arguments:
438
452
  text: The input text to generate a question from
453
+ number_of_questions: Number of questions to generate
439
454
  with_analysis: Whether to include detailed reasoning analysis
440
455
  output_lang: Language for the output question
441
456
  user_prompt: Additional instructions for question generation
@@ -463,6 +478,7 @@ class AsyncTheTool:
463
478
  output = await self._operator.run(
464
479
  # User parameters
465
480
  text=text,
481
+ number_of_questions=number_of_questions,
466
482
  with_analysis=with_analysis,
467
483
  output_lang=output_lang,
468
484
  user_prompt=user_prompt,
@@ -474,7 +490,7 @@ class AsyncTheTool:
474
490
  priority=priority,
475
491
  # Internal parameters
476
492
  prompt_file="text_to_question.yaml",
477
- output_model=Models.StrOutput,
493
+ output_model=Models.ReasonListStr,
478
494
  mode=None,
479
495
  )
480
496
  end = datetime.now()
@@ -502,9 +518,9 @@ class AsyncTheTool:
502
518
  user_prompt: str | None = None,
503
519
  temperature: float | None = 0.0,
504
520
  logprobs: bool = False,
505
- top_logprobs: int | None = None,
521
+ top_logprobs: int = 3,
506
522
  mode: Literal["default", "reason"] = "default",
507
- validator: Callable[[Any], bool] | None = None,
523
+ validator: Callable[[object], bool] | None = None,
508
524
  max_validation_retries: int | None = None,
509
525
  priority: int | None = 0,
510
526
  ) -> Models.ToolOutput:
@@ -553,7 +569,7 @@ class AsyncTheTool:
553
569
  priority=priority,
554
570
  # Internal parameters
555
571
  prompt_file="merge_questions.yaml",
556
- output_model=Models.StrOutput,
572
+ output_model=Models.Str,
557
573
  mode=mode,
558
574
  )
559
575
  end = datetime.now()
@@ -581,9 +597,9 @@ class AsyncTheTool:
581
597
  user_prompt: str | None = None,
582
598
  temperature: float | None = 0.0,
583
599
  logprobs: bool = False,
584
- top_logprobs: int | None = None,
600
+ top_logprobs: int = 3,
585
601
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
586
- validator: Callable[[Any], bool] | None = None,
602
+ validator: Callable[[object], bool] | None = None,
587
603
  max_validation_retries: int | None = None,
588
604
  priority: int | None = 0,
589
605
  ) -> Models.ToolOutput:
@@ -631,7 +647,7 @@ class AsyncTheTool:
631
647
  priority=priority,
632
648
  # Internal parameters
633
649
  prompt_file="rewrite.yaml",
634
- output_model=Models.StrOutput,
650
+ output_model=Models.Str,
635
651
  mode=mode,
636
652
  )
637
653
  end = datetime.now()
@@ -660,8 +676,8 @@ class AsyncTheTool:
660
676
  user_prompt: str | None = None,
661
677
  temperature: float | None = 0.0,
662
678
  logprobs: bool = False,
663
- top_logprobs: int | None = None,
664
- validator: Callable[[Any], bool] | None = None,
679
+ top_logprobs: int = 3,
680
+ validator: Callable[[object], bool] | None = None,
665
681
  max_validation_retries: int | None = None,
666
682
  priority: int | None = 0,
667
683
  ) -> Models.ToolOutput:
@@ -710,7 +726,7 @@ class AsyncTheTool:
710
726
  priority=priority,
711
727
  # Internal parameters
712
728
  prompt_file="subject_to_question.yaml",
713
- output_model=Models.ReasonListStrOutput,
729
+ output_model=Models.ReasonListStr,
714
730
  mode=None,
715
731
  )
716
732
  end = datetime.now()
@@ -738,8 +754,8 @@ class AsyncTheTool:
738
754
  user_prompt: str | None = None,
739
755
  temperature: float | None = 0.0,
740
756
  logprobs: bool = False,
741
- top_logprobs: int | None = None,
742
- validator: Callable[[Any], bool] | None = None,
757
+ top_logprobs: int = 3,
758
+ validator: Callable[[object], bool] | None = None,
743
759
  max_validation_retries: int | None = None,
744
760
  priority: int | None = 0,
745
761
  ) -> Models.ToolOutput:
@@ -786,7 +802,7 @@ class AsyncTheTool:
786
802
  priority=priority,
787
803
  # Internal parameters
788
804
  prompt_file="summarize.yaml",
789
- output_model=Models.StrOutput,
805
+ output_model=Models.Str,
790
806
  mode=None,
791
807
  )
792
808
  end = datetime.now()
@@ -810,21 +826,25 @@ class AsyncTheTool:
810
826
  self,
811
827
  text: str,
812
828
  target_language: str,
829
+ use_chunker: bool = True,
813
830
  with_analysis: bool = False,
814
831
  user_prompt: str | None = None,
815
832
  temperature: float | None = 0.0,
816
833
  logprobs: bool = False,
817
- top_logprobs: int | None = None,
818
- validator: Callable[[Any], bool] | None = None,
834
+ top_logprobs: int = 3,
835
+ validator: Callable[[object], bool] | None = None,
819
836
  max_validation_retries: int | None = None,
820
837
  priority: int | None = 0,
821
838
  ) -> Models.ToolOutput:
822
839
  """
823
840
  Translate text between languages.
824
841
 
842
+ Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
843
+
825
844
  Arguments:
826
845
  text: The input text to translate
827
846
  target_language: The target language for translation
847
+ use_chunker: Whether to use text chunker for text length bigger than 1500
828
848
  with_analysis: Whether to include detailed reasoning analysis
829
849
  user_prompt: Additional instructions for translation
830
850
  temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -848,27 +868,81 @@ class AsyncTheTool:
848
868
 
849
869
  try:
850
870
  start = datetime.now()
851
- output = await self._operator.run(
852
- # User parameters
853
- text=text,
854
- target_language=target_language,
855
- with_analysis=with_analysis,
856
- user_prompt=user_prompt,
857
- temperature=temperature,
858
- logprobs=logprobs,
859
- top_logprobs=top_logprobs,
860
- validator=validator,
861
- max_validation_retries=max_validation_retries,
862
- priority=priority,
863
- # Internal parameters
864
- prompt_file="translate.yaml",
865
- output_model=Models.StrOutput,
866
- mode=None,
867
- output_lang=None,
868
- )
869
- end = datetime.now()
870
- output.execution_time = (end - start).total_seconds()
871
- return output
871
+
872
+ if len(text.split(" ")) > 1500 and use_chunker:
873
+ chunks = text_to_chunks(text, 1200, 0)
874
+
875
+ translation = ""
876
+ analysis = ""
877
+ logprobs = []
878
+
879
+ # Run translation for each chunk
880
+ for chunk in chunks:
881
+ chunk_output = await self._operator.run(
882
+ # User parameters
883
+ text=chunk,
884
+ target_language=target_language,
885
+ with_analysis=with_analysis,
886
+ user_prompt=user_prompt,
887
+ temperature=temperature,
888
+ logprobs=logprobs,
889
+ top_logprobs=top_logprobs,
890
+ validator=validator,
891
+ max_validation_retries=max_validation_retries,
892
+ priority=priority,
893
+ # Internal parameters
894
+ prompt_file="translate.yaml",
895
+ output_model=Models.Str,
896
+ mode=None,
897
+ output_lang=None,
898
+ )
899
+
900
+ # Check for errors from operator
901
+ if chunk_output.errors:
902
+ output.errors.extend(chunk_output.errors)
903
+ end = datetime.now()
904
+ output.execution_time = (end - start).total_seconds()
905
+ return output
906
+
907
+ # Concatenate the outputs
908
+ translation += chunk_output.result + "\n"
909
+ if with_analysis:
910
+ analysis += chunk_output.analysis
911
+ if logprobs:
912
+ logprobs += chunk_output.logprobs
913
+
914
+ end = datetime.now()
915
+ output = Models.ToolOutput(
916
+ result=translation,
917
+ logprobs=logprobs,
918
+ analysis=analysis,
919
+ process="translate",
920
+ execution_time=(end - start).total_seconds(),
921
+ )
922
+ return output
923
+
924
+ else:
925
+ output = await self._operator.run(
926
+ # User parameters
927
+ text=text,
928
+ target_language=target_language,
929
+ with_analysis=with_analysis,
930
+ user_prompt=user_prompt,
931
+ temperature=temperature,
932
+ logprobs=logprobs,
933
+ top_logprobs=top_logprobs,
934
+ validator=validator,
935
+ max_validation_retries=max_validation_retries,
936
+ priority=priority,
937
+ # Internal parameters
938
+ prompt_file="translate.yaml",
939
+ output_model=Models.Str,
940
+ mode=None,
941
+ output_lang=None,
942
+ )
943
+ end = datetime.now()
944
+ output.execution_time = (end - start).total_seconds()
945
+ return output
872
946
 
873
947
  except PromptError as e:
874
948
  output.errors.append(f"Prompt error: {e}")
@@ -883,7 +957,7 @@ class AsyncTheTool:
883
957
 
884
958
  return output
885
959
 
886
- async def detect_entity(
960
+ async def propositionize(
887
961
  self,
888
962
  text: str,
889
963
  with_analysis: bool = False,
@@ -891,13 +965,15 @@ class AsyncTheTool:
891
965
  user_prompt: str | None = None,
892
966
  temperature: float | None = 0.0,
893
967
  logprobs: bool = False,
894
- top_logprobs: int | None = None,
895
- validator: Callable[[Any], bool] | None = None,
968
+ top_logprobs: int = 3,
969
+ validator: Callable[[object], bool] | None = None,
896
970
  max_validation_retries: int | None = None,
897
971
  priority: int | None = 0,
898
972
  ) -> Models.ToolOutput:
899
973
  """
900
- Detects entities in a given text based on the entity_detector.yaml prompt.
974
+ Proposition input text to meaningful sentences.
975
+
976
+ Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
901
977
 
902
978
  Arguments:
903
979
  text: The input text
@@ -913,7 +989,7 @@ class AsyncTheTool:
913
989
 
914
990
  Returns:
915
991
  ToolOutput: Object containing:
916
- - result (list[Entity]): The entities
992
+ - result (list[str]): The propositions
917
993
  - logprobs (list | None): Probability data if logprobs enabled
918
994
  - analysis (str | None): Detailed reasoning if with_analysis enabled
919
995
  - process (str | None): Description of the process used
@@ -938,8 +1014,8 @@ class AsyncTheTool:
938
1014
  max_validation_retries=max_validation_retries,
939
1015
  priority=priority,
940
1016
  # Internal parameters
941
- prompt_file="detect_entity.yaml",
942
- output_model=Models.EntityDetectorOutput,
1017
+ prompt_file="propositionize.yaml",
1018
+ output_model=Models.ListStr,
943
1019
  mode=None,
944
1020
  )
945
1021
  end = datetime.now()
@@ -959,24 +1035,28 @@ class AsyncTheTool:
959
1035
 
960
1036
  return output
961
1037
 
962
- async def propositionize(
1038
+ async def check_fact(
963
1039
  self,
964
1040
  text: str,
1041
+ source_text: str,
965
1042
  with_analysis: bool = False,
966
1043
  output_lang: str | None = None,
967
1044
  user_prompt: str | None = None,
968
1045
  temperature: float | None = 0.0,
969
1046
  logprobs: bool = False,
970
- top_logprobs: int | None = None,
971
- validator: Callable[[Any], bool] | None = None,
1047
+ top_logprobs: int = 3,
1048
+ validator: Callable[[object], bool] | None = None,
972
1049
  max_validation_retries: int | None = None,
973
1050
  priority: int | None = 0,
974
1051
  ) -> Models.ToolOutput:
975
1052
  """
976
- Proposition input text to meaningful sentences.
1053
+ Checks wheather a statement is relevant to the source text or not.
1054
+
1055
+ Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
977
1056
 
978
1057
  Arguments:
979
1058
  text: The input text
1059
+ source_text: the source text that we want to check relation of text to it
980
1060
  with_analysis: Whether to include detailed reasoning analysis
981
1061
  output_lang: Language for the output summary
982
1062
  user_prompt: Additional instructions for summarization
@@ -989,7 +1069,7 @@ class AsyncTheTool:
989
1069
 
990
1070
  Returns:
991
1071
  ToolOutput: Object containing:
992
- - result (list[str]): The propositions
1072
+ - result (bool): statement is relevant to source text or not
993
1073
  - logprobs (list | None): Probability data if logprobs enabled
994
1074
  - analysis (str | None): Detailed reasoning if with_analysis enabled
995
1075
  - process (str | None): Description of the process used
@@ -1014,14 +1094,14 @@ class AsyncTheTool:
1014
1094
  max_validation_retries=max_validation_retries,
1015
1095
  priority=priority,
1016
1096
  # Internal parameters
1017
- prompt_file="propositionize.yaml",
1018
- output_model=Models.ListStrOutput,
1097
+ prompt_file="check_fact.yaml",
1098
+ output_model=Models.Bool,
1019
1099
  mode=None,
1100
+ source_text=source_text,
1020
1101
  )
1021
1102
  end = datetime.now()
1022
1103
  output.execution_time = (end - start).total_seconds()
1023
1104
  return output
1024
-
1025
1105
  except PromptError as e:
1026
1106
  output.errors.append(f"Prompt error: {e}")
1027
1107
  except LLMError as e:
@@ -1038,20 +1118,27 @@ class AsyncTheTool:
1038
1118
  async def run_custom(
1039
1119
  self,
1040
1120
  prompt: str,
1041
- output_model: Any,
1121
+ output_model: object,
1122
+ with_analysis: bool = False,
1123
+ analyze_template: str | None = None,
1042
1124
  output_lang: str | None = None,
1043
1125
  temperature: float | None = None,
1044
1126
  logprobs: bool | None = None,
1045
- top_logprobs: int | None = None,
1046
- validator: Callable[[Any], bool] | None = None,
1127
+ top_logprobs: int = 3,
1128
+ validator: Callable[[object], bool] | None = None,
1047
1129
  max_validation_retries: int | None = None,
1048
1130
  priority: int | None = 0,
1049
1131
  ) -> Models.ToolOutput:
1050
1132
  """
1051
1133
  Custom tool that can do almost anything!
1052
1134
 
1135
+ Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
1136
+
1053
1137
  Arguments:
1054
- text: The user prompt
1138
+ prompt: The user prompt
1139
+ output_model: Pydantic BaseModel used for structured output
1140
+ with_analysis: Whether to include detailed reasoning analysis
1141
+ analyze_template: The analyze template used for reasoning analysis
1055
1142
  output_lang: Language for the output summary
1056
1143
  temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
1057
1144
  logprobs: Whether to return token probability information
@@ -1078,6 +1165,8 @@ class AsyncTheTool:
1078
1165
  # User paramaeters
1079
1166
  text=prompt,
1080
1167
  output_model=output_model,
1168
+ with_analysis=with_analysis,
1169
+ analyze_template=analyze_template,
1081
1170
  output_model_str=output_model.model_json_schema(),
1082
1171
  output_lang=output_lang,
1083
1172
  temperature=temperature,
@@ -1089,7 +1178,6 @@ class AsyncTheTool:
1089
1178
  # Internal parameters
1090
1179
  prompt_file="run_custom.yaml",
1091
1180
  user_prompt=None,
1092
- with_analysis=False,
1093
1181
  mode=None,
1094
1182
  )
1095
1183
  end = datetime.now()