hamtaa-texttools 1.1.19__py3-none-any.whl → 1.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from typing import Literal, Any
2
+ from typing import Literal
3
3
  from collections.abc import Callable
4
4
 
5
5
  from openai import OpenAI
@@ -12,6 +12,7 @@ from texttools.internals.exceptions import (
12
12
  LLMError,
13
13
  ValidationError,
14
14
  )
15
+ from texttools.internals.text_to_chunks import text_to_chunks
15
16
 
16
17
 
17
18
  class TheTool:
@@ -35,9 +36,9 @@ class TheTool:
35
36
  user_prompt: str | None = None,
36
37
  temperature: float | None = 0.0,
37
38
  logprobs: bool = False,
38
- top_logprobs: int | None = None,
39
+ top_logprobs: int = 3,
39
40
  mode: Literal["category_list", "category_tree"] = "category_list",
40
- validator: Callable[[Any], bool] | None = None,
41
+ validator: Callable[[object], bool] | None = None,
41
42
  max_validation_retries: int | None = None,
42
43
  priority: int | None = 0,
43
44
  ) -> Models.ToolOutput:
@@ -75,11 +76,11 @@ class TheTool:
75
76
  start = datetime.now()
76
77
 
77
78
  if mode == "category_tree":
78
- # Initializations
79
- output = Models.ToolOutput()
80
79
  levels = categories.get_level_count()
81
80
  parent_id = 0
82
- final_output = []
81
+ final_categories = []
82
+ analysis = ""
83
+ logprobs = []
83
84
 
84
85
  for _ in range(levels):
85
86
  # Get child nodes for current parent
@@ -102,7 +103,7 @@ class TheTool:
102
103
  ]
103
104
  category_names = [node.name for node in children]
104
105
 
105
- # Run categorization for this level
106
+ # Run categorization for current level
106
107
  level_output = self._operator.run(
107
108
  # User parameters
108
109
  text=text,
@@ -143,16 +144,22 @@ class TheTool:
143
144
  return output
144
145
 
145
146
  parent_id = parent_node.node_id
146
- final_output.append(parent_node.name)
147
+ final_categories.append(parent_node.name)
147
148
 
148
- # Copy analysis/logprobs/process from the last level's output
149
- output.analysis = level_output.analysis
150
- output.logprobs = level_output.logprobs
151
- output.process = level_output.process
149
+ if with_analysis:
150
+ analysis += level_output.analysis
151
+ if logprobs:
152
+ logprobs += level_output.logprobs
152
153
 
153
- output.result = final_output
154
154
  end = datetime.now()
155
- output.execution_time = (end - start).total_seconds()
155
+ output = Models.ToolOutput(
156
+ result=final_categories,
157
+ logprobs=logprobs,
158
+ analysis=analysis,
159
+ process="categorize",
160
+ execution_time=(end - start).total_seconds(),
161
+ )
162
+
156
163
  return output
157
164
 
158
165
  else:
@@ -199,10 +206,10 @@ class TheTool:
199
206
  user_prompt: str | None = None,
200
207
  temperature: float | None = 0.0,
201
208
  logprobs: bool = False,
202
- top_logprobs: int | None = None,
209
+ top_logprobs: int = 3,
203
210
  mode: Literal["auto", "threshold", "count"] = "auto",
204
211
  number_of_keywords: int | None = None,
205
- validator: Callable[[Any], bool] | None = None,
212
+ validator: Callable[[object], bool] | None = None,
206
213
  max_validation_retries: int | None = None,
207
214
  priority: int | None = 0,
208
215
  ) -> Models.ToolOutput:
@@ -251,7 +258,7 @@ class TheTool:
251
258
  priority=priority,
252
259
  # Internal parameters
253
260
  prompt_file="extract_keywords.yaml",
254
- output_model=Models.ListStrOutput,
261
+ output_model=Models.ListStr,
255
262
  )
256
263
  end = datetime.now()
257
264
  output.execution_time = (end - start).total_seconds()
@@ -273,13 +280,14 @@ class TheTool:
273
280
  def extract_entities(
274
281
  self,
275
282
  text: str,
283
+ entities: list[str] | None = None,
276
284
  with_analysis: bool = False,
277
285
  output_lang: str | None = None,
278
286
  user_prompt: str | None = None,
279
287
  temperature: float | None = 0.0,
280
288
  logprobs: bool = False,
281
- top_logprobs: int | None = None,
282
- validator: Callable[[Any], bool] | None = None,
289
+ top_logprobs: int = 3,
290
+ validator: Callable[[object], bool] | None = None,
283
291
  max_validation_retries: int | None = None,
284
292
  priority: int | None = 0,
285
293
  ) -> Models.ToolOutput:
@@ -288,6 +296,7 @@ class TheTool:
288
296
 
289
297
  Arguments:
290
298
  text: The input text to extract entities from
299
+ entities: List of entities provided by user (Optional)
291
300
  with_analysis: Whether to include detailed reasoning analysis
292
301
  output_lang: Language for the output response
293
302
  user_prompt: Additional instructions for entity extraction
@@ -315,6 +324,8 @@ class TheTool:
315
324
  output = self._operator.run(
316
325
  # User parameters
317
326
  text=text,
327
+ entities=entities
328
+ or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
318
329
  with_analysis=with_analysis,
319
330
  output_lang=output_lang,
320
331
  user_prompt=user_prompt,
@@ -326,7 +337,7 @@ class TheTool:
326
337
  priority=priority,
327
338
  # Internal parameters
328
339
  prompt_file="extract_entities.yaml",
329
- output_model=Models.ListDictStrStrOutput,
340
+ output_model=Models.ListDictStrStr,
330
341
  mode=None,
331
342
  )
332
343
  end = datetime.now()
@@ -353,8 +364,8 @@ class TheTool:
353
364
  user_prompt: str | None = None,
354
365
  temperature: float | None = 0.0,
355
366
  logprobs: bool = False,
356
- top_logprobs: int | None = None,
357
- validator: Callable[[Any], bool] | None = None,
367
+ top_logprobs: int = 3,
368
+ validator: Callable[[object], bool] | None = None,
358
369
  max_validation_retries: int | None = None,
359
370
  priority: int | None = 0,
360
371
  ) -> Models.ToolOutput:
@@ -399,7 +410,7 @@ class TheTool:
399
410
  priority=priority,
400
411
  # Internal parameters
401
412
  prompt_file="is_question.yaml",
402
- output_model=Models.BoolOutput,
413
+ output_model=Models.Bool,
403
414
  mode=None,
404
415
  output_lang=None,
405
416
  )
@@ -423,13 +434,14 @@ class TheTool:
423
434
  def text_to_question(
424
435
  self,
425
436
  text: str,
437
+ number_of_questions: int,
426
438
  with_analysis: bool = False,
427
439
  output_lang: str | None = None,
428
440
  user_prompt: str | None = None,
429
441
  temperature: float | None = 0.0,
430
442
  logprobs: bool = False,
431
- top_logprobs: int | None = None,
432
- validator: Callable[[Any], bool] | None = None,
443
+ top_logprobs: int = 3,
444
+ validator: Callable[[object], bool] | None = None,
433
445
  max_validation_retries: int | None = None,
434
446
  priority: int | None = 0,
435
447
  ) -> Models.ToolOutput:
@@ -438,6 +450,7 @@ class TheTool:
438
450
 
439
451
  Arguments:
440
452
  text: The input text to generate a question from
453
+ number_of_questions: Number of questions to generate
441
454
  with_analysis: Whether to include detailed reasoning analysis
442
455
  output_lang: Language for the output question
443
456
  user_prompt: Additional instructions for question generation
@@ -465,6 +478,7 @@ class TheTool:
465
478
  output = self._operator.run(
466
479
  # User parameters
467
480
  text=text,
481
+ number_of_questions=number_of_questions,
468
482
  with_analysis=with_analysis,
469
483
  output_lang=output_lang,
470
484
  user_prompt=user_prompt,
@@ -476,7 +490,7 @@ class TheTool:
476
490
  priority=priority,
477
491
  # Internal parameters
478
492
  prompt_file="text_to_question.yaml",
479
- output_model=Models.StrOutput,
493
+ output_model=Models.ReasonListStr,
480
494
  mode=None,
481
495
  )
482
496
  end = datetime.now()
@@ -504,9 +518,9 @@ class TheTool:
504
518
  user_prompt: str | None = None,
505
519
  temperature: float | None = 0.0,
506
520
  logprobs: bool = False,
507
- top_logprobs: int | None = None,
521
+ top_logprobs: int = 3,
508
522
  mode: Literal["default", "reason"] = "default",
509
- validator: Callable[[Any], bool] | None = None,
523
+ validator: Callable[[object], bool] | None = None,
510
524
  max_validation_retries: int | None = None,
511
525
  priority: int | None = 0,
512
526
  ) -> Models.ToolOutput:
@@ -555,7 +569,7 @@ class TheTool:
555
569
  priority=priority,
556
570
  # Internal parameters
557
571
  prompt_file="merge_questions.yaml",
558
- output_model=Models.StrOutput,
572
+ output_model=Models.Str,
559
573
  mode=mode,
560
574
  )
561
575
  end = datetime.now()
@@ -583,9 +597,9 @@ class TheTool:
583
597
  user_prompt: str | None = None,
584
598
  temperature: float | None = 0.0,
585
599
  logprobs: bool = False,
586
- top_logprobs: int | None = None,
600
+ top_logprobs: int = 3,
587
601
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
588
- validator: Callable[[Any], bool] | None = None,
602
+ validator: Callable[[object], bool] | None = None,
589
603
  max_validation_retries: int | None = None,
590
604
  priority: int | None = 0,
591
605
  ) -> Models.ToolOutput:
@@ -633,7 +647,7 @@ class TheTool:
633
647
  priority=priority,
634
648
  # Internal parameters
635
649
  prompt_file="rewrite.yaml",
636
- output_model=Models.StrOutput,
650
+ output_model=Models.Str,
637
651
  mode=mode,
638
652
  )
639
653
  end = datetime.now()
@@ -662,8 +676,8 @@ class TheTool:
662
676
  user_prompt: str | None = None,
663
677
  temperature: float | None = 0.0,
664
678
  logprobs: bool = False,
665
- top_logprobs: int | None = None,
666
- validator: Callable[[Any], bool] | None = None,
679
+ top_logprobs: int = 3,
680
+ validator: Callable[[object], bool] | None = None,
667
681
  max_validation_retries: int | None = None,
668
682
  priority: int | None = 0,
669
683
  ) -> Models.ToolOutput:
@@ -712,7 +726,7 @@ class TheTool:
712
726
  priority=priority,
713
727
  # Internal parameters
714
728
  prompt_file="subject_to_question.yaml",
715
- output_model=Models.ReasonListStrOutput,
729
+ output_model=Models.ReasonListStr,
716
730
  mode=None,
717
731
  )
718
732
  end = datetime.now()
@@ -740,8 +754,8 @@ class TheTool:
740
754
  user_prompt: str | None = None,
741
755
  temperature: float | None = 0.0,
742
756
  logprobs: bool = False,
743
- top_logprobs: int | None = None,
744
- validator: Callable[[Any], bool] | None = None,
757
+ top_logprobs: int = 3,
758
+ validator: Callable[[object], bool] | None = None,
745
759
  max_validation_retries: int | None = None,
746
760
  priority: int | None = 0,
747
761
  ) -> Models.ToolOutput:
@@ -788,7 +802,7 @@ class TheTool:
788
802
  priority=priority,
789
803
  # Internal parameters
790
804
  prompt_file="summarize.yaml",
791
- output_model=Models.StrOutput,
805
+ output_model=Models.Str,
792
806
  mode=None,
793
807
  )
794
808
  end = datetime.now()
@@ -812,12 +826,13 @@ class TheTool:
812
826
  self,
813
827
  text: str,
814
828
  target_language: str,
829
+ use_chunker: bool = True,
815
830
  with_analysis: bool = False,
816
831
  user_prompt: str | None = None,
817
832
  temperature: float | None = 0.0,
818
833
  logprobs: bool = False,
819
- top_logprobs: int | None = None,
820
- validator: Callable[[Any], bool] | None = None,
834
+ top_logprobs: int = 3,
835
+ validator: Callable[[object], bool] | None = None,
821
836
  max_validation_retries: int | None = None,
822
837
  priority: int | None = 0,
823
838
  ) -> Models.ToolOutput:
@@ -829,6 +844,7 @@ class TheTool:
829
844
  Arguments:
830
845
  text: The input text to translate
831
846
  target_language: The target language for translation
847
+ use_chunker: Whether to use text chunker for text length bigger than 1500
832
848
  with_analysis: Whether to include detailed reasoning analysis
833
849
  user_prompt: Additional instructions for translation
834
850
  temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -852,27 +868,81 @@ class TheTool:
852
868
 
853
869
  try:
854
870
  start = datetime.now()
855
- output = self._operator.run(
856
- # User parameters
857
- text=text,
858
- target_language=target_language,
859
- with_analysis=with_analysis,
860
- user_prompt=user_prompt,
861
- temperature=temperature,
862
- logprobs=logprobs,
863
- top_logprobs=top_logprobs,
864
- validator=validator,
865
- max_validation_retries=max_validation_retries,
866
- priority=priority,
867
- # Internal parameters
868
- prompt_file="translate.yaml",
869
- output_model=Models.StrOutput,
870
- mode=None,
871
- output_lang=None,
872
- )
873
- end = datetime.now()
874
- output.execution_time = (end - start).total_seconds()
875
- return output
871
+
872
+ if len(text.split(" ")) > 1500 and use_chunker:
873
+ chunks = text_to_chunks(text, 1200, 0)
874
+
875
+ translation = ""
876
+ analysis = ""
877
+ logprobs = []
878
+
879
+ # Run translation for each chunk
880
+ for chunk in chunks:
881
+ chunk_output = self._operator.run(
882
+ # User parameters
883
+ text=chunk,
884
+ target_language=target_language,
885
+ with_analysis=with_analysis,
886
+ user_prompt=user_prompt,
887
+ temperature=temperature,
888
+ logprobs=logprobs,
889
+ top_logprobs=top_logprobs,
890
+ validator=validator,
891
+ max_validation_retries=max_validation_retries,
892
+ priority=priority,
893
+ # Internal parameters
894
+ prompt_file="translate.yaml",
895
+ output_model=Models.Str,
896
+ mode=None,
897
+ output_lang=None,
898
+ )
899
+
900
+ # Check for errors from operator
901
+ if chunk_output.errors:
902
+ output.errors.extend(chunk_output.errors)
903
+ end = datetime.now()
904
+ output.execution_time = (end - start).total_seconds()
905
+ return output
906
+
907
+ # Concatenate the outputs
908
+ translation += chunk_output.result + "\n"
909
+ if with_analysis:
910
+ analysis += chunk_output.analysis
911
+ if logprobs:
912
+ logprobs += chunk_output.logprobs
913
+
914
+ end = datetime.now()
915
+ output = Models.ToolOutput(
916
+ result=translation,
917
+ logprobs=logprobs,
918
+ analysis=analysis,
919
+ process="translate",
920
+ execution_time=(end - start).total_seconds(),
921
+ )
922
+ return output
923
+
924
+ else:
925
+ output = self._operator.run(
926
+ # User parameters
927
+ text=text,
928
+ target_language=target_language,
929
+ with_analysis=with_analysis,
930
+ user_prompt=user_prompt,
931
+ temperature=temperature,
932
+ logprobs=logprobs,
933
+ top_logprobs=top_logprobs,
934
+ validator=validator,
935
+ max_validation_retries=max_validation_retries,
936
+ priority=priority,
937
+ # Internal parameters
938
+ prompt_file="translate.yaml",
939
+ output_model=Models.Str,
940
+ mode=None,
941
+ output_lang=None,
942
+ )
943
+ end = datetime.now()
944
+ output.execution_time = (end - start).total_seconds()
945
+ return output
876
946
 
877
947
  except PromptError as e:
878
948
  output.errors.append(f"Prompt error: {e}")
@@ -895,8 +965,8 @@ class TheTool:
895
965
  user_prompt: str | None = None,
896
966
  temperature: float | None = 0.0,
897
967
  logprobs: bool = False,
898
- top_logprobs: int | None = None,
899
- validator: Callable[[Any], bool] | None = None,
968
+ top_logprobs: int = 3,
969
+ validator: Callable[[object], bool] | None = None,
900
970
  max_validation_retries: int | None = None,
901
971
  priority: int | None = 0,
902
972
  ) -> Models.ToolOutput:
@@ -945,7 +1015,7 @@ class TheTool:
945
1015
  priority=priority,
946
1016
  # Internal parameters
947
1017
  prompt_file="propositionize.yaml",
948
- output_model=Models.ListStrOutput,
1018
+ output_model=Models.ListStr,
949
1019
  mode=None,
950
1020
  )
951
1021
  end = datetime.now()
@@ -974,8 +1044,8 @@ class TheTool:
974
1044
  user_prompt: str | None = None,
975
1045
  temperature: float | None = 0.0,
976
1046
  logprobs: bool = False,
977
- top_logprobs: int | None = None,
978
- validator: Callable[[Any], bool] | None = None,
1047
+ top_logprobs: int = 3,
1048
+ validator: Callable[[object], bool] | None = None,
979
1049
  max_validation_retries: int | None = None,
980
1050
  priority: int | None = 0,
981
1051
  ) -> Models.ToolOutput:
@@ -1024,7 +1094,7 @@ class TheTool:
1024
1094
  priority=priority,
1025
1095
  # Internal parameters
1026
1096
  prompt_file="check_fact.yaml",
1027
- output_model=Models.BoolOutput,
1097
+ output_model=Models.Bool,
1028
1098
  mode=None,
1029
1099
  source_text=source_text,
1030
1100
  )
@@ -1048,14 +1118,14 @@ class TheTool:
1048
1118
  def run_custom(
1049
1119
  self,
1050
1120
  prompt: str,
1051
- output_model: Any,
1121
+ output_model: object,
1052
1122
  with_analysis: bool = False,
1053
1123
  analyze_template: str | None = None,
1054
1124
  output_lang: str | None = None,
1055
1125
  temperature: float | None = None,
1056
1126
  logprobs: bool | None = None,
1057
- top_logprobs: int | None = None,
1058
- validator: Callable[[Any], bool] | None = None,
1127
+ top_logprobs: int = 3,
1128
+ validator: Callable[[object], bool] | None = None,
1059
1129
  max_validation_retries: int | None = None,
1060
1130
  priority: int | None = 0,
1061
1131
  ) -> Models.ToolOutput:
@@ -1,24 +0,0 @@
1
- class Formatter:
2
- @staticmethod
3
- def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
4
- """
5
- Merges consecutive user messages into a single message, separated by newlines.
6
-
7
- This is useful for condensing a multi-turn user input into a single
8
- message for the LLM. Assistant and system messages are left unchanged and
9
- act as separators between user message groups.
10
- """
11
- merged: list[dict[str, str]] = []
12
-
13
- for message in messages:
14
- role, content = message["role"], message["content"].strip()
15
-
16
- # Merge with previous user turn
17
- if merged and role == "user" and merged[-1]["role"] == "user":
18
- merged[-1]["content"] += "\n" + content
19
-
20
- # Otherwise, start a new turn
21
- else:
22
- merged.append({"role": role, "content": content})
23
-
24
- return merged