hamtaa-texttools 1.1.18__py3-none-any.whl → 1.1.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/METADATA +38 -8
- hamtaa_texttools-1.1.20.dist-info/RECORD +33 -0
- texttools/batch/batch_runner.py +6 -6
- texttools/batch/internals/batch_manager.py +6 -6
- texttools/batch/internals/utils.py +1 -4
- texttools/internals/async_operator.py +4 -6
- texttools/internals/models.py +8 -17
- texttools/internals/operator_utils.py +24 -0
- texttools/internals/prompt_loader.py +34 -6
- texttools/internals/sync_operator.py +4 -6
- texttools/internals/text_to_chunks.py +97 -0
- texttools/prompts/check_fact.yaml +19 -0
- texttools/prompts/extract_entities.yaml +1 -1
- texttools/prompts/propositionize.yaml +13 -6
- texttools/prompts/run_custom.yaml +1 -1
- texttools/prompts/text_to_question.yaml +6 -4
- texttools/tools/async_tools.py +169 -81
- texttools/tools/sync_tools.py +169 -81
- hamtaa_texttools-1.1.18.dist-info/RECORD +0 -33
- texttools/internals/formatters.py +0 -24
- texttools/prompts/detect_entity.yaml +0 -22
- {hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/top_level.txt +0 -0
texttools/tools/sync_tools.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import Literal
|
|
2
|
+
from typing import Literal
|
|
3
3
|
from collections.abc import Callable
|
|
4
4
|
|
|
5
5
|
from openai import OpenAI
|
|
@@ -12,6 +12,7 @@ from texttools.internals.exceptions import (
|
|
|
12
12
|
LLMError,
|
|
13
13
|
ValidationError,
|
|
14
14
|
)
|
|
15
|
+
from texttools.internals.text_to_chunks import text_to_chunks
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class TheTool:
|
|
@@ -35,15 +36,17 @@ class TheTool:
|
|
|
35
36
|
user_prompt: str | None = None,
|
|
36
37
|
temperature: float | None = 0.0,
|
|
37
38
|
logprobs: bool = False,
|
|
38
|
-
top_logprobs: int
|
|
39
|
+
top_logprobs: int = 3,
|
|
39
40
|
mode: Literal["category_list", "category_tree"] = "category_list",
|
|
40
|
-
validator: Callable[[
|
|
41
|
+
validator: Callable[[object], bool] | None = None,
|
|
41
42
|
max_validation_retries: int | None = None,
|
|
42
43
|
priority: int | None = 0,
|
|
43
44
|
) -> Models.ToolOutput:
|
|
44
45
|
"""
|
|
45
46
|
Categorize a text into a category / category tree.
|
|
46
47
|
|
|
48
|
+
Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
49
|
+
|
|
47
50
|
Arguments:
|
|
48
51
|
text: The input text to categorize
|
|
49
52
|
categories: The category / category_tree to give to LLM
|
|
@@ -73,11 +76,11 @@ class TheTool:
|
|
|
73
76
|
start = datetime.now()
|
|
74
77
|
|
|
75
78
|
if mode == "category_tree":
|
|
76
|
-
# Initializations
|
|
77
|
-
output = Models.ToolOutput()
|
|
78
79
|
levels = categories.get_level_count()
|
|
79
80
|
parent_id = 0
|
|
80
|
-
|
|
81
|
+
final_categories = []
|
|
82
|
+
analysis = ""
|
|
83
|
+
logprobs = []
|
|
81
84
|
|
|
82
85
|
for _ in range(levels):
|
|
83
86
|
# Get child nodes for current parent
|
|
@@ -100,7 +103,7 @@ class TheTool:
|
|
|
100
103
|
]
|
|
101
104
|
category_names = [node.name for node in children]
|
|
102
105
|
|
|
103
|
-
# Run categorization for
|
|
106
|
+
# Run categorization for current level
|
|
104
107
|
level_output = self._operator.run(
|
|
105
108
|
# User parameters
|
|
106
109
|
text=text,
|
|
@@ -141,16 +144,22 @@ class TheTool:
|
|
|
141
144
|
return output
|
|
142
145
|
|
|
143
146
|
parent_id = parent_node.node_id
|
|
144
|
-
|
|
147
|
+
final_categories.append(parent_node.name)
|
|
145
148
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
149
|
+
if with_analysis:
|
|
150
|
+
analysis += level_output.analysis
|
|
151
|
+
if logprobs:
|
|
152
|
+
logprobs += level_output.logprobs
|
|
150
153
|
|
|
151
|
-
output.result = final_output
|
|
152
154
|
end = datetime.now()
|
|
153
|
-
output
|
|
155
|
+
output = Models.ToolOutput(
|
|
156
|
+
result=final_categories,
|
|
157
|
+
logprobs=logprobs,
|
|
158
|
+
analysis=analysis,
|
|
159
|
+
process="categorize",
|
|
160
|
+
execution_time=(end - start).total_seconds(),
|
|
161
|
+
)
|
|
162
|
+
|
|
154
163
|
return output
|
|
155
164
|
|
|
156
165
|
else:
|
|
@@ -197,10 +206,10 @@ class TheTool:
|
|
|
197
206
|
user_prompt: str | None = None,
|
|
198
207
|
temperature: float | None = 0.0,
|
|
199
208
|
logprobs: bool = False,
|
|
200
|
-
top_logprobs: int
|
|
209
|
+
top_logprobs: int = 3,
|
|
201
210
|
mode: Literal["auto", "threshold", "count"] = "auto",
|
|
202
211
|
number_of_keywords: int | None = None,
|
|
203
|
-
validator: Callable[[
|
|
212
|
+
validator: Callable[[object], bool] | None = None,
|
|
204
213
|
max_validation_retries: int | None = None,
|
|
205
214
|
priority: int | None = 0,
|
|
206
215
|
) -> Models.ToolOutput:
|
|
@@ -249,7 +258,7 @@ class TheTool:
|
|
|
249
258
|
priority=priority,
|
|
250
259
|
# Internal parameters
|
|
251
260
|
prompt_file="extract_keywords.yaml",
|
|
252
|
-
output_model=Models.
|
|
261
|
+
output_model=Models.ListStr,
|
|
253
262
|
)
|
|
254
263
|
end = datetime.now()
|
|
255
264
|
output.execution_time = (end - start).total_seconds()
|
|
@@ -271,13 +280,14 @@ class TheTool:
|
|
|
271
280
|
def extract_entities(
|
|
272
281
|
self,
|
|
273
282
|
text: str,
|
|
283
|
+
entities: list[str] | None = None,
|
|
274
284
|
with_analysis: bool = False,
|
|
275
285
|
output_lang: str | None = None,
|
|
276
286
|
user_prompt: str | None = None,
|
|
277
287
|
temperature: float | None = 0.0,
|
|
278
288
|
logprobs: bool = False,
|
|
279
|
-
top_logprobs: int
|
|
280
|
-
validator: Callable[[
|
|
289
|
+
top_logprobs: int = 3,
|
|
290
|
+
validator: Callable[[object], bool] | None = None,
|
|
281
291
|
max_validation_retries: int | None = None,
|
|
282
292
|
priority: int | None = 0,
|
|
283
293
|
) -> Models.ToolOutput:
|
|
@@ -286,6 +296,7 @@ class TheTool:
|
|
|
286
296
|
|
|
287
297
|
Arguments:
|
|
288
298
|
text: The input text to extract entities from
|
|
299
|
+
entities: List of entities provided by user (Optional)
|
|
289
300
|
with_analysis: Whether to include detailed reasoning analysis
|
|
290
301
|
output_lang: Language for the output response
|
|
291
302
|
user_prompt: Additional instructions for entity extraction
|
|
@@ -313,6 +324,8 @@ class TheTool:
|
|
|
313
324
|
output = self._operator.run(
|
|
314
325
|
# User parameters
|
|
315
326
|
text=text,
|
|
327
|
+
entities=entities
|
|
328
|
+
or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
|
|
316
329
|
with_analysis=with_analysis,
|
|
317
330
|
output_lang=output_lang,
|
|
318
331
|
user_prompt=user_prompt,
|
|
@@ -324,7 +337,7 @@ class TheTool:
|
|
|
324
337
|
priority=priority,
|
|
325
338
|
# Internal parameters
|
|
326
339
|
prompt_file="extract_entities.yaml",
|
|
327
|
-
output_model=Models.
|
|
340
|
+
output_model=Models.ListDictStrStr,
|
|
328
341
|
mode=None,
|
|
329
342
|
)
|
|
330
343
|
end = datetime.now()
|
|
@@ -351,8 +364,8 @@ class TheTool:
|
|
|
351
364
|
user_prompt: str | None = None,
|
|
352
365
|
temperature: float | None = 0.0,
|
|
353
366
|
logprobs: bool = False,
|
|
354
|
-
top_logprobs: int
|
|
355
|
-
validator: Callable[[
|
|
367
|
+
top_logprobs: int = 3,
|
|
368
|
+
validator: Callable[[object], bool] | None = None,
|
|
356
369
|
max_validation_retries: int | None = None,
|
|
357
370
|
priority: int | None = 0,
|
|
358
371
|
) -> Models.ToolOutput:
|
|
@@ -397,7 +410,7 @@ class TheTool:
|
|
|
397
410
|
priority=priority,
|
|
398
411
|
# Internal parameters
|
|
399
412
|
prompt_file="is_question.yaml",
|
|
400
|
-
output_model=Models.
|
|
413
|
+
output_model=Models.Bool,
|
|
401
414
|
mode=None,
|
|
402
415
|
output_lang=None,
|
|
403
416
|
)
|
|
@@ -421,13 +434,14 @@ class TheTool:
|
|
|
421
434
|
def text_to_question(
|
|
422
435
|
self,
|
|
423
436
|
text: str,
|
|
437
|
+
number_of_questions: int,
|
|
424
438
|
with_analysis: bool = False,
|
|
425
439
|
output_lang: str | None = None,
|
|
426
440
|
user_prompt: str | None = None,
|
|
427
441
|
temperature: float | None = 0.0,
|
|
428
442
|
logprobs: bool = False,
|
|
429
|
-
top_logprobs: int
|
|
430
|
-
validator: Callable[[
|
|
443
|
+
top_logprobs: int = 3,
|
|
444
|
+
validator: Callable[[object], bool] | None = None,
|
|
431
445
|
max_validation_retries: int | None = None,
|
|
432
446
|
priority: int | None = 0,
|
|
433
447
|
) -> Models.ToolOutput:
|
|
@@ -436,6 +450,7 @@ class TheTool:
|
|
|
436
450
|
|
|
437
451
|
Arguments:
|
|
438
452
|
text: The input text to generate a question from
|
|
453
|
+
number_of_questions: Number of questions to generate
|
|
439
454
|
with_analysis: Whether to include detailed reasoning analysis
|
|
440
455
|
output_lang: Language for the output question
|
|
441
456
|
user_prompt: Additional instructions for question generation
|
|
@@ -463,6 +478,7 @@ class TheTool:
|
|
|
463
478
|
output = self._operator.run(
|
|
464
479
|
# User parameters
|
|
465
480
|
text=text,
|
|
481
|
+
number_of_questions=number_of_questions,
|
|
466
482
|
with_analysis=with_analysis,
|
|
467
483
|
output_lang=output_lang,
|
|
468
484
|
user_prompt=user_prompt,
|
|
@@ -474,7 +490,7 @@ class TheTool:
|
|
|
474
490
|
priority=priority,
|
|
475
491
|
# Internal parameters
|
|
476
492
|
prompt_file="text_to_question.yaml",
|
|
477
|
-
output_model=Models.
|
|
493
|
+
output_model=Models.ReasonListStr,
|
|
478
494
|
mode=None,
|
|
479
495
|
)
|
|
480
496
|
end = datetime.now()
|
|
@@ -502,9 +518,9 @@ class TheTool:
|
|
|
502
518
|
user_prompt: str | None = None,
|
|
503
519
|
temperature: float | None = 0.0,
|
|
504
520
|
logprobs: bool = False,
|
|
505
|
-
top_logprobs: int
|
|
521
|
+
top_logprobs: int = 3,
|
|
506
522
|
mode: Literal["default", "reason"] = "default",
|
|
507
|
-
validator: Callable[[
|
|
523
|
+
validator: Callable[[object], bool] | None = None,
|
|
508
524
|
max_validation_retries: int | None = None,
|
|
509
525
|
priority: int | None = 0,
|
|
510
526
|
) -> Models.ToolOutput:
|
|
@@ -553,7 +569,7 @@ class TheTool:
|
|
|
553
569
|
priority=priority,
|
|
554
570
|
# Internal parameters
|
|
555
571
|
prompt_file="merge_questions.yaml",
|
|
556
|
-
output_model=Models.
|
|
572
|
+
output_model=Models.Str,
|
|
557
573
|
mode=mode,
|
|
558
574
|
)
|
|
559
575
|
end = datetime.now()
|
|
@@ -581,9 +597,9 @@ class TheTool:
|
|
|
581
597
|
user_prompt: str | None = None,
|
|
582
598
|
temperature: float | None = 0.0,
|
|
583
599
|
logprobs: bool = False,
|
|
584
|
-
top_logprobs: int
|
|
600
|
+
top_logprobs: int = 3,
|
|
585
601
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
586
|
-
validator: Callable[[
|
|
602
|
+
validator: Callable[[object], bool] | None = None,
|
|
587
603
|
max_validation_retries: int | None = None,
|
|
588
604
|
priority: int | None = 0,
|
|
589
605
|
) -> Models.ToolOutput:
|
|
@@ -631,7 +647,7 @@ class TheTool:
|
|
|
631
647
|
priority=priority,
|
|
632
648
|
# Internal parameters
|
|
633
649
|
prompt_file="rewrite.yaml",
|
|
634
|
-
output_model=Models.
|
|
650
|
+
output_model=Models.Str,
|
|
635
651
|
mode=mode,
|
|
636
652
|
)
|
|
637
653
|
end = datetime.now()
|
|
@@ -660,8 +676,8 @@ class TheTool:
|
|
|
660
676
|
user_prompt: str | None = None,
|
|
661
677
|
temperature: float | None = 0.0,
|
|
662
678
|
logprobs: bool = False,
|
|
663
|
-
top_logprobs: int
|
|
664
|
-
validator: Callable[[
|
|
679
|
+
top_logprobs: int = 3,
|
|
680
|
+
validator: Callable[[object], bool] | None = None,
|
|
665
681
|
max_validation_retries: int | None = None,
|
|
666
682
|
priority: int | None = 0,
|
|
667
683
|
) -> Models.ToolOutput:
|
|
@@ -710,7 +726,7 @@ class TheTool:
|
|
|
710
726
|
priority=priority,
|
|
711
727
|
# Internal parameters
|
|
712
728
|
prompt_file="subject_to_question.yaml",
|
|
713
|
-
output_model=Models.
|
|
729
|
+
output_model=Models.ReasonListStr,
|
|
714
730
|
mode=None,
|
|
715
731
|
)
|
|
716
732
|
end = datetime.now()
|
|
@@ -738,8 +754,8 @@ class TheTool:
|
|
|
738
754
|
user_prompt: str | None = None,
|
|
739
755
|
temperature: float | None = 0.0,
|
|
740
756
|
logprobs: bool = False,
|
|
741
|
-
top_logprobs: int
|
|
742
|
-
validator: Callable[[
|
|
757
|
+
top_logprobs: int = 3,
|
|
758
|
+
validator: Callable[[object], bool] | None = None,
|
|
743
759
|
max_validation_retries: int | None = None,
|
|
744
760
|
priority: int | None = 0,
|
|
745
761
|
) -> Models.ToolOutput:
|
|
@@ -786,7 +802,7 @@ class TheTool:
|
|
|
786
802
|
priority=priority,
|
|
787
803
|
# Internal parameters
|
|
788
804
|
prompt_file="summarize.yaml",
|
|
789
|
-
output_model=Models.
|
|
805
|
+
output_model=Models.Str,
|
|
790
806
|
mode=None,
|
|
791
807
|
)
|
|
792
808
|
end = datetime.now()
|
|
@@ -810,21 +826,25 @@ class TheTool:
|
|
|
810
826
|
self,
|
|
811
827
|
text: str,
|
|
812
828
|
target_language: str,
|
|
829
|
+
use_chunker: bool = True,
|
|
813
830
|
with_analysis: bool = False,
|
|
814
831
|
user_prompt: str | None = None,
|
|
815
832
|
temperature: float | None = 0.0,
|
|
816
833
|
logprobs: bool = False,
|
|
817
|
-
top_logprobs: int
|
|
818
|
-
validator: Callable[[
|
|
834
|
+
top_logprobs: int = 3,
|
|
835
|
+
validator: Callable[[object], bool] | None = None,
|
|
819
836
|
max_validation_retries: int | None = None,
|
|
820
837
|
priority: int | None = 0,
|
|
821
838
|
) -> Models.ToolOutput:
|
|
822
839
|
"""
|
|
823
840
|
Translate text between languages.
|
|
824
841
|
|
|
842
|
+
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
843
|
+
|
|
825
844
|
Arguments:
|
|
826
845
|
text: The input text to translate
|
|
827
846
|
target_language: The target language for translation
|
|
847
|
+
use_chunker: Whether to use text chunker for text length bigger than 1500
|
|
828
848
|
with_analysis: Whether to include detailed reasoning analysis
|
|
829
849
|
user_prompt: Additional instructions for translation
|
|
830
850
|
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
@@ -848,27 +868,81 @@ class TheTool:
|
|
|
848
868
|
|
|
849
869
|
try:
|
|
850
870
|
start = datetime.now()
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
871
|
+
|
|
872
|
+
if len(text.split(" ")) > 1500 and use_chunker:
|
|
873
|
+
chunks = text_to_chunks(text, 1200, 0)
|
|
874
|
+
|
|
875
|
+
translation = ""
|
|
876
|
+
analysis = ""
|
|
877
|
+
logprobs = []
|
|
878
|
+
|
|
879
|
+
# Run translation for each chunk
|
|
880
|
+
for chunk in chunks:
|
|
881
|
+
chunk_output = self._operator.run(
|
|
882
|
+
# User parameters
|
|
883
|
+
text=chunk,
|
|
884
|
+
target_language=target_language,
|
|
885
|
+
with_analysis=with_analysis,
|
|
886
|
+
user_prompt=user_prompt,
|
|
887
|
+
temperature=temperature,
|
|
888
|
+
logprobs=logprobs,
|
|
889
|
+
top_logprobs=top_logprobs,
|
|
890
|
+
validator=validator,
|
|
891
|
+
max_validation_retries=max_validation_retries,
|
|
892
|
+
priority=priority,
|
|
893
|
+
# Internal parameters
|
|
894
|
+
prompt_file="translate.yaml",
|
|
895
|
+
output_model=Models.Str,
|
|
896
|
+
mode=None,
|
|
897
|
+
output_lang=None,
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
# Check for errors from operator
|
|
901
|
+
if chunk_output.errors:
|
|
902
|
+
output.errors.extend(chunk_output.errors)
|
|
903
|
+
end = datetime.now()
|
|
904
|
+
output.execution_time = (end - start).total_seconds()
|
|
905
|
+
return output
|
|
906
|
+
|
|
907
|
+
# Concatenate the outputs
|
|
908
|
+
translation += chunk_output.result + "\n"
|
|
909
|
+
if with_analysis:
|
|
910
|
+
analysis += chunk_output.analysis
|
|
911
|
+
if logprobs:
|
|
912
|
+
logprobs += chunk_output.logprobs
|
|
913
|
+
|
|
914
|
+
end = datetime.now()
|
|
915
|
+
output = Models.ToolOutput(
|
|
916
|
+
result=translation,
|
|
917
|
+
logprobs=logprobs,
|
|
918
|
+
analysis=analysis,
|
|
919
|
+
process="translate",
|
|
920
|
+
execution_time=(end - start).total_seconds(),
|
|
921
|
+
)
|
|
922
|
+
return output
|
|
923
|
+
|
|
924
|
+
else:
|
|
925
|
+
output = self._operator.run(
|
|
926
|
+
# User parameters
|
|
927
|
+
text=text,
|
|
928
|
+
target_language=target_language,
|
|
929
|
+
with_analysis=with_analysis,
|
|
930
|
+
user_prompt=user_prompt,
|
|
931
|
+
temperature=temperature,
|
|
932
|
+
logprobs=logprobs,
|
|
933
|
+
top_logprobs=top_logprobs,
|
|
934
|
+
validator=validator,
|
|
935
|
+
max_validation_retries=max_validation_retries,
|
|
936
|
+
priority=priority,
|
|
937
|
+
# Internal parameters
|
|
938
|
+
prompt_file="translate.yaml",
|
|
939
|
+
output_model=Models.Str,
|
|
940
|
+
mode=None,
|
|
941
|
+
output_lang=None,
|
|
942
|
+
)
|
|
943
|
+
end = datetime.now()
|
|
944
|
+
output.execution_time = (end - start).total_seconds()
|
|
945
|
+
return output
|
|
872
946
|
|
|
873
947
|
except PromptError as e:
|
|
874
948
|
output.errors.append(f"Prompt error: {e}")
|
|
@@ -883,7 +957,7 @@ class TheTool:
|
|
|
883
957
|
|
|
884
958
|
return output
|
|
885
959
|
|
|
886
|
-
def
|
|
960
|
+
def propositionize(
|
|
887
961
|
self,
|
|
888
962
|
text: str,
|
|
889
963
|
with_analysis: bool = False,
|
|
@@ -891,13 +965,15 @@ class TheTool:
|
|
|
891
965
|
user_prompt: str | None = None,
|
|
892
966
|
temperature: float | None = 0.0,
|
|
893
967
|
logprobs: bool = False,
|
|
894
|
-
top_logprobs: int
|
|
895
|
-
validator: Callable[[
|
|
968
|
+
top_logprobs: int = 3,
|
|
969
|
+
validator: Callable[[object], bool] | None = None,
|
|
896
970
|
max_validation_retries: int | None = None,
|
|
897
971
|
priority: int | None = 0,
|
|
898
972
|
) -> Models.ToolOutput:
|
|
899
973
|
"""
|
|
900
|
-
|
|
974
|
+
Proposition input text to meaningful sentences.
|
|
975
|
+
|
|
976
|
+
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
901
977
|
|
|
902
978
|
Arguments:
|
|
903
979
|
text: The input text
|
|
@@ -913,7 +989,7 @@ class TheTool:
|
|
|
913
989
|
|
|
914
990
|
Returns:
|
|
915
991
|
ToolOutput: Object containing:
|
|
916
|
-
- result (list[
|
|
992
|
+
- result (list[str]): The propositions
|
|
917
993
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
918
994
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
919
995
|
- process (str | None): Description of the process used
|
|
@@ -938,8 +1014,8 @@ class TheTool:
|
|
|
938
1014
|
max_validation_retries=max_validation_retries,
|
|
939
1015
|
priority=priority,
|
|
940
1016
|
# Internal parameters
|
|
941
|
-
prompt_file="
|
|
942
|
-
output_model=Models.
|
|
1017
|
+
prompt_file="propositionize.yaml",
|
|
1018
|
+
output_model=Models.ListStr,
|
|
943
1019
|
mode=None,
|
|
944
1020
|
)
|
|
945
1021
|
end = datetime.now()
|
|
@@ -959,24 +1035,28 @@ class TheTool:
|
|
|
959
1035
|
|
|
960
1036
|
return output
|
|
961
1037
|
|
|
962
|
-
def
|
|
1038
|
+
def check_fact(
|
|
963
1039
|
self,
|
|
964
1040
|
text: str,
|
|
1041
|
+
source_text: str,
|
|
965
1042
|
with_analysis: bool = False,
|
|
966
1043
|
output_lang: str | None = None,
|
|
967
1044
|
user_prompt: str | None = None,
|
|
968
1045
|
temperature: float | None = 0.0,
|
|
969
1046
|
logprobs: bool = False,
|
|
970
|
-
top_logprobs: int
|
|
971
|
-
validator: Callable[[
|
|
1047
|
+
top_logprobs: int = 3,
|
|
1048
|
+
validator: Callable[[object], bool] | None = None,
|
|
972
1049
|
max_validation_retries: int | None = None,
|
|
973
1050
|
priority: int | None = 0,
|
|
974
1051
|
) -> Models.ToolOutput:
|
|
975
1052
|
"""
|
|
976
|
-
|
|
1053
|
+
Checks wheather a statement is relevant to the source text or not.
|
|
1054
|
+
|
|
1055
|
+
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
977
1056
|
|
|
978
1057
|
Arguments:
|
|
979
1058
|
text: The input text
|
|
1059
|
+
source_text: the source text that we want to check relation of text to it
|
|
980
1060
|
with_analysis: Whether to include detailed reasoning analysis
|
|
981
1061
|
output_lang: Language for the output summary
|
|
982
1062
|
user_prompt: Additional instructions for summarization
|
|
@@ -989,7 +1069,7 @@ class TheTool:
|
|
|
989
1069
|
|
|
990
1070
|
Returns:
|
|
991
1071
|
ToolOutput: Object containing:
|
|
992
|
-
- result (
|
|
1072
|
+
- result (bool): statement is relevant to source text or not
|
|
993
1073
|
- logprobs (list | None): Probability data if logprobs enabled
|
|
994
1074
|
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
995
1075
|
- process (str | None): Description of the process used
|
|
@@ -998,7 +1078,6 @@ class TheTool:
|
|
|
998
1078
|
- errors (list(str) | None): Errors occured during tool call
|
|
999
1079
|
"""
|
|
1000
1080
|
output = Models.ToolOutput()
|
|
1001
|
-
|
|
1002
1081
|
try:
|
|
1003
1082
|
start = datetime.now()
|
|
1004
1083
|
output = self._operator.run(
|
|
@@ -1014,9 +1093,10 @@ class TheTool:
|
|
|
1014
1093
|
max_validation_retries=max_validation_retries,
|
|
1015
1094
|
priority=priority,
|
|
1016
1095
|
# Internal parameters
|
|
1017
|
-
prompt_file="
|
|
1018
|
-
output_model=Models.
|
|
1096
|
+
prompt_file="check_fact.yaml",
|
|
1097
|
+
output_model=Models.Bool,
|
|
1019
1098
|
mode=None,
|
|
1099
|
+
source_text=source_text,
|
|
1020
1100
|
)
|
|
1021
1101
|
end = datetime.now()
|
|
1022
1102
|
output.execution_time = (end - start).total_seconds()
|
|
@@ -1038,20 +1118,27 @@ class TheTool:
|
|
|
1038
1118
|
def run_custom(
|
|
1039
1119
|
self,
|
|
1040
1120
|
prompt: str,
|
|
1041
|
-
output_model:
|
|
1121
|
+
output_model: object,
|
|
1122
|
+
with_analysis: bool = False,
|
|
1123
|
+
analyze_template: str | None = None,
|
|
1042
1124
|
output_lang: str | None = None,
|
|
1043
1125
|
temperature: float | None = None,
|
|
1044
1126
|
logprobs: bool | None = None,
|
|
1045
|
-
top_logprobs: int
|
|
1046
|
-
validator: Callable[[
|
|
1127
|
+
top_logprobs: int = 3,
|
|
1128
|
+
validator: Callable[[object], bool] | None = None,
|
|
1047
1129
|
max_validation_retries: int | None = None,
|
|
1048
1130
|
priority: int | None = 0,
|
|
1049
1131
|
) -> Models.ToolOutput:
|
|
1050
1132
|
"""
|
|
1051
1133
|
Custom tool that can do almost anything!
|
|
1052
1134
|
|
|
1135
|
+
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
1136
|
+
|
|
1053
1137
|
Arguments:
|
|
1054
|
-
|
|
1138
|
+
prompt: The user prompt
|
|
1139
|
+
output_model: Pydantic BaseModel used for structured output
|
|
1140
|
+
with_analysis: Whether to include detailed reasoning analysis
|
|
1141
|
+
analyze_template: The analyze template used for reasoning analysis
|
|
1055
1142
|
output_lang: Language for the output summary
|
|
1056
1143
|
temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
|
|
1057
1144
|
logprobs: Whether to return token probability information
|
|
@@ -1078,6 +1165,8 @@ class TheTool:
|
|
|
1078
1165
|
# User paramaeters
|
|
1079
1166
|
text=prompt,
|
|
1080
1167
|
output_model=output_model,
|
|
1168
|
+
with_analysis=with_analysis,
|
|
1169
|
+
analyze_template=analyze_template,
|
|
1081
1170
|
output_model_str=output_model.model_json_schema(),
|
|
1082
1171
|
output_lang=output_lang,
|
|
1083
1172
|
temperature=temperature,
|
|
@@ -1089,7 +1178,6 @@ class TheTool:
|
|
|
1089
1178
|
# Internal parameters
|
|
1090
1179
|
prompt_file="run_custom.yaml",
|
|
1091
1180
|
user_prompt=None,
|
|
1092
|
-
with_analysis=False,
|
|
1093
1181
|
mode=None,
|
|
1094
1182
|
)
|
|
1095
1183
|
end = datetime.now()
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
hamtaa_texttools-1.1.18.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
|
-
texttools/__init__.py,sha256=CmCS9dEvO6061GiJ8A7gD3UAhCWHTkaID9q3Krlyq_o,311
|
|
3
|
-
texttools/batch/batch_config.py,sha256=m1UgILVKjNdWE6laNbfbG4vgi4o2fEegGZbeoam6pnY,749
|
|
4
|
-
texttools/batch/batch_runner.py,sha256=Tz-jec27UZBSZAXc0sxitc5XycDfzvOYl47Yqzq6Myw,10031
|
|
5
|
-
texttools/batch/internals/batch_manager.py,sha256=UoBe76vmFG72qrSaGKDZf4HzkykFBkkkbL9TLfV8TuQ,8730
|
|
6
|
-
texttools/batch/internals/utils.py,sha256=F1_7YlVFKhjUROAFX4m0SaP8KiZVZyHRMIIB87VUGQc,373
|
|
7
|
-
texttools/internals/async_operator.py,sha256=_RfYSm_66RJ6nppzorJ4r3BHdhr8xr404QjeVvsvX4Q,8485
|
|
8
|
-
texttools/internals/exceptions.py,sha256=h_yp_5i_5IfmqTBQ4S6ZOISrrliJBQ3HTEAjwJXrplk,495
|
|
9
|
-
texttools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
|
|
10
|
-
texttools/internals/models.py,sha256=zmgdFhMCNyfc-5dtSE4jwulhltVgxYzITZRMDJBUF0A,5977
|
|
11
|
-
texttools/internals/operator_utils.py,sha256=w1k0RJ_W_CRbVc_J2w337VuL-opHpHiCxfhEOwtyuOo,1856
|
|
12
|
-
texttools/internals/prompt_loader.py,sha256=bL4F0pwVEjaBhjNRbf0kyl0YhSJ0G8NXa03Uz7XBzTE,3030
|
|
13
|
-
texttools/internals/sync_operator.py,sha256=7SdsNoFQxgmMrSZbUUw7SJVqyO5Xhu8dui9lm64RKsk,8382
|
|
14
|
-
texttools/prompts/README.md,sha256=-5YO93CN93QLifqZpUeUnCOCBbDiOTV-cFQeJ7Gg0I4,1377
|
|
15
|
-
texttools/prompts/categorize.yaml,sha256=F7VezB25B_sT5yoC25ezODBddkuDD5lUHKetSpx9FKI,2743
|
|
16
|
-
texttools/prompts/detect_entity.yaml,sha256=1rhMkJOjxSQcT4j_c5SRcIm77AUdeG-rUmeidb6VOFc,981
|
|
17
|
-
texttools/prompts/extract_entities.yaml,sha256=KiKjeDpHaeh3JVtZ6q1pa3k4DYucUIU9WnEcRTCA-SE,651
|
|
18
|
-
texttools/prompts/extract_keywords.yaml,sha256=Vj4Tt3vT6LtpOo_iBZPo9oWI50oVdPGXe5i8yDR8ex4,3177
|
|
19
|
-
texttools/prompts/is_question.yaml,sha256=d0-vKRbXWkxvO64ikvxRjEmpAXGpCYIPGhgexvPPjws,471
|
|
20
|
-
texttools/prompts/merge_questions.yaml,sha256=0J85GvTirZB4ELwH3sk8ub_WcqqpYf6PrMKr3djlZeo,1792
|
|
21
|
-
texttools/prompts/propositionize.yaml,sha256=ZEFkYy8qYpFK2oCmZ-i5M7nFtlQp_q8bKQZbFSuyE3c,792
|
|
22
|
-
texttools/prompts/rewrite.yaml,sha256=LO7He_IA3MZKz8a-LxH9DHJpOjpYwaYN1pbjp1Y0tFo,5392
|
|
23
|
-
texttools/prompts/run_custom.yaml,sha256=38OkCoVITbuuS9c08UZSP1jZW4WjSmRIi8fR0RAiPu4,108
|
|
24
|
-
texttools/prompts/subject_to_question.yaml,sha256=C7x7rNNm6U_ZG9HOn6zuzYOtvJUZ2skuWbL1-aYdd3E,1147
|
|
25
|
-
texttools/prompts/summarize.yaml,sha256=o6rxGPfWtZd61Duvm8NVvCJqfq73b-wAuMSKR6UYUqY,459
|
|
26
|
-
texttools/prompts/text_to_question.yaml,sha256=UheKYpDn6iyKI8NxunHZtFpNyfCLZZe5cvkuXpurUJY,783
|
|
27
|
-
texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6sr50,634
|
|
28
|
-
texttools/tools/async_tools.py,sha256=uNy-18XBOGKPcXkvp5mtNYfBrd16qA2HGsoxxMjTpFM,48536
|
|
29
|
-
texttools/tools/sync_tools.py,sha256=UMos-6cWL9LKy8m0UwK8PBZEXqM5RVQgjjQ3k8LL4qA,48348
|
|
30
|
-
hamtaa_texttools-1.1.18.dist-info/METADATA,sha256=Ro8eqilfvMDymdL8N_wkkgshnMPy9nWLJDO_IRBA9J4,9680
|
|
31
|
-
hamtaa_texttools-1.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
32
|
-
hamtaa_texttools-1.1.18.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
33
|
-
hamtaa_texttools-1.1.18.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
class Formatter:
|
|
2
|
-
@staticmethod
|
|
3
|
-
def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
4
|
-
"""
|
|
5
|
-
Merges consecutive user messages into a single message, separated by newlines.
|
|
6
|
-
|
|
7
|
-
This is useful for condensing a multi-turn user input into a single
|
|
8
|
-
message for the LLM. Assistant and system messages are left unchanged and
|
|
9
|
-
act as separators between user message groups.
|
|
10
|
-
"""
|
|
11
|
-
merged: list[dict[str, str]] = []
|
|
12
|
-
|
|
13
|
-
for message in messages:
|
|
14
|
-
role, content = message["role"], message["content"].strip()
|
|
15
|
-
|
|
16
|
-
# Merge with previous user turn
|
|
17
|
-
if merged and role == "user" and merged[-1]["role"] == "user":
|
|
18
|
-
merged[-1]["content"] += "\n" + content
|
|
19
|
-
|
|
20
|
-
# Otherwise, start a new turn
|
|
21
|
-
else:
|
|
22
|
-
merged.append({"role": role, "content": content})
|
|
23
|
-
|
|
24
|
-
return merged
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
main_template: |
|
|
2
|
-
You are an expert Named Entity Recognition (NER) system. Extract entities from the text.
|
|
3
|
-
The output must strictly follow the provided Pydantic schema.
|
|
4
|
-
|
|
5
|
-
Mapping Rule:
|
|
6
|
-
- Person: شخص
|
|
7
|
-
- Location: مکان
|
|
8
|
-
- Time: زمان
|
|
9
|
-
- Living Beings: موجود زنده
|
|
10
|
-
- Organization: سازمان
|
|
11
|
-
- Concept: مفهوم
|
|
12
|
-
|
|
13
|
-
CRITICAL:
|
|
14
|
-
1. The final output structure must be a complete JSON object matching the Pydantic schema (List[Entity]).
|
|
15
|
-
2. Both the extracted text and the type must be in Persian, using the exact mapping provided above.
|
|
16
|
-
|
|
17
|
-
Here is the text: {input}
|
|
18
|
-
|
|
19
|
-
analyze_template: |
|
|
20
|
-
Analyze the following text to identify all potential named entities and their categories (Person, Location, Time, Living Beings, Organization, Concept).
|
|
21
|
-
Provide a brief summary of the entities identified that will help the main process to extract them accurately and apply the correct Persian type label.
|
|
22
|
-
Here is the text: {input}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|