hamtaa-texttools 1.3.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/METADATA +42 -48
- hamtaa_texttools-2.0.0.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/WHEEL +1 -1
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/licenses/LICENSE +1 -1
- texttools/__init__.py +1 -1
- texttools/core/internal_models.py +21 -8
- texttools/core/operators/__init__.py +0 -0
- texttools/core/operators/async_operator.py +11 -19
- texttools/core/operators/sync_operator.py +11 -19
- texttools/core/utils.py +260 -0
- texttools/models.py +77 -22
- texttools/prompts/{rewrite.yaml → augment.yaml} +3 -3
- texttools/prompts/categorize.yaml +7 -8
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +4 -2
- texttools/prompts/{check_fact.yaml → is_fact.yaml} +5 -4
- texttools/prompts/is_question.yaml +1 -1
- texttools/prompts/merge_questions.yaml +8 -6
- texttools/prompts/propositionize.yaml +11 -7
- texttools/prompts/run_custom.yaml +3 -1
- texttools/prompts/summarize.yaml +3 -3
- texttools/prompts/to_question.yaml +60 -0
- texttools/prompts/translate.yaml +4 -4
- texttools/tools/async_tools.py +90 -169
- texttools/tools/sync_tools.py +76 -150
- hamtaa_texttools-1.3.1.dist-info/RECORD +0 -30
- texttools/core/engine.py +0 -264
- texttools/prompts/subject_to_question.yaml +0 -26
- texttools/prompts/text_to_question.yaml +0 -26
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/top_level.txt +0 -0
texttools/tools/sync_tools.py
CHANGED
|
@@ -4,7 +4,6 @@ from typing import Any, Literal
|
|
|
4
4
|
|
|
5
5
|
from openai import OpenAI
|
|
6
6
|
|
|
7
|
-
from ..core.engine import text_to_chunks
|
|
8
7
|
from ..core.exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
9
8
|
from ..core.internal_models import (
|
|
10
9
|
Bool,
|
|
@@ -15,15 +14,11 @@ from ..core.internal_models import (
|
|
|
15
14
|
create_dynamic_model,
|
|
16
15
|
)
|
|
17
16
|
from ..core.operators.sync_operator import Operator
|
|
17
|
+
from ..core.utils import TheToolUtils
|
|
18
18
|
from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class TheTool:
|
|
22
|
-
"""
|
|
23
|
-
Each method configures the operator with a specific YAML prompt,
|
|
24
|
-
output schema, and flags, then delegates execution to `operator.run()`.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
22
|
def __init__(
|
|
28
23
|
self,
|
|
29
24
|
client: OpenAI,
|
|
@@ -45,16 +40,16 @@ class TheTool:
|
|
|
45
40
|
priority: int | None = None,
|
|
46
41
|
) -> ToolOutput:
|
|
47
42
|
"""
|
|
48
|
-
|
|
43
|
+
Classify text into given categories
|
|
49
44
|
|
|
50
45
|
Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
51
46
|
|
|
52
47
|
Arguments:
|
|
53
48
|
text: The input text
|
|
54
49
|
categories: The category list / category tree
|
|
55
|
-
with_analysis:
|
|
50
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
56
51
|
user_prompt: Additional instructions
|
|
57
|
-
temperature: Controls randomness
|
|
52
|
+
temperature: Controls randomness
|
|
58
53
|
logprobs: Whether to return token probability information
|
|
59
54
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
60
55
|
validator: Custom validation function to validate the output
|
|
@@ -167,27 +162,29 @@ class TheTool:
|
|
|
167
162
|
def extract_keywords(
|
|
168
163
|
self,
|
|
169
164
|
text: str,
|
|
165
|
+
mode: Literal["auto", "threshold", "count"],
|
|
166
|
+
number_of_keywords: int | None = None,
|
|
170
167
|
with_analysis: bool = False,
|
|
171
168
|
output_lang: str | None = None,
|
|
172
169
|
user_prompt: str | None = None,
|
|
173
170
|
temperature: float | None = 0.0,
|
|
174
171
|
logprobs: bool = False,
|
|
175
172
|
top_logprobs: int = 3,
|
|
176
|
-
mode: Literal["auto", "threshold", "count"] = "auto",
|
|
177
|
-
number_of_keywords: int | None = None,
|
|
178
173
|
validator: Callable[[Any], bool] | None = None,
|
|
179
174
|
max_validation_retries: int | None = None,
|
|
180
175
|
priority: int | None = None,
|
|
181
176
|
) -> ToolOutput:
|
|
182
177
|
"""
|
|
183
|
-
Extract
|
|
178
|
+
Extract keywords from the text
|
|
184
179
|
|
|
185
180
|
Arguments:
|
|
186
181
|
text: The input text
|
|
187
|
-
|
|
188
|
-
|
|
182
|
+
mode: auto -> decide n of keywords automatically, threshold -> decide n of keywords by a threshold, count -> takes number of keywords as the parameter
|
|
183
|
+
number_of_keywords: Must be set only when using "count" mode
|
|
184
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
185
|
+
output_lang: Forces the model to respond in a specific language
|
|
189
186
|
user_prompt: Additional instructions
|
|
190
|
-
temperature: Controls randomness
|
|
187
|
+
temperature: Controls randomness
|
|
191
188
|
logprobs: Whether to return token probability information
|
|
192
189
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
193
190
|
validator: Custom validation function to validate the output
|
|
@@ -204,20 +201,20 @@ class TheTool:
|
|
|
204
201
|
operator_output = self._operator.run(
|
|
205
202
|
# User parameters
|
|
206
203
|
text=text,
|
|
204
|
+
number_of_keywords=number_of_keywords,
|
|
205
|
+
mode=mode,
|
|
207
206
|
with_analysis=with_analysis,
|
|
208
207
|
output_lang=output_lang,
|
|
209
208
|
user_prompt=user_prompt,
|
|
210
209
|
temperature=temperature,
|
|
211
210
|
logprobs=logprobs,
|
|
212
211
|
top_logprobs=top_logprobs,
|
|
213
|
-
number_of_keywords=number_of_keywords,
|
|
214
212
|
validator=validator,
|
|
215
213
|
max_validation_retries=max_validation_retries,
|
|
216
214
|
priority=priority,
|
|
217
215
|
# Internal parameters
|
|
218
216
|
tool_name=tool_name,
|
|
219
217
|
output_model=ListStr,
|
|
220
|
-
mode=mode,
|
|
221
218
|
)
|
|
222
219
|
|
|
223
220
|
metadata = ToolOutputMetadata(
|
|
@@ -241,7 +238,7 @@ class TheTool:
|
|
|
241
238
|
def extract_entities(
|
|
242
239
|
self,
|
|
243
240
|
text: str,
|
|
244
|
-
entities: list[str]
|
|
241
|
+
entities: list[str] = ["all named entities"],
|
|
245
242
|
with_analysis: bool = False,
|
|
246
243
|
output_lang: str | None = None,
|
|
247
244
|
user_prompt: str | None = None,
|
|
@@ -253,15 +250,15 @@ class TheTool:
|
|
|
253
250
|
priority: int | None = None,
|
|
254
251
|
) -> ToolOutput:
|
|
255
252
|
"""
|
|
256
|
-
Perform Named Entity Recognition (NER)
|
|
253
|
+
Perform Named Entity Recognition (NER)
|
|
257
254
|
|
|
258
255
|
Arguments:
|
|
259
256
|
text: The input text
|
|
260
|
-
entities: List of entities
|
|
261
|
-
with_analysis:
|
|
262
|
-
output_lang:
|
|
257
|
+
entities: List of entities
|
|
258
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
259
|
+
output_lang: Forces the model to respond in a specific language
|
|
263
260
|
user_prompt: Additional instructions
|
|
264
|
-
temperature: Controls randomness
|
|
261
|
+
temperature: Controls randomness
|
|
265
262
|
logprobs: Whether to return token probability information
|
|
266
263
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
267
264
|
validator: Custom validation function to validate the output
|
|
@@ -278,8 +275,7 @@ class TheTool:
|
|
|
278
275
|
operator_output = self._operator.run(
|
|
279
276
|
# User parameters
|
|
280
277
|
text=text,
|
|
281
|
-
entities=entities
|
|
282
|
-
or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
|
|
278
|
+
entities=entities,
|
|
283
279
|
with_analysis=with_analysis,
|
|
284
280
|
output_lang=output_lang,
|
|
285
281
|
user_prompt=user_prompt,
|
|
@@ -330,9 +326,9 @@ class TheTool:
|
|
|
330
326
|
|
|
331
327
|
Arguments:
|
|
332
328
|
text: The input text
|
|
333
|
-
with_analysis:
|
|
329
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
334
330
|
user_prompt: Additional instructions
|
|
335
|
-
temperature: Controls randomness
|
|
331
|
+
temperature: Controls randomness
|
|
336
332
|
logprobs: Whether to return token probability information
|
|
337
333
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
338
334
|
validator: Custom validation function to validate the output
|
|
@@ -382,10 +378,11 @@ class TheTool:
|
|
|
382
378
|
|
|
383
379
|
return tool_output
|
|
384
380
|
|
|
385
|
-
def
|
|
381
|
+
def to_question(
|
|
386
382
|
self,
|
|
387
383
|
text: str,
|
|
388
384
|
number_of_questions: int,
|
|
385
|
+
mode: Literal["from_text", "from_subject"],
|
|
389
386
|
with_analysis: bool = False,
|
|
390
387
|
output_lang: str | None = None,
|
|
391
388
|
user_prompt: str | None = None,
|
|
@@ -397,15 +394,16 @@ class TheTool:
|
|
|
397
394
|
priority: int | None = None,
|
|
398
395
|
) -> ToolOutput:
|
|
399
396
|
"""
|
|
400
|
-
Generate
|
|
397
|
+
Generate questions from the given text / subject
|
|
401
398
|
|
|
402
399
|
Arguments:
|
|
403
400
|
text: The input text
|
|
401
|
+
mode: from_text -> generate questions from an answer, from_subject -> generate questions from a subject
|
|
404
402
|
number_of_questions: Number of questions to generate
|
|
405
|
-
with_analysis:
|
|
406
|
-
output_lang:
|
|
403
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
404
|
+
output_lang: Forces the model to respond in a specific language
|
|
407
405
|
user_prompt: Additional instructions
|
|
408
|
-
temperature: Controls randomness
|
|
406
|
+
temperature: Controls randomness
|
|
409
407
|
logprobs: Whether to return token probability information
|
|
410
408
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
411
409
|
validator: Custom validation function to validate the output
|
|
@@ -415,7 +413,7 @@ class TheTool:
|
|
|
415
413
|
Returns:
|
|
416
414
|
ToolOutput
|
|
417
415
|
"""
|
|
418
|
-
tool_name = "
|
|
416
|
+
tool_name = "to_question"
|
|
419
417
|
start = perf_counter()
|
|
420
418
|
|
|
421
419
|
try:
|
|
@@ -423,6 +421,7 @@ class TheTool:
|
|
|
423
421
|
# User parameters
|
|
424
422
|
text=text,
|
|
425
423
|
number_of_questions=number_of_questions,
|
|
424
|
+
mode=mode,
|
|
426
425
|
with_analysis=with_analysis,
|
|
427
426
|
output_lang=output_lang,
|
|
428
427
|
user_prompt=user_prompt,
|
|
@@ -435,7 +434,6 @@ class TheTool:
|
|
|
435
434
|
# Internal parameters
|
|
436
435
|
tool_name=tool_name,
|
|
437
436
|
output_model=ReasonListStr,
|
|
438
|
-
mode=None,
|
|
439
437
|
)
|
|
440
438
|
|
|
441
439
|
metadata = ToolOutputMetadata(
|
|
@@ -459,26 +457,27 @@ class TheTool:
|
|
|
459
457
|
def merge_questions(
|
|
460
458
|
self,
|
|
461
459
|
text: list[str],
|
|
460
|
+
mode: Literal["simple", "stepwise"],
|
|
462
461
|
with_analysis: bool = False,
|
|
463
462
|
output_lang: str | None = None,
|
|
464
463
|
user_prompt: str | None = None,
|
|
465
464
|
temperature: float | None = 0.0,
|
|
466
465
|
logprobs: bool = False,
|
|
467
466
|
top_logprobs: int = 3,
|
|
468
|
-
mode: Literal["default", "reason"] = "default",
|
|
469
467
|
validator: Callable[[Any], bool] | None = None,
|
|
470
468
|
max_validation_retries: int | None = None,
|
|
471
469
|
priority: int | None = None,
|
|
472
470
|
) -> ToolOutput:
|
|
473
471
|
"""
|
|
474
|
-
Merge multiple questions into a single unified question
|
|
472
|
+
Merge multiple questions into a single unified question
|
|
475
473
|
|
|
476
474
|
Arguments:
|
|
477
475
|
text: List of questions to merge
|
|
478
|
-
|
|
479
|
-
|
|
476
|
+
mode: simple -> regular question merging, stepwise -> merge questions in two steps
|
|
477
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
478
|
+
output_lang: Forces the model to respond in a specific language
|
|
480
479
|
user_prompt: Additional instructions
|
|
481
|
-
temperature: Controls randomness
|
|
480
|
+
temperature: Controls randomness
|
|
482
481
|
logprobs: Whether to return token probability information
|
|
483
482
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
484
483
|
validator: Custom validation function to validate the output
|
|
@@ -496,6 +495,7 @@ class TheTool:
|
|
|
496
495
|
operator_output = self._operator.run(
|
|
497
496
|
# User parameters
|
|
498
497
|
text=text,
|
|
498
|
+
mode=mode,
|
|
499
499
|
with_analysis=with_analysis,
|
|
500
500
|
output_lang=output_lang,
|
|
501
501
|
user_prompt=user_prompt,
|
|
@@ -508,7 +508,6 @@ class TheTool:
|
|
|
508
508
|
# Internal parameters
|
|
509
509
|
tool_name=tool_name,
|
|
510
510
|
output_model=Str,
|
|
511
|
-
mode=mode,
|
|
512
511
|
)
|
|
513
512
|
|
|
514
513
|
metadata = ToolOutputMetadata(
|
|
@@ -529,29 +528,30 @@ class TheTool:
|
|
|
529
528
|
|
|
530
529
|
return tool_output
|
|
531
530
|
|
|
532
|
-
def
|
|
531
|
+
def augment(
|
|
533
532
|
self,
|
|
534
533
|
text: str,
|
|
534
|
+
mode: Literal["positive", "negative", "hard_negative"],
|
|
535
535
|
with_analysis: bool = False,
|
|
536
536
|
output_lang: str | None = None,
|
|
537
537
|
user_prompt: str | None = None,
|
|
538
538
|
temperature: float | None = 0.0,
|
|
539
539
|
logprobs: bool = False,
|
|
540
540
|
top_logprobs: int = 3,
|
|
541
|
-
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
542
541
|
validator: Callable[[Any], bool] | None = None,
|
|
543
542
|
max_validation_retries: int | None = None,
|
|
544
543
|
priority: int | None = None,
|
|
545
544
|
) -> ToolOutput:
|
|
546
545
|
"""
|
|
547
|
-
Rewrite
|
|
546
|
+
Rewrite text in different augmentations
|
|
548
547
|
|
|
549
548
|
Arguments:
|
|
550
549
|
text: The input text
|
|
551
|
-
|
|
552
|
-
|
|
550
|
+
mode: positive -> positive augmentation, negative -> negative augmentation, hard_negative -> hard negative augmentation
|
|
551
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
552
|
+
output_lang: Forces the model to respond in a specific language
|
|
553
553
|
user_prompt: Additional instructions
|
|
554
|
-
temperature: Controls randomness
|
|
554
|
+
temperature: Controls randomness
|
|
555
555
|
logprobs: Whether to return token probability information
|
|
556
556
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
557
557
|
validator: Custom validation function to validate the output
|
|
@@ -561,87 +561,14 @@ class TheTool:
|
|
|
561
561
|
Returns:
|
|
562
562
|
ToolOutput
|
|
563
563
|
"""
|
|
564
|
-
tool_name = "
|
|
564
|
+
tool_name = "augment"
|
|
565
565
|
start = perf_counter()
|
|
566
566
|
|
|
567
567
|
try:
|
|
568
568
|
operator_output = self._operator.run(
|
|
569
569
|
# User parameters
|
|
570
570
|
text=text,
|
|
571
|
-
with_analysis=with_analysis,
|
|
572
|
-
output_lang=output_lang,
|
|
573
|
-
user_prompt=user_prompt,
|
|
574
|
-
temperature=temperature,
|
|
575
|
-
logprobs=logprobs,
|
|
576
|
-
top_logprobs=top_logprobs,
|
|
577
|
-
validator=validator,
|
|
578
|
-
max_validation_retries=max_validation_retries,
|
|
579
|
-
priority=priority,
|
|
580
|
-
# Internal parameters
|
|
581
|
-
tool_name=tool_name,
|
|
582
|
-
output_model=Str,
|
|
583
571
|
mode=mode,
|
|
584
|
-
)
|
|
585
|
-
|
|
586
|
-
metadata = ToolOutputMetadata(
|
|
587
|
-
tool_name=tool_name, execution_time=perf_counter() - start
|
|
588
|
-
)
|
|
589
|
-
tool_output = ToolOutput(
|
|
590
|
-
result=operator_output.result,
|
|
591
|
-
logprobs=operator_output.logprobs,
|
|
592
|
-
analysis=operator_output.analysis,
|
|
593
|
-
metadata=metadata,
|
|
594
|
-
)
|
|
595
|
-
|
|
596
|
-
except (PromptError, LLMError, ValidationError, TextToolsError, Exception) as e:
|
|
597
|
-
metadata = ToolOutputMetadata(tool_name=tool_name)
|
|
598
|
-
tool_output = ToolOutput(
|
|
599
|
-
errors=[f"{type(e).__name__}: {e}"], metadata=metadata
|
|
600
|
-
)
|
|
601
|
-
|
|
602
|
-
return tool_output
|
|
603
|
-
|
|
604
|
-
def subject_to_question(
|
|
605
|
-
self,
|
|
606
|
-
text: str,
|
|
607
|
-
number_of_questions: int,
|
|
608
|
-
with_analysis: bool = False,
|
|
609
|
-
output_lang: str | None = None,
|
|
610
|
-
user_prompt: str | None = None,
|
|
611
|
-
temperature: float | None = 0.0,
|
|
612
|
-
logprobs: bool = False,
|
|
613
|
-
top_logprobs: int = 3,
|
|
614
|
-
validator: Callable[[Any], bool] | None = None,
|
|
615
|
-
max_validation_retries: int | None = None,
|
|
616
|
-
priority: int | None = None,
|
|
617
|
-
) -> ToolOutput:
|
|
618
|
-
"""
|
|
619
|
-
Generate a list of questions about a subject.
|
|
620
|
-
|
|
621
|
-
Arguments:
|
|
622
|
-
text: The subject text to generate questions about
|
|
623
|
-
number_of_questions: Number of questions to generate
|
|
624
|
-
with_analysis: Whether to include detailed reasoning analysis
|
|
625
|
-
output_lang: Language for the output
|
|
626
|
-
user_prompt: Additional instructions
|
|
627
|
-
temperature: Controls randomness (0.0 - 2.0)
|
|
628
|
-
logprobs: Whether to return token probability information
|
|
629
|
-
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
630
|
-
validator: Custom validation function to validate the output
|
|
631
|
-
max_validation_retries: Maximum number of retry attempts if validation fails
|
|
632
|
-
priority: Task execution priority (if enabled by vLLM and the model)
|
|
633
|
-
|
|
634
|
-
Returns:
|
|
635
|
-
ToolOutput
|
|
636
|
-
"""
|
|
637
|
-
tool_name = "subject_to_question"
|
|
638
|
-
start = perf_counter()
|
|
639
|
-
|
|
640
|
-
try:
|
|
641
|
-
operator_output = self._operator.run(
|
|
642
|
-
# User parameters
|
|
643
|
-
text=text,
|
|
644
|
-
number_of_questions=number_of_questions,
|
|
645
572
|
with_analysis=with_analysis,
|
|
646
573
|
output_lang=output_lang,
|
|
647
574
|
user_prompt=user_prompt,
|
|
@@ -653,8 +580,7 @@ class TheTool:
|
|
|
653
580
|
priority=priority,
|
|
654
581
|
# Internal parameters
|
|
655
582
|
tool_name=tool_name,
|
|
656
|
-
output_model=
|
|
657
|
-
mode=None,
|
|
583
|
+
output_model=Str,
|
|
658
584
|
)
|
|
659
585
|
|
|
660
586
|
metadata = ToolOutputMetadata(
|
|
@@ -689,14 +615,14 @@ class TheTool:
|
|
|
689
615
|
priority: int | None = None,
|
|
690
616
|
) -> ToolOutput:
|
|
691
617
|
"""
|
|
692
|
-
Summarize the given
|
|
618
|
+
Summarize the given text
|
|
693
619
|
|
|
694
620
|
Arguments:
|
|
695
621
|
text: The input text
|
|
696
|
-
with_analysis:
|
|
697
|
-
output_lang:
|
|
622
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
623
|
+
output_lang: Forces the model to respond in a specific language
|
|
698
624
|
user_prompt: Additional instructions
|
|
699
|
-
temperature: Controls randomness
|
|
625
|
+
temperature: Controls randomness
|
|
700
626
|
logprobs: Whether to return token probability information
|
|
701
627
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
702
628
|
validator: Custom validation function to validate the output
|
|
@@ -749,7 +675,7 @@ class TheTool:
|
|
|
749
675
|
def translate(
|
|
750
676
|
self,
|
|
751
677
|
text: str,
|
|
752
|
-
|
|
678
|
+
target_lang: str,
|
|
753
679
|
use_chunker: bool = True,
|
|
754
680
|
with_analysis: bool = False,
|
|
755
681
|
user_prompt: str | None = None,
|
|
@@ -761,17 +687,17 @@ class TheTool:
|
|
|
761
687
|
priority: int | None = None,
|
|
762
688
|
) -> ToolOutput:
|
|
763
689
|
"""
|
|
764
|
-
Translate text between languages
|
|
690
|
+
Translate text between languages
|
|
765
691
|
|
|
766
692
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
767
693
|
|
|
768
694
|
Arguments:
|
|
769
695
|
text: The input text
|
|
770
|
-
|
|
771
|
-
use_chunker: Whether to use text chunker for
|
|
772
|
-
with_analysis:
|
|
696
|
+
target_lang: The target language for translation
|
|
697
|
+
use_chunker: Whether to use text chunker for large texts
|
|
698
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
773
699
|
user_prompt: Additional instructions
|
|
774
|
-
temperature: Controls randomness
|
|
700
|
+
temperature: Controls randomness
|
|
775
701
|
logprobs: Whether to return token probability information
|
|
776
702
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
777
703
|
validator: Custom validation function to validate the output
|
|
@@ -786,7 +712,7 @@ class TheTool:
|
|
|
786
712
|
|
|
787
713
|
try:
|
|
788
714
|
if len(text.split(" ")) > 1500 and use_chunker:
|
|
789
|
-
chunks =
|
|
715
|
+
chunks = TheToolUtils.to_chunks(text, 1200, 0)
|
|
790
716
|
translation = ""
|
|
791
717
|
analysis = ""
|
|
792
718
|
logprobs_list = []
|
|
@@ -795,7 +721,7 @@ class TheTool:
|
|
|
795
721
|
chunk_operator_output = self._operator.run(
|
|
796
722
|
# User parameters
|
|
797
723
|
text=chunk,
|
|
798
|
-
|
|
724
|
+
target_lang=target_lang,
|
|
799
725
|
with_analysis=with_analysis,
|
|
800
726
|
user_prompt=user_prompt,
|
|
801
727
|
temperature=temperature,
|
|
@@ -832,7 +758,7 @@ class TheTool:
|
|
|
832
758
|
operator_output = self._operator.run(
|
|
833
759
|
# User parameters
|
|
834
760
|
text=text,
|
|
835
|
-
|
|
761
|
+
target_lang=target_lang,
|
|
836
762
|
with_analysis=with_analysis,
|
|
837
763
|
user_prompt=user_prompt,
|
|
838
764
|
temperature=temperature,
|
|
@@ -880,16 +806,16 @@ class TheTool:
|
|
|
880
806
|
priority: int | None = None,
|
|
881
807
|
) -> ToolOutput:
|
|
882
808
|
"""
|
|
883
|
-
|
|
809
|
+
Convert a text into atomic, independent, meaningful sentences
|
|
884
810
|
|
|
885
811
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
886
812
|
|
|
887
813
|
Arguments:
|
|
888
814
|
text: The input text
|
|
889
|
-
with_analysis:
|
|
890
|
-
output_lang:
|
|
815
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
816
|
+
output_lang: Forces the model to respond in a specific language
|
|
891
817
|
user_prompt: Additional instructions
|
|
892
|
-
temperature: Controls randomness
|
|
818
|
+
temperature: Controls randomness
|
|
893
819
|
logprobs: Whether to return token probability information
|
|
894
820
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
895
821
|
validator: Custom validation function to validate the output
|
|
@@ -939,7 +865,7 @@ class TheTool:
|
|
|
939
865
|
|
|
940
866
|
return tool_output
|
|
941
867
|
|
|
942
|
-
def
|
|
868
|
+
def is_fact(
|
|
943
869
|
self,
|
|
944
870
|
text: str,
|
|
945
871
|
source_text: str,
|
|
@@ -954,17 +880,17 @@ class TheTool:
|
|
|
954
880
|
priority: int | None = None,
|
|
955
881
|
) -> ToolOutput:
|
|
956
882
|
"""
|
|
957
|
-
|
|
883
|
+
Check whether a statement is a fact based on the source text
|
|
958
884
|
|
|
959
885
|
Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
|
|
960
886
|
|
|
961
887
|
Arguments:
|
|
962
888
|
text: The input text
|
|
963
|
-
source_text: The source text
|
|
964
|
-
with_analysis:
|
|
965
|
-
output_lang:
|
|
889
|
+
source_text: The source text
|
|
890
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
891
|
+
output_lang: Forces the model to respond in a specific language
|
|
966
892
|
user_prompt: Additional instructions
|
|
967
|
-
temperature: Controls randomness
|
|
893
|
+
temperature: Controls randomness
|
|
968
894
|
logprobs: Whether to return token probability information
|
|
969
895
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
970
896
|
validator: Custom validation function to validate the output
|
|
@@ -974,7 +900,7 @@ class TheTool:
|
|
|
974
900
|
Returns:
|
|
975
901
|
ToolOutput
|
|
976
902
|
"""
|
|
977
|
-
tool_name = "
|
|
903
|
+
tool_name = "is_fact"
|
|
978
904
|
start = perf_counter()
|
|
979
905
|
|
|
980
906
|
try:
|
|
@@ -1030,15 +956,15 @@ class TheTool:
|
|
|
1030
956
|
priority: int | None = None,
|
|
1031
957
|
) -> ToolOutput:
|
|
1032
958
|
"""
|
|
1033
|
-
Custom tool that can do almost anything
|
|
959
|
+
Custom tool that can do almost anything
|
|
1034
960
|
|
|
1035
961
|
Arguments:
|
|
1036
962
|
prompt: The user prompt
|
|
1037
963
|
output_model: Pydantic BaseModel used for structured output
|
|
1038
|
-
with_analysis:
|
|
964
|
+
with_analysis: Adds a reasoning step before generating the final output. Note: This doubles token usage per call
|
|
1039
965
|
analyze_template: The analyze template used for reasoning analysis
|
|
1040
|
-
output_lang:
|
|
1041
|
-
temperature: Controls randomness
|
|
966
|
+
output_lang: Forces the model to respond in a specific language
|
|
967
|
+
temperature: Controls randomness
|
|
1042
968
|
logprobs: Whether to return token probability information
|
|
1043
969
|
top_logprobs: Number of top token alternatives to return if logprobs enabled
|
|
1044
970
|
validator: Custom validation function to validate the output
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
hamtaa_texttools-1.3.1.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
|
-
texttools/__init__.py,sha256=RK1GAU6pq2lGwFtHdrCX5JkPRHmOLGcmGH67hd_7VAQ,175
|
|
3
|
-
texttools/models.py,sha256=5eT2cSrFq8Xa38kANznV7gbi7lwB2PoDxciLKTpsd6c,2516
|
|
4
|
-
texttools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
texttools/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
texttools/core/engine.py,sha256=iRHdlIOPuUwIN6_72HNyTQQE7h_7xUZhC-WO-fDA5k8,9597
|
|
7
|
-
texttools/core/exceptions.py,sha256=6SDjUL1rmd3ngzD3ytF4LyTRj3bQMSFR9ECrLoqXXHw,395
|
|
8
|
-
texttools/core/internal_models.py,sha256=aExdLvhXhSev8NY1kuAJckeXdFBEisQtKZPxybd3rW8,1703
|
|
9
|
-
texttools/core/operators/async_operator.py,sha256=wFs7eZ9QJrL0jBOu00YffgfPnIrCSavNjecSorXh-mE,6452
|
|
10
|
-
texttools/core/operators/sync_operator.py,sha256=NaUS-aLh3y0QNMiKut4qtcSZKYXbuPbw0o2jvPsYKdY,6357
|
|
11
|
-
texttools/prompts/categorize.yaml,sha256=42Rp3SgVHaDLKrJ27_uK788LiQud0pOXJthz4r0a40Y,1214
|
|
12
|
-
texttools/prompts/check_fact.yaml,sha256=zWFQDRhEE1ij9wSeeenS9YSTM-bY5zzUaG390zUgmcs,714
|
|
13
|
-
texttools/prompts/extract_entities.yaml,sha256=_zYKHNJDIzVDI_-TnwFCKyMs-XLM5igvmWhvSTc3INQ,637
|
|
14
|
-
texttools/prompts/extract_keywords.yaml,sha256=1o4u3uwzapNtB1BUpNIRL5qtrwjW0Yhvyq0TZJiafdg,3272
|
|
15
|
-
texttools/prompts/is_question.yaml,sha256=jnPARd2ZiulLzHW_r4WAsz3sOryfz6Gy5-yYXp-2hd0,496
|
|
16
|
-
texttools/prompts/merge_questions.yaml,sha256=l9Q2OEjPp3SDkxbq3zZCj2ZmXacWSnmYMpUr3l6r5yE,1816
|
|
17
|
-
texttools/prompts/propositionize.yaml,sha256=nbGAfbm1-2Hoc0JLtqZi-S7VHQfnMmuTKI7dZeBxQW0,1403
|
|
18
|
-
texttools/prompts/rewrite.yaml,sha256=klEm8MqXK-Bo8RsS5R9KLMT0zlD-BKo_G6tz9lpAcEY,5420
|
|
19
|
-
texttools/prompts/run_custom.yaml,sha256=IETY9H0wPGWIIzcnupfbwwKQblwZrbYAxB754W9MhgU,125
|
|
20
|
-
texttools/prompts/subject_to_question.yaml,sha256=AK16pZW9HUppIF8JBSEenbUNOU3aqeVV781_WUXnLqk,1160
|
|
21
|
-
texttools/prompts/summarize.yaml,sha256=rPh060Bx_yI1W2JNg-nr83LUk9itatYLKM8ciH2pOvg,486
|
|
22
|
-
texttools/prompts/text_to_question.yaml,sha256=pUwPgK9l5f8S4E5fCht9JY7PFVK2aY1InPfASr7R5o4,1017
|
|
23
|
-
texttools/prompts/translate.yaml,sha256=Dd5bs3O8SI-FlVSwHMYGeEjMmdOWeRlcfBHkhixCx7c,665
|
|
24
|
-
texttools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
texttools/tools/async_tools.py,sha256=2suwx8N0aRnowaSOpV6C57AqPlmQe5Z0Yx4E5QIMkmU,46939
|
|
26
|
-
texttools/tools/sync_tools.py,sha256=mEuL-nlbxVW30dPE3hGkAUnYXbul-3gN2Le4CMVFCgU,42528
|
|
27
|
-
hamtaa_texttools-1.3.1.dist-info/METADATA,sha256=6wLYAaPVOFpzUz8tN7lfzbAGhEr10JBXgRHcZZvrt5s,7453
|
|
28
|
-
hamtaa_texttools-1.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
-
hamtaa_texttools-1.3.1.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
30
|
-
hamtaa_texttools-1.3.1.dist-info/RECORD,,
|