hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/METADATA +192 -141
- hamtaa_texttools-1.0.6.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/licenses/LICENSE +20 -20
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/top_level.txt +0 -0
- texttools/__init__.py +9 -9
- texttools/batch/__init__.py +4 -4
- texttools/batch/batch_manager.py +229 -240
- texttools/batch/batch_runner.py +263 -212
- texttools/formatters/base_formatter.py +33 -33
- texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
- texttools/prompts/README.md +35 -31
- texttools/prompts/categorizer.yaml +28 -31
- texttools/prompts/{question_detector.yaml → is_question.yaml} +13 -14
- texttools/prompts/keyword_extractor.yaml +18 -14
- texttools/prompts/ner_extractor.yaml +20 -21
- texttools/prompts/question_merger.yaml +45 -48
- texttools/prompts/rewriter.yaml +111 -0
- texttools/prompts/run_custom.yaml +7 -0
- texttools/prompts/{subject_question_generator.yaml → subject_to_question.yaml} +22 -26
- texttools/prompts/summarizer.yaml +13 -11
- texttools/prompts/{question_generator.yaml → text_to_question.yaml} +19 -22
- texttools/prompts/translator.yaml +14 -14
- texttools/tools/__init__.py +4 -4
- texttools/tools/async_the_tool.py +277 -263
- texttools/tools/internals/async_operator.py +308 -288
- texttools/tools/internals/operator.py +295 -306
- texttools/tools/internals/output_models.py +52 -62
- texttools/tools/internals/prompt_loader.py +66 -82
- texttools/tools/the_tool.py +501 -400
- hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
- texttools/prompts/question_rewriter.yaml +0 -46
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,62 +1,52 @@
|
|
|
1
|
-
from typing import Literal
|
|
2
|
-
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class StrOutput(BaseModel):
|
|
7
|
-
""
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
result:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class
|
|
31
|
-
""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
result: Literal[
|
|
54
|
-
"باورهای دینی",
|
|
55
|
-
"اخلاق اسلامی",
|
|
56
|
-
"احکام و فقه",
|
|
57
|
-
"تاریخ اسلام و شخصیت ها",
|
|
58
|
-
"منابع دینی",
|
|
59
|
-
"دین و جامعه/سیاست",
|
|
60
|
-
"عرفان و معنویت",
|
|
61
|
-
"هیچکدام",
|
|
62
|
-
]
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StrOutput(BaseModel):
|
|
7
|
+
result: str = Field(..., description="The output string")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BoolOutput(BaseModel):
|
|
11
|
+
result: bool = Field(
|
|
12
|
+
..., description="Boolean indicating the output state", example=True
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ListStrOutput(BaseModel):
|
|
17
|
+
result: list[str] = Field(
|
|
18
|
+
..., description="The output list of strings", example=["text_1", "text_2"]
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ListDictStrStrOutput(BaseModel):
|
|
23
|
+
result: list[dict[str, str]] = Field(
|
|
24
|
+
...,
|
|
25
|
+
description="List of dictionaries containing string key-value pairs",
|
|
26
|
+
example=[{"text": "Mohammad", "type": "PER"}],
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReasonListStrOutput(BaseModel):
|
|
31
|
+
reason: str = Field(..., description="Thinking process that led to the output")
|
|
32
|
+
result: list[str] = Field(..., description="The output list of strings")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CategorizerOutput(BaseModel):
|
|
36
|
+
reason: str = Field(
|
|
37
|
+
..., description="Explanation of why the input belongs to the category"
|
|
38
|
+
)
|
|
39
|
+
result: Literal[
|
|
40
|
+
"باورهای دینی",
|
|
41
|
+
"اخلاق اسلامی",
|
|
42
|
+
"احکام و فقه",
|
|
43
|
+
"تاریخ اسلام و شخصیت ها",
|
|
44
|
+
"منابع دینی",
|
|
45
|
+
"دین و جامعه/سیاست",
|
|
46
|
+
"عرفان و معنویت",
|
|
47
|
+
"هیچکدام",
|
|
48
|
+
] = Field(
|
|
49
|
+
...,
|
|
50
|
+
description="Predicted category label",
|
|
51
|
+
example="اخلاق اسلامی",
|
|
52
|
+
)
|
|
@@ -1,82 +1,66 @@
|
|
|
1
|
-
from
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
MAIN_TEMPLATE: str = "main_template"
|
|
25
|
-
ANALYZE_TEMPLATE: str = "analyze_template"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
data
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
use_modes: bool,
|
|
68
|
-
mode: str,
|
|
69
|
-
input_text: str,
|
|
70
|
-
prompts_dir: str = "prompts",
|
|
71
|
-
**extra_kwargs,
|
|
72
|
-
) -> dict[str, str]:
|
|
73
|
-
template_configs = self._load_templates(
|
|
74
|
-
prompts_dir, prompt_file, use_modes, mode
|
|
75
|
-
)
|
|
76
|
-
format_args = self._build_format_args(input_text, **extra_kwargs)
|
|
77
|
-
|
|
78
|
-
# Inject variables inside each template
|
|
79
|
-
for key in template_configs.keys():
|
|
80
|
-
template_configs[key] = template_configs[key].format(**format_args)
|
|
81
|
-
|
|
82
|
-
return template_configs
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PromptLoader:
|
|
7
|
+
"""
|
|
8
|
+
Utility for loading and formatting YAML prompt templates.
|
|
9
|
+
|
|
10
|
+
Responsibilities:
|
|
11
|
+
- Load and parse YAML prompt definitions.
|
|
12
|
+
- Select the right template (by mode, if applicable).
|
|
13
|
+
- Inject variables (`{input}`, plus any extra kwargs) into the templates.
|
|
14
|
+
- Return a dict with:
|
|
15
|
+
{
|
|
16
|
+
"main_template": "...",
|
|
17
|
+
"analyze_template": "..." | None
|
|
18
|
+
}
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
23
|
+
|
|
24
|
+
MAIN_TEMPLATE: str = "main_template"
|
|
25
|
+
ANALYZE_TEMPLATE: str = "analyze_template"
|
|
26
|
+
|
|
27
|
+
# Use lru_cache to load each file once
|
|
28
|
+
@lru_cache(maxsize=32)
|
|
29
|
+
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
30
|
+
prompt_path = self.base_dir / prompt_file
|
|
31
|
+
|
|
32
|
+
if not prompt_path.exists():
|
|
33
|
+
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
37
|
+
except yaml.YAMLError as e:
|
|
38
|
+
raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
|
|
42
|
+
if mode
|
|
43
|
+
else data[self.MAIN_TEMPLATE],
|
|
44
|
+
self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE)[mode]
|
|
45
|
+
if mode
|
|
46
|
+
else data.get(self.ANALYZE_TEMPLATE),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
def _build_format_args(self, text: str, **extra_kwargs) -> dict[str, str]:
|
|
50
|
+
# Base formatting args
|
|
51
|
+
format_args = {"input": text}
|
|
52
|
+
# Merge extras
|
|
53
|
+
format_args.update(extra_kwargs)
|
|
54
|
+
return format_args
|
|
55
|
+
|
|
56
|
+
def load(
|
|
57
|
+
self, prompt_file: str, text: str, mode: str, **extra_kwargs
|
|
58
|
+
) -> dict[str, str]:
|
|
59
|
+
template_configs = self._load_templates(prompt_file, mode)
|
|
60
|
+
format_args = self._build_format_args(text, **extra_kwargs)
|
|
61
|
+
|
|
62
|
+
# Inject variables inside each template
|
|
63
|
+
for key in template_configs.keys():
|
|
64
|
+
template_configs[key] = template_configs[key].format(**format_args)
|
|
65
|
+
|
|
66
|
+
return template_configs
|