hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/METADATA +192 -141
- hamtaa_texttools-1.0.5.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/licenses/LICENSE +20 -20
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/top_level.txt +0 -0
- texttools/__init__.py +9 -9
- texttools/batch/__init__.py +4 -4
- texttools/batch/batch_manager.py +240 -240
- texttools/batch/batch_runner.py +212 -212
- texttools/formatters/base_formatter.py +33 -33
- texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
- texttools/prompts/README.md +31 -31
- texttools/prompts/categorizer.yaml +28 -31
- texttools/prompts/custom_tool.yaml +7 -0
- texttools/prompts/keyword_extractor.yaml +18 -14
- texttools/prompts/ner_extractor.yaml +20 -21
- texttools/prompts/question_detector.yaml +13 -14
- texttools/prompts/question_generator.yaml +19 -22
- texttools/prompts/question_merger.yaml +45 -48
- texttools/prompts/rewriter.yaml +111 -0
- texttools/prompts/subject_question_generator.yaml +22 -26
- texttools/prompts/summarizer.yaml +13 -11
- texttools/prompts/translator.yaml +14 -14
- texttools/tools/__init__.py +4 -4
- texttools/tools/async_the_tool.py +277 -263
- texttools/tools/internals/async_operator.py +297 -288
- texttools/tools/internals/operator.py +295 -306
- texttools/tools/internals/output_models.py +52 -62
- texttools/tools/internals/prompt_loader.py +76 -82
- texttools/tools/the_tool.py +501 -400
- hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
- texttools/prompts/question_rewriter.yaml +0 -46
- {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/WHEEL +0 -0
|
@@ -1,62 +1,52 @@
|
|
|
1
|
-
from typing import Literal
|
|
2
|
-
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class StrOutput(BaseModel):
|
|
7
|
-
""
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
result:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class
|
|
31
|
-
""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
result: Literal[
|
|
54
|
-
"باورهای دینی",
|
|
55
|
-
"اخلاق اسلامی",
|
|
56
|
-
"احکام و فقه",
|
|
57
|
-
"تاریخ اسلام و شخصیت ها",
|
|
58
|
-
"منابع دینی",
|
|
59
|
-
"دین و جامعه/سیاست",
|
|
60
|
-
"عرفان و معنویت",
|
|
61
|
-
"هیچکدام",
|
|
62
|
-
]
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StrOutput(BaseModel):
|
|
7
|
+
result: str = Field(..., description="The output string")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BoolOutput(BaseModel):
|
|
11
|
+
result: bool = Field(
|
|
12
|
+
..., description="Boolean indicating the output state", example=True
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ListStrOutput(BaseModel):
|
|
17
|
+
result: list[str] = Field(
|
|
18
|
+
..., description="The output list of strings", example=["text_1", "text_2"]
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ListDictStrStrOutput(BaseModel):
|
|
23
|
+
result: list[dict[str, str]] = Field(
|
|
24
|
+
...,
|
|
25
|
+
description="List of dictionaries containing string key-value pairs",
|
|
26
|
+
example=[{"text": "Mohammad", "type": "PER"}],
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReasonListStrOutput(BaseModel):
|
|
31
|
+
reason: str = Field(..., description="Thinking process that led to the output")
|
|
32
|
+
result: list[str] = Field(..., description="The output list of strings")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CategorizerOutput(BaseModel):
|
|
36
|
+
reason: str = Field(
|
|
37
|
+
..., description="Explanation of why the input belongs to the category"
|
|
38
|
+
)
|
|
39
|
+
result: Literal[
|
|
40
|
+
"باورهای دینی",
|
|
41
|
+
"اخلاق اسلامی",
|
|
42
|
+
"احکام و فقه",
|
|
43
|
+
"تاریخ اسلام و شخصیت ها",
|
|
44
|
+
"منابع دینی",
|
|
45
|
+
"دین و جامعه/سیاست",
|
|
46
|
+
"عرفان و معنویت",
|
|
47
|
+
"هیچکدام",
|
|
48
|
+
] = Field(
|
|
49
|
+
...,
|
|
50
|
+
description="Predicted category label",
|
|
51
|
+
example="اخلاق اسلامی",
|
|
52
|
+
)
|
|
@@ -1,82 +1,76 @@
|
|
|
1
|
-
from
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"
|
|
50
|
-
if
|
|
51
|
-
else data
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
format_args
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
# Inject variables inside each template
|
|
79
|
-
for key in template_configs.keys():
|
|
80
|
-
template_configs[key] = template_configs[key].format(**format_args)
|
|
81
|
-
|
|
82
|
-
return template_configs
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PromptLoader:
|
|
7
|
+
"""
|
|
8
|
+
Utility for loading and formatting YAML prompt templates.
|
|
9
|
+
|
|
10
|
+
Each YAML file under `prompts/` must define at least a `main_template`,
|
|
11
|
+
and optionally an `analyze_template`. These can either be a single string
|
|
12
|
+
or a dictionary keyed by mode names (if `use_modes=True`).
|
|
13
|
+
|
|
14
|
+
Responsibilities:
|
|
15
|
+
- Load and parse YAML prompt definitions.
|
|
16
|
+
- Select the right template (by mode, if applicable).
|
|
17
|
+
- Inject variables (`{input}`, plus any extra kwargs) into the templates.
|
|
18
|
+
- Return a dict with:
|
|
19
|
+
{
|
|
20
|
+
"main_template": "...",
|
|
21
|
+
"analyze_template": "..." | None
|
|
22
|
+
}
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
MAIN_TEMPLATE: str = "main_template"
|
|
26
|
+
ANALYZE_TEMPLATE: str = "analyze_template"
|
|
27
|
+
|
|
28
|
+
def _load_templates(
|
|
29
|
+
self,
|
|
30
|
+
prompts_dir: str,
|
|
31
|
+
prompt_file: str,
|
|
32
|
+
mode: str | None,
|
|
33
|
+
) -> dict[str, str]:
|
|
34
|
+
prompt_path = Path(__file__).parent.parent.parent / prompts_dir / prompt_file
|
|
35
|
+
|
|
36
|
+
if not prompt_path.exists():
|
|
37
|
+
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Load the data
|
|
41
|
+
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
42
|
+
except yaml.YAMLError as e:
|
|
43
|
+
raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
"main_template": data[self.MAIN_TEMPLATE][mode]
|
|
47
|
+
if mode
|
|
48
|
+
else data[self.MAIN_TEMPLATE],
|
|
49
|
+
"analyze_template": data.get(self.ANALYZE_TEMPLATE)[mode]
|
|
50
|
+
if mode
|
|
51
|
+
else data.get(self.ANALYZE_TEMPLATE),
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def _build_format_args(self, text: str, **extra_kwargs) -> dict[str, str]:
|
|
55
|
+
# Base formatting args
|
|
56
|
+
format_args = {"input": text}
|
|
57
|
+
# Merge extras
|
|
58
|
+
format_args.update(extra_kwargs)
|
|
59
|
+
return format_args
|
|
60
|
+
|
|
61
|
+
def load(
|
|
62
|
+
self,
|
|
63
|
+
prompt_file: str,
|
|
64
|
+
text: str,
|
|
65
|
+
mode: str,
|
|
66
|
+
prompts_dir: str = "prompts",
|
|
67
|
+
**extra_kwargs,
|
|
68
|
+
) -> dict[str, str]:
|
|
69
|
+
template_configs = self._load_templates(prompts_dir, prompt_file, mode)
|
|
70
|
+
format_args = self._build_format_args(text, **extra_kwargs)
|
|
71
|
+
|
|
72
|
+
# Inject variables inside each template
|
|
73
|
+
for key in template_configs.keys():
|
|
74
|
+
template_configs[key] = template_configs[key].format(**format_args)
|
|
75
|
+
|
|
76
|
+
return template_configs
|