hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (32) hide show
  1. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/METADATA +192 -141
  2. hamtaa_texttools-1.0.5.dist-info/RECORD +30 -0
  3. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/licenses/LICENSE +20 -20
  4. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/top_level.txt +0 -0
  5. texttools/__init__.py +9 -9
  6. texttools/batch/__init__.py +4 -4
  7. texttools/batch/batch_manager.py +240 -240
  8. texttools/batch/batch_runner.py +212 -212
  9. texttools/formatters/base_formatter.py +33 -33
  10. texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
  11. texttools/prompts/README.md +31 -31
  12. texttools/prompts/categorizer.yaml +28 -31
  13. texttools/prompts/custom_tool.yaml +7 -0
  14. texttools/prompts/keyword_extractor.yaml +18 -14
  15. texttools/prompts/ner_extractor.yaml +20 -21
  16. texttools/prompts/question_detector.yaml +13 -14
  17. texttools/prompts/question_generator.yaml +19 -22
  18. texttools/prompts/question_merger.yaml +45 -48
  19. texttools/prompts/rewriter.yaml +111 -0
  20. texttools/prompts/subject_question_generator.yaml +22 -26
  21. texttools/prompts/summarizer.yaml +13 -11
  22. texttools/prompts/translator.yaml +14 -14
  23. texttools/tools/__init__.py +4 -4
  24. texttools/tools/async_the_tool.py +277 -263
  25. texttools/tools/internals/async_operator.py +297 -288
  26. texttools/tools/internals/operator.py +295 -306
  27. texttools/tools/internals/output_models.py +52 -62
  28. texttools/tools/internals/prompt_loader.py +76 -82
  29. texttools/tools/the_tool.py +501 -400
  30. hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
  31. texttools/prompts/question_rewriter.yaml +0 -46
  32. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/WHEEL +0 -0
@@ -1,62 +1,52 @@
1
- from typing import Literal
2
-
3
- from pydantic import BaseModel
4
-
5
-
6
- class StrOutput(BaseModel):
7
- """
8
- Output model for a single string result.
9
- """
10
-
11
- result: str
12
-
13
-
14
- class BoolOutput(BaseModel):
15
- """
16
- Output model for a single boolean result.
17
- """
18
-
19
- result: bool
20
-
21
-
22
- class ListStrOutput(BaseModel):
23
- """
24
- Output model for a list of strings result.
25
- """
26
-
27
- result: list[str]
28
-
29
-
30
- class ListDictStrStrOutput(BaseModel):
31
- """
32
- Output model for a list of dictionaries with string key-value pairs.
33
- """
34
-
35
- result: list[dict[str, str]]
36
-
37
-
38
- class ReasonListStrOutput(BaseModel):
39
- """
40
- Output model containing a reasoning string followed by a list of strings.
41
- """
42
-
43
- reason: str
44
- result: list[str]
45
-
46
-
47
- class CategorizerOutput(BaseModel):
48
- """
49
- Output model for categorization with reasoning and a predefined category result.
50
- """
51
-
52
- reason: str
53
- result: Literal[
54
- "باورهای دینی",
55
- "اخلاق اسلامی",
56
- "احکام و فقه",
57
- "تاریخ اسلام و شخصیت ها",
58
- "منابع دینی",
59
- "دین و جامعه/سیاست",
60
- "عرفان و معنویت",
61
- "هیچکدام",
62
- ]
1
+ from typing import Literal
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class StrOutput(BaseModel):
7
+ result: str = Field(..., description="The output string")
8
+
9
+
10
+ class BoolOutput(BaseModel):
11
+ result: bool = Field(
12
+ ..., description="Boolean indicating the output state", example=True
13
+ )
14
+
15
+
16
+ class ListStrOutput(BaseModel):
17
+ result: list[str] = Field(
18
+ ..., description="The output list of strings", example=["text_1", "text_2"]
19
+ )
20
+
21
+
22
+ class ListDictStrStrOutput(BaseModel):
23
+ result: list[dict[str, str]] = Field(
24
+ ...,
25
+ description="List of dictionaries containing string key-value pairs",
26
+ example=[{"text": "Mohammad", "type": "PER"}],
27
+ )
28
+
29
+
30
+ class ReasonListStrOutput(BaseModel):
31
+ reason: str = Field(..., description="Thinking process that led to the output")
32
+ result: list[str] = Field(..., description="The output list of strings")
33
+
34
+
35
+ class CategorizerOutput(BaseModel):
36
+ reason: str = Field(
37
+ ..., description="Explanation of why the input belongs to the category"
38
+ )
39
+ result: Literal[
40
+ "باورهای دینی",
41
+ "اخلاق اسلامی",
42
+ "احکام و فقه",
43
+ "تاریخ اسلام و شخصیت ها",
44
+ "منابع دینی",
45
+ "دین و جامعه/سیاست",
46
+ "عرفان و معنویت",
47
+ "هیچکدام",
48
+ ] = Field(
49
+ ...,
50
+ description="Predicted category label",
51
+ example="اخلاق اسلامی",
52
+ )
@@ -1,82 +1,76 @@
1
- from pathlib import Path
2
- import yaml
3
-
4
-
5
- class PromptLoader:
6
- """
7
- Utility for loading and formatting YAML prompt templates.
8
-
9
- Each YAML file under `prompts/` must define at least a `main_template`,
10
- and optionally an `analyze_template`. These can either be a single string
11
- or a dictionary keyed by mode names (if `use_modes=True`).
12
-
13
- Responsibilities:
14
- - Load and parse YAML prompt definitions.
15
- - Select the right template (by mode, if applicable).
16
- - Inject variables (`{input}`, plus any extra kwargs) into the templates.
17
- - Return a dict with:
18
- {
19
- "main_template": "...",
20
- "analyze_template": "..." | None
21
- }
22
- """
23
-
24
- MAIN_TEMPLATE: str = "main_template"
25
- ANALYZE_TEMPLATE: str = "analyze_template"
26
-
27
- def _get_prompt_path(self, prompt_file: str, prompts_dir: str) -> Path:
28
- return Path(__file__).parent.parent.parent / prompts_dir / prompt_file
29
-
30
- def _load_templates(
31
- self,
32
- prompts_dir: str,
33
- prompt_file: str,
34
- use_modes: bool,
35
- mode: str,
36
- ) -> dict[str, str]:
37
- prompt_path = self._get_prompt_path(prompt_file, prompts_dir)
38
-
39
- if not prompt_path.exists():
40
- raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
41
-
42
- try:
43
- # Load the data
44
- data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
45
- except yaml.YAMLError as e:
46
- raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
47
-
48
- return {
49
- "main_template": data["main_template"][mode]
50
- if use_modes
51
- else data["main_template"],
52
- "analyze_template": data.get("analyze_template")[mode]
53
- if use_modes
54
- else data.get("analyze_template"),
55
- }
56
-
57
- def _build_format_args(self, input_text: str, **extra_kwargs) -> dict[str, str]:
58
- # Base formatting args
59
- format_args = {"input": input_text}
60
- # Merge extras
61
- format_args.update(extra_kwargs)
62
- return format_args
63
-
64
- def load_prompts(
65
- self,
66
- prompt_file: str,
67
- use_modes: bool,
68
- mode: str,
69
- input_text: str,
70
- prompts_dir: str = "prompts",
71
- **extra_kwargs,
72
- ) -> dict[str, str]:
73
- template_configs = self._load_templates(
74
- prompts_dir, prompt_file, use_modes, mode
75
- )
76
- format_args = self._build_format_args(input_text, **extra_kwargs)
77
-
78
- # Inject variables inside each template
79
- for key in template_configs.keys():
80
- template_configs[key] = template_configs[key].format(**format_args)
81
-
82
- return template_configs
1
+ from typing import Optional
2
+ from pathlib import Path
3
+ import yaml
4
+
5
+
6
+ class PromptLoader:
7
+ """
8
+ Utility for loading and formatting YAML prompt templates.
9
+
10
+ Each YAML file under `prompts/` must define at least a `main_template`,
11
+ and optionally an `analyze_template`. These can either be a single string
12
+ or a dictionary keyed by mode names (if `use_modes=True`).
13
+
14
+ Responsibilities:
15
+ - Load and parse YAML prompt definitions.
16
+ - Select the right template (by mode, if applicable).
17
+ - Inject variables (`{input}`, plus any extra kwargs) into the templates.
18
+ - Return a dict with:
19
+ {
20
+ "main_template": "...",
21
+ "analyze_template": "..." | None
22
+ }
23
+ """
24
+
25
+ MAIN_TEMPLATE: str = "main_template"
26
+ ANALYZE_TEMPLATE: str = "analyze_template"
27
+
28
+ def _load_templates(
29
+ self,
30
+ prompts_dir: str,
31
+ prompt_file: str,
32
+ mode: str | None,
33
+ ) -> dict[str, str]:
34
+ prompt_path = Path(__file__).parent.parent.parent / prompts_dir / prompt_file
35
+
36
+ if not prompt_path.exists():
37
+ raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
38
+
39
+ try:
40
+ # Load the data
41
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
42
+ except yaml.YAMLError as e:
43
+ raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
44
+
45
+ return {
46
+ "main_template": data[self.MAIN_TEMPLATE][mode]
47
+ if mode
48
+ else data[self.MAIN_TEMPLATE],
49
+ "analyze_template": data.get(self.ANALYZE_TEMPLATE)[mode]
50
+ if mode
51
+ else data.get(self.ANALYZE_TEMPLATE),
52
+ }
53
+
54
+ def _build_format_args(self, text: str, **extra_kwargs) -> dict[str, str]:
55
+ # Base formatting args
56
+ format_args = {"input": text}
57
+ # Merge extras
58
+ format_args.update(extra_kwargs)
59
+ return format_args
60
+
61
+ def load(
62
+ self,
63
+ prompt_file: str,
64
+ text: str,
65
+ mode: str,
66
+ prompts_dir: str = "prompts",
67
+ **extra_kwargs,
68
+ ) -> dict[str, str]:
69
+ template_configs = self._load_templates(prompts_dir, prompt_file, mode)
70
+ format_args = self._build_format_args(text, **extra_kwargs)
71
+
72
+ # Inject variables inside each template
73
+ for key in template_configs.keys():
74
+ template_configs[key] = template_configs[key].format(**format_args)
75
+
76
+ return template_configs