PyPI - hamtaa-texttools - Versions diffs - 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl - Mend

hamtaa-texttools 1.0.4py3-none-any.whl → 1.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (32) hide show

{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/METADATA +192 -141
hamtaa_texttools-1.0.6.dist-info/RECORD +30 -0
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/licenses/LICENSE +20 -20
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/top_level.txt +0 -0
texttools/__init__.py +9 -9
texttools/batch/__init__.py +4 -4
texttools/batch/batch_manager.py +229 -240
texttools/batch/batch_runner.py +263 -212
texttools/formatters/base_formatter.py +33 -33
texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
texttools/prompts/README.md +35 -31
texttools/prompts/categorizer.yaml +28 -31
texttools/prompts/{question_detector.yaml → is_question.yaml} +13 -14
texttools/prompts/keyword_extractor.yaml +18 -14
texttools/prompts/ner_extractor.yaml +20 -21
texttools/prompts/question_merger.yaml +45 -48
texttools/prompts/rewriter.yaml +111 -0
texttools/prompts/run_custom.yaml +7 -0
texttools/prompts/{subject_question_generator.yaml → subject_to_question.yaml} +22 -26
texttools/prompts/summarizer.yaml +13 -11
texttools/prompts/{question_generator.yaml → text_to_question.yaml} +19 -22
texttools/prompts/translator.yaml +14 -14
texttools/tools/__init__.py +4 -4
texttools/tools/async_the_tool.py +277 -263
texttools/tools/internals/async_operator.py +308 -288
texttools/tools/internals/operator.py +295 -306
texttools/tools/internals/output_models.py +52 -62
texttools/tools/internals/prompt_loader.py +66 -82
texttools/tools/the_tool.py +501 -400
hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
texttools/prompts/question_rewriter.yaml +0 -46
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.6.dist-info}/WHEEL +0 -0

texttools/tools/internals/output_models.py CHANGED Viewed

@@ -1,62 +1,52 @@
-from typing import Literal
-from pydantic import BaseModel
-class StrOutput(BaseModel):
-    """
-    Output model for a single string result.
-    """
-    result: str
-class BoolOutput(BaseModel):
-    """
-    Output model for a single boolean result.
-    """
-    result: bool
-class ListStrOutput(BaseModel):
-    """
-    Output model for a list of strings result.
-    """
-    result: list[str]
-class ListDictStrStrOutput(BaseModel):
-    """
-    Output model for a list of dictionaries with string key-value pairs.
-    """
-    result: list[dict[str, str]]
-class ReasonListStrOutput(BaseModel):
-    """
-    Output model containing a reasoning string followed by a list of strings.
-    """
-    reason: str
-    result: list[str]
-class CategorizerOutput(BaseModel):
-    """
-    Output model for categorization with reasoning and a predefined category result.
-    """
-    reason: str
-    result: Literal[
-        "باورهای دینی",
-        "اخلاق اسلامی",
-        "احکام و فقه",
-        "تاریخ اسلام و شخصیت ها",
-        "منابع دینی",
-        "دین و جامعه/سیاست",
-        "عرفان و معنویت",
-        "هیچکدام",
-    ]
+from typing import Literal
+from pydantic import BaseModel, Field
+class StrOutput(BaseModel):
+    result: str = Field(..., description="The output string")
+class BoolOutput(BaseModel):
+    result: bool = Field(
+        ..., description="Boolean indicating the output state", example=True
+    )
+class ListStrOutput(BaseModel):
+    result: list[str] = Field(
+        ..., description="The output list of strings", example=["text_1", "text_2"]
+    )
+class ListDictStrStrOutput(BaseModel):
+    result: list[dict[str, str]] = Field(
+        ...,
+        description="List of dictionaries containing string key-value pairs",
+        example=[{"text": "Mohammad", "type": "PER"}],
+    )
+class ReasonListStrOutput(BaseModel):
+    reason: str = Field(..., description="Thinking process that led to the output")
+    result: list[str] = Field(..., description="The output list of strings")
+class CategorizerOutput(BaseModel):
+    reason: str = Field(
+        ..., description="Explanation of why the input belongs to the category"
+    )
+    result: Literal[
+        "باورهای دینی",
+        "اخلاق اسلامی",
+        "احکام و فقه",
+        "تاریخ اسلام و شخصیت ها",
+        "منابع دینی",
+        "دین و جامعه/سیاست",
+        "عرفان و معنویت",
+        "هیچکدام",
+    ] = Field(
+        ...,
+        description="Predicted category label",
+        example="اخلاق اسلامی",
+    )

texttools/tools/internals/prompt_loader.py CHANGED Viewed

@@ -1,82 +1,66 @@
-from pathlib import Path
-import yaml
-class PromptLoader:
-    """
-    Utility for loading and formatting YAML prompt templates.
-    Each YAML file under `prompts/` must define at least a `main_template`,
-    and optionally an `analyze_template`. These can either be a single string
-    or a dictionary keyed by mode names (if `use_modes=True`).
-    Responsibilities:
-    - Load and parse YAML prompt definitions.
-    - Select the right template (by mode, if applicable).
-    - Inject variables (`{input}`, plus any extra kwargs) into the templates.
-    - Return a dict with:
-        {
-            "main_template": "...",
-            "analyze_template": "..." | None
-        }
-    """
-    MAIN_TEMPLATE: str = "main_template"
-    ANALYZE_TEMPLATE: str = "analyze_template"
-    def _get_prompt_path(self, prompt_file: str, prompts_dir: str) -> Path:
-        return Path(__file__).parent.parent.parent / prompts_dir / prompt_file
-    def _load_templates(
-        self,
-        prompts_dir: str,
-        prompt_file: str,
-        use_modes: bool,
-        mode: str,
-    ) -> dict[str, str]:
-        prompt_path = self._get_prompt_path(prompt_file, prompts_dir)
-        if not prompt_path.exists():
-            raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
-        try:
-            # Load the data
-            data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
-        except yaml.YAMLError as e:
-            raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
-        return {
-            "main_template": data["main_template"][mode]
-            if use_modes
-            else data["main_template"],
-            "analyze_template": data.get("analyze_template")[mode]
-            if use_modes
-            else data.get("analyze_template"),
-        }
-    def _build_format_args(self, input_text: str, **extra_kwargs) -> dict[str, str]:
-        # Base formatting args
-        format_args = {"input": input_text}
-        # Merge extras
-        format_args.update(extra_kwargs)
-        return format_args
-    def load_prompts(
-        self,
-        prompt_file: str,
-        use_modes: bool,
-        mode: str,
-        input_text: str,
-        prompts_dir: str = "prompts",
-        **extra_kwargs,
-    ) -> dict[str, str]:
-        template_configs = self._load_templates(
-            prompts_dir, prompt_file, use_modes, mode
-        )
-        format_args = self._build_format_args(input_text, **extra_kwargs)
-        # Inject variables inside each template
-        for key in template_configs.keys():
-            template_configs[key] = template_configs[key].format(**format_args)
-        return template_configs
+from functools import lru_cache
+from pathlib import Path
+import yaml
+class PromptLoader:
+    """
+    Utility for loading and formatting YAML prompt templates.
+    Responsibilities:
+    - Load and parse YAML prompt definitions.
+    - Select the right template (by mode, if applicable).
+    - Inject variables (`{input}`, plus any extra kwargs) into the templates.
+    - Return a dict with:
+        {
+            "main_template": "...",
+            "analyze_template": "..." | None
+        }
+    """
+    def __init__(self):
+        self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
+    MAIN_TEMPLATE: str = "main_template"
+    ANALYZE_TEMPLATE: str = "analyze_template"
+    # Use lru_cache to load each file once
+    @lru_cache(maxsize=32)
+    def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
+        prompt_path = self.base_dir / prompt_file
+        if not prompt_path.exists():
+            raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
+        try:
+            data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
+        return {
+            self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
+            if mode
+            else data[self.MAIN_TEMPLATE],
+            self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE)[mode]
+            if mode
+            else data.get(self.ANALYZE_TEMPLATE),
+        }
+    def _build_format_args(self, text: str, **extra_kwargs) -> dict[str, str]:
+        # Base formatting args
+        format_args = {"input": text}
+        # Merge extras
+        format_args.update(extra_kwargs)
+        return format_args
+    def load(
+        self, prompt_file: str, text: str, mode: str, **extra_kwargs
+    ) -> dict[str, str]:
+        template_configs = self._load_templates(prompt_file, mode)
+        format_args = self._build_format_args(text, **extra_kwargs)
+        # Inject variables inside each template
+        for key in template_configs.keys():
+            template_configs[key] = template_configs[key].format(**format_args)
+        return template_configs

hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 1.0.4py3-none-any.whl → 1.0.6py3-none-any.whl