PyPI - hamtaa-texttools - Versions diffs - 1.1.13__py3-none-any.whl → 1.1.14__py3-none-any.whl - Mend

hamtaa-texttools 1.1.13py3-none-any.whl → 1.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/METADATA +8 -6
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/RECORD +16 -15
texttools/__init__.py +2 -1
texttools/batch/batch_config.py +1 -1
texttools/batch/batch_runner.py +1 -1
texttools/prompts/categorize.yaml +77 -0
texttools/prompts/detect_entity.yaml +22 -0
texttools/prompts/extract_keywords.yaml +68 -18
texttools/tools/async_tools.py +206 -41
texttools/tools/internals/async_operator.py +8 -4
texttools/tools/internals/models.py +181 -0
texttools/tools/internals/sync_operator.py +9 -4
texttools/tools/sync_tools.py +206 -41
texttools/prompts/categorizer.yaml +0 -28
texttools/tools/internals/output_models.py +0 -62
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/top_level.txt +0 -0

texttools/tools/sync_tools.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from typing import Literal, Any, Callable
+from typing import Literal, Any
+from collections.abc import Callable
 from openai import OpenAI
 from texttools.tools.internals.sync_operator import Operator
-import texttools.tools.internals.output_models as OM
+import texttools.tools.internals.models as Models
 class TheTool:
@@ -27,19 +28,23 @@ class TheTool:
     def categorize(
         self,
         text: str,
+        categories: list[str] | Models.CategoryTree,
         with_analysis: bool = False,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
+        mode: Literal["category_list", "category_tree"] = "category_list",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
-        Categorize a text into a single Islamic studies domain category.
+        Categorize a text into a category / category tree.
         Arguments:
             text: The input text to categorize
+            categories: The category / category_tree to give to LLM
             with_analysis: Whether to include detailed reasoning analysis
             user_prompt: Additional instructions for the categorization
             temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -47,30 +52,104 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
-                - result (str): The assigned Islamic studies category
+                - result (str): The assigned category
                 - logprobs (list | None): Probability data if logprobs enabled
                 - analysis (str | None): Detailed reasoning if with_analysis enabled
                 - errors (list(str) | None): Errors occured during tool call
         """
-        return self._operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            validator=validator,
-            max_validation_retries=max_validation_retries,
-            # Internal parameters
-            prompt_file="categorizer.yaml",
-            output_model=OM.CategorizerOutput,
-            mode=None,
-            output_lang=None,
-        )
+        if mode == "category_tree":
+            # Initializations
+            output = Models.ToolOutput()
+            levels = categories.level_count()
+            parent_id = 0
+            final_output = []
+            for _ in range(levels):
+                # Get child nodes for current parent
+                parent_node = categories.find_node(parent_id)
+                children = categories.find_children(parent_node)
+                # Check if child nodes exist
+                if not children:
+                    output.errors.append(
+                        f"No categories found for parent_id {parent_id} in the tree"
+                    )
+                    return output
+                # Extract category names and descriptions
+                category_list = [
+                    f"Category Name: {node.name}, Description: {node.description}"
+                    for node in children
+                ]
+                category_names = [node.name for node in children]
+                # Run categorization for this level
+                level_output = self._operator.run(
+                    # User parameters
+                    text=text,
+                    category_list=category_list,
+                    with_analysis=with_analysis,
+                    user_prompt=user_prompt,
+                    temperature=temperature,
+                    logprobs=logprobs,
+                    top_logprobs=top_logprobs,
+                    mode=mode,
+                    validator=validator,
+                    max_validation_retries=max_validation_retries,
+                    # Internal parameters
+                    prompt_file="categorize.yaml",
+                    output_model=Models.create_dynamic_model(category_names),
+                    output_lang=None,
+                )
+                # Check for errors from operator
+                if level_output.errors:
+                    output.errors.extend(level_output.errors)
+                    return output
+                # Get the chosen category
+                chosen_category = level_output.result
+                # Find the corresponding node
+                parent_node = categories.find_node(chosen_category)
+                if parent_node is None:
+                    output.errors.append(
+                        f"Category '{chosen_category}' not found in tree after selection"
+                    )
+                    return output
+                parent_id = parent_node.node_id
+                final_output.append(parent_node.name)
+                # Copy analysis/logprobs from the last level's output
+                output.analysis = level_output.analysis
+                output.logprobs = level_output.logprobs
+            output.result = final_output
+            return output
+        else:
+            return self._operator.run(
+                # User parameters
+                text=text,
+                category_list=categories,
+                with_analysis=with_analysis,
+                user_prompt=user_prompt,
+                temperature=temperature,
+                logprobs=logprobs,
+                top_logprobs=top_logprobs,
+                mode=mode,
+                validator=validator,
+                max_validation_retries=max_validation_retries,
+                # Internal parameters
+                prompt_file="categorize.yaml",
+                output_model=Models.create_dynamic_model(categories),
+                output_lang=None,
+            )
     def extract_keywords(
         self,
@@ -81,9 +160,12 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
+        mode: Literal["auto", "threshold", "count"] = "auto",
+        number_of_keywords: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Extract salient keywords from text.
@@ -97,6 +179,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -114,12 +197,14 @@ class TheTool:
             temperature=temperature,
             logprobs=logprobs,
             top_logprobs=top_logprobs,
+            mode=mode,
+            number_of_keywords=number_of_keywords,
             validator=validator,
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="extract_keywords.yaml",
-            output_model=OM.ListStrOutput,
-            mode=None,
+            output_model=Models.ListStrOutput,
+            priority=priority,
         )
     def extract_entities(
@@ -133,7 +218,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Perform Named Entity Recognition (NER) over the input text.
@@ -147,6 +233,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -168,8 +255,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="extract_entities.yaml",
-            output_model=OM.ListDictStrStrOutput,
+            output_model=Models.ListDictStrStrOutput,
             mode=None,
+            priority=priority,
         )
     def is_question(
@@ -182,7 +270,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Detect if the input is phrased as a question.
@@ -195,6 +284,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -215,9 +305,10 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="is_question.yaml",
-            output_model=OM.BoolOutput,
+            output_model=Models.BoolOutput,
             mode=None,
             output_lang=None,
+            priority=priority,
         )
     def text_to_question(
@@ -231,7 +322,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Generate a single question from the given text.
@@ -245,6 +337,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -266,8 +359,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="text_to_question.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
+            priority=priority,
         )
     def merge_questions(
@@ -282,7 +376,8 @@ class TheTool:
         mode: Literal["default", "reason"] = "default",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Merge multiple questions into a single unified question.
@@ -297,6 +392,7 @@ class TheTool:
             mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -319,8 +415,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="merge_questions.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=mode,
+            priority=priority,
         )
     def rewrite(
@@ -335,7 +432,8 @@ class TheTool:
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Rewrite a text with different modes.
@@ -350,6 +448,7 @@ class TheTool:
             mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -371,8 +470,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="rewrite.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=mode,
+            priority=priority,
         )
     def subject_to_question(
@@ -387,7 +487,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Generate a list of questions about a subject.
@@ -402,6 +503,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -424,8 +526,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="subject_to_question.yaml",
-            output_model=OM.ReasonListStrOutput,
+            output_model=Models.ReasonListStrOutput,
             mode=None,
+            priority=priority,
         )
     def summarize(
@@ -439,7 +542,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Summarize the given subject text.
@@ -453,6 +557,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -474,8 +579,9 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="summarize.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
+            priority=priority,
         )
     def translate(
@@ -489,7 +595,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Translate text between languages.
@@ -503,6 +610,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -524,9 +632,63 @@ class TheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="translate.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
             output_lang=None,
+            priority=priority,
+        )
+    def detect_entity(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+        max_validation_retries: int | None = None,
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
+        """
+        Detects entities in a given text based on the entity_detector.yaml prompt.
+        Arguments:
+            text: The input text
+            with_analysis: Whether to include detailed reasoning analysis
+            output_lang: Language for the output summary
+            user_prompt: Additional instructions for summarization
+            temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
+            logprobs: Whether to return token probability information
+            top_logprobs: Number of top token alternatives to return if logprobs enabled
+            validator: Custom validation function to validate the output
+            max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
+        Returns:
+            ToolOutput: Object containing:
+                - result (list[Entity]): The entities
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+                - errors (list(str) | None): Errors occured during tool call
+        """
+        return self._operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            max_validation_retries=max_validation_retries,
+            # Internal parameters
+            prompt_file="detect_entity.yaml",
+            output_model=Models.EntityDetectorOutput,
+            mode=None,
+            priority=priority,
         )
     def run_custom(
@@ -539,7 +701,8 @@ class TheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Custom tool that can do almost anything!
@@ -551,6 +714,7 @@ class TheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -575,4 +739,5 @@ class TheTool:
             user_prompt=None,
             with_analysis=False,
             mode=None,
+            priority=priority,
         )

texttools/prompts/categorizer.yaml DELETED Viewed

@@ -1,28 +0,0 @@
-main_template: |
-  تو یک متخصص علوم دینی هستی
-  من یک متن به تو میدهم و تو باید
-  آن متن را در یکی از دسته بندی های زیر طبقه بندی کنی
-  دسته بندی ها:
-  "باورهای دینی",
-  "اخلاق اسلامی",
-  "احکام و فقه",
-  "تاریخ اسلام و شخصیت ها",
-  "منابع دینی",
-  "دین و جامعه/سیاست",
-  "عرفان و معنویت",
-  "هیچکدام",
-  فقط با این فرمت json پاسخ بده:
-  {{
-  	  "reason": "<دلیل انتخابت رو به صورت خلاصه بگو>",
-      "result": "<یکی از دسته بندی ها>"
-  }}
-  متنی که باید طبقه بندی کنی:
-  {input}
-analyze_template: |
-  ما میخواهیم متنی که داده می شود را طبقه بندی کنیم.
-  برای بهبود طبقه بندی، نیاز به آنالیز متن داریم.
-  متنی که داده می شود را آنالیز کن و ایده اصلی و آنالیزی کوتاه از آن را بنویس.
-  آنالیز باید بسیار خلاصه باشد
-  نهایتا 20 کلمه
-  {input}

texttools/tools/internals/output_models.py DELETED Viewed

@@ -1,62 +0,0 @@
-from typing import Literal, Any
-from pydantic import BaseModel, Field
-class ToolOutput(BaseModel):
-    result: Any = None
-    analysis: str = ""
-    logprobs: list[dict[str, Any]] = []
-    errors: list[str] = []
-    def __repr__(self) -> str:
-        return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
-class StrOutput(BaseModel):
-    result: str = Field(..., description="The output string")
-class BoolOutput(BaseModel):
-    result: bool = Field(
-        ..., description="Boolean indicating the output state", example=True
-    )
-class ListStrOutput(BaseModel):
-    result: list[str] = Field(
-        ..., description="The output list of strings", example=["text_1", "text_2"]
-    )
-class ListDictStrStrOutput(BaseModel):
-    result: list[dict[str, str]] = Field(
-        ...,
-        description="List of dictionaries containing string key-value pairs",
-        example=[{"text": "Mohammad", "type": "PER"}],
-    )
-class ReasonListStrOutput(BaseModel):
-    reason: str = Field(..., description="Thinking process that led to the output")
-    result: list[str] = Field(..., description="The output list of strings")
-class CategorizerOutput(BaseModel):
-    reason: str = Field(
-        ..., description="Explanation of why the input belongs to the category"
-    )
-    result: Literal[
-        "باورهای دینی",
-        "اخلاق اسلامی",
-        "احکام و فقه",
-        "تاریخ اسلام و شخصیت ها",
-        "منابع دینی",
-        "دین و جامعه/سیاست",
-        "عرفان و معنویت",
-        "هیچکدام",
-    ] = Field(
-        ...,
-        description="Predicted category label",
-        example="اخلاق اسلامی",
-    )

{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

hamtaa-texttools 1.1.13__py3-none-any.whl → 1.1.14__py3-none-any.whl

hamtaa-texttools 1.1.13py3-none-any.whl → 1.1.14py3-none-any.whl