PyPI - hamtaa-texttools - Versions diffs - 1.1.13__py3-none-any.whl → 1.1.14__py3-none-any.whl - Mend

hamtaa-texttools 1.1.13py3-none-any.whl → 1.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/METADATA +8 -6
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/RECORD +16 -15
texttools/__init__.py +2 -1
texttools/batch/batch_config.py +1 -1
texttools/batch/batch_runner.py +1 -1
texttools/prompts/categorize.yaml +77 -0
texttools/prompts/detect_entity.yaml +22 -0
texttools/prompts/extract_keywords.yaml +68 -18
texttools/tools/async_tools.py +206 -41
texttools/tools/internals/async_operator.py +8 -4
texttools/tools/internals/models.py +181 -0
texttools/tools/internals/sync_operator.py +9 -4
texttools/tools/sync_tools.py +206 -41
texttools/prompts/categorizer.yaml +0 -28
texttools/tools/internals/output_models.py +0 -62
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.13.dist-info → hamtaa_texttools-1.1.14.dist-info}/top_level.txt +0 -0

texttools/tools/async_tools.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from typing import Literal, Any, Callable
+from typing import Literal, Any
+from collections.abc import Callable
 from openai import AsyncOpenAI
 from texttools.tools.internals.async_operator import AsyncOperator
-import texttools.tools.internals.output_models as OM
+import texttools.tools.internals.models as Models
 class AsyncTheTool:
@@ -29,19 +30,23 @@ class AsyncTheTool:
     async def categorize(
         self,
         text: str,
+        categories: list[str] | Models.CategoryTree,
         with_analysis: bool = False,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
+        mode: Literal["category_list", "category_tree"] = "category_list",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
-        Categorize a text into a single Islamic studies domain category.
+        Categorize a text into a category / category tree.
         Arguments:
             text: The input text to categorize
+            categories: The category / category_tree to give to LLM
             with_analysis: Whether to include detailed reasoning analysis
             user_prompt: Additional instructions for the categorization
             temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -49,30 +54,104 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
-                - result (str): The assigned Islamic studies category
+                - result (str): The assigned category
                 - logprobs (list | None): Probability data if logprobs enabled
                 - analysis (str | None): Detailed reasoning if with_analysis enabled
                 - errors (list(str) | None): Errors occured during tool call
         """
-        return await self._operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            validator=validator,
-            max_validation_retries=max_validation_retries,
-            # Internal parameters
-            prompt_file="categorizer.yaml",
-            output_model=OM.CategorizerOutput,
-            mode=None,
-            output_lang=None,
-        )
+        if mode == "category_tree":
+            # Initializations
+            output = Models.ToolOutput()
+            levels = categories.level_count()
+            parent_id = 0
+            final_output = []
+            for _ in range(levels):
+                # Get child nodes for current parent
+                parent_node = categories.find_node(parent_id)
+                children = categories.find_children(parent_node)
+                # Check if child nodes exist
+                if not children:
+                    output.errors.append(
+                        f"No categories found for parent_id {parent_id} in the tree"
+                    )
+                    return output
+                # Extract category names and descriptions
+                category_list = [
+                    f"Category Name: {node.name}, Description: {node.description}"
+                    for node in children
+                ]
+                category_names = [node.name for node in children]
+                # Run categorization for this level
+                level_output = await self._operator.run(
+                    # User parameters
+                    text=text,
+                    category_list=category_list,
+                    with_analysis=with_analysis,
+                    user_prompt=user_prompt,
+                    temperature=temperature,
+                    logprobs=logprobs,
+                    top_logprobs=top_logprobs,
+                    mode=mode,
+                    validator=validator,
+                    max_validation_retries=max_validation_retries,
+                    # Internal parameters
+                    prompt_file="categorize.yaml",
+                    output_model=Models.create_dynamic_model(category_names),
+                    output_lang=None,
+                )
+                # Check for errors from operator
+                if level_output.errors:
+                    output.errors.extend(level_output.errors)
+                    return output
+                # Get the chosen category
+                chosen_category = level_output.result
+                # Find the corresponding node
+                parent_node = categories.find_node(chosen_category)
+                if parent_node is None:
+                    output.errors.append(
+                        f"Category '{chosen_category}' not found in tree after selection"
+                    )
+                    return output
+                parent_id = parent_node.node_id
+                final_output.append(parent_node.name)
+                # Copy analysis/logprobs from the last level's output
+                output.analysis = level_output.analysis
+                output.logprobs = level_output.logprobs
+            output.result = final_output
+            return output
+        else:
+            return await self._operator.run(
+                # User parameters
+                text=text,
+                category_list=categories,
+                with_analysis=with_analysis,
+                user_prompt=user_prompt,
+                temperature=temperature,
+                logprobs=logprobs,
+                top_logprobs=top_logprobs,
+                mode=mode,
+                validator=validator,
+                max_validation_retries=max_validation_retries,
+                # Internal parameters
+                prompt_file="categorize.yaml",
+                output_model=Models.create_dynamic_model(categories),
+                output_lang=None,
+            )
     async def extract_keywords(
         self,
@@ -83,9 +162,12 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
+        mode: Literal["auto", "threshold", "count"] = "auto",
+        number_of_keywords: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Extract salient keywords from text.
@@ -99,6 +181,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -116,12 +199,14 @@ class AsyncTheTool:
             temperature=temperature,
             logprobs=logprobs,
             top_logprobs=top_logprobs,
+            mode=mode,
+            number_of_keywords=number_of_keywords,
             validator=validator,
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="extract_keywords.yaml",
-            output_model=OM.ListStrOutput,
-            mode=None,
+            output_model=Models.ListStrOutput,
+            priority=priority,
         )
     async def extract_entities(
@@ -135,7 +220,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Perform Named Entity Recognition (NER) over the input text.
@@ -149,6 +235,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -170,8 +257,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="extract_entities.yaml",
-            output_model=OM.ListDictStrStrOutput,
+            output_model=Models.ListDictStrStrOutput,
             mode=None,
+            priority=priority,
         )
     async def is_question(
@@ -184,7 +272,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Detect if the input is phrased as a question.
@@ -197,6 +286,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -217,9 +307,10 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="is_question.yaml",
-            output_model=OM.BoolOutput,
+            output_model=Models.BoolOutput,
             mode=None,
             output_lang=None,
+            priority=priority,
         )
     async def text_to_question(
@@ -233,7 +324,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Generate a single question from the given text.
@@ -247,6 +339,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -268,8 +361,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="text_to_question.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
+            priority=priority,
         )
     async def merge_questions(
@@ -284,7 +378,8 @@ class AsyncTheTool:
         mode: Literal["default", "reason"] = "default",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Merge multiple questions into a single unified question.
@@ -299,6 +394,7 @@ class AsyncTheTool:
             mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -321,8 +417,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="merge_questions.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=mode,
+            priority=priority,
         )
     async def rewrite(
@@ -337,7 +434,8 @@ class AsyncTheTool:
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Rewrite a text with different modes.
@@ -352,6 +450,7 @@ class AsyncTheTool:
             mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -373,8 +472,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="rewrite.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=mode,
+            priority=priority,
         )
     async def subject_to_question(
@@ -389,7 +489,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Generate a list of questions about a subject.
@@ -404,6 +505,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -426,8 +528,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="subject_to_question.yaml",
-            output_model=OM.ReasonListStrOutput,
+            output_model=Models.ReasonListStrOutput,
             mode=None,
+            priority=priority,
         )
     async def summarize(
@@ -441,7 +544,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Summarize the given subject text.
@@ -455,6 +559,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -476,8 +581,9 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="summarize.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
+            priority=priority,
         )
     async def translate(
@@ -491,7 +597,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Translate text between languages.
@@ -505,6 +612,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -526,9 +634,63 @@ class AsyncTheTool:
             max_validation_retries=max_validation_retries,
             # Internal parameters
             prompt_file="translate.yaml",
-            output_model=OM.StrOutput,
+            output_model=Models.StrOutput,
             mode=None,
             output_lang=None,
+            priority=priority,
+        )
+    async def detect_entity(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+        max_validation_retries: int | None = None,
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
+        """
+        Detects entities in a given text based on the entity_detector.yaml prompt.
+        Arguments:
+            text: The input text
+            with_analysis: Whether to include detailed reasoning analysis
+            output_lang: Language for the output summary
+            user_prompt: Additional instructions for summarization
+            temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
+            logprobs: Whether to return token probability information
+            top_logprobs: Number of top token alternatives to return if logprobs enabled
+            validator: Custom validation function to validate the output
+            max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
+        Returns:
+            ToolOutput: Object containing:
+                - result (list[Entity]): The entities
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+                - errors (list(str) | None): Errors occured during tool call
+        """
+        return await self._operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            max_validation_retries=max_validation_retries,
+            # Internal parameters
+            prompt_file="detect_entity.yaml",
+            output_model=Models.EntityDetectorOutput,
+            mode=None,
+            priority=priority,
         )
     async def run_custom(
@@ -541,7 +703,8 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         validator: Callable[[Any], bool] | None = None,
         max_validation_retries: int | None = None,
-    ) -> OM.ToolOutput:
+        priority: int | None = 0,
+    ) -> Models.ToolOutput:
         """
         Custom tool that can do almost anything!
@@ -553,6 +716,7 @@ class AsyncTheTool:
             top_logprobs: Number of top token alternatives to return if logprobs enabled
             validator: Custom validation function to validate the output
             max_validation_retries: Maximum number of retry attempts if validation fails
+            priority: Task execution priority (if enabled by vLLM and model)
         Returns:
             ToolOutput: Object containing:
@@ -577,4 +741,5 @@ class AsyncTheTool:
             user_prompt=None,
             with_analysis=False,
             mode=None,
+            priority=priority,
         )

texttools/tools/internals/async_operator.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from typing import Any, TypeVar, Type, Callable
+from typing import Any, TypeVar, Type
+from collections.abc import Callable
 import logging
 from openai import AsyncOpenAI
 from pydantic import BaseModel
-from texttools.tools.internals.output_models import ToolOutput
+from texttools.tools.internals.models import ToolOutput
 from texttools.tools.internals.operator_utils import OperatorUtils
 from texttools.tools.internals.formatters import Formatter
 from texttools.tools.internals.prompt_loader import PromptLoader
@@ -51,6 +52,7 @@ class AsyncOperator:
         temperature: float,
         logprobs: bool = False,
         top_logprobs: int = 3,
+        priority: int | None = 0,
     ) -> tuple[T, Any]:
         """
         Parses a chat completion using OpenAI's structured output format.
@@ -66,7 +68,8 @@ class AsyncOperator:
         if logprobs:
             request_kwargs["logprobs"] = True
             request_kwargs["top_logprobs"] = top_logprobs
+        if priority:
+            request_kwargs["extra_body"] = {"priority": priority}
         completion = await self._client.beta.chat.completions.parse(**request_kwargs)
         parsed = completion.choices[0].message.parsed
         return parsed, completion
@@ -87,6 +90,7 @@ class AsyncOperator:
         prompt_file: str,
         output_model: Type[T],
         mode: str | None,
+        priority: int | None = 0,
         **extra_kwargs,
     ) -> ToolOutput:
         """
@@ -136,7 +140,7 @@ class AsyncOperator:
             messages = formatter.user_merge_format(messages)
             parsed, completion = await self._parse_completion(
-                messages, output_model, temperature, logprobs, top_logprobs
+                messages, output_model, temperature, logprobs, top_logprobs, priority
             )
             output.result = parsed.result

hamtaa-texttools 1.1.13__py3-none-any.whl → 1.1.14__py3-none-any.whl

hamtaa-texttools 1.1.13py3-none-any.whl → 1.1.14py3-none-any.whl