PyPI - hamtaa-texttools - Versions diffs - 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl - Mend

hamtaa-texttools 1.1.12py3-none-any.whl → 1.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{hamtaa_texttools-1.1.12.dist-info → hamtaa_texttools-1.1.14.dist-info}/METADATA +8 -6
hamtaa_texttools-1.1.14.dist-info/RECORD +31 -0
texttools/__init__.py +2 -1
texttools/batch/batch_config.py +1 -1
texttools/batch/batch_runner.py +1 -1
texttools/prompts/categorize.yaml +77 -0
texttools/prompts/detect_entity.yaml +22 -0
texttools/prompts/extract_keywords.yaml +68 -18
texttools/tools/async_tools.py +206 -41
texttools/tools/internals/async_operator.py +12 -4
texttools/tools/internals/formatters.py +24 -0
texttools/tools/internals/models.py +181 -0
texttools/tools/internals/sync_operator.py +13 -4
texttools/tools/sync_tools.py +206 -41
hamtaa_texttools-1.1.12.dist-info/RECORD +0 -29
texttools/prompts/categorizer.yaml +0 -28
texttools/tools/internals/output_models.py +0 -62
{hamtaa_texttools-1.1.12.dist-info → hamtaa_texttools-1.1.14.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.12.dist-info → hamtaa_texttools-1.1.14.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.12.dist-info → hamtaa_texttools-1.1.14.dist-info}/top_level.txt +0 -0

texttools/tools/internals/models.py ADDED Viewed

@@ -0,0 +1,181 @@
+from typing import Type, Any, Literal
+from pydantic import BaseModel, Field, create_model
+class ToolOutput(BaseModel):
+    result: Any = None
+    analysis: str = ""
+    logprobs: list[dict[str, Any]] = []
+    errors: list[str] = []
+    def __repr__(self) -> str:
+        return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
+class StrOutput(BaseModel):
+    result: str = Field(..., description="The output string")
+class BoolOutput(BaseModel):
+    result: bool = Field(
+        ..., description="Boolean indicating the output state", example=True
+    )
+class ListStrOutput(BaseModel):
+    result: list[str] = Field(
+        ..., description="The output list of strings", example=["text_1", "text_2"]
+    )
+class ListDictStrStrOutput(BaseModel):
+    result: list[dict[str, str]] = Field(
+        ...,
+        description="List of dictionaries containing string key-value pairs",
+        example=[{"text": "Mohammad", "type": "PER"}],
+    )
+class ReasonListStrOutput(BaseModel):
+    reason: str = Field(..., description="Thinking process that led to the output")
+    result: list[str] = Field(..., description="The output list of strings")
+class Node(BaseModel):
+    node_id: int
+    name: str
+    level: int
+    parent_id: int | None
+    description: str = "No description provided"
+class CategoryTree:
+    def __init__(self, tree_name):
+        self.root = Node(node_id=0, name=tree_name, level=0, parent_id=None)
+        self.node_list: list[Node] = [self.root]
+        self.new_id = 1
+    def add_node(
+        self,
+        node_name: str,
+        parent_name: str | None = None,
+        description: str | None = None,
+    ) -> None:
+        if self.find_node(node_name):
+            raise ValueError(f"{node_name} has been chosen for another category before")
+        if parent_name:
+            parent_node = self.find_node(parent_name)
+            if parent_node is None:
+                raise ValueError(f"Parent category '{parent_name}' not found")
+            parent_id = parent_node.node_id
+            level = parent_node.level + 1
+        else:
+            level = 1
+            parent_id = 0
+        node_data = {
+            "node_id": self.new_id,
+            "name": node_name,
+            "level": level,
+            "parent_id": parent_id,
+        }
+        if description is not None:
+            node_data["description"] = description
+        self.node_list.append(Node(**node_data))
+        self.new_id += 1
+    def get_nodes(self) -> list[Node]:
+        return self.node_list
+    def find_node(self, identifier: int | str) -> Node | None:
+        if isinstance(identifier, str):
+            for node in self.get_nodes():
+                if node.name == identifier:
+                    return node
+            return None
+        elif isinstance(identifier, int):
+            for node in self.get_nodes():
+                if node.node_id == identifier:
+                    return node
+            return None
+        else:
+            return None
+    def find_children(self, parent_node: Node) -> list[Node] | None:
+        children = []
+        for node in self.get_nodes():
+            if parent_node.node_id == node.parent_id:
+                children.append(node)
+        return children if children else None
+    def remove_node(self, identifier: int | str) -> None:
+        node = self.find_node(identifier)
+        if node is not None:
+            # Remove node's children recursively
+            children = self.find_children(node)
+            # Ending condition
+            if children is None:
+                self.node_list.remove(node)
+                return
+            for child in children:
+                self.remove_node(child.name)
+            # Remove the node from tree
+            self.node_list.remove(node)
+        else:
+            raise ValueError(f"Node with identifier: '{identifier}' not found.")
+    def dump_tree(self) -> dict:
+        def build_dict(node: Node) -> dict:
+            children = [
+                build_dict(child)
+                for child in self.node_list
+                if child.parent_id == node.node_id
+            ]
+            return {
+                "node_id": node.node_id,
+                "name": node.name,
+                "level": node.level,
+                "parent_id": node.parent_id,
+                "children": children,
+            }
+        return {"category_tree": build_dict(self.root)["children"]}
+    def level_count(self) -> int:
+        return max([item.level for item in self.node_list])
+# This function is needed to create CategorizerOutput with dynamic categories
+def create_dynamic_model(allowed_values: list[str]) -> Type[BaseModel]:
+    literal_type = Literal[*allowed_values]
+    CategorizerOutput = create_model(
+        "CategorizerOutput",
+        reason=(
+            str,
+            Field(
+                ..., description="Explanation of why the input belongs to the category"
+            ),
+        ),
+        result=(literal_type, Field(..., description="Predicted category label")),
+    )
+    return CategorizerOutput
+class Entity(BaseModel):
+    text: str = Field(description="The exact text of the entity")
+    type: str = Field(description="The type of the entity")
+class EntityDetectorOutput(BaseModel):
+    result: list[Entity] = Field(description="List of all extracted entities")

texttools/tools/internals/sync_operator.py CHANGED Viewed

@@ -1,11 +1,13 @@
-from typing import Any, TypeVar, Type, Callable
+from typing import Any, TypeVar, Type
+from collections.abc import Callable
 import logging
 from openai import OpenAI
 from pydantic import BaseModel
-from texttools.tools.internals.output_models import ToolOutput
+from texttools.tools.internals.models import ToolOutput
 from texttools.tools.internals.operator_utils import OperatorUtils
+from texttools.tools.internals.formatters import Formatter
 from texttools.tools.internals.prompt_loader import PromptLoader
 # Base Model type for output models
@@ -50,6 +52,7 @@ class Operator:
         temperature: float,
         logprobs: bool = False,
         top_logprobs: int = 3,
+        priority: int | None = 0,
     ) -> tuple[T, Any]:
         """
         Parses a chat completion using OpenAI's structured output format.
@@ -66,6 +69,9 @@ class Operator:
             request_kwargs["logprobs"] = True
             request_kwargs["top_logprobs"] = top_logprobs
+        if priority:
+            request_kwargs["extra_body"] = {"priority": priority}
         completion = self._client.beta.chat.completions.parse(**request_kwargs)
         parsed = completion.choices[0].message.parsed
         return parsed, completion
@@ -86,14 +92,15 @@ class Operator:
         prompt_file: str,
         output_model: Type[T],
         mode: str | None,
+        priority: int | None = 0,
         **extra_kwargs,
     ) -> ToolOutput:
         """
         Execute the LLM pipeline with the given input text.
         """
         prompt_loader = PromptLoader()
+        formatter = Formatter()
         output = ToolOutput()
         try:
             # Prompt configs contain two keys: main_template and analyze template, both are string
             prompt_configs = prompt_loader.load(
@@ -131,8 +138,10 @@ class Operator:
                 OperatorUtils.build_user_message(prompt_configs["main_template"])
             )
+            messages = formatter.user_merge_format(messages)
             parsed, completion = self._parse_completion(
-                messages, output_model, temperature, logprobs, top_logprobs
+                messages, output_model, temperature, logprobs, top_logprobs, priority
             )
             output.result = parsed.result

hamtaa-texttools 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

hamtaa-texttools 1.1.12py3-none-any.whl → 1.1.14py3-none-any.whl