PyPI - hamtaa-texttools - Versions diffs - 1.1.19__py3-none-any.whl → 1.1.20__py3-none-any.whl - Mend

hamtaa-texttools 1.1.19py3-none-any.whl → 1.1.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/METADATA +8 -8
{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/RECORD +17 -17
texttools/batch/batch_runner.py +6 -6
texttools/batch/internals/batch_manager.py +6 -6
texttools/batch/internals/utils.py +1 -4
texttools/internals/async_operator.py +4 -6
texttools/internals/models.py +8 -17
texttools/internals/operator_utils.py +24 -0
texttools/internals/sync_operator.py +4 -6
texttools/internals/text_to_chunks.py +97 -0
texttools/prompts/extract_entities.yaml +1 -1
texttools/prompts/text_to_question.yaml +6 -4
texttools/tools/async_tools.py +140 -70
texttools/tools/sync_tools.py +140 -70
texttools/internals/formatters.py +0 -24
{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/top_level.txt +0 -0

texttools/tools/sync_tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Literal, Any
+from typing import Literal
 from collections.abc import Callable
 from openai import OpenAI
@@ -12,6 +12,7 @@ from texttools.internals.exceptions import (
     LLMError,
     ValidationError,
 )
+from texttools.internals.text_to_chunks import text_to_chunks
 class TheTool:
@@ -35,9 +36,9 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["category_list", "category_tree"] = "category_list",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -75,11 +76,11 @@ class TheTool:
             start = datetime.now()
             if mode == "category_tree":
-                # Initializations
-                output = Models.ToolOutput()
                 levels = categories.get_level_count()
                 parent_id = 0
-                final_output = []
+                final_categories = []
+                analysis = ""
+                logprobs = []
                 for _ in range(levels):
                     # Get child nodes for current parent
@@ -102,7 +103,7 @@ class TheTool:
                     ]
                     category_names = [node.name for node in children]
-                    # Run categorization for this level
+                    # Run categorization for current level
                     level_output = self._operator.run(
                         # User parameters
                         text=text,
@@ -143,16 +144,22 @@ class TheTool:
                         return output
                     parent_id = parent_node.node_id
-                    final_output.append(parent_node.name)
+                    final_categories.append(parent_node.name)
-                    # Copy analysis/logprobs/process from the last level's output
-                    output.analysis = level_output.analysis
-                    output.logprobs = level_output.logprobs
-                    output.process = level_output.process
+                    if with_analysis:
+                        analysis += level_output.analysis
+                    if logprobs:
+                        logprobs += level_output.logprobs
-                output.result = final_output
                 end = datetime.now()
-                output.execution_time = (end - start).total_seconds()
+                output = Models.ToolOutput(
+                    result=final_categories,
+                    logprobs=logprobs,
+                    analysis=analysis,
+                    process="categorize",
+                    execution_time=(end - start).total_seconds(),
+                )
                 return output
             else:
@@ -199,10 +206,10 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["auto", "threshold", "count"] = "auto",
         number_of_keywords: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -251,7 +258,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="extract_keywords.yaml",
-                output_model=Models.ListStrOutput,
+                output_model=Models.ListStr,
             )
             end = datetime.now()
             output.execution_time = (end - start).total_seconds()
@@ -273,13 +280,14 @@ class TheTool:
     def extract_entities(
         self,
         text: str,
+        entities: list[str] | None = None,
         with_analysis: bool = False,
         output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -288,6 +296,7 @@ class TheTool:
         Arguments:
             text: The input text to extract entities from
+            entities: List of entities provided by user (Optional)
             with_analysis: Whether to include detailed reasoning analysis
             output_lang: Language for the output response
             user_prompt: Additional instructions for entity extraction
@@ -315,6 +324,8 @@ class TheTool:
             output = self._operator.run(
                 # User parameters
                 text=text,
+                entities=entities
+                or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
                 with_analysis=with_analysis,
                 output_lang=output_lang,
                 user_prompt=user_prompt,
@@ -326,7 +337,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="extract_entities.yaml",
-                output_model=Models.ListDictStrStrOutput,
+                output_model=Models.ListDictStrStr,
                 mode=None,
             )
             end = datetime.now()
@@ -353,8 +364,8 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -399,7 +410,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="is_question.yaml",
-                output_model=Models.BoolOutput,
+                output_model=Models.Bool,
                 mode=None,
                 output_lang=None,
             )
@@ -423,13 +434,14 @@ class TheTool:
     def text_to_question(
         self,
         text: str,
+        number_of_questions: int,
         with_analysis: bool = False,
         output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -438,6 +450,7 @@ class TheTool:
         Arguments:
             text: The input text to generate a question from
+            number_of_questions: Number of questions to generate
             with_analysis: Whether to include detailed reasoning analysis
             output_lang: Language for the output question
             user_prompt: Additional instructions for question generation
@@ -465,6 +478,7 @@ class TheTool:
             output = self._operator.run(
                 # User parameters
                 text=text,
+                number_of_questions=number_of_questions,
                 with_analysis=with_analysis,
                 output_lang=output_lang,
                 user_prompt=user_prompt,
@@ -476,7 +490,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="text_to_question.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.ReasonListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -504,9 +518,9 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["default", "reason"] = "default",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -555,7 +569,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="merge_questions.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=mode,
             )
             end = datetime.now()
@@ -583,9 +597,9 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -633,7 +647,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="rewrite.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=mode,
             )
             end = datetime.now()
@@ -662,8 +676,8 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -712,7 +726,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="subject_to_question.yaml",
-                output_model=Models.ReasonListStrOutput,
+                output_model=Models.ReasonListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -740,8 +754,8 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -788,7 +802,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="summarize.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=None,
             )
             end = datetime.now()
@@ -812,12 +826,13 @@ class TheTool:
         self,
         text: str,
         target_language: str,
+        use_chunker: bool = True,
         with_analysis: bool = False,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -829,6 +844,7 @@ class TheTool:
         Arguments:
             text: The input text to translate
             target_language: The target language for translation
+            use_chunker: Whether to use text chunker for text length bigger than 1500
             with_analysis: Whether to include detailed reasoning analysis
             user_prompt: Additional instructions for translation
             temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -852,27 +868,81 @@ class TheTool:
         try:
             start = datetime.now()
-            output = self._operator.run(
-                # User parameters
-                text=text,
-                target_language=target_language,
-                with_analysis=with_analysis,
-                user_prompt=user_prompt,
-                temperature=temperature,
-                logprobs=logprobs,
-                top_logprobs=top_logprobs,
-                validator=validator,
-                max_validation_retries=max_validation_retries,
-                priority=priority,
-                # Internal parameters
-                prompt_file="translate.yaml",
-                output_model=Models.StrOutput,
-                mode=None,
-                output_lang=None,
-            )
-            end = datetime.now()
-            output.execution_time = (end - start).total_seconds()
-            return output
+            if len(text.split(" ")) > 1500 and use_chunker:
+                chunks = text_to_chunks(text, 1200, 0)
+                translation = ""
+                analysis = ""
+                logprobs = []
+                # Run translation for each chunk
+                for chunk in chunks:
+                    chunk_output = self._operator.run(
+                        # User parameters
+                        text=chunk,
+                        target_language=target_language,
+                        with_analysis=with_analysis,
+                        user_prompt=user_prompt,
+                        temperature=temperature,
+                        logprobs=logprobs,
+                        top_logprobs=top_logprobs,
+                        validator=validator,
+                        max_validation_retries=max_validation_retries,
+                        priority=priority,
+                        # Internal parameters
+                        prompt_file="translate.yaml",
+                        output_model=Models.Str,
+                        mode=None,
+                        output_lang=None,
+                    )
+                    # Check for errors from operator
+                    if chunk_output.errors:
+                        output.errors.extend(chunk_output.errors)
+                        end = datetime.now()
+                        output.execution_time = (end - start).total_seconds()
+                        return output
+                    # Concatenate the outputs
+                    translation += chunk_output.result + "\n"
+                    if with_analysis:
+                        analysis += chunk_output.analysis
+                    if logprobs:
+                        logprobs += chunk_output.logprobs
+                end = datetime.now()
+                output = Models.ToolOutput(
+                    result=translation,
+                    logprobs=logprobs,
+                    analysis=analysis,
+                    process="translate",
+                    execution_time=(end - start).total_seconds(),
+                )
+                return output
+            else:
+                output = self._operator.run(
+                    # User parameters
+                    text=text,
+                    target_language=target_language,
+                    with_analysis=with_analysis,
+                    user_prompt=user_prompt,
+                    temperature=temperature,
+                    logprobs=logprobs,
+                    top_logprobs=top_logprobs,
+                    validator=validator,
+                    max_validation_retries=max_validation_retries,
+                    priority=priority,
+                    # Internal parameters
+                    prompt_file="translate.yaml",
+                    output_model=Models.Str,
+                    mode=None,
+                    output_lang=None,
+                )
+                end = datetime.now()
+                output.execution_time = (end - start).total_seconds()
+                return output
         except PromptError as e:
             output.errors.append(f"Prompt error: {e}")
@@ -895,8 +965,8 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -945,7 +1015,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="propositionize.yaml",
-                output_model=Models.ListStrOutput,
+                output_model=Models.ListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -974,8 +1044,8 @@ class TheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -1024,7 +1094,7 @@ class TheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="check_fact.yaml",
-                output_model=Models.BoolOutput,
+                output_model=Models.Bool,
                 mode=None,
                 source_text=source_text,
             )
@@ -1048,14 +1118,14 @@ class TheTool:
     def run_custom(
         self,
         prompt: str,
-        output_model: Any,
+        output_model: object,
         with_analysis: bool = False,
         analyze_template: str | None = None,
         output_lang: str | None = None,
         temperature: float | None = None,
         logprobs: bool | None = None,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:

texttools/internals/formatters.py DELETED Viewed

@@ -1,24 +0,0 @@
-class Formatter:
-    @staticmethod
-    def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
-        """
-        Merges consecutive user messages into a single message, separated by newlines.
-        This is useful for condensing a multi-turn user input into a single
-        message for the LLM. Assistant and system messages are left unchanged and
-        act as separators between user message groups.
-        """
-        merged: list[dict[str, str]] = []
-        for message in messages:
-            role, content = message["role"], message["content"].strip()
-            # Merge with previous user turn
-            if merged and role == "user" and merged[-1]["role"] == "user":
-                merged[-1]["content"] += "\n" + content
-            # Otherwise, start a new turn
-            else:
-                merged.append({"role": role, "content": content})
-        return merged

{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.20.dist-info}/top_level.txt RENAMED Viewed

File without changes

hamtaa-texttools 1.1.19__py3-none-any.whl → 1.1.20__py3-none-any.whl

hamtaa-texttools 1.1.19py3-none-any.whl → 1.1.20py3-none-any.whl