PyPI - hamtaa-texttools - Versions diffs - 1.1.18__py3-none-any.whl → 1.1.20__py3-none-any.whl - Mend

hamtaa-texttools 1.1.18py3-none-any.whl → 1.1.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/METADATA +38 -8
hamtaa_texttools-1.1.20.dist-info/RECORD +33 -0
texttools/batch/batch_runner.py +6 -6
texttools/batch/internals/batch_manager.py +6 -6
texttools/batch/internals/utils.py +1 -4
texttools/internals/async_operator.py +4 -6
texttools/internals/models.py +8 -17
texttools/internals/operator_utils.py +24 -0
texttools/internals/prompt_loader.py +34 -6
texttools/internals/sync_operator.py +4 -6
texttools/internals/text_to_chunks.py +97 -0
texttools/prompts/check_fact.yaml +19 -0
texttools/prompts/extract_entities.yaml +1 -1
texttools/prompts/propositionize.yaml +13 -6
texttools/prompts/run_custom.yaml +1 -1
texttools/prompts/text_to_question.yaml +6 -4
texttools/tools/async_tools.py +169 -81
texttools/tools/sync_tools.py +169 -81
hamtaa_texttools-1.1.18.dist-info/RECORD +0 -33
texttools/internals/formatters.py +0 -24
texttools/prompts/detect_entity.yaml +0 -22
{hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.18.dist-info → hamtaa_texttools-1.1.20.dist-info}/top_level.txt +0 -0

texttools/tools/async_tools.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Literal, Any
+from typing import Literal
 from collections.abc import Callable
 from openai import AsyncOpenAI
@@ -12,6 +12,7 @@ from texttools.internals.exceptions import (
     LLMError,
     ValidationError,
 )
+from texttools.internals.text_to_chunks import text_to_chunks
 class AsyncTheTool:
@@ -35,15 +36,17 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["category_list", "category_tree"] = "category_list",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
         """
         Categorize a text into a category / category tree.
+        Important Note: category_tree mode is EXPERIMENTAL, you can use it but it isn't reliable.
         Arguments:
             text: The input text to categorize
             categories: The category / category_tree to give to LLM
@@ -73,11 +76,11 @@ class AsyncTheTool:
             start = datetime.now()
             if mode == "category_tree":
-                # Initializations
-                output = Models.ToolOutput()
                 levels = categories.get_level_count()
                 parent_id = 0
-                final_output = []
+                final_categories = []
+                analysis = ""
+                logprobs = []
                 for _ in range(levels):
                     # Get child nodes for current parent
@@ -100,7 +103,7 @@ class AsyncTheTool:
                     ]
                     category_names = [node.name for node in children]
-                    # Run categorization for this level
+                    # Run categorization for current level
                     level_output = await self._operator.run(
                         # User parameters
                         text=text,
@@ -141,16 +144,22 @@ class AsyncTheTool:
                         return output
                     parent_id = parent_node.node_id
-                    final_output.append(parent_node.name)
+                    final_categories.append(parent_node.name)
-                    # Copy analysis/logprobs/process from the last level's output
-                    output.analysis = level_output.analysis
-                    output.logprobs = level_output.logprobs
-                    output.process = level_output.process
+                    if with_analysis:
+                        analysis += level_output.analysis
+                    if logprobs:
+                        logprobs += level_output.logprobs
-                output.result = final_output
                 end = datetime.now()
-                output.execution_time = (end - start).total_seconds()
+                output = Models.ToolOutput(
+                    result=final_categories,
+                    logprobs=logprobs,
+                    analysis=analysis,
+                    process="categorize",
+                    execution_time=(end - start).total_seconds(),
+                )
                 return output
             else:
@@ -197,10 +206,10 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["auto", "threshold", "count"] = "auto",
         number_of_keywords: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -249,7 +258,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="extract_keywords.yaml",
-                output_model=Models.ListStrOutput,
+                output_model=Models.ListStr,
             )
             end = datetime.now()
             output.execution_time = (end - start).total_seconds()
@@ -271,13 +280,14 @@ class AsyncTheTool:
     async def extract_entities(
         self,
         text: str,
+        entities: list[str] | None = None,
         with_analysis: bool = False,
         output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -286,6 +296,7 @@ class AsyncTheTool:
         Arguments:
             text: The input text to extract entities from
+            entities: List of entities provided by user (Optional)
             with_analysis: Whether to include detailed reasoning analysis
             output_lang: Language for the output response
             user_prompt: Additional instructions for entity extraction
@@ -313,6 +324,8 @@ class AsyncTheTool:
             output = await self._operator.run(
                 # User parameters
                 text=text,
+                entities=entities
+                or "all named entities (e.g., PER, ORG, LOC, DAT, etc.)",
                 with_analysis=with_analysis,
                 output_lang=output_lang,
                 user_prompt=user_prompt,
@@ -324,7 +337,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="extract_entities.yaml",
-                output_model=Models.ListDictStrStrOutput,
+                output_model=Models.ListDictStrStr,
                 mode=None,
             )
             end = datetime.now()
@@ -351,8 +364,8 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -397,7 +410,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="is_question.yaml",
-                output_model=Models.BoolOutput,
+                output_model=Models.Bool,
                 mode=None,
                 output_lang=None,
             )
@@ -421,13 +434,14 @@ class AsyncTheTool:
     async def text_to_question(
         self,
         text: str,
+        number_of_questions: int,
         with_analysis: bool = False,
         output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -436,6 +450,7 @@ class AsyncTheTool:
         Arguments:
             text: The input text to generate a question from
+            number_of_questions: Number of questions to generate
             with_analysis: Whether to include detailed reasoning analysis
             output_lang: Language for the output question
             user_prompt: Additional instructions for question generation
@@ -463,6 +478,7 @@ class AsyncTheTool:
             output = await self._operator.run(
                 # User parameters
                 text=text,
+                number_of_questions=number_of_questions,
                 with_analysis=with_analysis,
                 output_lang=output_lang,
                 user_prompt=user_prompt,
@@ -474,7 +490,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="text_to_question.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.ReasonListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -502,9 +518,9 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["default", "reason"] = "default",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -553,7 +569,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="merge_questions.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=mode,
             )
             end = datetime.now()
@@ -581,9 +597,9 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
+        top_logprobs: int = 3,
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
-        validator: Callable[[Any], bool] | None = None,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -631,7 +647,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="rewrite.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=mode,
             )
             end = datetime.now()
@@ -660,8 +676,8 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -710,7 +726,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="subject_to_question.yaml",
-                output_model=Models.ReasonListStrOutput,
+                output_model=Models.ReasonListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -738,8 +754,8 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
@@ -786,7 +802,7 @@ class AsyncTheTool:
                 priority=priority,
                 # Internal parameters
                 prompt_file="summarize.yaml",
-                output_model=Models.StrOutput,
+                output_model=Models.Str,
                 mode=None,
             )
             end = datetime.now()
@@ -810,21 +826,25 @@ class AsyncTheTool:
         self,
         text: str,
         target_language: str,
+        use_chunker: bool = True,
         with_analysis: bool = False,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
         """
         Translate text between languages.
+        Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
         Arguments:
             text: The input text to translate
             target_language: The target language for translation
+            use_chunker: Whether to use text chunker for text length bigger than 1500
             with_analysis: Whether to include detailed reasoning analysis
             user_prompt: Additional instructions for translation
             temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
@@ -848,27 +868,81 @@ class AsyncTheTool:
         try:
             start = datetime.now()
-            output = await self._operator.run(
-                # User parameters
-                text=text,
-                target_language=target_language,
-                with_analysis=with_analysis,
-                user_prompt=user_prompt,
-                temperature=temperature,
-                logprobs=logprobs,
-                top_logprobs=top_logprobs,
-                validator=validator,
-                max_validation_retries=max_validation_retries,
-                priority=priority,
-                # Internal parameters
-                prompt_file="translate.yaml",
-                output_model=Models.StrOutput,
-                mode=None,
-                output_lang=None,
-            )
-            end = datetime.now()
-            output.execution_time = (end - start).total_seconds()
-            return output
+            if len(text.split(" ")) > 1500 and use_chunker:
+                chunks = text_to_chunks(text, 1200, 0)
+                translation = ""
+                analysis = ""
+                logprobs = []
+                # Run translation for each chunk
+                for chunk in chunks:
+                    chunk_output = await self._operator.run(
+                        # User parameters
+                        text=chunk,
+                        target_language=target_language,
+                        with_analysis=with_analysis,
+                        user_prompt=user_prompt,
+                        temperature=temperature,
+                        logprobs=logprobs,
+                        top_logprobs=top_logprobs,
+                        validator=validator,
+                        max_validation_retries=max_validation_retries,
+                        priority=priority,
+                        # Internal parameters
+                        prompt_file="translate.yaml",
+                        output_model=Models.Str,
+                        mode=None,
+                        output_lang=None,
+                    )
+                    # Check for errors from operator
+                    if chunk_output.errors:
+                        output.errors.extend(chunk_output.errors)
+                        end = datetime.now()
+                        output.execution_time = (end - start).total_seconds()
+                        return output
+                    # Concatenate the outputs
+                    translation += chunk_output.result + "\n"
+                    if with_analysis:
+                        analysis += chunk_output.analysis
+                    if logprobs:
+                        logprobs += chunk_output.logprobs
+                end = datetime.now()
+                output = Models.ToolOutput(
+                    result=translation,
+                    logprobs=logprobs,
+                    analysis=analysis,
+                    process="translate",
+                    execution_time=(end - start).total_seconds(),
+                )
+                return output
+            else:
+                output = await self._operator.run(
+                    # User parameters
+                    text=text,
+                    target_language=target_language,
+                    with_analysis=with_analysis,
+                    user_prompt=user_prompt,
+                    temperature=temperature,
+                    logprobs=logprobs,
+                    top_logprobs=top_logprobs,
+                    validator=validator,
+                    max_validation_retries=max_validation_retries,
+                    priority=priority,
+                    # Internal parameters
+                    prompt_file="translate.yaml",
+                    output_model=Models.Str,
+                    mode=None,
+                    output_lang=None,
+                )
+                end = datetime.now()
+                output.execution_time = (end - start).total_seconds()
+                return output
         except PromptError as e:
             output.errors.append(f"Prompt error: {e}")
@@ -883,7 +957,7 @@ class AsyncTheTool:
         return output
-    async def detect_entity(
+    async def propositionize(
         self,
         text: str,
         with_analysis: bool = False,
@@ -891,13 +965,15 @@ class AsyncTheTool:
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
         """
-        Detects entities in a given text based on the entity_detector.yaml prompt.
+        Proposition input text to meaningful sentences.
+        Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
         Arguments:
             text: The input text
@@ -913,7 +989,7 @@ class AsyncTheTool:
         Returns:
             ToolOutput: Object containing:
-                - result (list[Entity]): The entities
+                - result (list[str]): The propositions
                 - logprobs (list | None): Probability data if logprobs enabled
                 - analysis (str | None): Detailed reasoning if with_analysis enabled
                 - process (str | None): Description of the process used
@@ -938,8 +1014,8 @@ class AsyncTheTool:
                 max_validation_retries=max_validation_retries,
                 priority=priority,
                 # Internal parameters
-                prompt_file="detect_entity.yaml",
-                output_model=Models.EntityDetectorOutput,
+                prompt_file="propositionize.yaml",
+                output_model=Models.ListStr,
                 mode=None,
             )
             end = datetime.now()
@@ -959,24 +1035,28 @@ class AsyncTheTool:
         return output
-    async def propositionize(
+    async def check_fact(
         self,
         text: str,
+        source_text: str,
         with_analysis: bool = False,
         output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
         """
-        Proposition input text to meaningful sentences.
+        Checks wheather a statement is relevant to the source text or not.
+        Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
         Arguments:
             text: The input text
+            source_text: the source text that we want to check relation of text to it
             with_analysis: Whether to include detailed reasoning analysis
             output_lang: Language for the output summary
             user_prompt: Additional instructions for summarization
@@ -989,7 +1069,7 @@ class AsyncTheTool:
         Returns:
             ToolOutput: Object containing:
-                - result (list[str]): The propositions
+                - result (bool): statement is relevant to source text or not
                 - logprobs (list | None): Probability data if logprobs enabled
                 - analysis (str | None): Detailed reasoning if with_analysis enabled
                 - process (str | None): Description of the process used
@@ -1014,14 +1094,14 @@ class AsyncTheTool:
                 max_validation_retries=max_validation_retries,
                 priority=priority,
                 # Internal parameters
-                prompt_file="propositionize.yaml",
-                output_model=Models.ListStrOutput,
+                prompt_file="check_fact.yaml",
+                output_model=Models.Bool,
                 mode=None,
+                source_text=source_text,
             )
             end = datetime.now()
             output.execution_time = (end - start).total_seconds()
             return output
         except PromptError as e:
             output.errors.append(f"Prompt error: {e}")
         except LLMError as e:
@@ -1038,20 +1118,27 @@ class AsyncTheTool:
     async def run_custom(
         self,
         prompt: str,
-        output_model: Any,
+        output_model: object,
+        with_analysis: bool = False,
+        analyze_template: str | None = None,
         output_lang: str | None = None,
         temperature: float | None = None,
         logprobs: bool | None = None,
-        top_logprobs: int | None = None,
-        validator: Callable[[Any], bool] | None = None,
+        top_logprobs: int = 3,
+        validator: Callable[[object], bool] | None = None,
         max_validation_retries: int | None = None,
         priority: int | None = 0,
     ) -> Models.ToolOutput:
         """
         Custom tool that can do almost anything!
+        Important Note: This tool is EXPERIMENTAL, you can use it but it isn't reliable.
         Arguments:
-            text: The user prompt
+            prompt: The user prompt
+            output_model: Pydantic BaseModel used for structured output
+            with_analysis: Whether to include detailed reasoning analysis
+            analyze_template: The analyze template used for reasoning analysis
             output_lang: Language for the output summary
             temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
             logprobs: Whether to return token probability information
@@ -1078,6 +1165,8 @@ class AsyncTheTool:
                 # User paramaeters
                 text=prompt,
                 output_model=output_model,
+                with_analysis=with_analysis,
+                analyze_template=analyze_template,
                 output_model_str=output_model.model_json_schema(),
                 output_lang=output_lang,
                 temperature=temperature,
@@ -1089,7 +1178,6 @@ class AsyncTheTool:
                 # Internal parameters
                 prompt_file="run_custom.yaml",
                 user_prompt=None,
-                with_analysis=False,
                 mode=None,
             )
             end = datetime.now()

hamtaa-texttools 1.1.18__py3-none-any.whl → 1.1.20__py3-none-any.whl

hamtaa-texttools 1.1.18py3-none-any.whl → 1.1.20py3-none-any.whl