PyPI - hamtaa-texttools - Versions diffs - 1.0.7__tar.gz → 1.0.8__tar.gz - Mend

hamtaa-texttools 1.0.7tar.gz → 1.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (38) hide show

{hamtaa_texttools-1.0.7/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.0.7
+Version: 1.0.8
 Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
 Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
 License: MIT License
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
 It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
-**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
 ---
 ## ✨ Features
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
 - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
-All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
+- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
+All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
+**Note:** There might be some tools that don't support some of the parameters above.
 ---
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
 ```python
 from openai import OpenAI
-from pydantic import BaseModel
 from texttools import TheTool
 # Create your OpenAI client
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
 model = "gpt-4o-mini"
 # Create an instance of TheTool
-# Note: You can give parameters to TheTool so that you don't need to give them to each tool
-the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
+the_tool = TheTool(client=client, model=model)
 # Example: Question Detection
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection["result"])
 print(detection["logprobs"])
-# Output: True
+# Output: True \n --logprobs
 # Example: Translation
-# Note: You can overwrite with_analysis if defined at TheTool
-print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
-# Output: "Hi! How are you?"
-# Example: Custom Tool
-# Note: Output model should only contain result key
-# Everything else will be ignored
-class Custom(BaseModel):
-  result: list[list[dict[str, int]]]
-custom_prompt = "Something"
-custom_result = the_tool.run_custom(custom_prompt, Custom)
-print(custom_result)
+translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
+print(translation["result"])
+print(translation["analysis"])
+# Output: "Hi! How are you?" \n --analysis
 ```
 ---
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
 from texttools import AsyncTheTool
 async def main():
-    # Create your async OpenAI client
+    # Create your AsyncOpenAI client
     async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
     # Specify the model

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/README.md RENAMED Viewed

@@ -8,8 +8,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
 It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
-**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
 ---
 ## ✨ Features
@@ -44,7 +42,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
 - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
-All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
+- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
+All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
+**Note:** There might be some tools that don't support some of the parameters above.
 ---
@@ -70,7 +72,6 @@ pip install -U hamtaa-texttools
 ```python
 from openai import OpenAI
-from pydantic import BaseModel
 from texttools import TheTool
 # Create your OpenAI client
@@ -80,29 +81,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
 model = "gpt-4o-mini"
 # Create an instance of TheTool
-# Note: You can give parameters to TheTool so that you don't need to give them to each tool
-the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
+the_tool = TheTool(client=client, model=model)
 # Example: Question Detection
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection["result"])
 print(detection["logprobs"])
-# Output: True
+# Output: True \n --logprobs
 # Example: Translation
-# Note: You can overwrite with_analysis if defined at TheTool
-print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
-# Output: "Hi! How are you?"
-# Example: Custom Tool
-# Note: Output model should only contain result key
-# Everything else will be ignored
-class Custom(BaseModel):
-  result: list[list[dict[str, int]]]
-custom_prompt = "Something"
-custom_result = the_tool.run_custom(custom_prompt, Custom)
-print(custom_result)
+translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
+print(translation["result"])
+print(translation["analysis"])
+# Output: "Hi! How are you?" \n --analysis
 ```
 ---
@@ -115,7 +106,7 @@ from openai import AsyncOpenAI
 from texttools import AsyncTheTool
 async def main():
-    # Create your async OpenAI client
+    # Create your AsyncOpenAI client
     async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
     # Specify the model

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8/hamtaa_texttools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.0.7
+Version: 1.0.8
 Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
 Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
 License: MIT License
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
 It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
-**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
 ---
 ## ✨ Features
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
 - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
-All these flags can be used individually or together to tailor the behavior of any tool in **TextTools**.
+- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
+All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
+**Note:** There might be some tools that don't support some of the parameters above.
 ---
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
 ```python
 from openai import OpenAI
-from pydantic import BaseModel
 from texttools import TheTool
 # Create your OpenAI client
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
 model = "gpt-4o-mini"
 # Create an instance of TheTool
-# Note: You can give parameters to TheTool so that you don't need to give them to each tool
-the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
+the_tool = TheTool(client=client, model=model)
 # Example: Question Detection
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection["result"])
 print(detection["logprobs"])
-# Output: True
+# Output: True \n --logprobs
 # Example: Translation
-# Note: You can overwrite with_analysis if defined at TheTool
-print(the_tool.translate("سلام، حالت چطوره؟", target_language="English", with_analysis=False)["result"])
-# Output: "Hi! How are you?"
-# Example: Custom Tool
-# Note: Output model should only contain result key
-# Everything else will be ignored
-class Custom(BaseModel):
-  result: list[list[dict[str, int]]]
-custom_prompt = "Something"
-custom_result = the_tool.run_custom(custom_prompt, Custom)
-print(custom_result)
+translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
+print(translation["result"])
+print(translation["analysis"])
+# Output: "Hi! How are you?" \n --analysis
 ```
 ---
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
 from texttools import AsyncTheTool
 async def main():
-    # Create your async OpenAI client
+    # Create your AsyncOpenAI client
     async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
     # Specify the model

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,8 +11,6 @@ texttools/__init__.py
 texttools/batch/__init__.py
 texttools/batch/batch_manager.py
 texttools/batch/batch_runner.py
-texttools/formatters/base_formatter.py
-texttools/formatters/user_merge_formatter.py
 texttools/prompts/README.md
 texttools/prompts/categorizer.yaml
 texttools/prompts/extract_entities.yaml
@@ -30,6 +28,7 @@ texttools/tools/async_the_tool.py
 texttools/tools/the_tool.py
 texttools/tools/internals/async_operator.py
 texttools/tools/internals/base_operator.py
+texttools/tools/internals/formatters.py
 texttools/tools/internals/operator.py
 texttools/tools/internals/output_models.py
 texttools/tools/internals/prompt_loader.py

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "hamtaa-texttools"
-version = "1.0.7"
+version = "1.0.8"
 authors = [
   { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
   { name = "Montazer", email = "montazerh82@gmail.com" },

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/async_the_tool.py RENAMED Viewed

@@ -10,6 +10,9 @@ class AsyncTheTool:
     """
     Async counterpart to TheTool.
+    Each method configures the async operator with a specific YAML prompt,
+    output schema, and flags, then delegates execution to `operator.run()`.
     Usage:
         async_client = AsyncOpenAI(...)
         tool = TheToolAsync(async_client, model="model-name")
@@ -27,7 +30,6 @@ class AsyncTheTool:
         self,
         text: str,
         with_analysis: bool = False,
-        output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
@@ -36,20 +38,13 @@ class AsyncTheTool:
         """
         Categorize a text into a single Islamic studies domain category.
-        Args:
-            text: Input string to categorize.
-            with_analysis: If True, first runs an LLM "analysis" step and
-                           conditions the main prompt on that analysis.
         Returns:
-            {"result": <category string>}
-            Example: {"result": "باورهای دینی"}
+            {"result": <category string>} + ("logprobs" and "analysis" if enabled)
         """
         return await self.operator.run(
             # User parameters
             text=text,
             with_analysis=with_analysis,
-            output_lang=output_lang,
             user_prompt=user_prompt,
             temperature=temperature,
             logprobs=logprobs,
@@ -59,6 +54,7 @@ class AsyncTheTool:
             output_model=OutputModels.CategorizerOutput,
             resp_format="parse",
             mode=None,
+            output_lang=None,
         )
     async def extract_keywords(
@@ -71,6 +67,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, list[str]]:
+        """
+        Extract salient keywords from text.
+        Returns:
+            {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -97,6 +99,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, list[dict[str, str]]]:
+        """
+        Perform Named Entity Recognition (NER) over the input text.
+        Returns:
+            {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -122,6 +130,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, bool]:
+        """
+        Detect if the input is phrased as a question.
+        Returns:
+            {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -148,6 +162,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, str]:
+        """
+        Generate a single question from the given text.
+        Returns:
+            {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -175,6 +195,12 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         mode: Literal["default", "reason"] = "default",
     ) -> dict[str, str]:
+        """
+        Merge multiple questions into a single unified question.
+        Returns:
+            {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
+        """
         text = ", ".join(text)
         return await self.operator.run(
             # User parameters
@@ -203,6 +229,12 @@ class AsyncTheTool:
         top_logprobs: int | None = None,
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
     ) -> dict[str, str]:
+        """
+        Rewrite a text with different modes.
+        Returns:
+            {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -230,6 +262,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, list[str]]:
+        """
+        Generate a list of questions about a subject.
+        Returns:
+            {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -257,6 +295,12 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, str]:
+        """
+        Summarize the given subject text.
+        Returns:
+            {"result": <summary>} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
@@ -278,18 +322,22 @@ class AsyncTheTool:
         text: str,
         target_language: str,
         with_analysis: bool = False,
-        output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
     ) -> dict[str, str]:
+        """
+        Translate text between languages.
+        Returns:
+            {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
+        """
         return await self.operator.run(
             # User parameters
             text=text,
             target_language=target_language,
             with_analysis=with_analysis,
-            output_lang=output_lang,
             user_prompt=user_prompt,
             temperature=temperature,
             logprobs=logprobs,
@@ -299,6 +347,7 @@ class AsyncTheTool:
             output_model=OutputModels.StrOutput,
             resp_format="parse",
             mode=None,
+            output_lang=None,
         )
     async def run_custom(
@@ -313,10 +362,6 @@ class AsyncTheTool:
         """
         Custom tool that can do almost anything!
-        Args:
-            prompt: Custom prompt.
-            output_model: Custom BaseModel output model.
         Returns:
             {"result": <Any>}
         """

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/async_operator.py RENAMED Viewed

@@ -1,5 +1,3 @@
-from __future__ import annotations
 from typing import Any, TypeVar, Type, Literal
 import logging
@@ -7,9 +5,7 @@ from openai import AsyncOpenAI
 from pydantic import BaseModel
 from texttools.tools.internals.base_operator import BaseOperator
-from texttools.formatters.user_merge_formatter import (
-    UserMergeFormatter,
-)
+from texttools.tools.internals.formatters import Formatter
 from texttools.tools.internals.prompt_loader import PromptLoader
 # Base Model type for output models
@@ -31,14 +27,12 @@ class AsyncOperator(BaseOperator):
     """
     def __init__(self, client: AsyncOpenAI, model: str):
-        self.client: AsyncOpenAI = client
+        self.client = client
         self.model = model
-    async def _analysis_completion(
-        self,
-        analyze_message: list[dict[str, str]],
-        temperature: float,
-    ) -> str:
+    async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
+        analyze_prompt = prompt_configs["analyze_template"]
+        analyze_message = [self._build_user_message(analyze_prompt)]
         completion = await self.client.chat.completions.create(
             model=self.model,
             messages=analyze_message,
@@ -47,12 +41,6 @@ class AsyncOperator(BaseOperator):
         analysis = completion.choices[0].message.content.strip()
         return analysis
-    async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
-        analyze_prompt = prompt_configs["analyze_template"]
-        analyze_message = [self._build_user_message(analyze_prompt)]
-        analysis = await self._analysis_completion(analyze_message, temperature)
-        return analysis
     async def _parse_completion(
         self,
         message: list[dict[str, str]],
@@ -126,14 +114,12 @@ class AsyncOperator(BaseOperator):
         Execute the async LLM pipeline with the given input text. (Async)
         """
         prompt_loader = PromptLoader()
-        formatter = UserMergeFormatter()
+        formatter = Formatter()
         try:
-            cleaned_text = text.strip()
             prompt_configs = prompt_loader.load(
                 prompt_file=prompt_file,
-                text=cleaned_text,
+                text=text.strip(),
                 mode=mode,
                 **extra_kwargs,
             )
@@ -159,7 +145,7 @@ class AsyncOperator(BaseOperator):
                 )
             messages.append(self._build_user_message(prompt_configs["main_template"]))
-            messages = formatter.format(messages)
+            messages = formatter.user_merge_format(messages)
             if resp_format == "vllm":
                 parsed, completion = await self._vllm_completion(
@@ -188,4 +174,4 @@ class AsyncOperator(BaseOperator):
         except Exception as e:
             logger.error(f"Async TheTool failed: {e}")
-            return {"Error": str(e), "result": ""}
+            return {"error": str(e), "result": ""}

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/base_operator.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import TypeVar, Type
+from typing import TypeVar, Type, Any
 import json
 import re
 import math
@@ -55,7 +55,7 @@ class BaseOperator:
         # Convert dictionary to output model
         return output_model(**response_dict)
-    def _extract_logprobs(self, completion: dict):
+    def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
         logprobs_data = []
         ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')

hamtaa_texttools-1.0.8/texttools/tools/internals/formatters.py ADDED Viewed

@@ -0,0 +1,24 @@
+class Formatter:
+    @staticmethod
+    def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
+        """
+        Merges consecutive user messages into a single message, separated by newlines.
+        This is useful for condensing a multi-turn user input into a single
+        message for the LLM. Assistant and system messages are left unchanged and
+        act as separators between user message groups.
+        """
+        merged: list[dict[str, str]] = []
+        for message in messages:
+            role, content = message["role"], message["content"].strip()
+            # Merge with previous user turn
+            if merged and role == "user" and merged[-1]["role"] == "user":
+                merged[-1]["content"] += "\n" + content
+            # Otherwise, start a new turn
+            else:
+                merged.append({"role": role, "content": content})
+        return merged

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/operator.py RENAMED Viewed

@@ -1,5 +1,3 @@
-from __future__ import annotations
 from typing import Any, TypeVar, Type, Literal
 import logging
@@ -7,9 +5,7 @@ from openai import OpenAI
 from pydantic import BaseModel
 from texttools.tools.internals.base_operator import BaseOperator
-from texttools.formatters.user_merge_formatter import (
-    UserMergeFormatter,
-)
+from texttools.tools.internals.formatters import Formatter
 from texttools.tools.internals.prompt_loader import PromptLoader
 # Base Model type for output models
@@ -22,7 +18,7 @@ logger.setLevel(logging.INFO)
 class Operator(BaseOperator):
     """
-    Core engine for running text-processing operations with an LLM.
+    Core engine for running text-processing operations with an LLM (Sync).
     It wires together:
     - `PromptLoader` → loads YAML prompt templates.
@@ -31,14 +27,12 @@ class Operator(BaseOperator):
     """
     def __init__(self, client: OpenAI, model: str):
-        self.client: OpenAI = client
+        self.client = client
         self.model = model
-    def _analysis_completion(
-        self,
-        analyze_message: list[dict[str, str]],
-        temperature: float,
-    ) -> str:
+    def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
+        analyze_prompt = prompt_configs["analyze_template"]
+        analyze_message = [self._build_user_message(analyze_prompt)]
         completion = self.client.chat.completions.create(
             model=self.model,
             messages=analyze_message,
@@ -47,12 +41,6 @@ class Operator(BaseOperator):
         analysis = completion.choices[0].message.content.strip()
         return analysis
-    def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
-        analyze_prompt = prompt_configs["analyze_template"]
-        analyze_message = [self._build_user_message(analyze_prompt)]
-        analysis = self._analysis_completion(analyze_message, temperature)
-        return analysis
     def _parse_completion(
         self,
         message: list[dict[str, str]],
@@ -83,7 +71,6 @@ class Operator(BaseOperator):
         temperature: float,
         logprobs: bool = False,
         top_logprobs: int = 3,
-        max_tokens: int | None = None,
     ) -> tuple[Type[T], Any]:
         json_schema = output_model.model_json_schema()
@@ -127,14 +114,12 @@ class Operator(BaseOperator):
         Execute the LLM pipeline with the given input text.
         """
         prompt_loader = PromptLoader()
-        formatter = UserMergeFormatter()
+        formatter = Formatter()
         try:
-            cleaned_text = text.strip()
             prompt_configs = prompt_loader.load(
                 prompt_file=prompt_file,
-                text=cleaned_text,
+                text=text.strip(),
                 mode=mode,
                 **extra_kwargs,
             )
@@ -160,7 +145,7 @@ class Operator(BaseOperator):
                 )
             messages.append(self._build_user_message(prompt_configs["main_template"]))
-            messages = formatter.format(messages)
+            messages = formatter.user_merge_format(messages)
             if resp_format == "vllm":
                 parsed, completion = self._vllm_completion(
@@ -189,4 +174,4 @@ class Operator(BaseOperator):
         except Exception as e:
             logger.error(f"TheTool failed: {e}")
-            return {"Error": str(e), "result": ""}
+            return {"error": str(e), "result": ""}

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/prompt_loader.py RENAMED Viewed

@@ -18,24 +18,15 @@ class PromptLoader:
         }
     """
-    def __init__(self):
-        self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
     MAIN_TEMPLATE: str = "main_template"
     ANALYZE_TEMPLATE: str = "analyze_template"
     # Use lru_cache to load each file once
     @lru_cache(maxsize=32)
     def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
-        prompt_path = self.base_dir / prompt_file
-        if not prompt_path.exists():
-            raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
-        try:
-            data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
-        except yaml.YAMLError as e:
-            raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
+        base_dir = Path(__file__).parent.parent.parent / Path("prompts")
+        prompt_path = base_dir / prompt_file
+        data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
         return {
             self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]

{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/the_tool.py RENAMED Viewed

@@ -28,7 +28,6 @@ class TheTool:
         self,
         text: str,
         with_analysis: bool = False,
-        output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
@@ -37,20 +36,13 @@ class TheTool:
         """
         Categorize a text into a single Islamic studies domain category.
-        Args:
-            text: Input string to categorize.
-            with_analysis: If True, first runs an LLM "analysis" step and
-                           conditions the main prompt on that analysis.
         Returns:
-            {"result": <category string>}
-            Example: {"result": "باورهای دینی"}
+            {"result": <category string>} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
             text=text,
             with_analysis=with_analysis,
-            output_lang=output_lang,
             user_prompt=user_prompt,
             temperature=temperature,
             logprobs=logprobs,
@@ -60,6 +52,7 @@ class TheTool:
             output_model=OutputModels.CategorizerOutput,
             resp_format="parse",
             mode=None,
+            output_lang=None,
         )
     def extract_keywords(
@@ -75,12 +68,8 @@ class TheTool:
         """
         Extract salient keywords from text.
-        Args:
-            text: Input string to analyze.
-            with_analysis: Whether to run an extra LLM reasoning step.
         Returns:
-            {"result": [<keyword1>, <keyword2>, ...]}
+            {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -111,12 +100,8 @@ class TheTool:
         """
         Perform Named Entity Recognition (NER) over the input text.
-        Args:
-            text: Input string.
-            with_analysis: Whether to run an extra LLM reasoning step.
         Returns:
-            {"result": [{"text": <entity>, "type": <entity_type>}, ...]}
+            {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -146,12 +131,8 @@ class TheTool:
         """
         Detect if the input is phrased as a question.
-        Args:
-            question: Input string to evaluate.
-            with_analysis: Whether to include an analysis step.
         Returns:
-            {"result": "true"} or {"result": "false"}
+            {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -182,12 +163,8 @@ class TheTool:
         """
         Generate a single question from the given text.
-        Args:
-            text: Source text to derive a question from.
-            with_analysis: Whether to use analysis before generation.
         Returns:
-            {"result": <generated_question>}
+            {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -219,15 +196,8 @@ class TheTool:
         """
         Merge multiple questions into a single unified question.
-        Args:
-            questions: List of question strings.
-            mode: Merge strategy:
-                - "default": simple merging.
-                - "reason": merging with reasoning explanation.
-            with_analysis: Whether to use an analysis step.
         Returns:
-            {"result": <merged_question>}
+            {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
         """
         text = ", ".join(text)
         return self.operator.run(
@@ -258,17 +228,10 @@ class TheTool:
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
     ) -> dict[str, str]:
         """
-        Rewrite a question with different wording or meaning.
-        Args:
-            question: Input question to rewrite.
-            mode: Rewrite strategy:
-                - "positive": keep meaning, change words.
-                - "negative": alter meaning, preserve wording style.
-            with_analysis: Whether to include an analysis step.
+        Rewrite a text with different modes.
         Returns:
-            {"result": <rewritten_question>}
+            {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -300,14 +263,8 @@ class TheTool:
         """
         Generate a list of questions about a subject.
-        Args:
-            subject: Topic of interest.
-            number_of_questions: Number of questions to produce.
-            language: Target language for generated questions.
-            with_analysis: Whether to include an analysis step.
         Returns:
-            {"result": [<question1>, <question2>, ...]}
+            {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -339,12 +296,8 @@ class TheTool:
         """
         Summarize the given subject text.
-        Args:
-            subject: Input text to summarize.
-            with_analysis: Whether to include an analysis step.
         Returns:
-            {"result": <summary>}
+            {"result": <summary>} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
@@ -367,7 +320,6 @@ class TheTool:
         text: str,
         target_language: str,
         with_analysis: bool = False,
-        output_lang: str | None = None,
         user_prompt: str | None = None,
         temperature: float | None = 0.0,
         logprobs: bool = False,
@@ -376,20 +328,14 @@ class TheTool:
         """
         Translate text between languages.
-        Args:
-            text: Input string to translate.
-            target_language: Language code or name to translate into.
-            with_analysis: Whether to include an analysis step.
         Returns:
-            {"result": <translated_text>}
+            {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
         """
         return self.operator.run(
             # User parameters
             text=text,
             target_language=target_language,
             with_analysis=with_analysis,
-            output_lang=output_lang,
             user_prompt=user_prompt,
             temperature=temperature,
             logprobs=logprobs,
@@ -399,6 +345,7 @@ class TheTool:
             output_model=OutputModels.StrOutput,
             resp_format="parse",
             mode=None,
+            output_lang=None,
         )
     def run_custom(
@@ -413,10 +360,6 @@ class TheTool:
         """
         Custom tool that can do almost anything!
-        Args:
-            prompt: Custom prompt.
-            output_model: Custom BaseModel output model.
         Returns:
             {"result": <Any>}
         """

hamtaa_texttools-1.0.7/texttools/formatters/base_formatter.py DELETED Viewed

@@ -1,33 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any
-class BaseFormatter(ABC):
-    """
-    Adapter to convert a conversation into a specific LLM API's input format.
-    Concrete implementations transform standardized messages (e.g., list[dict]) into the
-    exact payload required by a provider (e.g., OpenAI's message list, a single string, etc.).
-    """
-    @abstractmethod
-    def format(
-        self,
-        messages: Any,
-    ) -> Any:
-        """
-        Transform the input messages into a provider-specific payload.
-        Args:
-            messages: The input conversation. While often a list of dicts with
-                      'role' and 'content' keys, the exact type and structure may vary
-                      by implementation.
-        Returns:
-            A payload in the format expected by the target LLM API. This could be:
-            - A list of role-content dictionaries (e.g., for OpenAI)
-            - A single formatted string (e.g., for completion-style APIs)
-            - A complex dictionary with additional parameters
-            - Any other provider-specific data structure
-        """
-        pass

hamtaa_texttools-1.0.7/texttools/formatters/user_merge_formatter.py DELETED Viewed

@@ -1,30 +0,0 @@
-from texttools.formatters.base_formatter import BaseFormatter
-class UserMergeFormatter(BaseFormatter):
-    """
-    Merges consecutive user messages into a single message, separated by newlines.
-    This is useful for condensing a multi-turn user input into a single coherent
-    message for the LLM. Assistant and system messages are left unchanged and
-    act as separators between user message groups.
-    Raises:
-        ValueError: If the input messages have invalid structure or roles.
-    """
-    def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
-        merged: list[dict[str, str]] = []
-        for message in messages:
-            role, content = message["role"], message["content"].strip()
-            # Merge with previous user turn
-            if merged and role == "user" and merged[-1]["role"] == "user":
-                merged[-1]["content"] += "\n" + content
-            # Otherwise, start a new turn
-            else:
-                merged.append({"role": role, "content": content})
-        return merged