PyPI - hamtaa-texttools - Versions diffs - 1.1.3__tar.gz → 1.1.7__tar.gz - Mend

hamtaa-texttools 1.1.3tar.gz → 1.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{hamtaa_texttools-1.1.3/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.1.3
+Version: 1.1.7
 Summary: A high-level NLP toolkit built on top of modern LLMs.
 Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
 License: MIT License
@@ -86,6 +86,18 @@ All these parameters can be used individually or together to tailor the behavior
 ---
+## 🧩 ToolOutput
+Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
+- **`result`** → The output of LLM (`type=Any`)
+- **`analysis`** → The reasoning step before generating the final output (`type=str`)
+- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
+- **`errors`** → Any error that have occured during calling LLM (`type=str`)
+**None:** You can use `repr(ToolOutput)` to see details of an output.
+---
 ## 🚀 Installation
 Install the latest release via PyPI:
@@ -123,13 +135,13 @@ the_tool = TheTool(client=client, model=model)
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection.result)
 print(detection.logprobs)
-# Output: True \n --logprobs
+# Output: True + logprobs
 # Example: Translation
 translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
 print(translation.result)
 print(translation.analysis)
-# Output: "Hi! How are you?" \n --analysis
+# Output: "Hi! How are you?"  + analysis
 ```
 ---
@@ -149,19 +161,22 @@ async def main():
     model = "gpt-4o-mini"
     # Create an instance of AsyncTheTool
-    the_tool = AsyncTheTool(client=async_client, model=model)
+    async_the_tool = AsyncTheTool(client=async_client, model=model)
+    # Example: Async Translation and Keyword Extraction
+    translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
-    # Example: Async Translation
-    translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
     print(translation.result)
-    # Output: "Hi! How are you?"
+    print(keywords.result)
 asyncio.run(main())
 ```
 ---
-## 📚 Use Cases
+## 👍 Use Cases
 Use **TextTools** when you need to:
@@ -169,7 +184,35 @@ Use **TextTools** when you need to:
 - 🌍 **Translate** and process multilingual corpora with ease
 - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
 - 📊 **Analyze** large text collections using embeddings and categorization
-- 👍 **Automate** common text-processing tasks without reinventing the wheel
+---
+## 📚 Batch Processing
+Process large datasets efficiently using OpenAI's batch API.
+## Quick Start
+```python
+from texttools import BatchJobRunner, BatchConfig
+# Configure your batch job
+config = BatchConfig(
+    system_prompt="Extract entities from the text",
+    job_name="entity_extraction",
+    input_data_path="data.json",
+    output_data_filename="results.json",
+    model="gpt-4o-mini"
+)
+# Define your output schema
+class Output(BaseModel):
+    entities: list[str]
+# Run the batch job
+runner = BatchJobRunner(config, output_model=Output)
+runner.run()
+```
 ---

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/README.md RENAMED Viewed

@@ -52,6 +52,18 @@ All these parameters can be used individually or together to tailor the behavior
 ---
+## 🧩 ToolOutput
+Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
+- **`result`** → The output of LLM (`type=Any`)
+- **`analysis`** → The reasoning step before generating the final output (`type=str`)
+- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
+- **`errors`** → Any error that have occured during calling LLM (`type=str`)
+**None:** You can use `repr(ToolOutput)` to see details of an output.
+---
 ## 🚀 Installation
 Install the latest release via PyPI:
@@ -89,13 +101,13 @@ the_tool = TheTool(client=client, model=model)
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection.result)
 print(detection.logprobs)
-# Output: True \n --logprobs
+# Output: True + logprobs
 # Example: Translation
 translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
 print(translation.result)
 print(translation.analysis)
-# Output: "Hi! How are you?" \n --analysis
+# Output: "Hi! How are you?"  + analysis
 ```
 ---
@@ -115,19 +127,22 @@ async def main():
     model = "gpt-4o-mini"
     # Create an instance of AsyncTheTool
-    the_tool = AsyncTheTool(client=async_client, model=model)
+    async_the_tool = AsyncTheTool(client=async_client, model=model)
+    # Example: Async Translation and Keyword Extraction
+    translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
-    # Example: Async Translation
-    translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
     print(translation.result)
-    # Output: "Hi! How are you?"
+    print(keywords.result)
 asyncio.run(main())
 ```
 ---
-## 📚 Use Cases
+## 👍 Use Cases
 Use **TextTools** when you need to:
@@ -135,7 +150,35 @@ Use **TextTools** when you need to:
 - 🌍 **Translate** and process multilingual corpora with ease
 - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
 - 📊 **Analyze** large text collections using embeddings and categorization
-- 👍 **Automate** common text-processing tasks without reinventing the wheel
+---
+## 📚 Batch Processing
+Process large datasets efficiently using OpenAI's batch API.
+## Quick Start
+```python
+from texttools import BatchJobRunner, BatchConfig
+# Configure your batch job
+config = BatchConfig(
+    system_prompt="Extract entities from the text",
+    job_name="entity_extraction",
+    input_data_path="data.json",
+    output_data_filename="results.json",
+    model="gpt-4o-mini"
+)
+# Define your output schema
+class Output(BaseModel):
+    entities: list[str]
+# Run the batch job
+runner = BatchJobRunner(config, output_model=Output)
+runner.run()
+```
 ---

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7/hamtaa_texttools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.1.3
+Version: 1.1.7
 Summary: A high-level NLP toolkit built on top of modern LLMs.
 Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
 License: MIT License
@@ -86,6 +86,18 @@ All these parameters can be used individually or together to tailor the behavior
 ---
+## 🧩 ToolOutput
+Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
+- **`result`** → The output of LLM (`type=Any`)
+- **`analysis`** → The reasoning step before generating the final output (`type=str`)
+- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
+- **`errors`** → Any error that have occured during calling LLM (`type=str`)
+**None:** You can use `repr(ToolOutput)` to see details of an output.
+---
 ## 🚀 Installation
 Install the latest release via PyPI:
@@ -123,13 +135,13 @@ the_tool = TheTool(client=client, model=model)
 detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
 print(detection.result)
 print(detection.logprobs)
-# Output: True \n --logprobs
+# Output: True + logprobs
 # Example: Translation
 translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
 print(translation.result)
 print(translation.analysis)
-# Output: "Hi! How are you?" \n --analysis
+# Output: "Hi! How are you?"  + analysis
 ```
 ---
@@ -149,19 +161,22 @@ async def main():
     model = "gpt-4o-mini"
     # Create an instance of AsyncTheTool
-    the_tool = AsyncTheTool(client=async_client, model=model)
+    async_the_tool = AsyncTheTool(client=async_client, model=model)
+    # Example: Async Translation and Keyword Extraction
+    translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
-    # Example: Async Translation
-    translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
+    (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
     print(translation.result)
-    # Output: "Hi! How are you?"
+    print(keywords.result)
 asyncio.run(main())
 ```
 ---
-## 📚 Use Cases
+## 👍 Use Cases
 Use **TextTools** when you need to:
@@ -169,7 +184,35 @@ Use **TextTools** when you need to:
 - 🌍 **Translate** and process multilingual corpora with ease
 - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
 - 📊 **Analyze** large text collections using embeddings and categorization
-- 👍 **Automate** common text-processing tasks without reinventing the wheel
+---
+## 📚 Batch Processing
+Process large datasets efficiently using OpenAI's batch API.
+## Quick Start
+```python
+from texttools import BatchJobRunner, BatchConfig
+# Configure your batch job
+config = BatchConfig(
+    system_prompt="Extract entities from the text",
+    job_name="entity_extraction",
+    input_data_path="data.json",
+    output_data_filename="results.json",
+    model="gpt-4o-mini"
+)
+# Define your output schema
+class Output(BaseModel):
+    entities: list[str]
+# Run the batch job
+runner = BatchJobRunner(config, output_model=Output)
+runner.run()
+```
 ---

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "hamtaa-texttools"
-version = "1.1.3"
+version = "1.1.7"
 authors = [
   { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
   { name = "Montazer", email = "montazerh82@gmail.com" },

hamtaa_texttools-1.1.7/texttools/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .batch import BatchJobRunner, BatchConfig
+from .tools import AsyncTheTool, TheTool
+__all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig"]

hamtaa_texttools-1.1.7/texttools/batch/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .batch_runner import BatchJobRunner, BatchConfig
+__all__ = ["BatchJobRunner", "BatchConfig"]

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/batch/batch_manager.py RENAMED Viewed

@@ -1,18 +1,20 @@
 import json
 import uuid
 from pathlib import Path
-from typing import Any, Type
+from typing import Any, Type, TypeVar
 import logging
 from pydantic import BaseModel
 from openai import OpenAI
 from openai.lib._pydantic import to_strict_json_schema
-logger = logging.getLogger("batch_runner")
-logger.setLevel(logging.INFO)
+# Base Model type for output models
+T = TypeVar("T", bound=BaseModel)
+logger = logging.getLogger("texttools.batch_runner")
-class SimpleBatchManager:
+class BatchManager:
     """
     Manages batch processing jobs for OpenAI's chat completions with structured outputs.
@@ -25,9 +27,8 @@ class SimpleBatchManager:
         self,
         client: OpenAI,
         model: str,
-        output_model: Type[BaseModel],
+        output_model: Type[T],
         prompt_template: str,
-        handlers: list[Any] | None = None,
         state_dir: Path = Path(".batch_jobs"),
         custom_json_schema_obj_str: dict | None = None,
         **client_kwargs: Any,
@@ -36,7 +37,6 @@ class SimpleBatchManager:
         self.model = model
         self.output_model = output_model
         self.prompt_template = prompt_template
-        self.handlers = handlers or []
         self.state_dir = state_dir
         self.state_dir.mkdir(parents=True, exist_ok=True)
         self.custom_json_schema_obj_str = custom_json_schema_obj_str
@@ -45,7 +45,7 @@ class SimpleBatchManager:
         if self.custom_json_schema_obj_str:
             if self.custom_json_schema_obj_str is not dict:
-                raise ValueError("schema should be a dict")
+                raise ValueError("Schema should be a dict")
     def _state_file(self, job_name: str) -> Path:
         return self.state_dir / f"{job_name}.json"
@@ -126,7 +126,7 @@ class SimpleBatchManager:
         else:
             raise TypeError(
-                "The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}."
+                "The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}"
             )
         file_path = self.state_dir / f"batch_{uuid.uuid4().hex}.jsonl"
@@ -220,8 +220,6 @@ class SimpleBatchManager:
                 error_d = {custom_id: results[custom_id]}
                 log.append(error_d)
-        for handler in self.handlers:
-            handler.handle(results)
         if remove_cache:
             self._clear_state(job_name)

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/batch/batch_runner.py RENAMED Viewed

@@ -3,24 +3,23 @@ import os
 import time
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Callable
+from typing import Any, Callable, Type, TypeVar
 import logging
 from dotenv import load_dotenv
 from openai import OpenAI
 from pydantic import BaseModel
-from texttools.batch import SimpleBatchManager
+from texttools.batch.batch_manager import BatchManager
+from texttools.tools.internals.output_models import StrOutput
-logger = logging.getLogger("batch_runner")
-logger.setLevel(logging.INFO)
+# Base Model type for output models
+T = TypeVar("T", bound=BaseModel)
+logger = logging.getLogger("texttools.batch_runner")
-class OutputModel(BaseModel):
-    desired_output: str
-def export_data(data):
+def export_data(data) -> list[dict[str, str]]:
     """
     Produces a structure of the following form from an initial data structure:
     [{"id": str, "text": str},...]
@@ -28,7 +27,7 @@ def export_data(data):
     return data
-def import_data(data):
+def import_data(data) -> Any:
     """
     Takes the output and adds and aggregates it to the original structure.
     """
@@ -47,9 +46,9 @@ class BatchConfig:
     output_data_filename: str = ""
     model: str = "gpt-4.1-mini"
     MAX_BATCH_SIZE: int = 100
-    MAX_TOTAL_TOKENS: int = 2000000
+    MAX_TOTAL_TOKENS: int = 2_000_000
     CHARS_PER_TOKEN: float = 2.7
-    PROMPT_TOKEN_MULTIPLIER: int = 1000
+    PROMPT_TOKEN_MULTIPLIER: int = 1_000
     BASE_OUTPUT_DIR: str = "Data/batch_entity_result"
     import_function: Callable = import_data
     export_function: Callable = export_data
@@ -63,7 +62,7 @@ class BatchJobRunner:
     """
     def __init__(
-        self, config: BatchConfig = BatchConfig(), output_model: type = OutputModel
+        self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
     ):
         self.config = config
         self.system_prompt = config.system_prompt
@@ -82,11 +81,11 @@ class BatchJobRunner:
         # Track retry attempts per part
         self.part_attempts: dict[int, int] = {}
-    def _init_manager(self) -> SimpleBatchManager:
+    def _init_manager(self) -> BatchManager:
         load_dotenv()
         api_key = os.getenv("OPENAI_API_KEY")
         client = OpenAI(api_key=api_key)
-        return SimpleBatchManager(
+        return BatchManager(
             client=client,
             model=self.model,
             prompt_template=self.system_prompt,
@@ -101,12 +100,12 @@ class BatchJobRunner:
         # Ensure data is a list of dicts with 'id' and 'content' as strings
         if not isinstance(data, list):
             raise ValueError(
-                'Exported data must be a list in this form:  [ {"id": str, "content": str},...]'
+                "Exported data must be a list of dicts with 'id' and 'content' keys"
             )
         for item in data:
             if not (isinstance(item, dict) and "id" in item and "content" in item):
                 raise ValueError(
-                    "Each item must be a dict with 'id' and 'content' keys."
+                    f"Item must be a dict with 'id' and 'content' keys. Got: {type(item)}"
                 )
             if not (isinstance(item["id"], str) and isinstance(item["content"], str)):
                 raise ValueError("'id' and 'content' must be strings.")
@@ -161,7 +160,45 @@ class BatchJobRunner:
             logger.info("Uploading...")
             time.sleep(30)
+    def _save_results(
+        self,
+        output_data: list[dict[str, Any]] | dict[str, Any],
+        log: list[Any],
+        part_idx: int,
+    ):
+        part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
+        result_path = (
+            Path(self.config.BASE_OUTPUT_DIR)
+            / f"{Path(self.output_data_filename).stem}{part_suffix}.json"
+        )
+        if not output_data:
+            logger.info("No output data to save. Skipping this part.")
+            return
+        else:
+            with open(result_path, "w", encoding="utf-8") as f:
+                json.dump(output_data, f, ensure_ascii=False, indent=4)
+        if log:
+            log_path = (
+                Path(self.config.BASE_OUTPUT_DIR)
+                / f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
+            )
+            with open(log_path, "w", encoding="utf-8") as f:
+                json.dump(log, f, ensure_ascii=False, indent=4)
+    def _result_exists(self, part_idx: int) -> bool:
+        part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
+        result_path = (
+            Path(self.config.BASE_OUTPUT_DIR)
+            / f"{Path(self.output_data_filename).stem}{part_suffix}.json"
+        )
+        return result_path.exists()
     def run(self):
+        """
+        Execute the batch job processing pipeline.
+        Submits jobs, monitors progress, handles retries, and saves results.
+        """
         # Submit all jobs up-front for concurrent execution
         self._submit_all_jobs()
         pending_parts: set[int] = set(self.part_idx_to_job_name.keys())
@@ -215,48 +252,3 @@ class BatchJobRunner:
                     f"Waiting {self.config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
                 )
                 time.sleep(self.config.poll_interval_seconds)
-    def _save_results(
-        self,
-        output_data: list[dict[str, Any]] | dict[str, Any],
-        log: list[Any],
-        part_idx: int,
-    ):
-        part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
-        result_path = (
-            Path(self.config.BASE_OUTPUT_DIR)
-            / f"{Path(self.output_data_filename).stem}{part_suffix}.json"
-        )
-        if not output_data:
-            logger.info("No output data to save. Skipping this part.")
-            return
-        else:
-            with open(result_path, "w", encoding="utf-8") as f:
-                json.dump(output_data, f, ensure_ascii=False, indent=4)
-        if log:
-            log_path = (
-                Path(self.config.BASE_OUTPUT_DIR)
-                / f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
-            )
-            with open(log_path, "w", encoding="utf-8") as f:
-                json.dump(log, f, ensure_ascii=False, indent=4)
-    def _result_exists(self, part_idx: int) -> bool:
-        part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
-        result_path = (
-            Path(self.config.BASE_OUTPUT_DIR)
-            / f"{Path(self.output_data_filename).stem}{part_suffix}.json"
-        )
-        return result_path.exists()
-if __name__ == "__main__":
-    logger.info("=== Batch Job Runner ===")
-    config = BatchConfig(
-        system_prompt="",
-        job_name="job_name",
-        input_data_path="Data.json",
-        output_data_filename="output",
-    )
-    runner = BatchJobRunner(config)
-    runner.run()

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/async_the_tool.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Literal, Any
+from typing import Literal, Any, Callable
 from openai import AsyncOpenAI
@@ -34,7 +34,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Categorize a text into a single Islamic studies domain category.
@@ -71,7 +71,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Extract salient keywords from text.
@@ -108,7 +108,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Perform Named Entity Recognition (NER) over the input text.
@@ -144,7 +144,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Detect if the input is phrased as a question.
@@ -181,7 +181,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Generate a single question from the given text.
@@ -219,7 +219,7 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
         mode: Literal["default", "reason"] = "default",
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Merge multiple questions into a single unified question.
@@ -258,7 +258,7 @@ class AsyncTheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Rewrite a text with different modes.
@@ -296,7 +296,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Generate a list of questions about a subject.
@@ -334,7 +334,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Summarize the given subject text.
@@ -371,7 +371,7 @@ class AsyncTheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Translate text between languages.

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/async_operator.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, TypeVar, Type, Literal
+from typing import Any, TypeVar, Type, Literal, Callable
 import logging
 from openai import AsyncOpenAI
@@ -12,8 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
-logger = logging.getLogger("async_operator")
-logger.setLevel(logging.INFO)
+logger = logging.getLogger("texttools.async_operator")
 class AsyncOperator(BaseOperator):
@@ -115,7 +114,7 @@ class AsyncOperator(BaseOperator):
         temperature: float,
         logprobs: bool,
         top_logprobs: int | None,
-        validator: Any | None,
+        validator: Callable[[Any], bool] | None,
         # Internal parameters
         prompt_file: str,
         output_model: Type[T],
@@ -128,7 +127,7 @@ class AsyncOperator(BaseOperator):
         """
         prompt_loader = PromptLoader()
         formatter = Formatter()
-        output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
+        output = ToolOutput()
         try:
             # Prompt configs contain two keys: main_template and analyze template, both are string
@@ -239,4 +238,5 @@ class AsyncOperator(BaseOperator):
         except Exception as e:
             logger.error(f"AsyncTheTool failed: {e}")
-            return output.errors.append(str(e))
+            output.errors.append(str(e))
+            return output

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/base_operator.py RENAMED Viewed

@@ -11,8 +11,7 @@ from openai import OpenAI, AsyncOpenAI
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
-logger = logging.getLogger("base_operator")
-logger.setLevel(logging.INFO)
+logger = logging.getLogger("texttools.base_operator")
 class BaseOperator:

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/operator.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, TypeVar, Type, Literal
+from typing import Any, TypeVar, Type, Literal, Callable
 import logging
 from openai import OpenAI
@@ -12,8 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
-logger = logging.getLogger("operator")
-logger.setLevel(logging.INFO)
+logger = logging.getLogger("texttools.operator")
 class Operator(BaseOperator):
@@ -115,7 +114,7 @@ class Operator(BaseOperator):
         temperature: float,
         logprobs: bool,
         top_logprobs: int | None,
-        validator: Any | None,
+        validator: Callable[[Any], bool] | None,
         # Internal parameters
         prompt_file: str,
         output_model: Type[T],
@@ -128,7 +127,7 @@ class Operator(BaseOperator):
         """
         prompt_loader = PromptLoader()
         formatter = Formatter()
-        output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
+        output = ToolOutput()
         try:
             # Prompt configs contain two keys: main_template and analyze template, both are string
@@ -239,4 +238,5 @@ class Operator(BaseOperator):
         except Exception as e:
             logger.error(f"TheTool failed: {e}")
-            return output.errors.append(str(e))
+            output.errors.append(str(e))
+            return output

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/output_models.py RENAMED Viewed

@@ -4,10 +4,13 @@ from pydantic import BaseModel, Field
 class ToolOutput(BaseModel):
-    result: str
-    analysis: str
-    logprobs: list[dict[str, Any]]
-    errors: list[str]
+    result: Any = None
+    analysis: str = ""
+    logprobs: list[dict[str, Any]] = []
+    errors: list[str] = []
+    def __repr__(self) -> str:
+        return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
 class StrOutput(BaseModel):

{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/the_tool.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Literal, Any
+from typing import Literal, Any, Callable
 from openai import OpenAI
@@ -32,7 +32,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Categorize a text into a single Islamic studies domain category.
@@ -69,7 +69,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Extract salient keywords from text.
@@ -106,7 +106,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Perform Named Entity Recognition (NER) over the input text.
@@ -142,7 +142,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Detect if the input is phrased as a question.
@@ -179,7 +179,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Generate a single question from the given text.
@@ -217,7 +217,7 @@ class TheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
         mode: Literal["default", "reason"] = "default",
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Merge multiple questions into a single unified question.
@@ -256,7 +256,7 @@ class TheTool:
         logprobs: bool = False,
         top_logprobs: int | None = None,
         mode: Literal["positive", "negative", "hard_negative"] = "positive",
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Rewrite a text with different modes.
@@ -294,7 +294,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Generate a list of questions about a subject.
@@ -332,7 +332,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Summarize the given subject text.
@@ -369,7 +369,7 @@ class TheTool:
         temperature: float | None = 0.0,
         logprobs: bool = False,
         top_logprobs: int | None = None,
-        validator: Any | None = None,
+        validator: Callable[[Any], bool] | None = None,
     ) -> OutputModels.ToolOutput:
         """
         Translate text between languages.

hamtaa_texttools-1.1.3/texttools/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-from .batch import BatchJobRunner, SimpleBatchManager
-from .tools import AsyncTheTool, TheTool
-__all__ = [
-    "TheTool",
-    "AsyncTheTool",
-    "SimpleBatchManager",
-    "BatchJobRunner",
-]

hamtaa_texttools-1.1.3/texttools/batch/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .batch_manager import SimpleBatchManager
-from .batch_runner import BatchJobRunner
-__all__ = ["SimpleBatchManager", "BatchJobRunner"]