PyPI - hamtaa-texttools - Versions diffs - 1.1.16__tar.gz → 1.1.18__tar.gz - Mend

hamtaa-texttools 1.1.16tar.gz → 1.1.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{hamtaa_texttools-1.1.16/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.18}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.1.16
+Version: 1.1.18
 Summary: A high-level NLP toolkit built on top of modern LLMs.
-Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
+Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
 License: MIT License
         Copyright (c) 2025 Hamtaa
@@ -60,6 +60,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
 - **`subject_to_question()`** - Generates questions about a specific subject
 - **`summarize()`** - Text summarization
 - **`translate()`** - Text translation between languages
+- **`propositionize()`** - Convert text to atomic independence meaningful sentences
 - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
 ---

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/README.md RENAMED Viewed

@@ -25,6 +25,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
 - **`subject_to_question()`** - Generates questions about a specific subject
 - **`summarize()`** - Text summarization
 - **`translate()`** - Text translation between languages
+- **`propositionize()`** - Convert text to atomic independence meaningful sentences
 - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
 ---

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18/hamtaa_texttools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 1.1.16
+Version: 1.1.18
 Summary: A high-level NLP toolkit built on top of modern LLMs.
-Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
+Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
 License: MIT License
         Copyright (c) 2025 Hamtaa
@@ -60,6 +60,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
 - **`subject_to_question()`** - Generates questions about a specific subject
 - **`summarize()`** - Text summarization
 - **`translate()`** - Text translation between languages
+- **`propositionize()`** - Convert text to atomic independence meaningful sentences
 - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
 ---

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/hamtaa_texttools.egg-info/SOURCES.txt RENAMED Viewed

@@ -15,6 +15,13 @@ texttools/batch/batch_config.py
 texttools/batch/batch_runner.py
 texttools/batch/internals/batch_manager.py
 texttools/batch/internals/utils.py
+texttools/internals/async_operator.py
+texttools/internals/exceptions.py
+texttools/internals/formatters.py
+texttools/internals/models.py
+texttools/internals/operator_utils.py
+texttools/internals/prompt_loader.py
+texttools/internals/sync_operator.py
 texttools/prompts/README.md
 texttools/prompts/categorize.yaml
 texttools/prompts/detect_entity.yaml
@@ -22,6 +29,7 @@ texttools/prompts/extract_entities.yaml
 texttools/prompts/extract_keywords.yaml
 texttools/prompts/is_question.yaml
 texttools/prompts/merge_questions.yaml
+texttools/prompts/propositionize.yaml
 texttools/prompts/rewrite.yaml
 texttools/prompts/run_custom.yaml
 texttools/prompts/subject_to_question.yaml
@@ -29,10 +37,4 @@ texttools/prompts/summarize.yaml
 texttools/prompts/text_to_question.yaml
 texttools/prompts/translate.yaml
 texttools/tools/async_tools.py
-texttools/tools/sync_tools.py
-texttools/tools/internals/async_operator.py
-texttools/tools/internals/formatters.py
-texttools/tools/internals/models.py
-texttools/tools/internals/operator_utils.py
-texttools/tools/internals/prompt_loader.py
-texttools/tools/internals/sync_operator.py
+texttools/tools/sync_tools.py

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/pyproject.toml RENAMED Viewed

@@ -4,12 +4,13 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "hamtaa-texttools"
-version = "1.1.16"
+version = "1.1.18"
 authors = [
   { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
   { name = "Montazer", email = "montazerh82@gmail.com" },
   { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
   { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
+  { name = "Zareshahi", email = "a.zareshahi1377@gmail.com" },
 ]
 description = "A high-level NLP toolkit built on top of modern LLMs."
 readme = "README.md"

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/tests/test_all_async_tools.py RENAMED Viewed

@@ -21,7 +21,10 @@ t = AsyncTheTool(client=client, model=MODEL)
 async def main():
     category_task = t.categorize(
-        "سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"]
+        "سلام حالت چطوره؟",
+        categories=["هیچکدام", "دینی", "فلسفه"],
+        logprobs=True,
+        top_logprobs=-1,
     )
     keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
     entities_task = t.extract_entities("We will be dead by the car crash")
@@ -40,6 +43,10 @@ async def main():
     questions_task = t.subject_to_question("Friendship", 3)
     summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
     translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
+    propositionize_task = t.propositionize(
+        "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
+        output_lang="Persian",
+    )
     (
         category,
         keywords,
@@ -51,6 +58,7 @@ async def main():
         questions,
         summary,
         translation,
+        propositionize,
     ) = await asyncio.gather(
         category_task,
         keywords_task,
@@ -62,6 +70,7 @@ async def main():
         questions_task,
         summary_task,
         translation_task,
+        propositionize_task,
     )
     for tool_output in (
@@ -75,6 +84,7 @@ async def main():
         questions,
         summary,
         translation,
+        propositionize,
     ):
         print(repr(tool_output))

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/tests/test_all_tools.py RENAMED Viewed

@@ -19,7 +19,12 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
 t = TheTool(client=client, model=MODEL)
 # Categorizer: list mode
-category = t.categorize("سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"])
+category = t.categorize(
+    "سلام حالت چطوره؟",
+    categories=["هیچکدام", "دینی", "فلسفه"],
+    logprobs=True,
+    top_logprobs=-1,
+)
 print(repr(category))
 # Categorizer: tree mode
@@ -46,7 +51,7 @@ keywords = t.extract_keywords(
 print(repr(keywords))
 # NER Extractor
-entities = t.extract_entities("We will be dead by the car crash")
+entities = t.extract_entities("We will be dead by the car crash", with_analysis=True)
 print(repr(entities))
@@ -84,6 +89,13 @@ print(repr(summary))
 translation = t.translate("سلام حالت چطوره؟", target_language="English")
 print(repr(translation))
+# propositionize
+propositionize = t.propositionize(
+    "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
+    output_lang="Persian",
+)
+print(repr(propositionize))
 # Custom tool
 class Student(BaseModel):

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/tests/test_output_validation.py RENAMED Viewed

@@ -29,7 +29,7 @@ question = t.text_to_question(
     "زندگی",
     output_lang="Persian",
     validator=validate,
-    max_validation_retries=5,
+    max_validation_retries=0,
     temperature=1.0,
 )
-print(question)
+print(repr(question))

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/texttools/__init__.py RENAMED Viewed

@@ -2,6 +2,6 @@ from .batch.batch_runner import BatchJobRunner
 from .batch.batch_config import BatchConfig
 from .tools.sync_tools import TheTool
 from .tools.async_tools import AsyncTheTool
-from .tools.internals.models import CategoryTree
+from .internals.models import CategoryTree
 __all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]

{hamtaa_texttools-1.1.16 → hamtaa_texttools-1.1.18}/texttools/batch/batch_runner.py RENAMED Viewed

@@ -11,7 +11,8 @@ from pydantic import BaseModel
 from texttools.batch.internals.batch_manager import BatchManager
 from texttools.batch.batch_config import BatchConfig
-from texttools.tools.internals.models import StrOutput
+from texttools.internals.models import StrOutput
+from texttools.internals.exceptions import TextToolsError, ConfigurationError
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
@@ -27,22 +28,26 @@ class BatchJobRunner:
     def __init__(
         self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
     ):
-        self._config = config
-        self._system_prompt = config.system_prompt
-        self._job_name = config.job_name
-        self._input_data_path = config.input_data_path
-        self._output_data_filename = config.output_data_filename
-        self._model = config.model
-        self._output_model = output_model
-        self._manager = self._init_manager()
-        self._data = self._load_data()
-        self._parts: list[list[dict[str, Any]]] = []
-        # Map part index to job name
-        self._part_idx_to_job_name: dict[int, str] = {}
-        # Track retry attempts per part
-        self._part_attempts: dict[int, int] = {}
-        self._partition_data()
-        Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
+        try:
+            self._config = config
+            self._system_prompt = config.system_prompt
+            self._job_name = config.job_name
+            self._input_data_path = config.input_data_path
+            self._output_data_filename = config.output_data_filename
+            self._model = config.model
+            self._output_model = output_model
+            self._manager = self._init_manager()
+            self._data = self._load_data()
+            self._parts: list[list[dict[str, Any]]] = []
+            # Map part index to job name
+            self._part_idx_to_job_name: dict[int, str] = {}
+            # Track retry attempts per part
+            self._part_attempts: dict[int, int] = {}
+            self._partition_data()
+            Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            raise ConfigurationError(f"Batch runner initialization failed: {e}")
     def _init_manager(self) -> BatchManager:
         load_dotenv()
@@ -162,56 +167,62 @@ class BatchJobRunner:
         Submits jobs, monitors progress, handles retries, and saves results.
         """
-        # Submit all jobs up-front for concurrent execution
-        self._submit_all_jobs()
-        pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
-        logger.info(f"Pending parts: {sorted(pending_parts)}")
-        # Polling loop
-        while pending_parts:
-            finished_this_round: list[int] = []
-            for part_idx in list(pending_parts):
-                job_name = self._part_idx_to_job_name[part_idx]
-                status = self._manager.check_status(job_name=job_name)
-                logger.info(f"Status for {job_name}: {status}")
-                if status == "completed":
-                    logger.info(
-                        f"Job completed. Fetching results for part {part_idx + 1}..."
-                    )
-                    output_data, log = self._manager.fetch_results(
-                        job_name=job_name, remove_cache=False
-                    )
-                    output_data = self._config.import_function(output_data)
-                    self._save_results(output_data, log, part_idx)
-                    logger.info(f"Fetched and saved results for part {part_idx + 1}.")
-                    finished_this_round.append(part_idx)
-                elif status == "failed":
-                    attempt = self._part_attempts.get(part_idx, 0) + 1
-                    self._part_attempts[part_idx] = attempt
-                    if attempt <= self._config.max_retries:
+        try:
+            # Submit all jobs up-front for concurrent execution
+            self._submit_all_jobs()
+            pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
+            logger.info(f"Pending parts: {sorted(pending_parts)}")
+            # Polling loop
+            while pending_parts:
+                finished_this_round: list[int] = []
+                for part_idx in list(pending_parts):
+                    job_name = self._part_idx_to_job_name[part_idx]
+                    status = self._manager.check_status(job_name=job_name)
+                    logger.info(f"Status for {job_name}: {status}")
+                    if status == "completed":
                         logger.info(
-                            f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
+                            f"Job completed. Fetching results for part {part_idx + 1}..."
                         )
-                        self._manager._clear_state(job_name)
-                        time.sleep(10)
-                        payload = self._to_manager_payload(self._parts[part_idx])
-                        new_job_name = (
-                            f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
+                        output_data, log = self._manager.fetch_results(
+                            job_name=job_name, remove_cache=False
                         )
-                        self._manager.start(payload, job_name=new_job_name)
-                        self._part_idx_to_job_name[part_idx] = new_job_name
-                    else:
+                        output_data = self._config.import_function(output_data)
+                        self._save_results(output_data, log, part_idx)
                         logger.info(
-                            f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
+                            f"Fetched and saved results for part {part_idx + 1}."
                         )
                         finished_this_round.append(part_idx)
-                else:
-                    # Still running or queued
-                    continue
-            # Remove finished parts
-            for part_idx in finished_this_round:
-                pending_parts.discard(part_idx)
-            if pending_parts:
-                logger.info(
-                    f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
-                )
-                time.sleep(self._config.poll_interval_seconds)
+                    elif status == "failed":
+                        attempt = self._part_attempts.get(part_idx, 0) + 1
+                        self._part_attempts[part_idx] = attempt
+                        if attempt <= self._config.max_retries:
+                            logger.info(
+                                f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
+                            )
+                            self._manager._clear_state(job_name)
+                            time.sleep(10)
+                            payload = self._to_manager_payload(self._parts[part_idx])
+                            new_job_name = (
+                                f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
+                            )
+                            self._manager.start(payload, job_name=new_job_name)
+                            self._part_idx_to_job_name[part_idx] = new_job_name
+                        else:
+                            logger.info(
+                                f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
+                            )
+                            finished_this_round.append(part_idx)
+                    else:
+                        # Still running or queued
+                        continue
+                # Remove finished parts
+                for part_idx in finished_this_round:
+                    pending_parts.discard(part_idx)
+                if pending_parts:
+                    logger.info(
+                        f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
+                    )
+                    time.sleep(self._config.poll_interval_seconds)
+        except Exception as e:
+            raise TextToolsError(f"Batch job execution failed: {e}")

{hamtaa_texttools-1.1.16/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/async_operator.py RENAMED Viewed

@@ -5,10 +5,16 @@ import logging
 from openai import AsyncOpenAI
 from pydantic import BaseModel
-from texttools.tools.internals.models import ToolOutput
-from texttools.tools.internals.operator_utils import OperatorUtils
-from texttools.tools.internals.formatters import Formatter
-from texttools.tools.internals.prompt_loader import PromptLoader
+from texttools.internals.models import ToolOutput
+from texttools.internals.operator_utils import OperatorUtils
+from texttools.internals.formatters import Formatter
+from texttools.internals.prompt_loader import PromptLoader
+from texttools.internals.exceptions import (
+    TextToolsError,
+    LLMError,
+    ValidationError,
+    PromptError,
+)
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
@@ -35,15 +41,33 @@ class AsyncOperator:
         Calls OpenAI API for analysis using the configured prompt template.
         Returns the analyzed content as a string.
         """
-        analyze_prompt = prompt_configs["analyze_template"]
-        analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
-        completion = await self._client.chat.completions.create(
-            model=self._model,
-            messages=analyze_message,
-            temperature=temperature,
-        )
-        analysis = completion.choices[0].message.content.strip()
-        return analysis
+        try:
+            analyze_prompt = prompt_configs["analyze_template"]
+            if not analyze_prompt:
+                raise PromptError("Analyze template is empty")
+            analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
+            completion = await self._client.chat.completions.create(
+                model=self._model,
+                messages=analyze_message,
+                temperature=temperature,
+            )
+            if not completion.choices:
+                raise LLMError("No choices returned from LLM")
+            analysis = completion.choices[0].message.content.strip()
+            if not analysis:
+                raise LLMError("Empty analysis response")
+            return analysis.strip()
+        except Exception as e:
+            if isinstance(e, (PromptError, LLMError)):
+                raise
+            raise LLMError(f"Analysis failed: {e}")
     async def _parse_completion(
         self,
@@ -58,21 +82,37 @@ class AsyncOperator:
         Parses a chat completion using OpenAI's structured output format.
         Returns both the parsed object and the raw completion for logprobs.
         """
-        request_kwargs = {
-            "model": self._model,
-            "messages": message,
-            "response_format": output_model,
-            "temperature": temperature,
-        }
-        if logprobs:
-            request_kwargs["logprobs"] = True
-            request_kwargs["top_logprobs"] = top_logprobs
-        if priority:
-            request_kwargs["extra_body"] = {"priority": priority}
-        completion = await self._client.beta.chat.completions.parse(**request_kwargs)
-        parsed = completion.choices[0].message.parsed
-        return parsed, completion
+        try:
+            request_kwargs = {
+                "model": self._model,
+                "messages": message,
+                "response_format": output_model,
+                "temperature": temperature,
+            }
+            if logprobs:
+                request_kwargs["logprobs"] = True
+                request_kwargs["top_logprobs"] = top_logprobs
+            if priority:
+                request_kwargs["extra_body"] = {"priority": priority}
+            completion = await self._client.beta.chat.completions.parse(
+                **request_kwargs
+            )
+            if not completion.choices:
+                raise LLMError("No choices returned from LLM")
+            parsed = completion.choices[0].message.parsed
+            if not parsed:
+                raise LLMError("Failed to parse LLM response")
+            return parsed, completion
+        except Exception as e:
+            if isinstance(e, LLMError):
+                raise
+            raise LLMError(f"Completion failed: {e}")
     async def run(
         self,
@@ -94,13 +134,13 @@ class AsyncOperator:
         **extra_kwargs,
     ) -> ToolOutput:
         """
-        Execute the async LLM pipeline with the given input text. (Async)
+        Execute the LLM pipeline with the given input text. (Async)
         """
-        prompt_loader = PromptLoader()
-        formatter = Formatter()
-        output = ToolOutput()
         try:
+            prompt_loader = PromptLoader()
+            formatter = Formatter()
+            output = ToolOutput()
             # Prompt configs contain two keys: main_template and analyze template, both are string
             prompt_configs = prompt_loader.load(
                 prompt_file=prompt_file,
@@ -139,6 +179,9 @@ class AsyncOperator:
             messages = formatter.user_merge_format(messages)
+            if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
+                raise ValueError("top_logprobs should be an integer greater than 1")
             parsed, completion = await self._parse_completion(
                 messages, output_model, temperature, logprobs, top_logprobs, priority
             )
@@ -147,6 +190,15 @@ class AsyncOperator:
             # Retry logic if validation fails
             if validator and not validator(output.result):
+                if (
+                    not isinstance(max_validation_retries, int)
+                    or max_validation_retries < 1
+                ):
+                    raise ValueError(
+                        "max_validation_retries should be a positive integer"
+                    )
+                succeeded = False
                 for attempt in range(max_validation_retries):
                     logger.warning(
                         f"Validation failed, retrying for the {attempt + 1} time."
@@ -154,6 +206,7 @@ class AsyncOperator:
                     # Generate new temperature for retry
                     retry_temperature = OperatorUtils.get_retry_temp(temperature)
                     try:
                         parsed, completion = await self._parse_completion(
                             messages,
@@ -161,28 +214,23 @@ class AsyncOperator:
                             retry_temperature,
                             logprobs,
                             top_logprobs,
+                            priority=priority,
                         )
                         output.result = parsed.result
                         # Check if retry was successful
                         if validator(output.result):
-                            logger.info(
-                                f"Validation passed on retry attempt {attempt + 1}"
-                            )
+                            succeeded = True
                             break
-                        else:
-                            logger.warning(
-                                f"Validation still failing after retry attempt {attempt + 1}"
-                            )
-                    except Exception as e:
+                    except LLMError as e:
                         logger.error(f"Retry attempt {attempt + 1} failed: {e}")
-                        # Continue to next retry attempt if this one fails
-            # Final check after all retries
-            if validator and not validator(output.result):
-                output.errors.append("Validation failed after all retry attempts")
+                if not succeeded:
+                    raise ValidationError(
+                        f"Validation failed after {max_validation_retries} retries"
+                    )
             if logprobs:
                 output.logprobs = OperatorUtils.extract_logprobs(completion)
@@ -194,7 +242,7 @@ class AsyncOperator:
             return output
+        except (PromptError, LLMError, ValidationError):
+            raise
         except Exception as e:
-            logger.error(f"AsyncTheTool failed: {e}")
-            output.errors.append(str(e))
-            return output
+            raise TextToolsError(f"Unexpected error in operator: {e}")

hamtaa_texttools-1.1.18/texttools/internals/exceptions.py ADDED Viewed

@@ -0,0 +1,28 @@
+class TextToolsError(Exception):
+    """Base exception for all TextTools errors."""
+    pass
+class PromptError(TextToolsError):
+    """Errors related to prompt loading and formatting."""
+    pass
+class LLMError(TextToolsError):
+    """Errors from LLM API calls."""
+    pass
+class ValidationError(TextToolsError):
+    """Errors from output validation."""
+    pass
+class ConfigurationError(TextToolsError):
+    """Errors from misconfiguration."""
+    pass

hamtaa-texttools 1.1.16__tar.gz → 1.1.18__tar.gz

hamtaa-texttools 1.1.16tar.gz → 1.1.18tar.gz