PyPI - hamtaa-texttools - Versions diffs - 1.1.17__py3-none-any.whl → 1.1.19__py3-none-any.whl - Mend

hamtaa-texttools 1.1.17py3-none-any.whl → 1.1.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{hamtaa_texttools-1.1.17.dist-info → hamtaa_texttools-1.1.19.dist-info}/METADATA +31 -1
hamtaa_texttools-1.1.19.dist-info/RECORD +33 -0
texttools/__init__.py +1 -1
texttools/batch/batch_runner.py +75 -64
texttools/{tools/internals → internals}/async_operator.py +96 -48
texttools/internals/exceptions.py +28 -0
texttools/{tools/internals → internals}/models.py +2 -2
texttools/internals/prompt_loader.py +108 -0
texttools/{tools/internals → internals}/sync_operator.py +92 -47
texttools/prompts/check_fact.yaml +19 -0
texttools/prompts/propositionize.yaml +13 -6
texttools/prompts/run_custom.yaml +1 -1
texttools/tools/async_tools.py +576 -348
texttools/tools/sync_tools.py +573 -346
hamtaa_texttools-1.1.17.dist-info/RECORD +0 -32
texttools/prompts/detect_entity.yaml +0 -22
texttools/tools/internals/prompt_loader.py +0 -56
{hamtaa_texttools-1.1.17.dist-info → hamtaa_texttools-1.1.19.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.17.dist-info → hamtaa_texttools-1.1.19.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.17.dist-info → hamtaa_texttools-1.1.19.dist-info}/top_level.txt +0 -0
/texttools/{tools/internals → internals}/formatters.py +0 -0
/texttools/{tools/internals → internals}/operator_utils.py +0 -0

texttools/{tools/internals → internals}/sync_operator.py RENAMED Viewed

@@ -5,15 +5,21 @@ import logging
 from openai import OpenAI
 from pydantic import BaseModel
-from texttools.tools.internals.models import ToolOutput
-from texttools.tools.internals.operator_utils import OperatorUtils
-from texttools.tools.internals.formatters import Formatter
-from texttools.tools.internals.prompt_loader import PromptLoader
+from texttools.internals.models import ToolOutput
+from texttools.internals.operator_utils import OperatorUtils
+from texttools.internals.formatters import Formatter
+from texttools.internals.prompt_loader import PromptLoader
+from texttools.internals.exceptions import (
+    TextToolsError,
+    LLMError,
+    ValidationError,
+    PromptError,
+)
 # Base Model type for output models
 T = TypeVar("T", bound=BaseModel)
-logger = logging.getLogger("texttools.operator")
+logger = logging.getLogger("texttools.sync_operator")
 class Operator:
@@ -35,15 +41,33 @@ class Operator:
         Calls OpenAI API for analysis using the configured prompt template.
         Returns the analyzed content as a string.
         """
-        analyze_prompt = prompt_configs["analyze_template"]
-        analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
-        completion = self._client.chat.completions.create(
-            model=self._model,
-            messages=analyze_message,
-            temperature=temperature,
-        )
-        analysis = completion.choices[0].message.content.strip()
-        return analysis
+        try:
+            analyze_prompt = prompt_configs["analyze_template"]
+            if not analyze_prompt:
+                raise PromptError("Analyze template is empty")
+            analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
+            completion = self._client.chat.completions.create(
+                model=self._model,
+                messages=analyze_message,
+                temperature=temperature,
+            )
+            if not completion.choices:
+                raise LLMError("No choices returned from LLM")
+            analysis = completion.choices[0].message.content.strip()
+            if not analysis:
+                raise LLMError("Empty analysis response")
+            return analysis.strip()
+        except Exception as e:
+            if isinstance(e, (PromptError, LLMError)):
+                raise
+            raise LLMError(f"Analysis failed: {e}")
     def _parse_completion(
         self,
@@ -58,23 +82,35 @@ class Operator:
         Parses a chat completion using OpenAI's structured output format.
         Returns both the parsed object and the raw completion for logprobs.
         """
-        request_kwargs = {
-            "model": self._model,
-            "messages": message,
-            "response_format": output_model,
-            "temperature": temperature,
-        }
+        try:
+            request_kwargs = {
+                "model": self._model,
+                "messages": message,
+                "response_format": output_model,
+                "temperature": temperature,
+            }
+            if logprobs:
+                request_kwargs["logprobs"] = True
+                request_kwargs["top_logprobs"] = top_logprobs
+            if priority:
+                request_kwargs["extra_body"] = {"priority": priority}
+            completion = self._client.beta.chat.completions.parse(**request_kwargs)
+            if not completion.choices:
+                raise LLMError("No choices returned from LLM")
+            parsed = completion.choices[0].message.parsed
-        if logprobs:
-            request_kwargs["logprobs"] = True
-            request_kwargs["top_logprobs"] = top_logprobs
+            if not parsed:
+                raise LLMError("Failed to parse LLM response")
-        if priority:
-            request_kwargs["extra_body"] = {"priority": priority}
+            return parsed, completion
-        completion = self._client.beta.chat.completions.parse(**request_kwargs)
-        parsed = completion.choices[0].message.parsed
-        return parsed, completion
+        except Exception as e:
+            if isinstance(e, LLMError):
+                raise
+            raise LLMError(f"Completion failed: {e}")
     def run(
         self,
@@ -96,12 +132,13 @@ class Operator:
         **extra_kwargs,
     ) -> ToolOutput:
         """
-        Execute the LLM pipeline with the given input text.
+        Execute the LLM pipeline with the given input text. (Sync)
         """
-        prompt_loader = PromptLoader()
-        formatter = Formatter()
-        output = ToolOutput()
         try:
+            prompt_loader = PromptLoader()
+            formatter = Formatter()
+            output = ToolOutput()
             # Prompt configs contain two keys: main_template and analyze template, both are string
             prompt_configs = prompt_loader.load(
                 prompt_file=prompt_file,
@@ -140,6 +177,9 @@ class Operator:
             messages = formatter.user_merge_format(messages)
+            if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
+                raise ValueError("top_logprobs should be an integer greater than 1")
             parsed, completion = self._parse_completion(
                 messages, output_model, temperature, logprobs, top_logprobs, priority
             )
@@ -148,6 +188,15 @@ class Operator:
             # Retry logic if validation fails
             if validator and not validator(output.result):
+                if (
+                    not isinstance(max_validation_retries, int)
+                    or max_validation_retries < 1
+                ):
+                    raise ValueError(
+                        "max_validation_retries should be a positive integer"
+                    )
+                succeeded = False
                 for attempt in range(max_validation_retries):
                     logger.warning(
                         f"Validation failed, retrying for the {attempt + 1} time."
@@ -155,6 +204,7 @@ class Operator:
                     # Generate new temperature for retry
                     retry_temperature = OperatorUtils.get_retry_temp(temperature)
                     try:
                         parsed, completion = self._parse_completion(
                             messages,
@@ -162,28 +212,23 @@ class Operator:
                             retry_temperature,
                             logprobs,
                             top_logprobs,
+                            priority=priority,
                         )
                         output.result = parsed.result
                         # Check if retry was successful
                         if validator(output.result):
-                            logger.info(
-                                f"Validation passed on retry attempt {attempt + 1}"
-                            )
+                            succeeded = True
                             break
-                        else:
-                            logger.warning(
-                                f"Validation still failing after retry attempt {attempt + 1}"
-                            )
-                    except Exception as e:
+                    except LLMError as e:
                         logger.error(f"Retry attempt {attempt + 1} failed: {e}")
-                        # Continue to next retry attempt if this one fails
-            # Final check after all retries
-            if validator and not validator(output.result):
-                output.errors.append("Validation failed after all retry attempts")
+                if not succeeded:
+                    raise ValidationError(
+                        f"Validation failed after {max_validation_retries} retries"
+                    )
             if logprobs:
                 output.logprobs = OperatorUtils.extract_logprobs(completion)
@@ -195,7 +240,7 @@ class Operator:
             return output
+        except (PromptError, LLMError, ValidationError):
+            raise
         except Exception as e:
-            logger.error(f"TheTool failed: {e}")
-            output.errors.append(str(e))
-            return output
+            raise TextToolsError(f"Unexpected error in operator: {e}")

texttools/prompts/check_fact.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+main_template: |
+  You are an expert in determining whether a statement can be concluded from the source text or not.
+  You must return a boolean value: True or False.
+  Return True if the statement can be concluded from the source, and False otherwise.
+  Respond only in JSON format (Output should be a boolean):
+  {{"result": True/False}}
+  The statement is:
+  {input}
+  The source text is:
+  {source_text}
+analyze_template: |
+  You should analyze a statement and a source text and provide a brief,
+  summarized analysis that could help in determining that can the statement
+  be concluded from the source or not.
+  The statement is:
+  {input}
+  The source text is:
+  {source_text}

texttools/prompts/propositionize.yaml CHANGED Viewed

@@ -1,10 +1,17 @@
 main_template: |
-  You are an expert in breaking down text into atomic propositions in that language.
-  An atomic proposition is a single, self-contained fact that is concise, verifiable,
-  and does not rely on external context.
-  Each proposition must stand alone.
-  Rewrite sentences if needed to keep the context saved in each sentence.
-  Extract the atomic propositions of this text:
+  You are an expert data analyst specializing in Information Extraction.
+  Your task is to extract a list of "Atomic Propositions" from the provided text.
+  Definition of Atomic Proposition:
+  A single, self-contained statement of fact that is concise and verifiable.
+  Strict Guidelines:
+  1. Remove Meta-Data: STRICTLY EXCLUDE all citations, references, URLs, source attributions (e.g., "Source: makarem.ir"), and conversational fillers (e.g., "Based on the documents...", "In conclusion...").
+  2. Resolve Context: Replace pronouns ("it", "this", "they") with the specific nouns they refer to. Each proposition must make sense in isolation.
+  3. Preserve Logic: Keep conditions attached to their facts. Do not split a rule from its condition (e.g., "If X, then Y" should be one proposition).
+  4. No Redundancy: Do not extract summary statements that merely repeat facts already listed.
+  Extract the atomic propositions from the following text:
   {input}
 analyze_template: |

texttools/prompts/run_custom.yaml CHANGED Viewed

@@ -4,4 +4,4 @@ main_template: |
   {output_model_str}
 analyze_template: |
+  {analyze_template}

hamtaa-texttools 1.1.17__py3-none-any.whl → 1.1.19__py3-none-any.whl

hamtaa-texttools 1.1.17py3-none-any.whl → 1.1.19py3-none-any.whl