PyPI - hamtaa-texttools - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend - Supply Chain Defender

hamtaa-texttools 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (25) hide show

texttools/tools/internals/operator.py CHANGED Viewed

@@ -1,14 +1,12 @@
 from __future__ import annotations
-import math
-import re
-from typing import Any, TypeVar, Type, Literal, Optional
-import json
+from typing import Any, TypeVar, Type, Literal
 import logging
 from openai import OpenAI
 from pydantic import BaseModel
+from texttools.tools.internals.base_operator import BaseOperator
 from texttools.formatters.user_merge_formatter import (
     UserMergeFormatter,
 )
@@ -22,7 +20,7 @@ logger = logging.getLogger("operator")
 logger.setLevel(logging.INFO)
-class Operator:
+class Operator(BaseOperator):
     """
     Core engine for running text-processing operations with an LLM.
@@ -30,71 +28,46 @@ class Operator:
     - `PromptLoader` → loads YAML prompt templates.
     - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
     - OpenAI client → executes completions/parsed completions.
-    Workflow inside `run()`:
-    1. Load prompt templates (`main_template` [+ `analyze_template` if enabled]).
-    2. Optionally generate an "analysis" step via `_analyze()`.
-    3. Build messages for the LLM.
-    4. Call `.beta.chat.completions.parse()` to parse the result into the
-       configured `OUTPUT_MODEL` (a Pydantic schema).
-    5. Return results as a dict (always `{"result": ...}`, plus `analysis`
-       if analysis was enabled).
-    Attributes configured dynamically by `TheTool`:
-    - PROMPT_FILE: str → YAML filename
-    - OUTPUT_MODEL: Pydantic model class
-    - WITH_ANALYSIS: bool → whether to run an analysis phase first
-    - USE_MODES: bool → whether to select prompts by mode
-    - MODE: str → which mode to use if modes are enabled
-    - RESP_FORMAT: str → "vllm" or "parse"
     """
-    def __init__(self, client: OpenAI):
+    def __init__(self, client: OpenAI, model: str):
         self.client: OpenAI = client
-    def _build_user_message(self, prompt: str) -> dict[str, str]:
-        return {"role": "user", "content": prompt}
+        self.model = model
     def _analysis_completion(
         self,
         analyze_message: list[dict[str, str]],
-        model: str,
         temperature: float,
     ) -> str:
         completion = self.client.chat.completions.create(
-            model=model,
+            model=self.model,
             messages=analyze_message,
             temperature=temperature,
         )
         analysis = completion.choices[0].message.content.strip()
         return analysis
-    def _analyze(
-        self,
-        prompt_configs: dict[str, str],
-        model: str,
-        temperature: float,
-    ) -> str:
+    def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
         analyze_prompt = prompt_configs["analyze_template"]
         analyze_message = [self._build_user_message(analyze_prompt)]
-        analysis = self._analysis_completion(analyze_message, model, temperature)
+        analysis = self._analysis_completion(analyze_message, temperature)
         return analysis
     def _parse_completion(
         self,
         message: list[dict[str, str]],
         output_model: Type[T],
-        model: str,
         temperature: float,
         logprobs: bool = False,
         top_logprobs: int = 3,
     ) -> tuple[Type[T], Any]:
         request_kwargs = {
-            "model": model,
+            "model": self.model,
             "messages": message,
             "response_format": output_model,
             "temperature": temperature,
         }
         if logprobs:
             request_kwargs["logprobs"] = True
             request_kwargs["top_logprobs"] = top_logprobs
@@ -103,57 +76,20 @@ class Operator:
         parsed = completion.choices[0].message.parsed
         return parsed, completion
-    def _clean_json_response(self, response: str) -> str:
-        """
-        Clean JSON response by removing code block markers and whitespace.
-        Handles cases like:
-        - ```json{"result": "value"}```
-        """
-        stripped = response.strip()
-        cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
-        cleaned = re.sub(r"\s*```$", "", cleaned)
-        return cleaned.strip()
-    def _convert_to_output_model(
-        self, response_string: str, output_model: Type[T]
-    ) -> Type[T]:
-        """
-        Convert a JSON response string to output model.
-        Args:
-            response_string: The JSON string (may contain code block markers)
-            output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
-        Returns:
-            Instance of your output model
-        """
-        # Clean the response string
-        cleaned_json = self._clean_json_response(response_string)
-        # Fix Python-style booleans
-        cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
-        # Convert string to Python dictionary
-        response_dict = json.loads(cleaned_json)
-        # Convert dictionary to output model
-        return output_model(**response_dict)
     def _vllm_completion(
         self,
         message: list[dict[str, str]],
         output_model: Type[T],
-        model: str,
         temperature: float,
         logprobs: bool = False,
         top_logprobs: int = 3,
+        max_tokens: int | None = None,
     ) -> tuple[Type[T], Any]:
         json_schema = output_model.model_json_schema()
         # Build kwargs dynamically
         request_kwargs = {
-            "model": model,
+            "model": self.model,
             "messages": message,
             "extra_body": {"guided_json": json_schema},
             "temperature": temperature,
@@ -170,63 +106,25 @@ class Operator:
         parsed = self._convert_to_output_model(response, output_model)
         return parsed, completion
-    def _extract_logprobs(self, completion: dict):
-        logprobs_data = []
-        ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
-        for choice in completion.choices:
-            if not getattr(choice, "logprobs", None):
-                logger.info("No logprobs found.")
-                continue
-            for logprob_item in choice.logprobs.content:
-                if ignore_pattern.match(logprob_item.token):
-                    continue
-                token_entry = {
-                    "token": logprob_item.token,
-                    "prob": round(math.exp(logprob_item.logprob), 8),
-                    "top_alternatives": [],
-                }
-                for alt in logprob_item.top_logprobs:
-                    if ignore_pattern.match(alt.token):
-                        continue
-                    token_entry["top_alternatives"].append(
-                        {
-                            "token": alt.token,
-                            "prob": round(math.exp(alt.logprob), 8),
-                        }
-                    )
-                logprobs_data.append(token_entry)
-        return logprobs_data
     def run(
         self,
-        text: str,
         # User parameters
-        model: str,
+        text: str,
         with_analysis: bool,
+        output_lang: str | None,
+        user_prompt: str | None,
         temperature: float,
         logprobs: bool,
-        top_logprobs: int,
-        user_prompt: str | None,
-        output_lang: str | None,
-        # Each tool's parameters
+        top_logprobs: int | None,
+        # Internal parameters
         prompt_file: str,
         output_model: Type[T],
-        resp_format: Literal["vllm", "parse"] = "parse",
-        mode: str | None = None,
+        resp_format: Literal["vllm", "parse"],
+        mode: str | None,
         **extra_kwargs,
     ) -> dict[str, Any]:
         """
         Execute the LLM pipeline with the given input text.
-        Args:
-            text: The text to process (will be stripped of whitespace)
-            **extra_kwargs: Additional variables to inject into prompt templates
-        Returns:
-            Dictionary containing the parsed result and optional analysis
         """
         prompt_loader = PromptLoader()
         formatter = UserMergeFormatter()
@@ -244,7 +142,7 @@ class Operator:
             messages: list[dict[str, str]] = []
             if with_analysis:
-                analysis = self._analyze(prompt_configs, model, temperature)
+                analysis = self._analyze(prompt_configs, temperature)
                 messages.append(
                     self._build_user_message(f"Based on this analysis: {analysis}")
                 )
@@ -262,16 +160,15 @@ class Operator:
                 )
             messages.append(self._build_user_message(prompt_configs["main_template"]))
             messages = formatter.format(messages)
             if resp_format == "vllm":
                 parsed, completion = self._vllm_completion(
-                    messages, output_model, model, temperature, logprobs, top_logprobs
+                    messages, output_model, temperature, logprobs, top_logprobs
                 )
             elif resp_format == "parse":
                 parsed, completion = self._parse_completion(
-                    messages, output_model, model, temperature, logprobs, top_logprobs
+                    messages, output_model, temperature, logprobs, top_logprobs
                 )
             # Ensure output_model has a `result` field
@@ -280,16 +177,16 @@ class Operator:
                     "The provided output_model must define a field named 'result'"
                 )
-            results = {"result": parsed.result}
+            result = {"result": parsed.result}
             if logprobs:
-                results["logprobs"] = self._extract_logprobs(completion)
+                result["logprobs"] = self._extract_logprobs(completion)
             if with_analysis:
-                results["analysis"] = analysis
+                result["analysis"] = analysis
-            return results
+            return result
         except Exception as e:
-            logger.error(f"Operation failed: {e}")
+            logger.error(f"TheTool failed: {e}")
             return {"Error": str(e), "result": ""}

texttools/tools/internals/prompt_loader.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from functools import lru_cache
 from pathlib import Path
 import yaml
@@ -7,10 +7,6 @@ class PromptLoader:
     """
     Utility for loading and formatting YAML prompt templates.
-    Each YAML file under `prompts/` must define at least a `main_template`,
-    and optionally an `analyze_template`. These can either be a single string
-    or a dictionary keyed by mode names (if `use_modes=True`).
     Responsibilities:
     - Load and parse YAML prompt definitions.
     - Select the right template (by mode, if applicable).
@@ -22,31 +18,30 @@ class PromptLoader:
         }
     """
+    def __init__(self):
+        self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
     MAIN_TEMPLATE: str = "main_template"
     ANALYZE_TEMPLATE: str = "analyze_template"
-    def _load_templates(
-        self,
-        prompts_dir: str,
-        prompt_file: str,
-        mode: str | None,
-    ) -> dict[str, str]:
-        prompt_path = Path(__file__).parent.parent.parent / prompts_dir / prompt_file
+    # Use lru_cache to load each file once
+    @lru_cache(maxsize=32)
+    def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
+        prompt_path = self.base_dir / prompt_file
         if not prompt_path.exists():
             raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
         try:
-            # Load the data
             data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
         except yaml.YAMLError as e:
             raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
         return {
-            "main_template": data[self.MAIN_TEMPLATE][mode]
+            self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
             if mode
             else data[self.MAIN_TEMPLATE],
-            "analyze_template": data.get(self.ANALYZE_TEMPLATE)[mode]
+            self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE)[mode]
             if mode
             else data.get(self.ANALYZE_TEMPLATE),
         }
@@ -59,14 +54,9 @@ class PromptLoader:
         return format_args
     def load(
-        self,
-        prompt_file: str,
-        text: str,
-        mode: str,
-        prompts_dir: str = "prompts",
-        **extra_kwargs,
+        self, prompt_file: str, text: str, mode: str, **extra_kwargs
     ) -> dict[str, str]:
-        template_configs = self._load_templates(prompts_dir, prompt_file, mode)
+        template_configs = self._load_templates(prompt_file, mode)
         format_args = self._build_format_args(text, **extra_kwargs)
         # Inject variables inside each template