PyPI - hamtaa-texttools - Versions diffs - 1.0.1__py3-none-any.whl → 1.1.7__py3-none-any.whl - Mend

hamtaa-texttools 1.0.1py3-none-any.whl → 1.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (41) hide show

hamtaa_texttools-1.1.7.dist-info/METADATA +228 -0
hamtaa_texttools-1.1.7.dist-info/RECORD +30 -0
{hamtaa_texttools-1.0.1.dist-info → hamtaa_texttools-1.1.7.dist-info}/licenses/LICENSE +20 -20
{hamtaa_texttools-1.0.1.dist-info → hamtaa_texttools-1.1.7.dist-info}/top_level.txt +0 -0
texttools/__init__.py +4 -9
texttools/batch/__init__.py +3 -0
texttools/{utils/batch_manager → batch}/batch_manager.py +226 -240
texttools/batch/batch_runner.py +254 -0
texttools/prompts/README.md +35 -0
texttools/prompts/categorizer.yaml +28 -0
texttools/prompts/extract_entities.yaml +20 -0
texttools/prompts/extract_keywords.yaml +18 -0
texttools/prompts/is_question.yaml +14 -0
texttools/prompts/merge_questions.yaml +46 -0
texttools/prompts/rewrite.yaml +111 -0
texttools/prompts/run_custom.yaml +7 -0
texttools/prompts/subject_to_question.yaml +22 -0
texttools/prompts/summarize.yaml +14 -0
texttools/prompts/text_to_question.yaml +20 -0
texttools/prompts/translate.yaml +15 -0
texttools/tools/__init__.py +4 -3
texttools/tools/async_the_tool.py +435 -0
texttools/tools/internals/async_operator.py +242 -0
texttools/tools/internals/base_operator.py +100 -0
texttools/tools/internals/formatters.py +24 -0
texttools/tools/internals/operator.py +242 -0
texttools/tools/internals/output_models.py +62 -0
texttools/tools/internals/prompt_loader.py +60 -0
texttools/tools/the_tool.py +433 -291
hamtaa_texttools-1.0.1.dist-info/METADATA +0 -129
hamtaa_texttools-1.0.1.dist-info/RECORD +0 -18
texttools/formatters/base_formatter.py +0 -33
texttools/formatters/user_merge_formatter/user_merge_formatter.py +0 -47
texttools/prompts/__init__.py +0 -0
texttools/tools/operator.py +0 -236
texttools/tools/output_models.py +0 -54
texttools/tools/prompt_loader.py +0 -84
texttools/utils/__init__.py +0 -4
texttools/utils/batch_manager/__init__.py +0 -4
texttools/utils/batch_manager/batch_runner.py +0 -212
{hamtaa_texttools-1.0.1.dist-info → hamtaa_texttools-1.1.7.dist-info}/WHEEL +0 -0

texttools/tools/async_the_tool.py ADDED Viewed

@@ -0,0 +1,435 @@
+from typing import Literal, Any, Callable
+from openai import AsyncOpenAI
+from texttools.tools.internals.async_operator import AsyncOperator
+import texttools.tools.internals.output_models as OutputModels
+class AsyncTheTool:
+    """
+    Async counterpart to TheTool.
+    Each method configures the async operator with a specific YAML prompt,
+    output schema, and flags, then delegates execution to `operator.run()`.
+    Usage:
+        async_client = AsyncOpenAI(...)
+        tool = TheToolAsync(async_client, model="model-name")
+        result = await tool.categorize("text ...", with_analysis=True)
+    """
+    def __init__(
+        self,
+        client: AsyncOpenAI,
+        model: str,
+    ):
+        self.operator = AsyncOperator(client=client, model=model)
+    async def categorize(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Categorize a text into a single Islamic studies domain category.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The assigned Islamic studies category
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="categorizer.yaml",
+            output_model=OutputModels.CategorizerOutput,
+            resp_format="parse",
+            mode=None,
+            output_lang=None,
+        )
+    async def extract_keywords(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Extract salient keywords from text.
+        Returns:
+            ToolOutput: Object containing:
+                - result (list[str]): List of extracted keywords
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="extract_keywords.yaml",
+            output_model=OutputModels.ListStrOutput,
+            resp_format="parse",
+            mode=None,
+        )
+    async def extract_entities(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Perform Named Entity Recognition (NER) over the input text.
+        Returns:
+            ToolOutput: Object containing:
+                - result (list[dict]): List of entities with 'text' and 'type' keys
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="extract_entities.yaml",
+            output_model=OutputModels.ListDictStrStrOutput,
+            resp_format="parse",
+            mode=None,
+        )
+    async def is_question(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Detect if the input is phrased as a question.
+        Returns:
+            ToolOutput: Object containing:
+                - result (bool): True if text is a question, False otherwise
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="is_question.yaml",
+            output_model=OutputModels.BoolOutput,
+            resp_format="parse",
+            mode=None,
+            output_lang=None,
+        )
+    async def text_to_question(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Generate a single question from the given text.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The generated question
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="text_to_question.yaml",
+            output_model=OutputModels.StrOutput,
+            resp_format="parse",
+            mode=None,
+        )
+    async def merge_questions(
+        self,
+        text: list[str],
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        mode: Literal["default", "reason"] = "default",
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Merge multiple questions into a single unified question.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The merged question
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        text = ", ".join(text)
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="merge_questions.yaml",
+            output_model=OutputModels.StrOutput,
+            resp_format="parse",
+            mode=mode,
+        )
+    async def rewrite(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        mode: Literal["positive", "negative", "hard_negative"] = "positive",
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Rewrite a text with different modes.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The rewritten text
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="rewrite.yaml",
+            output_model=OutputModels.StrOutput,
+            resp_format="parse",
+            mode=mode,
+        )
+    async def subject_to_question(
+        self,
+        text: str,
+        number_of_questions: int,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Generate a list of questions about a subject.
+        Returns:
+            ToolOutput: Object containing:
+                - result (list[str]): List of generated questions
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            number_of_questions=number_of_questions,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="subject_to_question.yaml",
+            output_model=OutputModels.ReasonListStrOutput,
+            resp_format="parse",
+            mode=None,
+        )
+    async def summarize(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        output_lang: str | None = None,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Summarize the given subject text.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The summary text
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            with_analysis=with_analysis,
+            output_lang=output_lang,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="summarize.yaml",
+            output_model=OutputModels.StrOutput,
+            resp_format="parse",
+            mode=None,
+        )
+    async def translate(
+        self,
+        text: str,
+        target_language: str,
+        with_analysis: bool = False,
+        user_prompt: str | None = None,
+        temperature: float | None = 0.0,
+        logprobs: bool = False,
+        top_logprobs: int | None = None,
+        validator: Callable[[Any], bool] | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Translate text between languages.
+        Returns:
+            ToolOutput: Object containing:
+                - result (str): The translated text
+                - logprobs (list | None): Probability data if logprobs enabled
+                - analysis (str | None): Detailed reasoning if with_analysis enabled
+        """
+        return await self.operator.run(
+            # User parameters
+            text=text,
+            target_language=target_language,
+            with_analysis=with_analysis,
+            user_prompt=user_prompt,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            validator=validator,
+            # Internal parameters
+            prompt_file="translate.yaml",
+            output_model=OutputModels.StrOutput,
+            resp_format="parse",
+            mode=None,
+            output_lang=None,
+        )
+    async def run_custom(
+        self,
+        prompt: str,
+        output_model: Any,
+        output_lang: str | None = None,
+        temperature: float | None = None,
+        logprobs: bool | None = None,
+        top_logprobs: int | None = None,
+    ) -> OutputModels.ToolOutput:
+        """
+        Custom tool that can do almost anything!
+        Returns:
+            ToolOutput: Object with fields:
+                - result (str): The output result
+        """
+        return await self.operator.run(
+            # User paramaeters
+            text=prompt,
+            output_model=output_model,
+            output_model_str=output_model.model_json_schema(),
+            output_lang=output_lang,
+            temperature=temperature,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            # Internal parameters
+            prompt_file="run_custom.yaml",
+            resp_format="parse",
+            user_prompt=None,
+            with_analysis=False,
+            mode=None,
+            validator=None,
+        )

texttools/tools/internals/async_operator.py ADDED Viewed

@@ -0,0 +1,242 @@
+from typing import Any, TypeVar, Type, Literal, Callable
+import logging
+from openai import AsyncOpenAI
+from pydantic import BaseModel
+from texttools.tools.internals.output_models import ToolOutput
+from texttools.tools.internals.base_operator import BaseOperator
+from texttools.tools.internals.formatters import Formatter
+from texttools.tools.internals.prompt_loader import PromptLoader
+# Base Model type for output models
+T = TypeVar("T", bound=BaseModel)
+logger = logging.getLogger("texttools.async_operator")
+class AsyncOperator(BaseOperator):
+    """
+    Core engine for running text-processing operations with an LLM (Async).
+    It wires together:
+    - `PromptLoader` → loads YAML prompt templates.
+    - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
+    - AsyncOpenAI client → executes completions/parsed completions.
+    """
+    def __init__(self, client: AsyncOpenAI, model: str):
+        self.client = client
+        self.model = model
+    async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
+        """
+        Calls OpenAI API for analysis using the configured prompt template.
+        Returns the analyzed content as a string.
+        """
+        analyze_prompt = prompt_configs["analyze_template"]
+        analyze_message = [self._build_user_message(analyze_prompt)]
+        completion = await self.client.chat.completions.create(
+            model=self.model,
+            messages=analyze_message,
+            temperature=temperature,
+        )
+        analysis = completion.choices[0].message.content.strip()
+        return analysis
+    async def _parse_completion(
+        self,
+        message: list[dict[str, str]],
+        output_model: Type[T],
+        temperature: float,
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+    ) -> tuple[Type[T], Any]:
+        """
+        Parses a chat completion using OpenAI's structured output format.
+        Returns both the parsed object and the raw completion for logging.
+        """
+        request_kwargs = {
+            "model": self.model,
+            "messages": message,
+            "response_format": output_model,
+            "temperature": temperature,
+        }
+        if logprobs:
+            request_kwargs["logprobs"] = True
+            request_kwargs["top_logprobs"] = top_logprobs
+        completion = await self.client.beta.chat.completions.parse(**request_kwargs)
+        parsed = completion.choices[0].message.parsed
+        return parsed, completion
+    async def _vllm_completion(
+        self,
+        message: list[dict[str, str]],
+        output_model: Type[T],
+        temperature: float,
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+    ) -> tuple[Type[T], Any]:
+        """
+        Generates a completion using vLLM with JSON schema guidance.
+        Returns the parsed output model and raw completion.
+        """
+        json_schema = output_model.model_json_schema()
+        # Build kwargs dynamically
+        request_kwargs = {
+            "model": self.model,
+            "messages": message,
+            "extra_body": {"guided_json": json_schema},
+            "temperature": temperature,
+        }
+        if logprobs:
+            request_kwargs["logprobs"] = True
+            request_kwargs["top_logprobs"] = top_logprobs
+        completion = await self.client.chat.completions.create(**request_kwargs)
+        response = completion.choices[0].message.content
+        # Convert the string response to output model
+        parsed = self._convert_to_output_model(response, output_model)
+        return parsed, completion
+    async def run(
+        self,
+        # User parameters
+        text: str,
+        with_analysis: bool,
+        output_lang: str | None,
+        user_prompt: str | None,
+        temperature: float,
+        logprobs: bool,
+        top_logprobs: int | None,
+        validator: Callable[[Any], bool] | None,
+        # Internal parameters
+        prompt_file: str,
+        output_model: Type[T],
+        resp_format: Literal["vllm", "parse"],
+        mode: str | None,
+        **extra_kwargs,
+    ) -> ToolOutput:
+        """
+        Execute the async LLM pipeline with the given input text. (Async)
+        """
+        prompt_loader = PromptLoader()
+        formatter = Formatter()
+        output = ToolOutput()
+        try:
+            # Prompt configs contain two keys: main_template and analyze template, both are string
+            prompt_configs = prompt_loader.load(
+                prompt_file=prompt_file,
+                text=text.strip(),
+                mode=mode,
+                **extra_kwargs,
+            )
+            messages: list[dict[str, str]] = []
+            if with_analysis:
+                analysis = await self._analyze(prompt_configs, temperature)
+                messages.append(
+                    self._build_user_message(f"Based on this analysis: {analysis}")
+                )
+            if output_lang:
+                messages.append(
+                    self._build_user_message(
+                        f"Respond only in the {output_lang} language."
+                    )
+                )
+            if user_prompt:
+                messages.append(
+                    self._build_user_message(f"Consider this instruction {user_prompt}")
+                )
+            messages.append(self._build_user_message(prompt_configs["main_template"]))
+            messages = formatter.user_merge_format(messages)
+            if resp_format == "vllm":
+                parsed, completion = await self._vllm_completion(
+                    messages, output_model, temperature, logprobs, top_logprobs
+                )
+            elif resp_format == "parse":
+                parsed, completion = await self._parse_completion(
+                    messages, output_model, temperature, logprobs, top_logprobs
+                )
+            # Ensure output_model has a `result` field
+            if not hasattr(parsed, "result"):
+                error = "The provided output_model must define a field named 'result'"
+                logger.error(error)
+                output.errors.append(error)
+                return output
+            output.result = parsed.result
+            # Retry logic if validation fails
+            if validator and not validator(output.result):
+                max_retries = 3
+                for attempt in range(max_retries):
+                    logger.warning(
+                        f"Validation failed, retrying for the {attempt + 1} time."
+                    )
+                    # Generate new temperature for retry
+                    retry_temperature = self._get_retry_temp(temperature)
+                    try:
+                        if resp_format == "vllm":
+                            parsed, completion = await self._vllm_completion(
+                                messages,
+                                output_model,
+                                retry_temperature,
+                                logprobs,
+                                top_logprobs,
+                            )
+                        elif resp_format == "parse":
+                            parsed, completion = await self._parse_completion(
+                                messages,
+                                output_model,
+                                retry_temperature,
+                                logprobs,
+                                top_logprobs,
+                            )
+                        output.result = parsed.result
+                        # Check if retry was successful
+                        if validator(output.result):
+                            logger.info(
+                                f"Validation passed on retry attempt {attempt + 1}"
+                            )
+                            break
+                        else:
+                            logger.warning(
+                                f"Validation still failing after retry attempt {attempt + 1}"
+                            )
+                    except Exception as e:
+                        logger.error(f"Retry attempt {attempt + 1} failed: {e}")
+                        # Continue to next retry attempt if this one fails
+            # Final check after all retries
+            if validator and not validator(output.result):
+                output.errors.append("Validation failed after all retry attempts")
+            if logprobs:
+                output.logprobs = self._extract_logprobs(completion)
+            if with_analysis:
+                output.analysis = analysis
+            return output
+        except Exception as e:
+            logger.error(f"AsyncTheTool failed: {e}")
+            output.errors.append(str(e))
+            return output

hamtaa-texttools 1.0.1__py3-none-any.whl → 1.1.7__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 1.0.1py3-none-any.whl → 1.1.7py3-none-any.whl