PyPI - hamtaa-texttools - Versions diffs - 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl - Mend

hamtaa-texttools 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (32) hide show

{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/METADATA +192 -141
hamtaa_texttools-1.0.5.dist-info/RECORD +30 -0
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/licenses/LICENSE +20 -20
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/top_level.txt +0 -0
texttools/__init__.py +9 -9
texttools/batch/__init__.py +4 -4
texttools/batch/batch_manager.py +240 -240
texttools/batch/batch_runner.py +212 -212
texttools/formatters/base_formatter.py +33 -33
texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
texttools/prompts/README.md +31 -31
texttools/prompts/categorizer.yaml +28 -31
texttools/prompts/custom_tool.yaml +7 -0
texttools/prompts/keyword_extractor.yaml +18 -14
texttools/prompts/ner_extractor.yaml +20 -21
texttools/prompts/question_detector.yaml +13 -14
texttools/prompts/question_generator.yaml +19 -22
texttools/prompts/question_merger.yaml +45 -48
texttools/prompts/rewriter.yaml +111 -0
texttools/prompts/subject_question_generator.yaml +22 -26
texttools/prompts/summarizer.yaml +13 -11
texttools/prompts/translator.yaml +14 -14
texttools/tools/__init__.py +4 -4
texttools/tools/async_the_tool.py +277 -263
texttools/tools/internals/async_operator.py +297 -288
texttools/tools/internals/operator.py +295 -306
texttools/tools/internals/output_models.py +52 -62
texttools/tools/internals/prompt_loader.py +76 -82
texttools/tools/the_tool.py +501 -400
hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
texttools/prompts/question_rewriter.yaml +0 -46
{hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/WHEEL +0 -0

texttools/tools/async_the_tool.py CHANGED Viewed

@@ -1,263 +1,277 @@
-from typing import Any, Literal, Optional
-# async clients / operator
-from openai import AsyncOpenAI
-import texttools.tools.internals.output_models as OutputModels
-from texttools.tools.internals.async_operator import AsyncOperator
-class AsyncTheTool:
-    """
-    Async counterpart to TheTool.
-    Usage:
-        async_client = AsyncOpenAI(...)
-        tool = TheToolAsync(async_client, model="gemma-3")
-        result = await tool.categorize("متن ...", with_analysis=True)
-    """
-    def __init__(
-        self,
-        client: AsyncOpenAI,
-        *,
-        model: str,
-        temperature: float = 0.0,
-        **client_kwargs: Any,
-    ):
-        self.operator = AsyncOperator(
-            client=client,
-            model=model,
-            temperature=temperature,
-            **client_kwargs,
-        )
-    async def categorize(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 8,
-    ) -> dict[str, str]:
-        results = await self.operator.run(
-            text,
-            prompt_file="categorizer.yaml",
-            output_model=OutputModels.CategorizerOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def extract_keywords(
-        self,
-        text: str,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, list[str]]:
-        results = await self.operator.run(
-            text,
-            prompt_file="keyword_extractor.yaml",
-            output_model=OutputModels.ListStrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def extract_entities(
-        self,
-        text: str,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, list[dict[str, str]]]:
-        results = await self.operator.run(
-            text,
-            prompt_file="ner_extractor.yaml",
-            output_model=OutputModels.ListDictStrStrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def detect_question(
-        self,
-        question: str,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 2,
-    ) -> dict[str, bool]:
-        results = await self.operator.run(
-            question,
-            prompt_file="question_detector.yaml",
-            output_model=OutputModels.BoolOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def generate_question_from_text(
-        self,
-        text: str,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, str]:
-        results = await self.operator.run(
-            text,
-            prompt_file="question_generator.yaml",
-            output_model=OutputModels.StrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def merge_questions(
-        self,
-        questions: list[str],
-        output_lang: Optional[str] = None,
-        mode: Literal["default", "reason"] = "default",
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, str]:
-        question_str = ", ".join(questions)
-        results = await self.operator.run(
-            question_str,
-            prompt_file="question_merger.yaml",
-            output_model=OutputModels.StrOutput,
-            with_analysis=with_analysis,
-            use_modes=True,
-            mode=mode,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def rewrite_question(
-        self,
-        question: str,
-        output_lang: Optional[str] = None,
-        mode: Literal[
-            "same_meaning_different_wording",
-            "different_meaning_similar_wording",
-        ] = "same_meaning_different_wording",
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, str]:
-        results = await self.operator.run(
-            question,
-            prompt_file="question_rewriter.yaml",
-            output_model=OutputModels.StrOutput,
-            with_analysis=with_analysis,
-            use_modes=True,
-            mode=mode,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def generate_questions_from_subject(
-        self,
-        subject: str,
-        number_of_questions: int,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, list[str]]:
-        results = await self.operator.run(
-            subject,
-            prompt_file="subject_question_generator.yaml",
-            output_model=OutputModels.ReasonListStrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            number_of_questions=number_of_questions,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def summarize(
-        self,
-        text: str,
-        output_lang: Optional[str] = None,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, str]:
-        results = await self.operator.run(
-            text,
-            prompt_file="summarizer.yaml",
-            output_model=OutputModels.StrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            output_lang=output_lang,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
-    async def translate(
-        self,
-        text: str,
-        target_language: str,
-        with_analysis: bool = False,
-        user_prompt: str = "",
-        logprobs: bool = False,
-        top_logprobs: int = 3,
-    ) -> dict[str, str]:
-        results = await self.operator.run(
-            text,
-            prompt_file="translator.yaml",
-            output_model=OutputModels.StrOutput,
-            with_analysis=with_analysis,
-            resp_format="parse",
-            user_prompt=user_prompt,
-            target_language=target_language,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-        )
-        return results
+from typing import Literal
+from openai import AsyncOpenAI
+import texttools.tools.internals.output_models as OutputModels
+from texttools.tools.internals.async_operator import AsyncOperator
+class AsyncTheTool:
+    """
+    Async counterpart to TheTool.
+    Usage:
+        async_client = AsyncOpenAI(...)
+        tool = TheToolAsync(async_client, model="gemma-3")
+        result = await tool.categorize("متن ...", with_analysis=True)
+    """
+    def __init__(
+        self,
+        client: AsyncOpenAI,
+        *,
+        model: str,
+        temperature: float = 0.0,
+    ):
+        self.operator = AsyncOperator(
+            client=client,
+            model=model,
+            temperature=temperature,
+        )
+    async def categorize(
+        self,
+        text: str,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 8,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        results = await self.operator.run(
+            text,
+            prompt_file="categorizer.yaml",
+            output_model=OutputModels.CategorizerOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def extract_keywords(
+        self,
+        text: str,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, list[str]]:
+        results = await self.operator.run(
+            text,
+            prompt_file="keyword_extractor.yaml",
+            output_model=OutputModels.ListStrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def extract_entities(
+        self,
+        text: str,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, list[dict[str, str]]]:
+        results = await self.operator.run(
+            text,
+            prompt_file="ner_extractor.yaml",
+            output_model=OutputModels.ListDictStrStrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def detect_question(
+        self,
+        question: str,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 2,
+        max_tokens: int | None = None,
+    ) -> dict[str, bool]:
+        results = await self.operator.run(
+            question,
+            prompt_file="question_detector.yaml",
+            output_model=OutputModels.BoolOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def generate_question_from_text(
+        self,
+        text: str,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        results = await self.operator.run(
+            text,
+            prompt_file="question_generator.yaml",
+            output_model=OutputModels.StrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def merge_questions(
+        self,
+        questions: list[str],
+        output_lang: str | None = None,
+        mode: Literal["default", "reason"] = "default",
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        question_str = ", ".join(questions)
+        results = await self.operator.run(
+            question_str,
+            prompt_file="question_merger.yaml",
+            output_model=OutputModels.StrOutput,
+            with_analysis=with_analysis,
+            use_modes=True,
+            mode=mode,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def rewrite(
+        self,
+        question: str,
+        output_lang: str | None = None,
+        mode: Literal["positive", "negative", "hard_negative"] = "positive",
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        results = await self.operator.run(
+            question,
+            prompt_file="rewriter.yaml",
+            output_model=OutputModels.StrOutput,
+            with_analysis=with_analysis,
+            use_modes=True,
+            mode=mode,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def generate_questions_from_subject(
+        self,
+        subject: str,
+        number_of_questions: int,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, list[str]]:
+        results = await self.operator.run(
+            subject,
+            prompt_file="subject_question_generator.yaml",
+            output_model=OutputModels.ReasonListStrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            number_of_questions=number_of_questions,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def summarize(
+        self,
+        text: str,
+        output_lang: str | None = None,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        results = await self.operator.run(
+            text,
+            prompt_file="summarizer.yaml",
+            output_model=OutputModels.StrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            output_lang=output_lang,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results
+    async def translate(
+        self,
+        text: str,
+        target_language: str,
+        with_analysis: bool = False,
+        user_prompt: str = "",
+        logprobs: bool = False,
+        top_logprobs: int = 3,
+        max_tokens: int | None = None,
+    ) -> dict[str, str]:
+        results = await self.operator.run(
+            text,
+            prompt_file="translator.yaml",
+            output_model=OutputModels.StrOutput,
+            with_analysis=with_analysis,
+            resp_format="parse",
+            user_prompt=user_prompt,
+            target_language=target_language,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+        )
+        return results

hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl