PyPI - hamtaa-texttools - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.16__py3-none-any.whl - Mend

hamtaa-texttools 1.1.1py3-none-any.whl → 1.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.16.dist-info}/METADATA +98 -26
hamtaa_texttools-1.1.16.dist-info/RECORD +31 -0
texttools/__init__.py +6 -8
texttools/batch/batch_config.py +26 -0
texttools/batch/batch_runner.py +105 -151
texttools/batch/{batch_manager.py → internals/batch_manager.py} +39 -40
texttools/batch/internals/utils.py +16 -0
texttools/prompts/README.md +4 -4
texttools/prompts/categorize.yaml +77 -0
texttools/prompts/detect_entity.yaml +22 -0
texttools/prompts/extract_keywords.yaml +68 -18
texttools/tools/async_tools.py +804 -0
texttools/tools/internals/async_operator.py +90 -69
texttools/tools/internals/models.py +183 -0
texttools/tools/internals/operator_utils.py +54 -0
texttools/tools/internals/prompt_loader.py +13 -14
texttools/tools/internals/sync_operator.py +201 -0
texttools/tools/sync_tools.py +804 -0
hamtaa_texttools-1.1.1.dist-info/RECORD +0 -30
texttools/batch/__init__.py +0 -4
texttools/prompts/categorizer.yaml +0 -28
texttools/tools/__init__.py +0 -4
texttools/tools/async_the_tool.py +0 -414
texttools/tools/internals/base_operator.py +0 -91
texttools/tools/internals/operator.py +0 -179
texttools/tools/internals/output_models.py +0 -59
texttools/tools/the_tool.py +0 -412
{hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.16.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.16.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.16.dist-info}/top_level.txt +0 -0

hamtaa_texttools-1.1.1.dist-info/RECORD DELETED Viewed

@@ -1,30 +0,0 @@
-hamtaa_texttools-1.1.1.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
-texttools/__init__.py,sha256=v3tQCH_Cjj47fCpuhK6sKSVAqEjNkc-cZbY4OJa4IZw,202
-texttools/batch/__init__.py,sha256=q50JsQsmQGp_8RW0KNasYeYWVV0R4FUNZ-ujXwEJemY,143
-texttools/batch/batch_manager.py,sha256=leVIFkR-3HpDkQi_MK3TgFNnHYsCN-wbS4mTWoPmO3c,8828
-texttools/batch/batch_runner.py,sha256=cgiCYLIBQQC0dBWM8_lVP9c5QLJoAmS2ijMtp0p3U2o,10313
-texttools/prompts/README.md,sha256=rclMaCV1N8gT1KcpZu0-ka0dKGNg2f1CEcRMdQkgQOc,1379
-texttools/prompts/categorizer.yaml,sha256=GMqIIzQFhgnlpkgU1qi3FAD3mD4A2jiWD5TilQ2XnnE,1204
-texttools/prompts/extract_entities.yaml,sha256=KiKjeDpHaeh3JVtZ6q1pa3k4DYucUIU9WnEcRTCA-SE,651
-texttools/prompts/extract_keywords.yaml,sha256=0O7ypL_OsEOxtvlQ2CZjnsv9637DJwAKprZsf9Vo2_s,769
-texttools/prompts/is_question.yaml,sha256=d0-vKRbXWkxvO64ikvxRjEmpAXGpCYIPGhgexvPPjws,471
-texttools/prompts/merge_questions.yaml,sha256=0J85GvTirZB4ELwH3sk8ub_WcqqpYf6PrMKr3djlZeo,1792
-texttools/prompts/rewrite.yaml,sha256=LO7He_IA3MZKz8a-LxH9DHJpOjpYwaYN1pbjp1Y0tFo,5392
-texttools/prompts/run_custom.yaml,sha256=38OkCoVITbuuS9c08UZSP1jZW4WjSmRIi8fR0RAiPu4,108
-texttools/prompts/subject_to_question.yaml,sha256=C7x7rNNm6U_ZG9HOn6zuzYOtvJUZ2skuWbL1-aYdd3E,1147
-texttools/prompts/summarize.yaml,sha256=o6rxGPfWtZd61Duvm8NVvCJqfq73b-wAuMSKR6UYUqY,459
-texttools/prompts/text_to_question.yaml,sha256=UheKYpDn6iyKI8NxunHZtFpNyfCLZZe5cvkuXpurUJY,783
-texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6sr50,634
-texttools/tools/__init__.py,sha256=hG1I28Q7BJ1Dbs95x6QMKXdsAlC5Eh_tqC-EbAibwiU,114
-texttools/tools/async_the_tool.py,sha256=h6-Zkedet-eRUrkV5fANNoh4WmoqhXU5wJEHpd8nyNU,14377
-texttools/tools/the_tool.py,sha256=lKy3_CKcWo2cBLQ7dDgvh7-oos7UOx1NYM26tcMhwaI,14143
-texttools/tools/internals/async_operator.py,sha256=Kj-DLBKcKbZPCJYn4lVo4Iiei11M04pwgWpIl8L69aM,6169
-texttools/tools/internals/base_operator.py,sha256=OWJe8ybA6qmmoc7ysYeB8ccHPneDlEtmFGH1jLWQCeY,3135
-texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
-texttools/tools/internals/operator.py,sha256=g1E1WkgnKRDgOs6fEFu0-gPCw1Bniwb4VI9Er3Op_gk,6063
-texttools/tools/internals/output_models.py,sha256=gbVbzBWeyHUVNsCBuawdgz9ZEzsC7wfygGgZJsAaexY,1662
-texttools/tools/internals/prompt_loader.py,sha256=rbitJD3e8vAdcooP1Yx6KnSI83g28ho-FegfZ1cJ4j4,1979
-hamtaa_texttools-1.1.1.dist-info/METADATA,sha256=Cc1Rq94QyXgJ8SNhsBgyUfhho3oywzGpx6y16s50b-Q,7144
-hamtaa_texttools-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hamtaa_texttools-1.1.1.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
-hamtaa_texttools-1.1.1.dist-info/RECORD,,

texttools/batch/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .batch_manager import SimpleBatchManager
-from .batch_runner import BatchJobRunner
-__all__ = ["SimpleBatchManager", "BatchJobRunner"]

texttools/prompts/categorizer.yaml DELETED Viewed

@@ -1,28 +0,0 @@
-main_template: |
-  تو یک متخصص علوم دینی هستی
-  من یک متن به تو میدهم و تو باید
-  آن متن را در یکی از دسته بندی های زیر طبقه بندی کنی
-  دسته بندی ها:
-  "باورهای دینی",
-  "اخلاق اسلامی",
-  "احکام و فقه",
-  "تاریخ اسلام و شخصیت ها",
-  "منابع دینی",
-  "دین و جامعه/سیاست",
-  "عرفان و معنویت",
-  "هیچکدام",
-  فقط با این فرمت json پاسخ بده:
-  {{
-  	  "reason": "<دلیل انتخابت رو به صورت خلاصه بگو>",
-      "result": "<یکی از دسته بندی ها>"
-  }}
-  متنی که باید طبقه بندی کنی:
-  {input}
-analyze_template: |
-  ما میخواهیم متنی که داده می شود را طبقه بندی کنیم.
-  برای بهبود طبقه بندی، نیاز به آنالیز متن داریم.
-  متنی که داده می شود را آنالیز کن و ایده اصلی و آنالیزی کوتاه از آن را بنویس.
-  آنالیز باید بسیار خلاصه باشد
-  نهایتا 20 کلمه
-  {input}

texttools/tools/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .async_the_tool import AsyncTheTool
-from .the_tool import TheTool
-__all__ = ["TheTool", "AsyncTheTool"]

texttools/tools/async_the_tool.py DELETED Viewed

@@ -1,414 +0,0 @@
-from typing import Literal, Any
-from openai import AsyncOpenAI
-from texttools.tools.internals.async_operator import AsyncOperator
-import texttools.tools.internals.output_models as OutputModels
-class AsyncTheTool:
-    """
-    Async counterpart to TheTool.
-    Each method configures the async operator with a specific YAML prompt,
-    output schema, and flags, then delegates execution to `operator.run()`.
-    Usage:
-        async_client = AsyncOpenAI(...)
-        tool = TheToolAsync(async_client, model="model-name")
-        result = await tool.categorize("text ...", with_analysis=True)
-    """
-    def __init__(
-        self,
-        client: AsyncOpenAI,
-        model: str,
-    ):
-        self.operator = AsyncOperator(client=client, model=model)
-    async def categorize(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, str]:
-        """
-        Categorize a text into a single Islamic studies domain category.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The assigned Islamic studies category
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="categorizer.yaml",
-            output_model=OutputModels.CategorizerOutput,
-            resp_format="parse",
-            mode=None,
-            output_lang=None,
-        )
-    async def extract_keywords(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, list[str]]:
-        """
-        Extract salient keywords from text.
-        Returns:
-            ToolOutput: Object containing:
-                - result (list[str]): List of extracted keywords
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="extract_keywords.yaml",
-            output_model=OutputModels.ListStrOutput,
-            resp_format="parse",
-            mode=None,
-        )
-    async def extract_entities(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, list[dict[str, str]]]:
-        """
-        Perform Named Entity Recognition (NER) over the input text.
-        Returns:
-            ToolOutput: Object containing:
-                - result (list[dict]): List of entities with 'text' and 'type' keys
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="extract_entities.yaml",
-            output_model=OutputModels.ListDictStrStrOutput,
-            resp_format="parse",
-            mode=None,
-        )
-    async def is_question(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, bool]:
-        """
-        Detect if the input is phrased as a question.
-        Returns:
-            ToolOutput: Object containing:
-                - result (bool): True if text is a question, False otherwise
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="is_question.yaml",
-            output_model=OutputModels.BoolOutput,
-            resp_format="parse",
-            mode=None,
-            output_lang=None,
-        )
-    async def text_to_question(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, str]:
-        """
-        Generate a single question from the given text.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The generated question
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="text_to_question.yaml",
-            output_model=OutputModels.StrOutput,
-            resp_format="parse",
-            mode=None,
-        )
-    async def merge_questions(
-        self,
-        text: list[str],
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-        mode: Literal["default", "reason"] = "default",
-    ) -> dict[str, str]:
-        """
-        Merge multiple questions into a single unified question.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The merged question
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        text = ", ".join(text)
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="merge_questions.yaml",
-            output_model=OutputModels.StrOutput,
-            resp_format="parse",
-            mode=mode,
-        )
-    async def rewrite(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-        mode: Literal["positive", "negative", "hard_negative"] = "positive",
-    ) -> dict[str, str]:
-        """
-        Rewrite a text with different modes.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The rewritten text
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="rewrite.yaml",
-            output_model=OutputModels.StrOutput,
-            resp_format="parse",
-            mode=mode,
-        )
-    async def subject_to_question(
-        self,
-        text: str,
-        number_of_questions: int,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, list[str]]:
-        """
-        Generate a list of questions about a subject.
-        Returns:
-            ToolOutput: Object containing:
-                - result (list[str]): List of generated questions
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            number_of_questions=number_of_questions,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="subject_to_question.yaml",
-            output_model=OutputModels.ReasonListStrOutput,
-            resp_format="parse",
-            mode=None,
-        )
-    async def summarize(
-        self,
-        text: str,
-        with_analysis: bool = False,
-        output_lang: str | None = None,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, str]:
-        """
-        Summarize the given subject text.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The summary text
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            with_analysis=with_analysis,
-            output_lang=output_lang,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="summarize.yaml",
-            output_model=OutputModels.StrOutput,
-            resp_format="parse",
-            mode=None,
-        )
-    async def translate(
-        self,
-        text: str,
-        target_language: str,
-        with_analysis: bool = False,
-        user_prompt: str | None = None,
-        temperature: float | None = 0.0,
-        logprobs: bool = False,
-        top_logprobs: int | None = None,
-    ) -> dict[str, str]:
-        """
-        Translate text between languages.
-        Returns:
-            ToolOutput: Object containing:
-                - result (str): The translated text
-                - logprobs (list | None): Probability data if logprobs enabled
-                - analysis (str | None): Detailed reasoning if with_analysis enabled
-        """
-        return await self.operator.run(
-            # User parameters
-            text=text,
-            target_language=target_language,
-            with_analysis=with_analysis,
-            user_prompt=user_prompt,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="translate.yaml",
-            output_model=OutputModels.StrOutput,
-            resp_format="parse",
-            mode=None,
-            output_lang=None,
-        )
-    async def run_custom(
-        self,
-        prompt: str,
-        output_model: Any,
-        output_lang: str | None = None,
-        temperature: float | None = None,
-        logprobs: bool | None = None,
-        top_logprobs: int | None = None,
-    ) -> dict[str, Any]:
-        """
-        Custom tool that can do almost anything!
-        Returns:
-            ToolOutput: Object with fields:
-                - result (str): The output result
-        """
-        return await self.operator.run(
-            # User paramaeters
-            text=prompt,
-            output_model=output_model,
-            output_model_str=output_model.model_json_schema(),
-            output_lang=output_lang,
-            temperature=temperature,
-            logprobs=logprobs,
-            top_logprobs=top_logprobs,
-            # Internal parameters
-            prompt_file="run_custom.yaml",
-            resp_format="parse",
-            user_prompt=None,
-            with_analysis=False,
-            mode=None,
-        )

texttools/tools/internals/base_operator.py DELETED Viewed

@@ -1,91 +0,0 @@
-from typing import TypeVar, Type, Any
-import json
-import re
-import math
-import logging
-from pydantic import BaseModel
-from openai import OpenAI, AsyncOpenAI
-# Base Model type for output models
-T = TypeVar("T", bound=BaseModel)
-# Configure logger
-logger = logging.getLogger("base_operator")
-logger.setLevel(logging.INFO)
-class BaseOperator:
-    def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
-        self.client = client
-        self.model = model
-    def _build_user_message(self, prompt: str) -> dict[str, str]:
-        return {"role": "user", "content": prompt}
-    def _clean_json_response(self, response: str) -> str:
-        """
-        Clean JSON response by removing code block markers and whitespace.
-        Handles cases like:
-        - ```json{"result": "value"}```
-        """
-        stripped = response.strip()
-        cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
-        cleaned = re.sub(r"\s*```$", "", cleaned)
-        return cleaned.strip()
-    def _convert_to_output_model(
-        self, response_string: str, output_model: Type[T]
-    ) -> Type[T]:
-        """
-        Convert a JSON response string to output model.
-        Args:
-            response_string: The JSON string (may contain code block markers)
-            output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
-        Returns:
-            Instance of your output model
-        """
-        # Clean the response string
-        cleaned_json = self._clean_json_response(response_string)
-        # Fix Python-style booleans
-        cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
-        # Convert string to Python dictionary
-        response_dict = json.loads(cleaned_json)
-        # Convert dictionary to output model
-        return output_model(**response_dict)
-    def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
-        logprobs_data = []
-        ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
-        for choice in completion.choices:
-            if not getattr(choice, "logprobs", None):
-                logger.error("logprobs is not avalible in the chosen model.")
-                return []
-            for logprob_item in choice.logprobs.content:
-                if ignore_pattern.match(logprob_item.token):
-                    continue
-                token_entry = {
-                    "token": logprob_item.token,
-                    "prob": round(math.exp(logprob_item.logprob), 8),
-                    "top_alternatives": [],
-                }
-                for alt in logprob_item.top_logprobs:
-                    if ignore_pattern.match(alt.token):
-                        continue
-                    token_entry["top_alternatives"].append(
-                        {
-                            "token": alt.token,
-                            "prob": round(math.exp(alt.logprob), 8),
-                        }
-                    )
-                logprobs_data.append(token_entry)
-        return logprobs_data

hamtaa-texttools 1.1.1__py3-none-any.whl → 1.1.16__py3-none-any.whl

hamtaa-texttools 1.1.1py3-none-any.whl → 1.1.16py3-none-any.whl