PyPI - hamtaa-texttools - Versions diffs - 1.1.21__py3-none-any.whl → 1.1.23__py3-none-any.whl - Mend

hamtaa-texttools 1.1.21py3-none-any.whl → 1.1.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{hamtaa_texttools-1.1.21.dist-info → hamtaa_texttools-1.1.23.dist-info}/METADATA +46 -87
hamtaa_texttools-1.1.23.dist-info/RECORD +32 -0
texttools/__init__.py +3 -3
texttools/batch/batch_config.py +2 -1
texttools/batch/batch_manager.py +6 -6
texttools/batch/batch_runner.py +7 -7
texttools/internals/async_operator.py +29 -41
texttools/internals/exceptions.py +0 -6
texttools/internals/operator_utils.py +24 -5
texttools/internals/prompt_loader.py +0 -5
texttools/internals/sync_operator.py +29 -41
texttools/prompts/categorize.yaml +3 -2
texttools/prompts/check_fact.yaml +5 -0
texttools/prompts/extract_entities.yaml +4 -0
texttools/prompts/extract_keywords.yaml +15 -3
texttools/prompts/is_question.yaml +4 -0
texttools/prompts/merge_questions.yaml +8 -1
texttools/prompts/propositionize.yaml +2 -0
texttools/prompts/rewrite.yaml +3 -4
texttools/prompts/subject_to_question.yaml +5 -1
texttools/prompts/summarize.yaml +4 -0
texttools/prompts/text_to_question.yaml +4 -0
texttools/prompts/translate.yaml +5 -0
texttools/tools/async_tools.py +87 -103
texttools/tools/sync_tools.py +87 -104
hamtaa_texttools-1.1.21.dist-info/RECORD +0 -32
{hamtaa_texttools-1.1.21.dist-info → hamtaa_texttools-1.1.23.dist-info}/WHEEL +0 -0
{hamtaa_texttools-1.1.21.dist-info → hamtaa_texttools-1.1.23.dist-info}/licenses/LICENSE +0 -0
{hamtaa_texttools-1.1.21.dist-info → hamtaa_texttools-1.1.23.dist-info}/top_level.txt +0 -0

texttools/internals/sync_operator.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import TypeVar, Type
+from typing import TypeVar, Type, Any
 from collections.abc import Callable
 from openai import OpenAI
@@ -27,17 +27,11 @@ class Operator:
         self._client = client
         self._model = model
-    def _analyze_completion(self, analyze_prompt: str, temperature: float) -> str:
+    def _analyze_completion(self, analyze_message: list[dict[str, str]]) -> str:
         try:
-            if not analyze_prompt:
-                raise PromptError("Analyze template is empty")
-            analyze_message = OperatorUtils.build_user_message(analyze_prompt)
             completion = self._client.chat.completions.create(
                 model=self._model,
                 messages=analyze_message,
-                temperature=temperature,
             )
             if not completion.choices:
@@ -57,20 +51,18 @@ class Operator:
     def _parse_completion(
         self,
-        main_prompt: str,
+        main_message: list[dict[str, str]],
         output_model: Type[T],
         temperature: float,
         logprobs: bool,
         top_logprobs: int,
-        priority: int,
-    ) -> tuple[T, object]:
+        priority: int | None,
+    ) -> tuple[T, Any]:
         """
         Parses a chat completion using OpenAI's structured output format.
-        Returns both the parsed object and the raw completion for logprobs.
+        Returns both the parsed Any and the raw completion for logprobs.
         """
         try:
-            main_message = OperatorUtils.build_user_message(main_prompt)
             request_kwargs = {
                 "model": self._model,
                 "messages": main_message,
@@ -82,7 +74,7 @@ class Operator:
                 request_kwargs["logprobs"] = True
                 request_kwargs["top_logprobs"] = top_logprobs
-            if priority:
+            if priority is not None:
                 request_kwargs["extra_body"] = {"priority": priority}
             completion = self._client.beta.chat.completions.parse(**request_kwargs)
@@ -112,50 +104,48 @@ class Operator:
         temperature: float,
         logprobs: bool,
         top_logprobs: int,
-        validator: Callable[[object], bool] | None,
+        validator: Callable[[Any], bool] | None,
         max_validation_retries: int | None,
-        priority: int,
+        priority: int | None,
         # Internal parameters
-        prompt_file: str,
+        tool_name: str,
         output_model: Type[T],
         mode: str | None,
         **extra_kwargs,
     ) -> OperatorOutput:
         """
-        Execute the LLM pipeline with the given input text. (Sync)
+        Execute the LLM pipeline with the given input text.
         """
         try:
             prompt_loader = PromptLoader()
             prompt_configs = prompt_loader.load(
-                prompt_file=prompt_file,
+                prompt_file=tool_name + ".yaml",
                 text=text.strip(),
                 mode=mode,
                 **extra_kwargs,
             )
-            main_prompt = ""
-            analysis = ""
+            analysis: str | None = None
             if with_analysis:
-                analysis = self._analyze_completion(
-                    prompt_configs["analyze_template"], temperature
+                analyze_message = OperatorUtils.build_message(
+                    prompt_configs["analyze_template"]
                 )
-                main_prompt += f"Based on this analysis:\n{analysis}\n"
-            if output_lang:
-                main_prompt += f"Respond only in the {output_lang} language.\n"
+                analysis = self._analyze_completion(analyze_message)
-            if user_prompt:
-                main_prompt += f"Consider this instruction {user_prompt}\n"
-            main_prompt += prompt_configs["main_template"]
-            if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
-                raise ValueError("top_logprobs should be an integer greater than 1")
+            main_message = OperatorUtils.build_message(
+                OperatorUtils.build_main_prompt(
+                    prompt_configs["main_template"], analysis, output_lang, user_prompt
+                )
+            )
             parsed, completion = self._parse_completion(
-                main_prompt, output_model, temperature, logprobs, top_logprobs, priority
+                main_message,
+                output_model,
+                temperature,
+                logprobs,
+                top_logprobs,
+                priority,
             )
             # Retry logic if validation fails
@@ -164,9 +154,7 @@ class Operator:
                     not isinstance(max_validation_retries, int)
                     or max_validation_retries < 1
                 ):
-                    raise ValueError(
-                        "max_validation_retries should be a positive integer"
-                    )
+                    raise ValueError("max_validation_retries should be a positive int")
                 succeeded = False
                 for _ in range(max_validation_retries):
@@ -175,7 +163,7 @@ class Operator:
                     try:
                         parsed, completion = self._parse_completion(
-                            main_prompt,
+                            main_message,
                             output_model,
                             retry_temperature,
                             logprobs,

texttools/prompts/categorize.yaml CHANGED Viewed

@@ -23,7 +23,7 @@ main_template: |
   Available categories with their descriptions:
   {category_list}
-  The text that has to be categorized:
+  Here is the text:
   {text}
 analyze_template: |
@@ -31,5 +31,6 @@ analyze_template: |
   To improve categorization, we need an analysis of the text.
   Analyze the given text and write its main idea and a short analysis of that.
   Analysis should be very short.
-  Text:
+  Here is the text:
   {text}

texttools/prompts/check_fact.yaml CHANGED Viewed

@@ -2,10 +2,13 @@ main_template: |
   You are an expert in determining whether a statement can be concluded from the source text or not.
   You must return a boolean value: True or False.
   Return True if the statement can be concluded from the source, and False otherwise.
   Respond only in JSON format (Output should be a boolean):
   {{"result": True/False}}
   The statement is:
   {text}
   The source text is:
   {source_text}
@@ -13,7 +16,9 @@ analyze_template: |
   You should analyze a statement and a source text and provide a brief,
   summarized analysis that could help in determining that can the statement
   be concluded from the source or not.
   The statement is:
   {text}
   The source text is:
   {source_text}

texttools/prompts/extract_entities.yaml CHANGED Viewed

@@ -2,6 +2,7 @@ main_template: |
   You are a Named Entity Recognition (NER) extractor.
   Identify and extract {entities} from the given text.
   For each entity, provide its text and a clear type.
   Respond only in JSON format:
   {{
     "result": [
@@ -11,10 +12,13 @@ main_template: |
       }}
     ]
   }}
   Here is the text:
   {text}
 analyze_template: |
   Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
   Provide a brief, summarized analysis that could help in categorizing these entities.
+  Here is the text:
   {text}

texttools/prompts/extract_keywords.yaml CHANGED Viewed

@@ -3,14 +3,17 @@ main_template:
   auto: |
     You are an expert keyword extractor.
     Extract the most relevant keywords from the given text.
     Guidelines:
     - Keywords must represent the main concepts of the text.
     - If two words have overlapping meanings, choose only one.
     - Do not include generic or unrelated words.
     - Keywords must be single, self-contained words (no phrases).
     - Output between 3 and 7 keywords based on the input length.
-    - Respond only in JSON format:
+    Respond only in JSON format:
     {{"result": ["keyword1", "keyword2", etc.]}}
     Here is the text:
     {text}
@@ -29,8 +32,10 @@ main_template:
       - Short texts (a few sentences): 3 keywords
       - Medium texts (1–4 paragraphs): 4–5 keywords
       - Long texts (more than 4 paragraphs): 6–7 keywords
-    - Respond only in JSON format:
+    Respond only in JSON format:
     {{"result": ["keyword1", "keyword2", etc.]}}
     Here is the text:
     {text}
@@ -45,7 +50,8 @@ main_template:
     - If the text doesn't contain enough distinct keywords, include the most relevant ones even if some are less specific.
     - Keywords must be single words (no multi-word expressions).
     - Order keywords by relevance (most relevant first).
-    - Respond only in JSON format:
+    Respond only in JSON format:
     {{"result": ["keyword1", "keyword2", "keyword3", ...]}}
     Here is the text:
@@ -55,14 +61,20 @@ analyze_template:
   auto: |
     Analyze the following text to identify its main topics, concepts, and important terms.
     Provide a concise summary of your findings that will help in extracting relevant keywords.
+    Here is the text:
     {text}
   threshold: |
     Analyze the following text to identify its main topics, concepts, and important terms.
     Provide a concise summary of your findings that will help in extracting relevant keywords.
+    Here is the text:
     {text}
   count: |
     Analyze the following text to identify its main topics, concepts, and important terms.
     Provide a concise summary of your findings that will help in extracting relevant keywords.
+    Here is the text:
     {text}

texttools/prompts/is_question.yaml CHANGED Viewed

@@ -1,8 +1,10 @@
 main_template: |
   You are a question detector.
   Determine that if the given text contains any question or not.
   Respond only in JSON format (Output should be a boolean):
   {{"result": True/False}}
   Here is the text:
   {text}
@@ -10,5 +12,7 @@ analyze_template: |
   We want to analyze this text snippet to see if it contains any question or request of some kind or not.
   Read the text, and reason about it being a request or not.
   Summerized, short answer.
+  Here is the text:
   {text}

texttools/prompts/merge_questions.yaml CHANGED Viewed

@@ -4,13 +4,16 @@ main_template:
     You are a language expert.
     I will give you a list of questions that are semantically similar.
     Your task is to merge them into one unified question.
     Guidelines:
     - Preserves all the information and intent from the original questions.
     - Sounds natural, fluent, and concise.
     - Avoids redundancy or unnecessary repetition.
     - Does not omit any unique idea from the originals.
-    - Respond only in JSON format:
+    Respond only in JSON format:
     {{"result": "string"}}
     Here is the questions:
     {text}
@@ -20,8 +23,10 @@ main_template:
     Then, write one merged question that combines all their content clearly and naturally, without redundancy.
     Step 1: Extract key ideas.
     Step 2: Write the final merged question.
     Respond only in JSON format:
     {{"result": "string"}}
     Here is the questions:
     {text}
@@ -33,6 +38,7 @@ analyze_template:
     and the specific information they are seeking.
     Provide a brief, summarized understanding of the questions' meaning that
     will help in merging and rephrasing it accurately without changing its intent.
     Here is the question:
     {text}
@@ -41,6 +47,7 @@ analyze_template:
     and the literal meaning it conveys.
     Provide a brief, summarized analysis of their linguistic structure and current meaning,
     which will then be used to create a new question containing all of their contents.
     Here is the question:
     {text}

texttools/prompts/propositionize.yaml CHANGED Viewed

@@ -19,4 +19,6 @@ analyze_template: |
   An atomic proposition is a single, self-contained fact that is concise,
   verifiable, and does not rely on external context.
   You just have to think around the possible propositions in the text and how a proposition can be made.
+  Here is the text:
   {text}

texttools/prompts/rewrite.yaml CHANGED Viewed

@@ -52,7 +52,6 @@ main_template:
       - Make it Challenging: The difference should be subtle enough that it requires a deep understanding of the text to identify, not just a simple keyword mismatch.
       - Maintain Similar Length: The generated sentence should be of roughly the same length and level of detail as the Anchor.
       Respond only in JSON format:
       {{"result": "str"}}
@@ -73,7 +72,7 @@ analyze_template:
     Your analysis should capture the ESSENTIAL MEANING that must be preserved in any paraphrase.
-    Text:
+    Here is the text:
     {text}
   negative: |
@@ -87,7 +86,7 @@ analyze_template:
     The goal is to find topics that are in the same domain but semantically unrelated to this specific text.
-    Text:
+    Here is the text:
     {text}
   hard_negative: |
@@ -106,6 +105,6 @@ analyze_template:
     - Sentence structure
     - 80-90% of the vocabulary
-    Text:
+    Here is the text:
     {text}

texttools/prompts/subject_to_question.yaml CHANGED Viewed

@@ -3,12 +3,15 @@ main_template: |
   Given the following subject, generate {number_of_questions} appropriate questions that this subject would directly respond to.
   The generated subject should be independently meaningful,
   and it must not mention any verbs like, this, that, he or she and etc. in the question.
   There is a `reason` key, fill that up with a summerized version of your thoughts.
   The `reason` must be less than 20 words.
   Don't forget to fill the reason.
   Respond only in JSON format:
   {{"result": ["question1", "question2", ...], "reason": "string"}}
-  Here is the text:
+  Here is the subject:
   {text}
 analyze_template: |
@@ -18,5 +21,6 @@ analyze_template: |
   We need a summerized analysis of the subject.
   What is the subject about?
   What point of views can we see and generate questoins from it? (Questions that real users might have.)
   Here is the subject:
   {text}

texttools/prompts/summarize.yaml CHANGED Viewed

@@ -1,8 +1,10 @@
 main_template: |
   You are a summarizer.
   You must summarize the given text, preserving its meaning.
   Respond only in JSON format:
   {{"result": "string"}}
   Provide a concise summary of the following text:
   {text}
@@ -10,5 +12,7 @@ main_template: |
 analyze_template: |
   Read the following text and identify its main points, key arguments, and overall purpose.
   Provide a brief, summarized analysis that will help in generating an accurate and concise summary.
+  Here is the text:
   {text}

texttools/prompts/text_to_question.yaml CHANGED Viewed

@@ -3,11 +3,14 @@ main_template: |
   Given the following answer, generate {number_of_questions} appropriate questions that this answer would directly respond to.
   The generated answer should be independently meaningful,
   and not mentioning any verbs like, this, that, he or she on the question.
   There is a `reason` key, fill that up with a summerized version of your thoughts.
   The `reason` must be less than 20 words.
   Don't forget to fill the reason.
   Respond only in JSON format:
   {{"result": ["question1", "question2", ...], "reason": "string"}}
   Here is the answer:
   {text}
@@ -17,6 +20,7 @@ analyze_template: |
   Provide a brief, summarized understanding of the answer's content that will
   help in formulating relevant and direct questions.
   Just mention the keypoints that was provided in the answer
   Here is the answer:
   {text}

texttools/prompts/translate.yaml CHANGED Viewed

@@ -1,9 +1,12 @@
 main_template: |
   You are a {target_language} translator.
   Output only the translated text.
   Respond only in JSON format:
   {{"result": "string"}}
   Don't translate proper name, only transliterate them to {target_language}
   Translate the following text to {target_language}:
   {text}
@@ -11,5 +14,7 @@ analyze_template: |
   Analyze the following text and identify important linguistic considerations for translation.
   Point out any idioms, cultural references, or complex structures that need special attention.
   Also, list all proper nouns that should not be translated. Write your analysis in the {target_language}.
+  Here is the text:
   {text}

hamtaa-texttools 1.1.21__py3-none-any.whl → 1.1.23__py3-none-any.whl

hamtaa-texttools 1.1.21py3-none-any.whl → 1.1.23py3-none-any.whl