PyPI - hamtaa-texttools - Versions diffs - 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl - Mend

hamtaa-texttools 0.1.53py3-none-any.whl → 0.1.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (5) hide show

{hamtaa_texttools-0.1.53.dist-info → hamtaa_texttools-0.1.55.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 0.1.53
+Version: 0.1.55
 Summary: A set of high-level NLP tools
 Author: Tohidi, Montazer, Givechi, Mousavinezhad
 Requires-Python: >=3.8

{hamtaa_texttools-0.1.53.dist-info → hamtaa_texttools-0.1.55.dist-info}/RECORD RENAMED Viewed

@@ -50,12 +50,12 @@ texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufm
 texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
 texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
 texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
-texttools/tools/translator/gemma_translator.py,sha256=k7xBzdqDH8KJIgtzN4TpZ0baBGwChHcunxFknFmauuQ,7284
+texttools/tools/translator/gemma_translator.py,sha256=KsKbD_hzbOmFt-K0pciZ1IXz66JGm480FdBqWL2mYL0,7272
 texttools/utils/flex_processor.py,sha256=C-lMwMjpIM6uAPFxXdgajxcFV1ccngEfJqq6xe5S1J8,3123
 texttools/utils/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
 texttools/utils/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
 texttools/utils/batch_manager/batch_runner.py,sha256=DE6TFz3i_jR-ZiUYbgIdLgjqr3aitw-JM_tKnSvzGL0,7424
-hamtaa_texttools-0.1.53.dist-info/METADATA,sha256=CDKUwD_N6p_1tIbMDvVXpz8jZAXHKJWcApEWUeO-73g,1481
-hamtaa_texttools-0.1.53.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hamtaa_texttools-0.1.53.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
-hamtaa_texttools-0.1.53.dist-info/RECORD,,
+hamtaa_texttools-0.1.55.dist-info/METADATA,sha256=-WVDAY_TTcDZwiM8YkCsrA_qy8dlO669LM2oEPtYiA4,1481
+hamtaa_texttools-0.1.55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hamtaa_texttools-0.1.55.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
+hamtaa_texttools-0.1.55.dist-info/RECORD,,

texttools/tools/translator/gemma_translator.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from typing import Any, List, Optional
 from openai import OpenAI
@@ -19,7 +20,7 @@ class PreprocessorOutput(BaseModel):
 class GemmaTranslator(BaseTranslator):
     """
-    Translator for Gemma-style models with optional reasoning step.
+    Translator for Gemma-style models using structured JSON prompts.
     Outputs only the translated text, without any additional structure.
     """
@@ -52,57 +53,61 @@ class GemmaTranslator(BaseTranslator):
         reason: Optional[str] = None,
         proper_names: Optional[list[str]] = None,
     ) -> list[dict[str, str]]:
-        messages: list[dict[str, str]] = []
-        # This prompt gives initial information about translation like languages and proper names
-        enforce_prompt = f"""
-        You are a {source_language}-to-{target_language} translator.
-        Important Rule: The following are proper names and must NOT be translated.
-        They must be only transliterated into {target_language}.
-        That means preserving their phonetic form without changing their meaning.
-        Apply the rule for **ALL** of following proper names.
-        Proper names (do not translate **** of them):
-        {proper_names if proper_names else "None"}
-        If any proper name is found in the text, you MUST only transliterate it.
-        Output only the translated text. No comments, no explanations, no markdown.
-        """
-        messages.append({"role": "user", "content": enforce_prompt})
-        clean_text = text.strip()
+        """Constructs a single, comprehensive JSON prompt for the translation task."""
+        prompt_data = {
+            "role": "Expert Translator",
+            "task": f"Translate the following text from {source_language or 'the original language'} to {target_language}.",
+            "input_text": text,
+            "rules": {
+                "proper_names": {
+                    "instruction": "These names MUST NOT be translated. Only transliterate them to preserve their phonetic form.",
+                    "list": proper_names if proper_names else "None",
+                }
+            },
+            "output_instructions": [
+                "Provide ONLY the translated text.",
+                "Do not include any explanations, comments, or markdown formatting.",
+            ],
+        }
         if reason:
-            reason_prompt = f"""
-            Based on the analysis conducted, translate the following text {"from" + source_language if source_language else ""} to {target_language}.
-            The text to be translated is: "{clean_text}"
-            The analysis conducted: {reason}
-            """
-            messages.append({"role": "user", "content": reason_prompt})
-        else:
-            regular_prompt = f"""Translate the following text from {source_language or "original"} to {target_language}:
-            {clean_text}"""
-            messages.append({"role": "user", "content": regular_prompt})
-        # Optional additional template
+            prompt_data["context"] = {
+                "preliminary_analysis": reason,
+                "instruction": "Use this analysis to inform the translation.",
+            }
+        # The entire set of instructions is formatted into a single JSON string
+        content = json.dumps(prompt_data, indent=2)
+        messages = [{"role": "user", "content": content}]
+        # Optional additional JSON template for more complex rules
         if self.prompt_template:
             messages.append({"role": "user", "content": self.prompt_template})
-        restructured = self.chat_formatter.format(messages=messages)
-        return restructured
+        return self.chat_formatter.format(messages=messages)
     def _reason(self, text: str, target_language: str) -> str:
-        """
-        Internal reasoning step to help the model with translation.
-        """
-        reason_step_prompt = f"""
-        Analyze the following text and identify important linguistic considerations for translation.
-        Do not translate the text. Point out any idioms, cultural references, or complex structures that need special attention.
-        Also, list all proper nouns that should not be translated. Write your analysis in the {target_language}.
-        """
-        messages = [
-            {"role": "user", "content": reason_step_prompt},
-            {"role": "user", "content": text},
-        ]
+        """Internal reasoning step using a JSON prompt to analyze text before translation."""
+        prompt_data = {
+            "task": "Analyze the provided text to identify potential translation challenges.",
+            "analysis_points": [
+                "Identify idioms or colloquialisms.",
+                "Note any cultural references.",
+                "Point out complex grammatical structures.",
+                "List all proper nouns that should be transliterated, not translated.",
+            ],
+            "input_text": text,
+            "output_instructions": {
+                "language": target_language,
+                "format": "A concise, bulleted list.",
+                "important_rule": "DO NOT TRANSLATE the original text.",
+                "length": "must be less than 200 words.",
+            },
+        }
+        messages = [{"role": "user", "content": json.dumps(prompt_data, indent=2)}]
         restructured = self.chat_formatter.format(messages=messages)
         completion = self.client.chat.completions.create(
@@ -114,67 +119,66 @@ class GemmaTranslator(BaseTranslator):
         return completion.choices[0].message.content.strip()
-    def preprocess(self, text: str) -> list:
-        """
-        Preprocessor that finds proper names of Islamic figures. The extractions will be given to the
-        LLm in order to know that it shouldn't translate them, but transliterate them.
-        """
-        messages: list[dict[str, str]] = []
+    def preprocess(self, text: str) -> PreprocessorOutput:
+        """Preprocessor that finds proper names using a structured JSON prompt."""
-        main_prompt = """
-        You must detect proper names of people.
-        Your task is to extract a JSON list of entities from the given input. For each entity, include:
-        text: The exact matched string from the original.
-        type: Only include "Proper Name" for actual names of real people.
-        If there is no proper name in the following text, return empty json.
-        """
-        messages.append({"role": "user", "content": main_prompt})
+        prompt_data = {
+            "task_description": "Extract all proper names of people from the provided text.",
+            "input_text": text,
+            "output_format": {
+                "schema": {"entities": ["string"]},
+                "instruction": "Return a JSON object matching this schema. If no names are found, the 'entities' list must be empty.",
+            },
+        }
-        text_prompt = f"""The text to be extracted is:{text}"""
-        messages.append({"role": "user", "content": text_prompt})
+        messages = [{"role": "user", "content": json.dumps(prompt_data, indent=2)}]
         restructured = self.chat_formatter.format(messages=messages)
         completion = self.client.chat.completions.parse(
             model=self.model,
             messages=restructured,
-            response_format=PreprocessorOutput,
+            response_model=PreprocessorOutput,
             temperature=self.temperature,
             extra_body={
                 "guided_decoding_backend": "auto",
             },
             **self.client_kwargs,
         )
-        message = completion.choices[0].message
-        entities = message.parsed
-        return entities
+        return completion.choices[0].message.parsed
     def translate(
         self, text: str, target_language: str, source_language: Optional[str] = None
     ) -> str:
-        """
-        Translates text and returns only the translated string.
-        """
+        """Translates text using a structured JSON-based workflow."""
-        # Extract proper names to tell the LLM what names not to translate, but to transliterate
-        extracted = self.preprocess(text)
-        proper_names = extracted.entities
+        # 1. Preprocess: Extract proper names
+        extracted_data = self.preprocess(text)
+        proper_names = extracted_data.entities
+        # 2. Reason (optional): Analyze the text for challenges
         reason_summary = None
         if self.use_reason:
-            reason_summary = self._reason(text, target_language, source_language)
+            reason_summary = self._reason(text, target_language)
+        # 3. Translate: Build the final prompt and get the translation
         messages = self._build_messages(
             text, target_language, source_language, reason_summary, proper_names
         )
+        # For debugging purposes, let's see the final prompt
+        print("--- Translation Request ---")
         print(f"Original: {text}")
         print(
             f"Translating to {target_language} from {source_language or 'original'}..."
         )
-        print(
-            f"Reasoning: {reason_summary}" if reason_summary else "Reasoning not used."
-        )
+        if reason_summary:
+            print(f"Reasoning Analysis:\n{reason_summary}")
+        print("--- Final JSON Prompt Sent to Model ---")
+        # Pretty-print the JSON content from the message
+        print(json.dumps(json.loads(messages[0]["content"]), indent=2))
+        print("---------------------------")
         completion = self.client.chat.completions.create(
             model=self.model,

{hamtaa_texttools-0.1.53.dist-info → hamtaa_texttools-0.1.55.dist-info}/WHEEL RENAMED Viewed

File without changes

{hamtaa_texttools-0.1.53.dist-info → hamtaa_texttools-0.1.55.dist-info}/top_level.txt RENAMED Viewed

File without changes

hamtaa-texttools 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 0.1.53py3-none-any.whl → 0.1.55py3-none-any.whl