PyPI - hamtaa-texttools - Versions diffs - 0.1.43__py3-none-any.whl → 0.1.44__py3-none-any.whl - Mend

hamtaa-texttools 0.1.43py3-none-any.whl → 0.1.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (6) hide show

{hamtaa_texttools-0.1.43.dist-info → hamtaa_texttools-0.1.44.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.4
 Name: hamtaa-texttools
-Version: 0.1.43
+Version: 0.1.44
 Summary: A set of high-level NLP tools
 Author: Tohidi, Montazer, Givechi, Mousavinezhad
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
-Requires-Dist: openai==1.97.1
-Requires-Dist: numpy==1.26.4
+Requires-Dist: openai>=1.97.0
+Requires-Dist: numpy>=1.26.4
 # Text Tools
@@ -20,7 +20,7 @@ Requires-Dist: numpy==1.26.4
 </p>
-### How to Install
+## How to Install
 Install the package using:

{hamtaa_texttools-0.1.43.dist-info → hamtaa_texttools-0.1.44.dist-info}/RECORD RENAMED Viewed

@@ -16,7 +16,7 @@ texttools/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c
 texttools/batch_manager/batch_runner.py,sha256=kW0IPauI11xpssApMA7b4XI19FePImywym3V7tBaa-o,7404
 texttools/formatter/__init__.py,sha256=KHz2tFZctbit_HVbQNCTMi46JzmKlg-uB6Ost63IpVU,46
 texttools/formatter/base.py,sha256=0fiM6E7NdJevAVpL6yyPaUZVJGKWxE3fr-Ay1oqgJqQ,879
-texttools/formatter/gemma3_formatter.py,sha256=c7YRj6fIPqhs_nvnSbWRTuguRoNQJvuIvk_bcaVDioM,1634
+texttools/formatter/gemma3_formatter.py,sha256=AmdKBYLj6HMsI2DDX4KHNEEVYJmz_VVNUBOv8ScGjsY,1865
 texttools/handlers/__init__.py,sha256=sv0JloipQ57AI0xo-3w9k6cK5rYjZP3ltR2EbBhkHTA,121
 texttools/handlers/handlers.py,sha256=LtC4FBuzRUDy3Jw-Fp21WR-QS1jOcDhsGaMPFQGjfTw,2381
 texttools/handlers/categorizer/__init__.py,sha256=mE05vt_ma6vcP8pQ37BZ85WVQ8jhcjDS0iZV81_LFCY,127
@@ -35,7 +35,7 @@ texttools/tools/merger/gemma_question_merger.py,sha256=JAC-52kBbabIzEWp0MFi9viiu
 texttools/tools/ner/__init__.py,sha256=BW84BcItel6Mc2JlaDL6qvAktVMkti67VXceeCnOB1g,70
 texttools/tools/ner/gemma_ner_extractor.py,sha256=YhyIwX_8bdwkFb4gY8g9mZdYHW_r1jCvbmjjNCK9Wfo,5384
 texttools/tools/question_detector/__init__.py,sha256=ulArGttooSoxEe0vUDQSxUQrnsxr7gH9l-LjSER2dVI,162
-texttools/tools/question_detector/gemma_detector.py,sha256=dHWHcthjMArW42CNPGmk3Xbj1AxjM33A34dOmLUA64U,4141
+texttools/tools/question_detector/gemma_detector.py,sha256=DhlCAA6Hws_OTuYil6UY4sYlbjdQQU6EqHdoTl3a--w,3772
 texttools/tools/question_detector/llm_detector.py,sha256=zo89eh359hqQGGf83-6M22AaiH7q-m0m91SjTyxZaYs,3862
 texttools/tools/question_generator/__init__.py,sha256=EAElpB_YeyMoBqvFNjbW2a_j18SLtiKQ7sRmdS58Fww,61
 texttools/tools/question_generator/gemma_question_generator.py,sha256=V5QcXmHZ5shTvrThOxUrKJ4FqP0P58NIJbsPdyyy5IM,6744
@@ -54,7 +54,7 @@ texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojz
 texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
 texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
 texttools/tools/translator/gemma_translator.py,sha256=57NMfJAZHQjZSr_eCBePE_Pnag8pu3O00Jicxhzn6Jc,7572
-hamtaa_texttools-0.1.43.dist-info/METADATA,sha256=GjVLyZZclY4hp29Yd1DpRtqvFDmTAGOoYEOI-FFvbA0,1482
-hamtaa_texttools-0.1.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hamtaa_texttools-0.1.43.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
-hamtaa_texttools-0.1.43.dist-info/RECORD,,
+hamtaa_texttools-0.1.44.dist-info/METADATA,sha256=OImC1zmuJh7p8SY3s3mhm8poOzYOuuqx6vjOeDy5O3k,1481
+hamtaa_texttools-0.1.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hamtaa_texttools-0.1.44.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
+hamtaa_texttools-0.1.44.dist-info/RECORD,,

texttools/formatter/gemma3_formatter.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Literal
 from texttools.formatter.base import ChatFormatter
@@ -10,14 +8,13 @@ class Gemma3Formatter(ChatFormatter):
     """
     ROLE = "role"
+    CONTENT = "content"
     USER_ROLE = "user"
     ASSISTANT_ROLE = "assistant"
-    CONTENT = "content"
     VALID_ROLES = {USER_ROLE, ASSISTANT_ROLE}
+    VALID_KEYS = {ROLE, CONTENT}
-    def format(
-        self, messages: list[dict[Literal["role", "content"], str]]
-    ) -> list[dict[str, str]]:
+    def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
         """
         :param messages: list of {"role": ..., "content": ...}, where role is "user", "assistant", or "system"
         :return: a new list where consecutive "user" messages are merged into single entries
@@ -25,14 +22,20 @@ class Gemma3Formatter(ChatFormatter):
         merged: list[dict[str, str]] = []
-        for msg in messages:
-            role, content = msg[self.ROLE], msg[self.CONTENT].strip()
+        for message in messages:
+            # Validate keys strictly
+            if set(message.keys()) != self.VALID_KEYS:
+                raise ValueError(
+                    f"Message dict keys must be exactly {self.VALID_KEYS}, got {set(message.keys())}"
+                )
+            role, content = message[self.ROLE], message[self.CONTENT].strip()
             # Replace "system" role with "user" role
             if role == "system":
                 role = self.USER_ROLE
-            # Raise value error if msg["role"] wan't a valid role
+            # Raise value error if message["role"] wan't a valid role
             if role not in self.VALID_ROLES:
                 raise ValueError(f"Unexpected role: {role}")

texttools/tools/question_detector/gemma_detector.py CHANGED Viewed

@@ -36,64 +36,48 @@ class GemmaQuestionDetector(BaseQuestionDetector):
         self.model = model
         self.temperature = temperature
         self.client_kwargs = client_kwargs
         self.chat_formatter = chat_formatter or Gemma3Formatter()
         self.use_reason = use_reason
         self.prompt_template = prompt_template
-        self.json_schema = {"is_question": bool}
     def _build_messages(self, text: str, reason: str = None) -> list[dict[str, str]]:
-        clean = self.preprocess(text)
-        schema_instr = f"respond only in JSON format: {self.json_schema}"
+        clean_text = self.preprocess(text)
         messages: list[dict[str, str]] = []
         if reason:
             messages.append({"role": "user", "content": reason})
-        messages.append({"role": "user", "content": schema_instr})
         if self.prompt_template:
             messages.append({"role": "user", "content": self.prompt_template})
-        messages.append({"role": "user", "content": clean})
+        messages.append({"role": "user", "content": clean_text})
-        # this line will restructure the messages
-        # based on the formatter that we provided
-        # some models will require custom settings
+        # Restructure the messages based on the formatter; some models will require custom settings
         restructured = self.chat_formatter.format(messages=messages)
         return restructured
     def _reason(self, text: str) -> list:
-        messages = [
-            {
-                "role": "user",
-                "content": """
-                    we want to analyze this text snippet to see if it contains any question
-                    or request of some kind or not
-                    read the text, and reason about it being a request or not
-                    summerized
-                    short answer
-                    """,
-            },
-            {
-                "role": "user",
-                "content": f"""
+        reason_prompt = f"""
+                    We want to analyze this text snippet to see if it contains any question
+                    or request of some kind or not.
+                    Read the text, and reason about it being a request or not.
+                    Summerized, Short answer
                     {text}
-                    """,
-            },
+                    """
+        messages = [
+            {"role": "user", "content": reason_prompt},
         ]
         restructured = self.chat_formatter.format(messages=messages)
-        resp = self.client.chat.completions.create(
+        response = self.client.chat.completions.create(
             model=self.model,
             messages=restructured,
             temperature=self.temperature,
             **self.client_kwargs,
         )
-        reason = resp.choices[0].message.content.strip()
+        reason = response.choices[0].message.content.strip()
         return reason
     def detect(self, text: str) -> bool:
@@ -125,6 +109,6 @@ class GemmaQuestionDetector(BaseQuestionDetector):
                 f"Failed to parse the response. Raw content: {message.content}"
             )
-        # dispatch and return
+        # Dispatch and return
         self._dispatch({"question": text, "result": result})
         return result

{hamtaa_texttools-0.1.43.dist-info → hamtaa_texttools-0.1.44.dist-info}/WHEEL RENAMED Viewed

File without changes

{hamtaa_texttools-0.1.43.dist-info → hamtaa_texttools-0.1.44.dist-info}/top_level.txt RENAMED Viewed

File without changes

hamtaa-texttools 0.1.43__py3-none-any.whl → 0.1.44__py3-none-any.whl

Potentially problematic release.

hamtaa-texttools 0.1.43py3-none-any.whl → 0.1.44py3-none-any.whl