hamtaa-texttools 0.1.43__tar.gz → 0.1.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (68) hide show
  1. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/PKG-INFO +4 -4
  2. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/README.md +1 -1
  3. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/hamtaa_texttools.egg-info/PKG-INFO +4 -4
  4. hamtaa_texttools-0.1.44/hamtaa_texttools.egg-info/requires.txt +2 -0
  5. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/pyproject.toml +3 -3
  6. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/formatter/gemma3_formatter.py +12 -9
  7. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/question_detector/gemma_detector.py +14 -30
  8. hamtaa_texttools-0.1.43/hamtaa_texttools.egg-info/requires.txt +0 -2
  9. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  10. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  11. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  12. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/setup.cfg +0 -0
  13. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/setup.py +0 -0
  14. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/tests/test_vllm_output.py +0 -0
  15. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/__init__.py +0 -0
  16. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/__init__.py +0 -0
  17. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_categorizer.py +0 -0
  18. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_keyword_extractor.py +0 -0
  19. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_ner_extractor.py +0 -0
  20. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_question_detector.py +0 -0
  21. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_question_generator.py +0 -0
  22. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_question_merger.py +0 -0
  23. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_question_rewriter.py +0 -0
  24. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_router.py +0 -0
  25. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_summarizer.py +0 -0
  26. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_task_performer.py +0 -0
  27. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/base/base_translator.py +0 -0
  28. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/batch_manager/__init__.py +0 -0
  29. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/batch_manager/batch_manager.py +0 -0
  30. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/batch_manager/batch_runner.py +0 -0
  31. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/formatter/__init__.py +0 -0
  32. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/formatter/base.py +0 -0
  33. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/handlers/__init__.py +0 -0
  34. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/handlers/categorizer/__init__.py +0 -0
  35. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/handlers/categorizer/categorizer.py +0 -0
  36. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/handlers/handlers.py +0 -0
  37. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/__init__.py +0 -0
  38. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/__init__.py +0 -0
  39. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/encoder_model/__init__.py +0 -0
  40. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +0 -0
  41. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/llm/__init__.py +0 -0
  42. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/llm/gemma_categorizer.py +0 -0
  43. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/categorizer/llm/openai_categorizer.py +0 -0
  44. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/keyword_extractor/__init__.py +0 -0
  45. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/keyword_extractor/gemma_extractor.py +0 -0
  46. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/merger/__init__.py +0 -0
  47. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/merger/gemma_question_merger.py +0 -0
  48. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/ner/__init__.py +0 -0
  49. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/ner/gemma_ner_extractor.py +0 -0
  50. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/question_detector/__init__.py +0 -0
  51. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/question_detector/llm_detector.py +0 -0
  52. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/question_generator/__init__.py +0 -0
  53. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/question_generator/gemma_question_generator.py +0 -0
  54. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/reranker/__init__.py +0 -0
  55. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/reranker/reranker.py +0 -0
  56. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/reranker/scorer.py +0 -0
  57. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/reranker/sorter.py +0 -0
  58. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/rewriter/__init__.py +0 -0
  59. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/rewriter/gemma_question_rewriter.py +0 -0
  60. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/router/__init__.py +0 -0
  61. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/router/gemma_router.py +0 -0
  62. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/subject_to_question/__init__.py +0 -0
  63. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/subject_to_question/gemma_question_generator.py +0 -0
  64. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/summarizer/__init__.py +0 -0
  65. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/summarizer/gemma_summarizer.py +0 -0
  66. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/summarizer/llm_summerizer.py +0 -0
  67. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/translator/__init__.py +0 -0
  68. {hamtaa_texttools-0.1.43 → hamtaa_texttools-0.1.44}/texttools/tools/translator/gemma_translator.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.43
3
+ Version: 0.1.44
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: openai==1.97.1
9
- Requires-Dist: numpy==1.26.4
8
+ Requires-Dist: openai>=1.97.0
9
+ Requires-Dist: numpy>=1.26.4
10
10
 
11
11
  # Text Tools
12
12
 
@@ -20,7 +20,7 @@ Requires-Dist: numpy==1.26.4
20
20
  </p>
21
21
 
22
22
 
23
- ### How to Install
23
+ ## How to Install
24
24
 
25
25
  Install the package using:
26
26
 
@@ -10,7 +10,7 @@
10
10
  </p>
11
11
 
12
12
 
13
- ### How to Install
13
+ ## How to Install
14
14
 
15
15
  Install the package using:
16
16
 
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.43
3
+ Version: 0.1.44
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: openai==1.97.1
9
- Requires-Dist: numpy==1.26.4
8
+ Requires-Dist: openai>=1.97.0
9
+ Requires-Dist: numpy>=1.26.4
10
10
 
11
11
  # Text Tools
12
12
 
@@ -20,7 +20,7 @@ Requires-Dist: numpy==1.26.4
20
20
  </p>
21
21
 
22
22
 
23
- ### How to Install
23
+ ## How to Install
24
24
 
25
25
  Install the package using:
26
26
 
@@ -0,0 +1,2 @@
1
+ openai>=1.97.0
2
+ numpy>=1.26.4
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "0.1.43"
7
+ version = "0.1.44"
8
8
  description = "A set of high-level NLP tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -15,8 +15,8 @@ authors = [
15
15
  { name = "Mousavinezhad" }
16
16
  ]
17
17
  dependencies = [
18
- "openai==1.97.1",
19
- "numpy==1.26.4",
18
+ "openai>=1.97.0",
19
+ "numpy>=1.26.4",
20
20
  ]
21
21
 
22
22
  [tool.setuptools.packages.find]
@@ -1,5 +1,3 @@
1
- from typing import Literal
2
-
3
1
  from texttools.formatter.base import ChatFormatter
4
2
 
5
3
 
@@ -10,14 +8,13 @@ class Gemma3Formatter(ChatFormatter):
10
8
  """
11
9
 
12
10
  ROLE = "role"
11
+ CONTENT = "content"
13
12
  USER_ROLE = "user"
14
13
  ASSISTANT_ROLE = "assistant"
15
- CONTENT = "content"
16
14
  VALID_ROLES = {USER_ROLE, ASSISTANT_ROLE}
15
+ VALID_KEYS = {ROLE, CONTENT}
17
16
 
18
- def format(
19
- self, messages: list[dict[Literal["role", "content"], str]]
20
- ) -> list[dict[str, str]]:
17
+ def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
21
18
  """
22
19
  :param messages: list of {"role": ..., "content": ...}, where role is "user", "assistant", or "system"
23
20
  :return: a new list where consecutive "user" messages are merged into single entries
@@ -25,14 +22,20 @@ class Gemma3Formatter(ChatFormatter):
25
22
 
26
23
  merged: list[dict[str, str]] = []
27
24
 
28
- for msg in messages:
29
- role, content = msg[self.ROLE], msg[self.CONTENT].strip()
25
+ for message in messages:
26
+ # Validate keys strictly
27
+ if set(message.keys()) != self.VALID_KEYS:
28
+ raise ValueError(
29
+ f"Message dict keys must be exactly {self.VALID_KEYS}, got {set(message.keys())}"
30
+ )
31
+
32
+ role, content = message[self.ROLE], message[self.CONTENT].strip()
30
33
 
31
34
  # Replace "system" role with "user" role
32
35
  if role == "system":
33
36
  role = self.USER_ROLE
34
37
 
35
- # Raise value error if msg["role"] wan't a valid role
38
+ # Raise value error if message["role"] wan't a valid role
36
39
  if role not in self.VALID_ROLES:
37
40
  raise ValueError(f"Unexpected role: {role}")
38
41
 
@@ -36,64 +36,48 @@ class GemmaQuestionDetector(BaseQuestionDetector):
36
36
  self.model = model
37
37
  self.temperature = temperature
38
38
  self.client_kwargs = client_kwargs
39
-
40
39
  self.chat_formatter = chat_formatter or Gemma3Formatter()
41
-
42
40
  self.use_reason = use_reason
43
41
  self.prompt_template = prompt_template
44
42
 
45
- self.json_schema = {"is_question": bool}
46
-
47
43
  def _build_messages(self, text: str, reason: str = None) -> list[dict[str, str]]:
48
- clean = self.preprocess(text)
49
- schema_instr = f"respond only in JSON format: {self.json_schema}"
44
+ clean_text = self.preprocess(text)
50
45
  messages: list[dict[str, str]] = []
51
46
 
52
47
  if reason:
53
48
  messages.append({"role": "user", "content": reason})
54
49
 
55
- messages.append({"role": "user", "content": schema_instr})
56
50
  if self.prompt_template:
57
51
  messages.append({"role": "user", "content": self.prompt_template})
58
- messages.append({"role": "user", "content": clean})
52
+ messages.append({"role": "user", "content": clean_text})
59
53
 
60
- # this line will restructure the messages
61
- # based on the formatter that we provided
62
- # some models will require custom settings
54
+ # Restructure the messages based on the formatter; some models will require custom settings
63
55
  restructured = self.chat_formatter.format(messages=messages)
64
56
 
65
57
  return restructured
66
58
 
67
59
  def _reason(self, text: str) -> list:
68
- messages = [
69
- {
70
- "role": "user",
71
- "content": """
72
- we want to analyze this text snippet to see if it contains any question
73
- or request of some kind or not
74
- read the text, and reason about it being a request or not
75
- summerized
76
- short answer
77
- """,
78
- },
79
- {
80
- "role": "user",
81
- "content": f"""
60
+ reason_prompt = f"""
61
+ We want to analyze this text snippet to see if it contains any question
62
+ or request of some kind or not.
63
+ Read the text, and reason about it being a request or not.
64
+ Summerized, Short answer
82
65
  {text}
83
- """,
84
- },
66
+ """
67
+ messages = [
68
+ {"role": "user", "content": reason_prompt},
85
69
  ]
86
70
 
87
71
  restructured = self.chat_formatter.format(messages=messages)
88
72
 
89
- resp = self.client.chat.completions.create(
73
+ response = self.client.chat.completions.create(
90
74
  model=self.model,
91
75
  messages=restructured,
92
76
  temperature=self.temperature,
93
77
  **self.client_kwargs,
94
78
  )
95
79
 
96
- reason = resp.choices[0].message.content.strip()
80
+ reason = response.choices[0].message.content.strip()
97
81
  return reason
98
82
 
99
83
  def detect(self, text: str) -> bool:
@@ -125,6 +109,6 @@ class GemmaQuestionDetector(BaseQuestionDetector):
125
109
  f"Failed to parse the response. Raw content: {message.content}"
126
110
  )
127
111
 
128
- # dispatch and return
112
+ # Dispatch and return
129
113
  self._dispatch({"question": text, "result": result})
130
114
  return result
@@ -1,2 +0,0 @@
1
- openai==1.97.1
2
- numpy==1.26.4