hamtaa-texttools 0.1.52__py3-none-any.whl → 0.1.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.52
3
+ Version: 0.1.54
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
@@ -50,12 +50,12 @@ texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufm
50
50
  texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
51
51
  texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
52
52
  texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
53
- texttools/tools/translator/gemma_translator.py,sha256=gtvSpz19aGlbAk98M4xX61F4CqyI0QeAxLuw7N8cAoI,7551
53
+ texttools/tools/translator/gemma_translator.py,sha256=4bW9wVIkrlYDhWaOWB2sN7oC0xzeWJ-rfKRnp_lGrp4,7259
54
54
  texttools/utils/flex_processor.py,sha256=C-lMwMjpIM6uAPFxXdgajxcFV1ccngEfJqq6xe5S1J8,3123
55
55
  texttools/utils/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
56
56
  texttools/utils/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
57
57
  texttools/utils/batch_manager/batch_runner.py,sha256=DE6TFz3i_jR-ZiUYbgIdLgjqr3aitw-JM_tKnSvzGL0,7424
58
- hamtaa_texttools-0.1.52.dist-info/METADATA,sha256=tRdADD1IP3at6Bp043-KsnFBr1tAcguGvdIBCjAaFuo,1481
59
- hamtaa_texttools-0.1.52.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- hamtaa_texttools-0.1.52.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
61
- hamtaa_texttools-0.1.52.dist-info/RECORD,,
58
+ hamtaa_texttools-0.1.54.dist-info/METADATA,sha256=ad_jTTDOoADppaC7jik-hrxEuWc5aOwtz5_XFW1dTp0,1481
59
+ hamtaa_texttools-0.1.54.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ hamtaa_texttools-0.1.54.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
61
+ hamtaa_texttools-0.1.54.dist-info/RECORD,,
@@ -7,18 +7,13 @@ from texttools.base.base_translator import BaseTranslator
7
7
  from texttools.formatter.gemma3_formatter import Gemma3Formatter
8
8
 
9
9
 
10
- # Pydantic BaseModel to specify the output format of preprocessor
11
- # Preprocessor's job is to extract proper names
12
10
  class PreprocessorOutput(BaseModel):
13
11
  """
14
- A single proper-name entity extracted from the source text.
12
+ List of proper-name strings extracted from the source text.
15
13
  """
16
14
 
17
- text: str = Field(
18
- description="The exact substring from the original text that represents a proper name."
19
- )
20
- text_type: str = Field(
21
- description='Always use the literal value "Proper Name" when this entity is a real persons name.'
15
+ entities: List[str] = Field(
16
+ description="All proper names found in the text; return an empty list if none."
22
17
  )
23
18
 
24
19
 
@@ -73,7 +68,7 @@ class GemmaTranslator(BaseTranslator):
73
68
  """
74
69
  messages.append({"role": "user", "content": enforce_prompt})
75
70
 
76
- clean_text = text.strip()
71
+ clean_text = text
77
72
  if reason:
78
73
  reason_prompt = f"""
79
74
  Based on the analysis conducted, translate the following text {"from" + source_language if source_language else ""} to {target_language}.
@@ -143,7 +138,7 @@ class GemmaTranslator(BaseTranslator):
143
138
  completion = self.client.chat.completions.parse(
144
139
  model=self.model,
145
140
  messages=restructured,
146
- response_format=List[PreprocessorOutput],
141
+ response_format=PreprocessorOutput,
147
142
  temperature=self.temperature,
148
143
  extra_body={
149
144
  "guided_decoding_backend": "auto",
@@ -164,11 +159,11 @@ class GemmaTranslator(BaseTranslator):
164
159
 
165
160
  # Extract proper names to tell the LLM what names not to translate, but to transliterate
166
161
  extracted = self.preprocess(text)
167
- proper_names = [e.text for e in extracted]
162
+ proper_names = extracted.entities
168
163
 
169
164
  reason_summary = None
170
165
  if self.use_reason:
171
- reason_summary = self._reason(text, target_language, source_language)
166
+ reason_summary = self._reason(text, target_language)
172
167
 
173
168
  messages = self._build_messages(
174
169
  text, target_language, source_language, reason_summary, proper_names