hamtaa-texttools 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.51
3
+ Version: 0.1.53
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
@@ -50,12 +50,12 @@ texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufm
50
50
  texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
51
51
  texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
52
52
  texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
53
- texttools/tools/translator/gemma_translator.py,sha256=yIlgxBjGAup7vMHLPJ5Q8LmFl8udrBh6j0smAH0UFwY,7509
53
+ texttools/tools/translator/gemma_translator.py,sha256=k7xBzdqDH8KJIgtzN4TpZ0baBGwChHcunxFknFmauuQ,7284
54
54
  texttools/utils/flex_processor.py,sha256=C-lMwMjpIM6uAPFxXdgajxcFV1ccngEfJqq6xe5S1J8,3123
55
55
  texttools/utils/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
56
56
  texttools/utils/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
57
57
  texttools/utils/batch_manager/batch_runner.py,sha256=DE6TFz3i_jR-ZiUYbgIdLgjqr3aitw-JM_tKnSvzGL0,7424
58
- hamtaa_texttools-0.1.51.dist-info/METADATA,sha256=CBQyDE8L5VrKZVciOVG5LbxOyc1df9hBnEYz0dwauLc,1481
59
- hamtaa_texttools-0.1.51.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- hamtaa_texttools-0.1.51.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
61
- hamtaa_texttools-0.1.51.dist-info/RECORD,,
58
+ hamtaa_texttools-0.1.53.dist-info/METADATA,sha256=CDKUwD_N6p_1tIbMDvVXpz8jZAXHKJWcApEWUeO-73g,1481
59
+ hamtaa_texttools-0.1.53.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
+ hamtaa_texttools-0.1.53.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
61
+ hamtaa_texttools-0.1.53.dist-info/RECORD,,
@@ -7,18 +7,13 @@ from texttools.base.base_translator import BaseTranslator
7
7
  from texttools.formatter.gemma3_formatter import Gemma3Formatter
8
8
 
9
9
 
10
- # Pydantic BaseModel to specify the output format of preprocessor
11
- # Preprocessor's job is to extract proper names
12
10
  class PreprocessorOutput(BaseModel):
13
11
  """
14
- A single proper-name entity extracted from the source text.
12
+ List of proper-name strings extracted from the source text.
15
13
  """
16
14
 
17
- text: str = Field(
18
- description="The exact substring from the original text that represents a proper name."
19
- )
20
- text_type: str = Field(
21
- description='Always use the literal value "Proper Name" when this entity is a real persons name.'
15
+ entities: List[str] = Field(
16
+ description="All proper names found in the text; return an empty list if none."
22
17
  )
23
18
 
24
19
 
@@ -143,9 +138,12 @@ class GemmaTranslator(BaseTranslator):
143
138
  completion = self.client.chat.completions.parse(
144
139
  model=self.model,
145
140
  messages=restructured,
146
- response_format=List[PreprocessorOutput],
141
+ response_format=PreprocessorOutput,
147
142
  temperature=self.temperature,
148
- extra_body=dict(guided_decoding_backend="auto") ** self.client_kwargs,
143
+ extra_body={
144
+ "guided_decoding_backend": "auto",
145
+ },
146
+ **self.client_kwargs,
149
147
  )
150
148
  message = completion.choices[0].message
151
149
 
@@ -161,7 +159,7 @@ class GemmaTranslator(BaseTranslator):
161
159
 
162
160
  # Extract proper names to tell the LLM what names not to translate, but to transliterate
163
161
  extracted = self.preprocess(text)
164
- proper_names = [e.text for e in extracted]
162
+ proper_names = extracted.entities
165
163
 
166
164
  reason_summary = None
167
165
  if self.use_reason: