hamtaa-texttools 0.1.52__tar.gz → 0.1.54__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (68) hide show
  1. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/PKG-INFO +1 -1
  2. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/hamtaa_texttools.egg-info/PKG-INFO +1 -1
  3. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/pyproject.toml +1 -1
  4. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/translator/gemma_translator.py +7 -12
  5. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/README.md +0 -0
  6. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  7. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  8. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/hamtaa_texttools.egg-info/requires.txt +0 -0
  9. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  10. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/setup.cfg +0 -0
  11. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/setup.py +0 -0
  12. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/tests/test_vllm_output.py +0 -0
  13. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/__init__.py +0 -0
  14. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/__init__.py +0 -0
  15. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_categorizer.py +0 -0
  16. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_keyword_extractor.py +0 -0
  17. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_ner_extractor.py +0 -0
  18. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_question_detector.py +0 -0
  19. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_question_generator.py +0 -0
  20. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_question_merger.py +0 -0
  21. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_question_rewriter.py +0 -0
  22. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_router.py +0 -0
  23. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_summarizer.py +0 -0
  24. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_task_performer.py +0 -0
  25. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/base/base_translator.py +0 -0
  26. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/formatter/__init__.py +0 -0
  27. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/formatter/base.py +0 -0
  28. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/formatter/gemma3_formatter.py +0 -0
  29. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/handlers/__init__.py +0 -0
  30. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/handlers/categorizer/__init__.py +0 -0
  31. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/handlers/categorizer/categorizer.py +0 -0
  32. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/handlers/handlers.py +0 -0
  33. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/__init__.py +0 -0
  34. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/__init__.py +0 -0
  35. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/encoder_model/__init__.py +0 -0
  36. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +0 -0
  37. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/llm/__init__.py +0 -0
  38. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/llm/gemma_categorizer.py +0 -0
  39. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/categorizer/llm/openai_categorizer.py +0 -0
  40. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/keyword_extractor/__init__.py +0 -0
  41. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/keyword_extractor/gemma_extractor.py +0 -0
  42. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/merger/__init__.py +0 -0
  43. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/merger/gemma_question_merger.py +0 -0
  44. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/ner/__init__.py +0 -0
  45. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/ner/gemma_ner_extractor.py +0 -0
  46. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/question_detector/__init__.py +0 -0
  47. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/question_detector/gemma_detector.py +0 -0
  48. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/question_detector/llm_detector.py +0 -0
  49. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/question_generator/__init__.py +0 -0
  50. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/question_generator/gemma_question_generator.py +0 -0
  51. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/reranker/__init__.py +0 -0
  52. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/reranker/reranker.py +0 -0
  53. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/reranker/scorer.py +0 -0
  54. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/reranker/sorter.py +0 -0
  55. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/rewriter/__init__.py +0 -0
  56. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/rewriter/gemma_question_rewriter.py +0 -0
  57. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/router/__init__.py +0 -0
  58. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/router/gemma_router.py +0 -0
  59. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/subject_to_question/__init__.py +0 -0
  60. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/subject_to_question/gemma_question_generator.py +0 -0
  61. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/summarizer/__init__.py +0 -0
  62. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/summarizer/gemma_summarizer.py +0 -0
  63. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/summarizer/llm_summerizer.py +0 -0
  64. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/tools/translator/__init__.py +0 -0
  65. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/utils/batch_manager/__init__.py +0 -0
  66. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/utils/batch_manager/batch_manager.py +0 -0
  67. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/utils/batch_manager/batch_runner.py +0 -0
  68. {hamtaa_texttools-0.1.52 → hamtaa_texttools-0.1.54}/texttools/utils/flex_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.52
3
+ Version: 0.1.54
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.52
3
+ Version: 0.1.54
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "0.1.52"
7
+ version = "0.1.54"
8
8
  description = "A set of high-level NLP tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -7,18 +7,13 @@ from texttools.base.base_translator import BaseTranslator
7
7
  from texttools.formatter.gemma3_formatter import Gemma3Formatter
8
8
 
9
9
 
10
- # Pydantic BaseModel to specify the output format of preprocessor
11
- # Preprocessor's job is to extract proper names
12
10
  class PreprocessorOutput(BaseModel):
13
11
  """
14
- A single proper-name entity extracted from the source text.
12
+ List of proper-name strings extracted from the source text.
15
13
  """
16
14
 
17
- text: str = Field(
18
- description="The exact substring from the original text that represents a proper name."
19
- )
20
- text_type: str = Field(
21
- description='Always use the literal value "Proper Name" when this entity is a real persons name.'
15
+ entities: List[str] = Field(
16
+ description="All proper names found in the text; return an empty list if none."
22
17
  )
23
18
 
24
19
 
@@ -73,7 +68,7 @@ class GemmaTranslator(BaseTranslator):
73
68
  """
74
69
  messages.append({"role": "user", "content": enforce_prompt})
75
70
 
76
- clean_text = text.strip()
71
+ clean_text = text
77
72
  if reason:
78
73
  reason_prompt = f"""
79
74
  Based on the analysis conducted, translate the following text {"from" + source_language if source_language else ""} to {target_language}.
@@ -143,7 +138,7 @@ class GemmaTranslator(BaseTranslator):
143
138
  completion = self.client.chat.completions.parse(
144
139
  model=self.model,
145
140
  messages=restructured,
146
- response_format=List[PreprocessorOutput],
141
+ response_format=PreprocessorOutput,
147
142
  temperature=self.temperature,
148
143
  extra_body={
149
144
  "guided_decoding_backend": "auto",
@@ -164,11 +159,11 @@ class GemmaTranslator(BaseTranslator):
164
159
 
165
160
  # Extract proper names to tell the LLM what names not to translate, but to transliterate
166
161
  extracted = self.preprocess(text)
167
- proper_names = [e.text for e in extracted]
162
+ proper_names = extracted.entities
168
163
 
169
164
  reason_summary = None
170
165
  if self.use_reason:
171
- reason_summary = self._reason(text, target_language, source_language)
166
+ reason_summary = self._reason(text, target_language)
172
167
 
173
168
  messages = self._build_messages(
174
169
  text, target_language, source_language, reason_summary, proper_names