hamtaa-texttools 0.1.44__tar.gz → 0.1.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (69) hide show
  1. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/PKG-INFO +3 -3
  2. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/hamtaa_texttools.egg-info/PKG-INFO +3 -3
  3. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/hamtaa_texttools.egg-info/SOURCES.txt +5 -4
  4. hamtaa_texttools-0.1.45/hamtaa_texttools.egg-info/requires.txt +2 -0
  5. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/pyproject.toml +3 -3
  6. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/__init__.py +1 -1
  7. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/translator/gemma_translator.py +7 -20
  8. hamtaa_texttools-0.1.45/texttools/utils/flex_processor.py +78 -0
  9. hamtaa_texttools-0.1.44/hamtaa_texttools.egg-info/requires.txt +0 -2
  10. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/README.md +0 -0
  11. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  12. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  13. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/setup.cfg +0 -0
  14. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/setup.py +0 -0
  15. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/tests/test_vllm_output.py +0 -0
  16. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/__init__.py +0 -0
  17. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_categorizer.py +0 -0
  18. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_keyword_extractor.py +0 -0
  19. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_ner_extractor.py +0 -0
  20. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_question_detector.py +0 -0
  21. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_question_generator.py +0 -0
  22. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_question_merger.py +0 -0
  23. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_question_rewriter.py +0 -0
  24. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_router.py +0 -0
  25. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_summarizer.py +0 -0
  26. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_task_performer.py +0 -0
  27. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/base/base_translator.py +0 -0
  28. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/formatter/__init__.py +0 -0
  29. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/formatter/base.py +0 -0
  30. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/formatter/gemma3_formatter.py +0 -0
  31. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/handlers/__init__.py +0 -0
  32. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/handlers/categorizer/__init__.py +0 -0
  33. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/handlers/categorizer/categorizer.py +0 -0
  34. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/handlers/handlers.py +0 -0
  35. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/__init__.py +0 -0
  36. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/__init__.py +0 -0
  37. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/encoder_model/__init__.py +0 -0
  38. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +0 -0
  39. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/llm/__init__.py +0 -0
  40. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/llm/gemma_categorizer.py +0 -0
  41. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/categorizer/llm/openai_categorizer.py +0 -0
  42. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/keyword_extractor/__init__.py +0 -0
  43. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/keyword_extractor/gemma_extractor.py +0 -0
  44. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/merger/__init__.py +0 -0
  45. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/merger/gemma_question_merger.py +0 -0
  46. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/ner/__init__.py +0 -0
  47. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/ner/gemma_ner_extractor.py +0 -0
  48. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/question_detector/__init__.py +0 -0
  49. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/question_detector/gemma_detector.py +0 -0
  50. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/question_detector/llm_detector.py +0 -0
  51. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/question_generator/__init__.py +0 -0
  52. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/question_generator/gemma_question_generator.py +0 -0
  53. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/reranker/__init__.py +0 -0
  54. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/reranker/reranker.py +0 -0
  55. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/reranker/scorer.py +0 -0
  56. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/reranker/sorter.py +0 -0
  57. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/rewriter/__init__.py +0 -0
  58. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/rewriter/gemma_question_rewriter.py +0 -0
  59. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/router/__init__.py +0 -0
  60. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/router/gemma_router.py +0 -0
  61. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/subject_to_question/__init__.py +0 -0
  62. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/subject_to_question/gemma_question_generator.py +0 -0
  63. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/summarizer/__init__.py +0 -0
  64. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/summarizer/gemma_summarizer.py +0 -0
  65. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/summarizer/llm_summerizer.py +0 -0
  66. {hamtaa_texttools-0.1.44 → hamtaa_texttools-0.1.45}/texttools/tools/translator/__init__.py +0 -0
  67. {hamtaa_texttools-0.1.44/texttools → hamtaa_texttools-0.1.45/texttools/utils}/batch_manager/__init__.py +0 -0
  68. {hamtaa_texttools-0.1.44/texttools → hamtaa_texttools-0.1.45/texttools/utils}/batch_manager/batch_manager.py +0 -0
  69. {hamtaa_texttools-0.1.44/texttools → hamtaa_texttools-0.1.45/texttools/utils}/batch_manager/batch_runner.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.44
3
+ Version: 0.1.45
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: openai>=1.97.0
9
- Requires-Dist: numpy>=1.26.4
8
+ Requires-Dist: openai==1.97.1
9
+ Requires-Dist: numpy==1.26.4
10
10
 
11
11
  # Text Tools
12
12
 
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 0.1.44
3
+ Version: 0.1.45
4
4
  Summary: A set of high-level NLP tools
5
5
  Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
6
  Requires-Python: >=3.8
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: openai>=1.97.0
9
- Requires-Dist: numpy>=1.26.4
8
+ Requires-Dist: openai==1.97.1
9
+ Requires-Dist: numpy==1.26.4
10
10
 
11
11
  # Text Tools
12
12
 
@@ -20,9 +20,6 @@ texttools/base/base_router.py
20
20
  texttools/base/base_summarizer.py
21
21
  texttools/base/base_task_performer.py
22
22
  texttools/base/base_translator.py
23
- texttools/batch_manager/__init__.py
24
- texttools/batch_manager/batch_manager.py
25
- texttools/batch_manager/batch_runner.py
26
23
  texttools/formatter/__init__.py
27
24
  texttools/formatter/base.py
28
25
  texttools/formatter/gemma3_formatter.py
@@ -62,4 +59,8 @@ texttools/tools/summarizer/__init__.py
62
59
  texttools/tools/summarizer/gemma_summarizer.py
63
60
  texttools/tools/summarizer/llm_summerizer.py
64
61
  texttools/tools/translator/__init__.py
65
- texttools/tools/translator/gemma_translator.py
62
+ texttools/tools/translator/gemma_translator.py
63
+ texttools/utils/flex_processor.py
64
+ texttools/utils/batch_manager/__init__.py
65
+ texttools/utils/batch_manager/batch_manager.py
66
+ texttools/utils/batch_manager/batch_runner.py
@@ -0,0 +1,2 @@
1
+ openai==1.97.1
2
+ numpy==1.26.4
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "0.1.44"
7
+ version = "0.1.45"
8
8
  description = "A set of high-level NLP tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -15,8 +15,8 @@ authors = [
15
15
  { name = "Mousavinezhad" }
16
16
  ]
17
17
  dependencies = [
18
- "openai>=1.97.0",
19
- "numpy>=1.26.4",
18
+ "openai==1.97.1",
19
+ "numpy==1.26.4",
20
20
  ]
21
21
 
22
22
  [tool.setuptools.packages.find]
@@ -1,4 +1,4 @@
1
- from texttools.batch_manager import BatchJobRunner, SimpleBatchManager
1
+ from texttools.utils.batch_manager import BatchJobRunner, SimpleBatchManager
2
2
  from texttools.handlers import (
3
3
  NoOpResultHandler,
4
4
  PrintResultHandler,
@@ -1,5 +1,3 @@
1
- import json
2
- import re
3
1
  from typing import Any, Optional
4
2
 
5
3
  from openai import OpenAI
@@ -35,7 +33,7 @@ class GemmaTranslator(BaseTranslator):
35
33
  **client_kwargs: Any,
36
34
  ):
37
35
  super().__init__(handlers)
38
- self.client = client
36
+ self.client: OpenAI = client
39
37
  self.model = model
40
38
  self.temperature = temperature
41
39
  self.client_kwargs = client_kwargs
@@ -134,27 +132,16 @@ class GemmaTranslator(BaseTranslator):
134
132
  messages.append({"role": "user", "content": text_prompt})
135
133
 
136
134
  restructured = self.chat_formatter.format(messages=messages)
137
- completion = self.client.chat.completions.create(
135
+ completion = self.client.chat.completions.parse(
138
136
  model=self.model,
139
137
  messages=restructured,
140
- response_format={
141
- "type": "json_schema",
142
- "json_schema": {
143
- "name": "NER",
144
- "schema": PreprocessorOutput.model_json_schema(),
145
- },
146
- },
138
+ response_format=PreprocessorOutput,
147
139
  temperature=self.temperature,
148
- **self.client_kwargs,
140
+ extra_body=dict(guided_decoding_backend="auto") ** self.client_kwargs,
149
141
  )
150
- response = completion.choices[0].message.content
151
-
152
- # Remove Markdown-style triple backticks and any optional language tag like "json"
153
- if response.startswith("```"):
154
- response = re.sub(r"^```(?:json)?\s*|```$", "", response.strip())
155
-
156
- entities = json.loads(response)
142
+ message = completion.choices[0].message
157
143
 
144
+ entities = message.parsed
158
145
  return entities
159
146
 
160
147
  def translate(
@@ -189,7 +176,7 @@ class GemmaTranslator(BaseTranslator):
189
176
  temperature=self.temperature,
190
177
  **self.client_kwargs,
191
178
  )
192
- response = completion.choices[0].message.content.strip()
179
+ response = completion.choices[0].message.content
193
180
 
194
181
  self._dispatch(
195
182
  {
@@ -0,0 +1,78 @@
1
+ import random
2
+ import asyncio
3
+ from openai import OpenAI, RateLimitError, APIError
4
+ from typing import Optional
5
+ from pydantic import BaseModel, ValidationError
6
+ import httpx
7
+
8
+ # http_client = httpx()
9
+ # test_client = OpenAI(http_client=http_client)
10
+
11
+ async def flex_processing(
12
+ LLM_client: OpenAI,
13
+ system_prompt: str,
14
+ user_prompt: str,
15
+ output_model: Optional[BaseModel]=None,
16
+ prompt_cache_key: Optional[str]=None,
17
+ max_retries: int = 10,
18
+ base_delay: float = 2.0,
19
+ model_name: Optional[str] ="gpt-5-mini",
20
+ **client_kwargs):
21
+ """
22
+ Wrapper for flex processing with retry and exponential backoff.
23
+ Handles 429 'Resource Unavailable' errors gracefully.
24
+ """
25
+ for attempt in range(max_retries):
26
+ try:
27
+ request_kwargs = {
28
+ "model": model_name,
29
+ "messages": [
30
+ {"role": "system", "content": system_prompt},
31
+ {"role": "user", "content": user_prompt},
32
+ ],
33
+ "service_tier": "flex",
34
+ "timeout": 900.0,
35
+ **client_kwargs
36
+ }
37
+ if output_model:
38
+ request_kwargs["response_format"] = output_model
39
+ if prompt_cache_key:
40
+ request_kwargs["prompt_cache_key"] = prompt_cache_key
41
+
42
+ response = LLM_client.chat.completions.parse(**request_kwargs)
43
+ # response = self.client.chat.completions.parse(output_model)
44
+ content = response.choices[0].message.content
45
+ # ✅ Validate structured output if a model is provided
46
+ if output_model is not None:
47
+ try:
48
+ output_model.model_validate_json(content)
49
+ base_content = response.choices[0].message.parsed
50
+ # base_content = output_model(**content)
51
+ return base_content
52
+ except ValidationError as ve:
53
+ # Treat invalid output as retryable
54
+ wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
55
+ print(
56
+ f"[Flex Retry] Attempt {attempt+1}/{max_retries} produced invalid structured output. "
57
+ f"Retrying in {wait_time:.2f}s... (ValidationError: {ve})"
58
+ )
59
+ await asyncio.sleep(wait_time)
60
+ continue
61
+ except (RateLimitError, APIError) as e:
62
+ wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)
63
+ print(
64
+ f"[Flex Retry] Attempt {attempt+1}/{max_retries} failed "
65
+ f"with error: {type(e).__name__} - {e}. "
66
+ f"Retrying in {wait_time:.2f}s..."
67
+ )
68
+ await asyncio.sleep(wait_time)
69
+
70
+ except Exception as e:
71
+ # Non-recoverable error: break out immediately
72
+ raise RuntimeError(
73
+ f"[Flex Processing] Unrecoverable error for prompt_key={prompt_cache_key}: {e}"
74
+ )
75
+
76
+ raise RuntimeError(
77
+ f"[Flex Processing] Exhausted {max_retries} retries for prompt_key={prompt_cache_key}"
78
+ )
@@ -1,2 +0,0 @@
1
- openai>=1.97.0
2
- numpy>=1.26.4