hamtaa-texttools 1.1.8__py3-none-any.whl → 1.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.8.dist-info → hamtaa_texttools-1.1.9.dist-info}/METADATA +21 -1
- {hamtaa_texttools-1.1.8.dist-info → hamtaa_texttools-1.1.9.dist-info}/RECORD +9 -9
- texttools/tools/internals/async_operator.py +4 -5
- texttools/tools/internals/base_operator.py +8 -12
- texttools/tools/internals/operator.py +5 -8
- texttools/tools/internals/prompt_loader.py +2 -7
- {hamtaa_texttools-1.1.8.dist-info → hamtaa_texttools-1.1.9.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.8.dist-info → hamtaa_texttools-1.1.9.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.8.dist-info → hamtaa_texttools-1.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.9
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -187,6 +187,26 @@ Use **TextTools** when you need to:
|
|
|
187
187
|
|
|
188
188
|
---
|
|
189
189
|
|
|
190
|
+
## 🔍 Logging
|
|
191
|
+
|
|
192
|
+
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
import logging
|
|
197
|
+
|
|
198
|
+
# Default: warnings and errors only
|
|
199
|
+
logging.basicConfig(level=logging.WARNING)
|
|
200
|
+
|
|
201
|
+
# Debug everything (verbose)
|
|
202
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
203
|
+
|
|
204
|
+
# Complete silence
|
|
205
|
+
logging.basicConfig(level=logging.CRITICAL)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
190
210
|
## 📚 Batch Processing
|
|
191
211
|
|
|
192
212
|
Process large datasets efficiently using OpenAI's batch API.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
hamtaa_texttools-1.1.
|
|
1
|
+
hamtaa_texttools-1.1.9.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
2
|
texttools/__init__.py,sha256=lFYe1jdssHC1h8qcPpV3whANxiDi8aiiFdY-7L0Ck10,164
|
|
3
3
|
texttools/batch/__init__.py,sha256=DJGJTfR6F3Yv4_alsj9g1tesGzdcSV27Zw74DonhW_s,102
|
|
4
4
|
texttools/batch/batch_manager.py,sha256=ZgLiO9maCHnx2cJbUjsYXFnlUsMLI2TP3Vc9uKU0BLg,8706
|
|
@@ -18,13 +18,13 @@ texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6s
|
|
|
18
18
|
texttools/tools/__init__.py,sha256=3fPoeB-E5wGxWgv7axztHkeolR7ZDUJudd0xmpPFjao,113
|
|
19
19
|
texttools/tools/async_tools.py,sha256=2ZY7Lo6Jj9xoTF8bfdh_g8VOXZ7ljMMesd1_QHXyf4s,15395
|
|
20
20
|
texttools/tools/sync_tools.py,sha256=XKgZuzriFnk8B-YihJfs6BKivxjGCgOFfe7hnCpEiXs,15161
|
|
21
|
-
texttools/tools/internals/async_operator.py,sha256=
|
|
22
|
-
texttools/tools/internals/base_operator.py,sha256=
|
|
21
|
+
texttools/tools/internals/async_operator.py,sha256=egBsrcpGBmkDY5YzUvGHh1TjPmsH9IOVXDGmYMWjzMs,8960
|
|
22
|
+
texttools/tools/internals/base_operator.py,sha256=qV9LlVo_DzSCzQnjYTFi-6mlHN4gE0edPE2y_9WwQFw,3292
|
|
23
23
|
texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
|
|
24
|
-
texttools/tools/internals/operator.py,sha256=
|
|
24
|
+
texttools/tools/internals/operator.py,sha256=xgbt1Mm67SEC-KD9jwXjXGTCcaCsaVLhG6iCYOqLDcc,8709
|
|
25
25
|
texttools/tools/internals/output_models.py,sha256=ekpbyocmXj_dee7ieOT1zOkMo9cPHT7xcUFCZoUaXA0,1886
|
|
26
|
-
texttools/tools/internals/prompt_loader.py,sha256=
|
|
27
|
-
hamtaa_texttools-1.1.
|
|
28
|
-
hamtaa_texttools-1.1.
|
|
29
|
-
hamtaa_texttools-1.1.
|
|
30
|
-
hamtaa_texttools-1.1.
|
|
26
|
+
texttools/tools/internals/prompt_loader.py,sha256=8uD7JUatKXSLXhGwWs46iQpcjWdhF9p32SFDLMndy1o,1940
|
|
27
|
+
hamtaa_texttools-1.1.9.dist-info/METADATA,sha256=nQFuGr_7aVHlO7nRsTbubEtO0QVUofcdUKwMATzHhUU,9129
|
|
28
|
+
hamtaa_texttools-1.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
hamtaa_texttools-1.1.9.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
30
|
+
hamtaa_texttools-1.1.9.dist-info/RECORD,,
|
|
@@ -51,7 +51,7 @@ class AsyncOperator(BaseOperator):
|
|
|
51
51
|
temperature: float,
|
|
52
52
|
logprobs: bool = False,
|
|
53
53
|
top_logprobs: int = 3,
|
|
54
|
-
) -> tuple[
|
|
54
|
+
) -> tuple[T, Any]:
|
|
55
55
|
"""
|
|
56
56
|
Parses a chat completion using OpenAI's structured output format.
|
|
57
57
|
Returns both the parsed object and the raw completion for logging.
|
|
@@ -78,7 +78,7 @@ class AsyncOperator(BaseOperator):
|
|
|
78
78
|
temperature: float,
|
|
79
79
|
logprobs: bool = False,
|
|
80
80
|
top_logprobs: int = 3,
|
|
81
|
-
) -> tuple[
|
|
81
|
+
) -> tuple[T, Any]:
|
|
82
82
|
"""
|
|
83
83
|
Generates a completion using vLLM with JSON schema guidance.
|
|
84
84
|
Returns the parsed output model and raw completion.
|
|
@@ -138,7 +138,7 @@ class AsyncOperator(BaseOperator):
|
|
|
138
138
|
**extra_kwargs,
|
|
139
139
|
)
|
|
140
140
|
|
|
141
|
-
messages
|
|
141
|
+
messages = []
|
|
142
142
|
|
|
143
143
|
if with_analysis:
|
|
144
144
|
analysis = await self._analyze(prompt_configs, temperature)
|
|
@@ -181,8 +181,7 @@ class AsyncOperator(BaseOperator):
|
|
|
181
181
|
|
|
182
182
|
# Retry logic if validation fails
|
|
183
183
|
if validator and not validator(output.result):
|
|
184
|
-
|
|
185
|
-
for attempt in range(max_retries):
|
|
184
|
+
for attempt in range(self.MAX_RETRIES):
|
|
186
185
|
logger.warning(
|
|
187
186
|
f"Validation failed, retrying for the {attempt + 1} time."
|
|
188
187
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TypeVar, Type, Any
|
|
1
|
+
from typing import TypeVar, Type, Any, Union
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
import math
|
|
@@ -11,11 +11,16 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
11
11
|
# Base Model type for output models
|
|
12
12
|
T = TypeVar("T", bound=BaseModel)
|
|
13
13
|
|
|
14
|
+
ClientType = Union[OpenAI, AsyncOpenAI]
|
|
15
|
+
|
|
14
16
|
logger = logging.getLogger("texttools.base_operator")
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class BaseOperator:
|
|
18
|
-
|
|
20
|
+
# Max retry in case of failed output validation
|
|
21
|
+
MAX_RETRIES = 3
|
|
22
|
+
|
|
23
|
+
def __init__(self, client: ClientType, model: str):
|
|
19
24
|
self.client = client
|
|
20
25
|
self.model = model
|
|
21
26
|
|
|
@@ -40,16 +45,10 @@ class BaseOperator:
|
|
|
40
45
|
"""
|
|
41
46
|
Convert a JSON response string to output model.
|
|
42
47
|
"""
|
|
43
|
-
# Clean the response string
|
|
44
48
|
cleaned_json = self._clean_json_response(response_string)
|
|
45
|
-
|
|
46
|
-
# Fix Python-style booleans
|
|
47
49
|
cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
|
|
48
|
-
|
|
49
|
-
# Convert string to Python dictionary
|
|
50
50
|
response_dict = json.loads(cleaned_json)
|
|
51
51
|
|
|
52
|
-
# Convert dictionary to output model
|
|
53
52
|
return output_model(**response_dict)
|
|
54
53
|
|
|
55
54
|
def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
|
|
@@ -63,7 +62,7 @@ class BaseOperator:
|
|
|
63
62
|
|
|
64
63
|
for choice in completion.choices:
|
|
65
64
|
if not getattr(choice, "logprobs", None):
|
|
66
|
-
logger.error("logprobs is not
|
|
65
|
+
logger.error("logprobs is not available for the chosen model.")
|
|
67
66
|
return []
|
|
68
67
|
|
|
69
68
|
for logprob_item in choice.logprobs.content:
|
|
@@ -93,8 +92,5 @@ class BaseOperator:
|
|
|
93
92
|
"""
|
|
94
93
|
delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
|
|
95
94
|
new_temp = base_temp + delta_temp
|
|
96
|
-
print(f"Base Temp: {base_temp}")
|
|
97
|
-
print(f"Delta Temp: {delta_temp}")
|
|
98
|
-
print(f"New Temp: {new_temp}")
|
|
99
95
|
|
|
100
96
|
return max(0.0, min(new_temp, 1.5))
|
|
@@ -6,7 +6,6 @@ from pydantic import BaseModel
|
|
|
6
6
|
|
|
7
7
|
from texttools.tools.internals.output_models import ToolOutput
|
|
8
8
|
from texttools.tools.internals.base_operator import BaseOperator
|
|
9
|
-
from texttools.tools.internals.formatters import Formatter
|
|
10
9
|
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
11
10
|
|
|
12
11
|
# Base Model type for output models
|
|
@@ -51,7 +50,7 @@ class Operator(BaseOperator):
|
|
|
51
50
|
temperature: float,
|
|
52
51
|
logprobs: bool = False,
|
|
53
52
|
top_logprobs: int = 3,
|
|
54
|
-
) -> tuple[
|
|
53
|
+
) -> tuple[T, Any]:
|
|
55
54
|
"""
|
|
56
55
|
Parses a chat completion using OpenAI's structured output format.
|
|
57
56
|
Returns both the parsed object and the raw completion for logging.
|
|
@@ -78,7 +77,7 @@ class Operator(BaseOperator):
|
|
|
78
77
|
temperature: float,
|
|
79
78
|
logprobs: bool = False,
|
|
80
79
|
top_logprobs: int = 3,
|
|
81
|
-
) -> tuple[
|
|
80
|
+
) -> tuple[T, Any]:
|
|
82
81
|
"""
|
|
83
82
|
Generates a completion using vLLM with JSON schema guidance.
|
|
84
83
|
Returns the parsed output model and raw completion.
|
|
@@ -126,7 +125,6 @@ class Operator(BaseOperator):
|
|
|
126
125
|
Execute the LLM pipeline with the given input text.
|
|
127
126
|
"""
|
|
128
127
|
prompt_loader = PromptLoader()
|
|
129
|
-
formatter = Formatter()
|
|
130
128
|
output = ToolOutput()
|
|
131
129
|
|
|
132
130
|
try:
|
|
@@ -138,7 +136,7 @@ class Operator(BaseOperator):
|
|
|
138
136
|
**extra_kwargs,
|
|
139
137
|
)
|
|
140
138
|
|
|
141
|
-
messages
|
|
139
|
+
messages = []
|
|
142
140
|
|
|
143
141
|
if with_analysis:
|
|
144
142
|
analysis = self._analyze(prompt_configs, temperature)
|
|
@@ -159,7 +157,7 @@ class Operator(BaseOperator):
|
|
|
159
157
|
)
|
|
160
158
|
|
|
161
159
|
messages.append(self._build_user_message(prompt_configs["main_template"]))
|
|
162
|
-
messages
|
|
160
|
+
messages
|
|
163
161
|
|
|
164
162
|
if resp_format == "vllm":
|
|
165
163
|
parsed, completion = self._vllm_completion(
|
|
@@ -181,8 +179,7 @@ class Operator(BaseOperator):
|
|
|
181
179
|
|
|
182
180
|
# Retry logic if validation fails
|
|
183
181
|
if validator and not validator(output.result):
|
|
184
|
-
|
|
185
|
-
for attempt in range(max_retries):
|
|
182
|
+
for attempt in range(self.MAX_RETRIES):
|
|
186
183
|
logger.warning(
|
|
187
184
|
f"Validation failed, retrying for the {attempt + 1} time."
|
|
188
185
|
)
|
|
@@ -11,15 +11,10 @@ class PromptLoader:
|
|
|
11
11
|
- Load and parse YAML prompt definitions.
|
|
12
12
|
- Select the right template (by mode, if applicable).
|
|
13
13
|
- Inject variables (`{input}`, plus any extra kwargs) into the templates.
|
|
14
|
-
- Return a dict with:
|
|
15
|
-
{
|
|
16
|
-
"main_template": "...",
|
|
17
|
-
"analyze_template": "..." | None
|
|
18
|
-
}
|
|
19
14
|
"""
|
|
20
15
|
|
|
21
|
-
MAIN_TEMPLATE
|
|
22
|
-
ANALYZE_TEMPLATE
|
|
16
|
+
MAIN_TEMPLATE = "main_template"
|
|
17
|
+
ANALYZE_TEMPLATE = "analyze_template"
|
|
23
18
|
|
|
24
19
|
# Use lru_cache to load each file once
|
|
25
20
|
@lru_cache(maxsize=32)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|