hamtaa-texttools 1.1.8__py3-none-any.whl → 1.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.8
3
+ Version: 1.1.9
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -187,6 +187,26 @@ Use **TextTools** when you need to:
187
187
 
188
188
  ---
189
189
 
190
+ ## 🔍 Logging
191
+
192
+ TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
193
+
194
+
195
+ ```python
196
+ import logging
197
+
198
+ # Default: warnings and errors only
199
+ logging.basicConfig(level=logging.WARNING)
200
+
201
+ # Debug everything (verbose)
202
+ logging.basicConfig(level=logging.DEBUG)
203
+
204
+ # Complete silence
205
+ logging.basicConfig(level=logging.CRITICAL)
206
+ ```
207
+
208
+ ---
209
+
190
210
  ## 📚 Batch Processing
191
211
 
192
212
  Process large datasets efficiently using OpenAI's batch API.
@@ -1,4 +1,4 @@
1
- hamtaa_texttools-1.1.8.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
1
+ hamtaa_texttools-1.1.9.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
2
2
  texttools/__init__.py,sha256=lFYe1jdssHC1h8qcPpV3whANxiDi8aiiFdY-7L0Ck10,164
3
3
  texttools/batch/__init__.py,sha256=DJGJTfR6F3Yv4_alsj9g1tesGzdcSV27Zw74DonhW_s,102
4
4
  texttools/batch/batch_manager.py,sha256=ZgLiO9maCHnx2cJbUjsYXFnlUsMLI2TP3Vc9uKU0BLg,8706
@@ -18,13 +18,13 @@ texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6s
18
18
  texttools/tools/__init__.py,sha256=3fPoeB-E5wGxWgv7axztHkeolR7ZDUJudd0xmpPFjao,113
19
19
  texttools/tools/async_tools.py,sha256=2ZY7Lo6Jj9xoTF8bfdh_g8VOXZ7ljMMesd1_QHXyf4s,15395
20
20
  texttools/tools/sync_tools.py,sha256=XKgZuzriFnk8B-YihJfs6BKivxjGCgOFfe7hnCpEiXs,15161
21
- texttools/tools/internals/async_operator.py,sha256=fCi70LXasC_2G9iz8uVFptnZEvVeb9TXopMBLi-cFuE,9022
22
- texttools/tools/internals/base_operator.py,sha256=rV2WqGdiHK4ezYz1f1EWcdbKFSFJhBJpORnJzPICFvk,3471
21
+ texttools/tools/internals/async_operator.py,sha256=egBsrcpGBmkDY5YzUvGHh1TjPmsH9IOVXDGmYMWjzMs,8960
22
+ texttools/tools/internals/base_operator.py,sha256=qV9LlVo_DzSCzQnjYTFi-6mlHN4gE0edPE2y_9WwQFw,3292
23
23
  texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
24
- texttools/tools/internals/operator.py,sha256=UBDScStTUXf8CIhwXb-6e_YOWTLggoiBV71vXRzr0P0,8904
24
+ texttools/tools/internals/operator.py,sha256=xgbt1Mm67SEC-KD9jwXjXGTCcaCsaVLhG6iCYOqLDcc,8709
25
25
  texttools/tools/internals/output_models.py,sha256=ekpbyocmXj_dee7ieOT1zOkMo9cPHT7xcUFCZoUaXA0,1886
26
- texttools/tools/internals/prompt_loader.py,sha256=1khayXcRC5w0Vf2SufpNaN1IUIhbKzS5ATiKheoBcGE,2082
27
- hamtaa_texttools-1.1.8.dist-info/METADATA,sha256=Cfb4VkcUELzRN6TrKdWK5jr4YsGbh_VlAtYVny86cb4,8690
28
- hamtaa_texttools-1.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- hamtaa_texttools-1.1.8.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
30
- hamtaa_texttools-1.1.8.dist-info/RECORD,,
26
+ texttools/tools/internals/prompt_loader.py,sha256=8uD7JUatKXSLXhGwWs46iQpcjWdhF9p32SFDLMndy1o,1940
27
+ hamtaa_texttools-1.1.9.dist-info/METADATA,sha256=nQFuGr_7aVHlO7nRsTbubEtO0QVUofcdUKwMATzHhUU,9129
28
+ hamtaa_texttools-1.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ hamtaa_texttools-1.1.9.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
30
+ hamtaa_texttools-1.1.9.dist-info/RECORD,,
@@ -51,7 +51,7 @@ class AsyncOperator(BaseOperator):
51
51
  temperature: float,
52
52
  logprobs: bool = False,
53
53
  top_logprobs: int = 3,
54
- ) -> tuple[Type[T], Any]:
54
+ ) -> tuple[T, Any]:
55
55
  """
56
56
  Parses a chat completion using OpenAI's structured output format.
57
57
  Returns both the parsed object and the raw completion for logging.
@@ -78,7 +78,7 @@ class AsyncOperator(BaseOperator):
78
78
  temperature: float,
79
79
  logprobs: bool = False,
80
80
  top_logprobs: int = 3,
81
- ) -> tuple[Type[T], Any]:
81
+ ) -> tuple[T, Any]:
82
82
  """
83
83
  Generates a completion using vLLM with JSON schema guidance.
84
84
  Returns the parsed output model and raw completion.
@@ -138,7 +138,7 @@ class AsyncOperator(BaseOperator):
138
138
  **extra_kwargs,
139
139
  )
140
140
 
141
- messages: list[dict[str, str]] = []
141
+ messages = []
142
142
 
143
143
  if with_analysis:
144
144
  analysis = await self._analyze(prompt_configs, temperature)
@@ -181,8 +181,7 @@ class AsyncOperator(BaseOperator):
181
181
 
182
182
  # Retry logic if validation fails
183
183
  if validator and not validator(output.result):
184
- max_retries = 3
185
- for attempt in range(max_retries):
184
+ for attempt in range(self.MAX_RETRIES):
186
185
  logger.warning(
187
186
  f"Validation failed, retrying for the {attempt + 1} time."
188
187
  )
@@ -1,4 +1,4 @@
1
- from typing import TypeVar, Type, Any
1
+ from typing import TypeVar, Type, Any, Union
2
2
  import json
3
3
  import re
4
4
  import math
@@ -11,11 +11,16 @@ from openai import OpenAI, AsyncOpenAI
11
11
  # Base Model type for output models
12
12
  T = TypeVar("T", bound=BaseModel)
13
13
 
14
+ ClientType = Union[OpenAI, AsyncOpenAI]
15
+
14
16
  logger = logging.getLogger("texttools.base_operator")
15
17
 
16
18
 
17
19
  class BaseOperator:
18
- def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
20
+ # Max retry in case of failed output validation
21
+ MAX_RETRIES = 3
22
+
23
+ def __init__(self, client: ClientType, model: str):
19
24
  self.client = client
20
25
  self.model = model
21
26
 
@@ -40,16 +45,10 @@ class BaseOperator:
40
45
  """
41
46
  Convert a JSON response string to output model.
42
47
  """
43
- # Clean the response string
44
48
  cleaned_json = self._clean_json_response(response_string)
45
-
46
- # Fix Python-style booleans
47
49
  cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
48
-
49
- # Convert string to Python dictionary
50
50
  response_dict = json.loads(cleaned_json)
51
51
 
52
- # Convert dictionary to output model
53
52
  return output_model(**response_dict)
54
53
 
55
54
  def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
@@ -63,7 +62,7 @@ class BaseOperator:
63
62
 
64
63
  for choice in completion.choices:
65
64
  if not getattr(choice, "logprobs", None):
66
- logger.error("logprobs is not avalible in the chosen model.")
65
+ logger.error("logprobs is not available for the chosen model.")
67
66
  return []
68
67
 
69
68
  for logprob_item in choice.logprobs.content:
@@ -93,8 +92,5 @@ class BaseOperator:
93
92
  """
94
93
  delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
95
94
  new_temp = base_temp + delta_temp
96
- print(f"Base Temp: {base_temp}")
97
- print(f"Delta Temp: {delta_temp}")
98
- print(f"New Temp: {new_temp}")
99
95
 
100
96
  return max(0.0, min(new_temp, 1.5))
@@ -6,7 +6,6 @@ from pydantic import BaseModel
6
6
 
7
7
  from texttools.tools.internals.output_models import ToolOutput
8
8
  from texttools.tools.internals.base_operator import BaseOperator
9
- from texttools.tools.internals.formatters import Formatter
10
9
  from texttools.tools.internals.prompt_loader import PromptLoader
11
10
 
12
11
  # Base Model type for output models
@@ -51,7 +50,7 @@ class Operator(BaseOperator):
51
50
  temperature: float,
52
51
  logprobs: bool = False,
53
52
  top_logprobs: int = 3,
54
- ) -> tuple[Type[T], Any]:
53
+ ) -> tuple[T, Any]:
55
54
  """
56
55
  Parses a chat completion using OpenAI's structured output format.
57
56
  Returns both the parsed object and the raw completion for logging.
@@ -78,7 +77,7 @@ class Operator(BaseOperator):
78
77
  temperature: float,
79
78
  logprobs: bool = False,
80
79
  top_logprobs: int = 3,
81
- ) -> tuple[Type[T], Any]:
80
+ ) -> tuple[T, Any]:
82
81
  """
83
82
  Generates a completion using vLLM with JSON schema guidance.
84
83
  Returns the parsed output model and raw completion.
@@ -126,7 +125,6 @@ class Operator(BaseOperator):
126
125
  Execute the LLM pipeline with the given input text.
127
126
  """
128
127
  prompt_loader = PromptLoader()
129
- formatter = Formatter()
130
128
  output = ToolOutput()
131
129
 
132
130
  try:
@@ -138,7 +136,7 @@ class Operator(BaseOperator):
138
136
  **extra_kwargs,
139
137
  )
140
138
 
141
- messages: list[dict[str, str]] = []
139
+ messages = []
142
140
 
143
141
  if with_analysis:
144
142
  analysis = self._analyze(prompt_configs, temperature)
@@ -159,7 +157,7 @@ class Operator(BaseOperator):
159
157
  )
160
158
 
161
159
  messages.append(self._build_user_message(prompt_configs["main_template"]))
162
- messages = formatter.user_merge_format(messages)
160
+ messages
163
161
 
164
162
  if resp_format == "vllm":
165
163
  parsed, completion = self._vllm_completion(
@@ -181,8 +179,7 @@ class Operator(BaseOperator):
181
179
 
182
180
  # Retry logic if validation fails
183
181
  if validator and not validator(output.result):
184
- max_retries = 3
185
- for attempt in range(max_retries):
182
+ for attempt in range(self.MAX_RETRIES):
186
183
  logger.warning(
187
184
  f"Validation failed, retrying for the {attempt + 1} time."
188
185
  )
@@ -11,15 +11,10 @@ class PromptLoader:
11
11
  - Load and parse YAML prompt definitions.
12
12
  - Select the right template (by mode, if applicable).
13
13
  - Inject variables (`{input}`, plus any extra kwargs) into the templates.
14
- - Return a dict with:
15
- {
16
- "main_template": "...",
17
- "analyze_template": "..." | None
18
- }
19
14
  """
20
15
 
21
- MAIN_TEMPLATE: str = "main_template"
22
- ANALYZE_TEMPLATE: str = "analyze_template"
16
+ MAIN_TEMPLATE = "main_template"
17
+ ANALYZE_TEMPLATE = "analyze_template"
23
18
 
24
19
  # Use lru_cache to load each file once
25
20
  @lru_cache(maxsize=32)