hamtaa-texttools 1.1.0__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.1.0/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.2}/PKG-INFO +2 -2
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2/hamtaa_texttools.egg-info}/PKG-INFO +2 -2
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/requires.txt +1 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/pyproject.toml +32 -32
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/batch_manager.py +0 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/batch_runner.py +0 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/async_the_tool.py +53 -22
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/async_operator.py +21 -8
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/base_operator.py +5 -8
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/operator.py +22 -9
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/prompt_loader.py +3 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/the_tool.py +53 -22
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/LICENSE +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/README.md +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/categorizer.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/translate.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/formatters.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/output_models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
|
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Requires-Dist: openai==1.97.1
|
|
32
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
33
|
Dynamic: license-file
|
|
34
34
|
|
|
35
35
|
# TextTools
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
|
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Requires-Dist: openai==1.97.1
|
|
32
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
33
|
Dynamic: license-file
|
|
34
34
|
|
|
35
35
|
# TextTools
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
openai==1.97.1
|
|
2
|
-
|
|
2
|
+
pyyaml>=6.0
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "hamtaa-texttools"
|
|
7
|
-
version = "1.1.
|
|
8
|
-
authors = [
|
|
9
|
-
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
|
-
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
11
|
-
{ name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
|
|
12
|
-
{ name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
|
|
13
|
-
]
|
|
14
|
-
description = "A high-level NLP toolkit built on top of modern LLMs."
|
|
15
|
-
readme = "README.md"
|
|
16
|
-
license = {file = "LICENSE"}
|
|
17
|
-
requires-python = ">=3.8"
|
|
18
|
-
dependencies = [
|
|
19
|
-
"openai==1.97.1",
|
|
20
|
-
"
|
|
21
|
-
]
|
|
22
|
-
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
23
|
-
|
|
24
|
-
[tool.setuptools.packages.find]
|
|
25
|
-
where = ["."]
|
|
26
|
-
include = ["texttools*"]
|
|
27
|
-
|
|
28
|
-
[tool.setuptools]
|
|
29
|
-
include-package-data = true
|
|
30
|
-
|
|
31
|
-
[tool.setuptools.package-data]
|
|
32
|
-
"texttools" = ["prompts/*.yaml", "prompts/*.yml"]
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hamtaa-texttools"
|
|
7
|
+
version = "1.1.2"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
|
+
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
11
|
+
{ name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
|
|
12
|
+
{ name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
|
|
13
|
+
]
|
|
14
|
+
description = "A high-level NLP toolkit built on top of modern LLMs."
|
|
15
|
+
readme = "README.md"
|
|
16
|
+
license = {file = "LICENSE"}
|
|
17
|
+
requires-python = ">=3.8"
|
|
18
|
+
dependencies = [
|
|
19
|
+
"openai==1.97.1",
|
|
20
|
+
"pyyaml>=6.0",
|
|
21
|
+
]
|
|
22
|
+
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.packages.find]
|
|
25
|
+
where = ["."]
|
|
26
|
+
include = ["texttools*"]
|
|
27
|
+
|
|
28
|
+
[tool.setuptools]
|
|
29
|
+
include-package-data = true
|
|
30
|
+
|
|
31
|
+
[tool.setuptools.package-data]
|
|
32
|
+
"texttools" = ["prompts/*.yaml", "prompts/*.yml"]
|
|
@@ -34,12 +34,15 @@ class AsyncTheTool:
|
|
|
34
34
|
temperature: float | None = 0.0,
|
|
35
35
|
logprobs: bool = False,
|
|
36
36
|
top_logprobs: int | None = None,
|
|
37
|
-
) ->
|
|
37
|
+
) -> OutputModels.ToolOutput:
|
|
38
38
|
"""
|
|
39
39
|
Categorize a text into a single Islamic studies domain category.
|
|
40
40
|
|
|
41
41
|
Returns:
|
|
42
|
-
|
|
42
|
+
ToolOutput: Object containing:
|
|
43
|
+
- result (str): The assigned Islamic studies category
|
|
44
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
45
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
43
46
|
"""
|
|
44
47
|
return await self.operator.run(
|
|
45
48
|
# User parameters
|
|
@@ -66,12 +69,15 @@ class AsyncTheTool:
|
|
|
66
69
|
temperature: float | None = 0.0,
|
|
67
70
|
logprobs: bool = False,
|
|
68
71
|
top_logprobs: int | None = None,
|
|
69
|
-
) ->
|
|
72
|
+
) -> OutputModels.ToolOutput:
|
|
70
73
|
"""
|
|
71
74
|
Extract salient keywords from text.
|
|
72
75
|
|
|
73
76
|
Returns:
|
|
74
|
-
|
|
77
|
+
ToolOutput: Object containing:
|
|
78
|
+
- result (list[str]): List of extracted keywords
|
|
79
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
80
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
75
81
|
"""
|
|
76
82
|
return await self.operator.run(
|
|
77
83
|
# User parameters
|
|
@@ -98,12 +104,15 @@ class AsyncTheTool:
|
|
|
98
104
|
temperature: float | None = 0.0,
|
|
99
105
|
logprobs: bool = False,
|
|
100
106
|
top_logprobs: int | None = None,
|
|
101
|
-
) ->
|
|
107
|
+
) -> OutputModels.ToolOutput:
|
|
102
108
|
"""
|
|
103
109
|
Perform Named Entity Recognition (NER) over the input text.
|
|
104
110
|
|
|
105
111
|
Returns:
|
|
106
|
-
|
|
112
|
+
ToolOutput: Object containing:
|
|
113
|
+
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
114
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
115
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
107
116
|
"""
|
|
108
117
|
return await self.operator.run(
|
|
109
118
|
# User parameters
|
|
@@ -129,12 +138,15 @@ class AsyncTheTool:
|
|
|
129
138
|
temperature: float | None = 0.0,
|
|
130
139
|
logprobs: bool = False,
|
|
131
140
|
top_logprobs: int | None = None,
|
|
132
|
-
) ->
|
|
141
|
+
) -> OutputModels.ToolOutput:
|
|
133
142
|
"""
|
|
134
143
|
Detect if the input is phrased as a question.
|
|
135
144
|
|
|
136
145
|
Returns:
|
|
137
|
-
|
|
146
|
+
ToolOutput: Object containing:
|
|
147
|
+
- result (bool): True if text is a question, False otherwise
|
|
148
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
149
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
138
150
|
"""
|
|
139
151
|
return await self.operator.run(
|
|
140
152
|
# User parameters
|
|
@@ -161,12 +173,15 @@ class AsyncTheTool:
|
|
|
161
173
|
temperature: float | None = 0.0,
|
|
162
174
|
logprobs: bool = False,
|
|
163
175
|
top_logprobs: int | None = None,
|
|
164
|
-
) ->
|
|
176
|
+
) -> OutputModels.ToolOutput:
|
|
165
177
|
"""
|
|
166
178
|
Generate a single question from the given text.
|
|
167
179
|
|
|
168
180
|
Returns:
|
|
169
|
-
|
|
181
|
+
ToolOutput: Object containing:
|
|
182
|
+
- result (str): The generated question
|
|
183
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
184
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
170
185
|
"""
|
|
171
186
|
return await self.operator.run(
|
|
172
187
|
# User parameters
|
|
@@ -194,12 +209,15 @@ class AsyncTheTool:
|
|
|
194
209
|
logprobs: bool = False,
|
|
195
210
|
top_logprobs: int | None = None,
|
|
196
211
|
mode: Literal["default", "reason"] = "default",
|
|
197
|
-
) ->
|
|
212
|
+
) -> OutputModels.ToolOutput:
|
|
198
213
|
"""
|
|
199
214
|
Merge multiple questions into a single unified question.
|
|
200
215
|
|
|
201
216
|
Returns:
|
|
202
|
-
|
|
217
|
+
ToolOutput: Object containing:
|
|
218
|
+
- result (str): The merged question
|
|
219
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
220
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
203
221
|
"""
|
|
204
222
|
text = ", ".join(text)
|
|
205
223
|
return await self.operator.run(
|
|
@@ -228,12 +246,15 @@ class AsyncTheTool:
|
|
|
228
246
|
logprobs: bool = False,
|
|
229
247
|
top_logprobs: int | None = None,
|
|
230
248
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
231
|
-
) ->
|
|
249
|
+
) -> OutputModels.ToolOutput:
|
|
232
250
|
"""
|
|
233
251
|
Rewrite a text with different modes.
|
|
234
252
|
|
|
235
253
|
Returns:
|
|
236
|
-
|
|
254
|
+
ToolOutput: Object containing:
|
|
255
|
+
- result (str): The rewritten text
|
|
256
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
257
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
237
258
|
"""
|
|
238
259
|
return await self.operator.run(
|
|
239
260
|
# User parameters
|
|
@@ -261,12 +282,15 @@ class AsyncTheTool:
|
|
|
261
282
|
temperature: float | None = 0.0,
|
|
262
283
|
logprobs: bool = False,
|
|
263
284
|
top_logprobs: int | None = None,
|
|
264
|
-
) ->
|
|
285
|
+
) -> OutputModels.ToolOutput:
|
|
265
286
|
"""
|
|
266
287
|
Generate a list of questions about a subject.
|
|
267
288
|
|
|
268
289
|
Returns:
|
|
269
|
-
|
|
290
|
+
ToolOutput: Object containing:
|
|
291
|
+
- result (list[str]): List of generated questions
|
|
292
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
293
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
270
294
|
"""
|
|
271
295
|
return await self.operator.run(
|
|
272
296
|
# User parameters
|
|
@@ -294,12 +318,15 @@ class AsyncTheTool:
|
|
|
294
318
|
temperature: float | None = 0.0,
|
|
295
319
|
logprobs: bool = False,
|
|
296
320
|
top_logprobs: int | None = None,
|
|
297
|
-
) ->
|
|
321
|
+
) -> OutputModels.ToolOutput:
|
|
298
322
|
"""
|
|
299
323
|
Summarize the given subject text.
|
|
300
324
|
|
|
301
325
|
Returns:
|
|
302
|
-
|
|
326
|
+
ToolOutput: Object containing:
|
|
327
|
+
- result (str): The summary text
|
|
328
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
329
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
303
330
|
"""
|
|
304
331
|
return await self.operator.run(
|
|
305
332
|
# User parameters
|
|
@@ -326,12 +353,15 @@ class AsyncTheTool:
|
|
|
326
353
|
temperature: float | None = 0.0,
|
|
327
354
|
logprobs: bool = False,
|
|
328
355
|
top_logprobs: int | None = None,
|
|
329
|
-
) ->
|
|
356
|
+
) -> OutputModels.ToolOutput:
|
|
330
357
|
"""
|
|
331
358
|
Translate text between languages.
|
|
332
359
|
|
|
333
360
|
Returns:
|
|
334
|
-
|
|
361
|
+
ToolOutput: Object containing:
|
|
362
|
+
- result (str): The translated text
|
|
363
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
364
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
335
365
|
"""
|
|
336
366
|
return await self.operator.run(
|
|
337
367
|
# User parameters
|
|
@@ -358,12 +388,13 @@ class AsyncTheTool:
|
|
|
358
388
|
temperature: float | None = None,
|
|
359
389
|
logprobs: bool | None = None,
|
|
360
390
|
top_logprobs: int | None = None,
|
|
361
|
-
) ->
|
|
391
|
+
) -> OutputModels.ToolOutput:
|
|
362
392
|
"""
|
|
363
393
|
Custom tool that can do almost anything!
|
|
364
394
|
|
|
365
395
|
Returns:
|
|
366
|
-
|
|
396
|
+
ToolOutput: Object with fields:
|
|
397
|
+
- result (str): The output result
|
|
367
398
|
"""
|
|
368
399
|
return await self.operator.run(
|
|
369
400
|
# User paramaeters
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/async_operator.py
RENAMED
|
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
# Configure logger
|
|
16
15
|
logger = logging.getLogger("async_operator")
|
|
17
16
|
logger.setLevel(logging.INFO)
|
|
18
17
|
|
|
@@ -32,6 +31,10 @@ class AsyncOperator(BaseOperator):
|
|
|
32
31
|
self.model = model
|
|
33
32
|
|
|
34
33
|
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
36
|
+
Returns the analyzed content as a string.
|
|
37
|
+
"""
|
|
35
38
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
39
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
40
|
completion = await self.client.chat.completions.create(
|
|
@@ -50,6 +53,10 @@ class AsyncOperator(BaseOperator):
|
|
|
50
53
|
logprobs: bool = False,
|
|
51
54
|
top_logprobs: int = 3,
|
|
52
55
|
) -> tuple[Type[T], Any]:
|
|
56
|
+
"""
|
|
57
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
58
|
+
Returns both the parsed object and the raw completion for logging.
|
|
59
|
+
"""
|
|
53
60
|
request_kwargs = {
|
|
54
61
|
"model": self.model,
|
|
55
62
|
"messages": message,
|
|
@@ -73,6 +80,10 @@ class AsyncOperator(BaseOperator):
|
|
|
73
80
|
logprobs: bool = False,
|
|
74
81
|
top_logprobs: int = 3,
|
|
75
82
|
) -> tuple[Type[T], Any]:
|
|
83
|
+
"""
|
|
84
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
85
|
+
Returns the parsed output model and raw completion.
|
|
86
|
+
"""
|
|
76
87
|
json_schema = output_model.model_json_schema()
|
|
77
88
|
|
|
78
89
|
# Build kwargs dynamically
|
|
@@ -110,14 +121,16 @@ class AsyncOperator(BaseOperator):
|
|
|
110
121
|
resp_format: Literal["vllm", "parse"],
|
|
111
122
|
mode: str | None,
|
|
112
123
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
124
|
+
) -> ToolOutput:
|
|
114
125
|
"""
|
|
115
126
|
Execute the async LLM pipeline with the given input text. (Async)
|
|
116
127
|
"""
|
|
117
128
|
prompt_loader = PromptLoader()
|
|
118
129
|
formatter = Formatter()
|
|
130
|
+
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
119
131
|
|
|
120
132
|
try:
|
|
133
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
134
|
prompt_configs = prompt_loader.load(
|
|
122
135
|
prompt_file=prompt_file,
|
|
123
136
|
text=text.strip(),
|
|
@@ -159,11 +172,10 @@ class AsyncOperator(BaseOperator):
|
|
|
159
172
|
|
|
160
173
|
# Ensure output_model has a `result` field
|
|
161
174
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
175
|
+
error = "The provided output_model must define a field named 'result'"
|
|
176
|
+
logger.error(error)
|
|
177
|
+
output.errors.append(error)
|
|
178
|
+
return output
|
|
167
179
|
|
|
168
180
|
output.result = parsed.result
|
|
169
181
|
|
|
@@ -174,6 +186,7 @@ class AsyncOperator(BaseOperator):
|
|
|
174
186
|
output.analysis = analysis
|
|
175
187
|
|
|
176
188
|
return output
|
|
189
|
+
|
|
177
190
|
except Exception as e:
|
|
178
191
|
logger.error(f"AsyncTheTool failed: {e}")
|
|
179
|
-
return
|
|
192
|
+
return output.errors.append(str(e))
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/base_operator.py
RENAMED
|
@@ -10,7 +10,6 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
10
10
|
# Base Model type for output models
|
|
11
11
|
T = TypeVar("T", bound=BaseModel)
|
|
12
12
|
|
|
13
|
-
# Configure logger
|
|
14
13
|
logger = logging.getLogger("base_operator")
|
|
15
14
|
logger.setLevel(logging.INFO)
|
|
16
15
|
|
|
@@ -40,13 +39,6 @@ class BaseOperator:
|
|
|
40
39
|
) -> Type[T]:
|
|
41
40
|
"""
|
|
42
41
|
Convert a JSON response string to output model.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
response_string: The JSON string (may contain code block markers)
|
|
46
|
-
output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
Instance of your output model
|
|
50
42
|
"""
|
|
51
43
|
# Clean the response string
|
|
52
44
|
cleaned_json = self._clean_json_response(response_string)
|
|
@@ -61,7 +53,12 @@ class BaseOperator:
|
|
|
61
53
|
return output_model(**response_dict)
|
|
62
54
|
|
|
63
55
|
def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
|
|
56
|
+
"""
|
|
57
|
+
Extracts and filters token probabilities from completion logprobs.
|
|
58
|
+
Skips punctuation and structural tokens, returns cleaned probability data.
|
|
59
|
+
"""
|
|
64
60
|
logprobs_data = []
|
|
61
|
+
|
|
65
62
|
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
66
63
|
|
|
67
64
|
for choice in completion.choices:
|
|
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
# Configure logger
|
|
16
15
|
logger = logging.getLogger("operator")
|
|
17
16
|
logger.setLevel(logging.INFO)
|
|
18
17
|
|
|
@@ -32,6 +31,10 @@ class Operator(BaseOperator):
|
|
|
32
31
|
self.model = model
|
|
33
32
|
|
|
34
33
|
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
36
|
+
Returns the analyzed content as a string.
|
|
37
|
+
"""
|
|
35
38
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
39
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
40
|
completion = self.client.chat.completions.create(
|
|
@@ -50,6 +53,10 @@ class Operator(BaseOperator):
|
|
|
50
53
|
logprobs: bool = False,
|
|
51
54
|
top_logprobs: int = 3,
|
|
52
55
|
) -> tuple[Type[T], Any]:
|
|
56
|
+
"""
|
|
57
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
58
|
+
Returns both the parsed object and the raw completion for logging.
|
|
59
|
+
"""
|
|
53
60
|
request_kwargs = {
|
|
54
61
|
"model": self.model,
|
|
55
62
|
"messages": message,
|
|
@@ -73,6 +80,10 @@ class Operator(BaseOperator):
|
|
|
73
80
|
logprobs: bool = False,
|
|
74
81
|
top_logprobs: int = 3,
|
|
75
82
|
) -> tuple[Type[T], Any]:
|
|
83
|
+
"""
|
|
84
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
85
|
+
Returns the parsed output model and raw completion.
|
|
86
|
+
"""
|
|
76
87
|
json_schema = output_model.model_json_schema()
|
|
77
88
|
|
|
78
89
|
# Build kwargs dynamically
|
|
@@ -110,14 +121,16 @@ class Operator(BaseOperator):
|
|
|
110
121
|
resp_format: Literal["vllm", "parse"],
|
|
111
122
|
mode: str | None,
|
|
112
123
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
124
|
+
) -> ToolOutput:
|
|
114
125
|
"""
|
|
115
126
|
Execute the LLM pipeline with the given input text.
|
|
116
127
|
"""
|
|
117
128
|
prompt_loader = PromptLoader()
|
|
118
129
|
formatter = Formatter()
|
|
130
|
+
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
119
131
|
|
|
120
132
|
try:
|
|
133
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
134
|
prompt_configs = prompt_loader.load(
|
|
122
135
|
prompt_file=prompt_file,
|
|
123
136
|
text=text.strip(),
|
|
@@ -159,11 +172,10 @@ class Operator(BaseOperator):
|
|
|
159
172
|
|
|
160
173
|
# Ensure output_model has a `result` field
|
|
161
174
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
175
|
+
error = "The provided output_model must define a field named 'result'"
|
|
176
|
+
logger.error(error)
|
|
177
|
+
output.errors.append(error)
|
|
178
|
+
return output
|
|
167
179
|
|
|
168
180
|
output.result = parsed.result
|
|
169
181
|
|
|
@@ -174,6 +186,7 @@ class Operator(BaseOperator):
|
|
|
174
186
|
output.analysis = analysis
|
|
175
187
|
|
|
176
188
|
return output
|
|
189
|
+
|
|
177
190
|
except Exception as e:
|
|
178
|
-
logger.error(f"
|
|
179
|
-
return
|
|
191
|
+
logger.error(f"AsyncTheTool failed: {e}")
|
|
192
|
+
return output.errors.append(str(e))
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/prompt_loader.py
RENAMED
|
@@ -24,6 +24,9 @@ class PromptLoader:
|
|
|
24
24
|
# Use lru_cache to load each file once
|
|
25
25
|
@lru_cache(maxsize=32)
|
|
26
26
|
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
27
|
+
"""
|
|
28
|
+
Loads prompt templates from YAML file with optional mode selection.
|
|
29
|
+
"""
|
|
27
30
|
base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
28
31
|
prompt_path = base_dir / prompt_file
|
|
29
32
|
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
@@ -32,12 +32,15 @@ class TheTool:
|
|
|
32
32
|
temperature: float | None = 0.0,
|
|
33
33
|
logprobs: bool = False,
|
|
34
34
|
top_logprobs: int | None = None,
|
|
35
|
-
) ->
|
|
35
|
+
) -> OutputModels.ToolOutput:
|
|
36
36
|
"""
|
|
37
37
|
Categorize a text into a single Islamic studies domain category.
|
|
38
38
|
|
|
39
39
|
Returns:
|
|
40
|
-
|
|
40
|
+
ToolOutput: Object containing:
|
|
41
|
+
- result (str): The assigned Islamic studies category
|
|
42
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
43
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
41
44
|
"""
|
|
42
45
|
return self.operator.run(
|
|
43
46
|
# User parameters
|
|
@@ -64,12 +67,15 @@ class TheTool:
|
|
|
64
67
|
temperature: float | None = 0.0,
|
|
65
68
|
logprobs: bool = False,
|
|
66
69
|
top_logprobs: int | None = None,
|
|
67
|
-
) ->
|
|
70
|
+
) -> OutputModels.ToolOutput:
|
|
68
71
|
"""
|
|
69
72
|
Extract salient keywords from text.
|
|
70
73
|
|
|
71
74
|
Returns:
|
|
72
|
-
|
|
75
|
+
ToolOutput: Object containing:
|
|
76
|
+
- result (list[str]): List of extracted keywords
|
|
77
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
78
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
73
79
|
"""
|
|
74
80
|
return self.operator.run(
|
|
75
81
|
# User parameters
|
|
@@ -96,12 +102,15 @@ class TheTool:
|
|
|
96
102
|
temperature: float | None = 0.0,
|
|
97
103
|
logprobs: bool = False,
|
|
98
104
|
top_logprobs: int | None = None,
|
|
99
|
-
) ->
|
|
105
|
+
) -> OutputModels.ToolOutput:
|
|
100
106
|
"""
|
|
101
107
|
Perform Named Entity Recognition (NER) over the input text.
|
|
102
108
|
|
|
103
109
|
Returns:
|
|
104
|
-
|
|
110
|
+
ToolOutput: Object containing:
|
|
111
|
+
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
112
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
113
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
105
114
|
"""
|
|
106
115
|
return self.operator.run(
|
|
107
116
|
# User parameters
|
|
@@ -127,12 +136,15 @@ class TheTool:
|
|
|
127
136
|
temperature: float | None = 0.0,
|
|
128
137
|
logprobs: bool = False,
|
|
129
138
|
top_logprobs: int | None = None,
|
|
130
|
-
) ->
|
|
139
|
+
) -> OutputModels.ToolOutput:
|
|
131
140
|
"""
|
|
132
141
|
Detect if the input is phrased as a question.
|
|
133
142
|
|
|
134
143
|
Returns:
|
|
135
|
-
|
|
144
|
+
ToolOutput: Object containing:
|
|
145
|
+
- result (bool): True if text is a question, False otherwise
|
|
146
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
147
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
136
148
|
"""
|
|
137
149
|
return self.operator.run(
|
|
138
150
|
# User parameters
|
|
@@ -159,12 +171,15 @@ class TheTool:
|
|
|
159
171
|
temperature: float | None = 0.0,
|
|
160
172
|
logprobs: bool = False,
|
|
161
173
|
top_logprobs: int | None = None,
|
|
162
|
-
) ->
|
|
174
|
+
) -> OutputModels.ToolOutput:
|
|
163
175
|
"""
|
|
164
176
|
Generate a single question from the given text.
|
|
165
177
|
|
|
166
178
|
Returns:
|
|
167
|
-
|
|
179
|
+
ToolOutput: Object containing:
|
|
180
|
+
- result (str): The generated question
|
|
181
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
182
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
168
183
|
"""
|
|
169
184
|
return self.operator.run(
|
|
170
185
|
# User parameters
|
|
@@ -192,12 +207,15 @@ class TheTool:
|
|
|
192
207
|
logprobs: bool = False,
|
|
193
208
|
top_logprobs: int | None = None,
|
|
194
209
|
mode: Literal["default", "reason"] = "default",
|
|
195
|
-
) ->
|
|
210
|
+
) -> OutputModels.ToolOutput:
|
|
196
211
|
"""
|
|
197
212
|
Merge multiple questions into a single unified question.
|
|
198
213
|
|
|
199
214
|
Returns:
|
|
200
|
-
|
|
215
|
+
ToolOutput: Object containing:
|
|
216
|
+
- result (str): The merged question
|
|
217
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
218
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
201
219
|
"""
|
|
202
220
|
text = ", ".join(text)
|
|
203
221
|
return self.operator.run(
|
|
@@ -226,12 +244,15 @@ class TheTool:
|
|
|
226
244
|
logprobs: bool = False,
|
|
227
245
|
top_logprobs: int | None = None,
|
|
228
246
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
229
|
-
) ->
|
|
247
|
+
) -> OutputModels.ToolOutput:
|
|
230
248
|
"""
|
|
231
249
|
Rewrite a text with different modes.
|
|
232
250
|
|
|
233
251
|
Returns:
|
|
234
|
-
|
|
252
|
+
ToolOutput: Object containing:
|
|
253
|
+
- result (str): The rewritten text
|
|
254
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
255
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
235
256
|
"""
|
|
236
257
|
return self.operator.run(
|
|
237
258
|
# User parameters
|
|
@@ -259,12 +280,15 @@ class TheTool:
|
|
|
259
280
|
temperature: float | None = 0.0,
|
|
260
281
|
logprobs: bool = False,
|
|
261
282
|
top_logprobs: int | None = None,
|
|
262
|
-
) ->
|
|
283
|
+
) -> OutputModels.ToolOutput:
|
|
263
284
|
"""
|
|
264
285
|
Generate a list of questions about a subject.
|
|
265
286
|
|
|
266
287
|
Returns:
|
|
267
|
-
|
|
288
|
+
ToolOutput: Object containing:
|
|
289
|
+
- result (list[str]): List of generated questions
|
|
290
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
291
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
268
292
|
"""
|
|
269
293
|
return self.operator.run(
|
|
270
294
|
# User parameters
|
|
@@ -292,12 +316,15 @@ class TheTool:
|
|
|
292
316
|
temperature: float | None = 0.0,
|
|
293
317
|
logprobs: bool = False,
|
|
294
318
|
top_logprobs: int | None = None,
|
|
295
|
-
) ->
|
|
319
|
+
) -> OutputModels.ToolOutput:
|
|
296
320
|
"""
|
|
297
321
|
Summarize the given subject text.
|
|
298
322
|
|
|
299
323
|
Returns:
|
|
300
|
-
|
|
324
|
+
ToolOutput: Object containing:
|
|
325
|
+
- result (str): The summary text
|
|
326
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
327
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
301
328
|
"""
|
|
302
329
|
return self.operator.run(
|
|
303
330
|
# User parameters
|
|
@@ -324,12 +351,15 @@ class TheTool:
|
|
|
324
351
|
temperature: float | None = 0.0,
|
|
325
352
|
logprobs: bool = False,
|
|
326
353
|
top_logprobs: int | None = None,
|
|
327
|
-
) ->
|
|
354
|
+
) -> OutputModels.ToolOutput:
|
|
328
355
|
"""
|
|
329
356
|
Translate text between languages.
|
|
330
357
|
|
|
331
358
|
Returns:
|
|
332
|
-
|
|
359
|
+
ToolOutput: Object containing:
|
|
360
|
+
- result (str): The translated text
|
|
361
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
362
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
333
363
|
"""
|
|
334
364
|
return self.operator.run(
|
|
335
365
|
# User parameters
|
|
@@ -356,12 +386,13 @@ class TheTool:
|
|
|
356
386
|
temperature: float | None = None,
|
|
357
387
|
logprobs: bool | None = None,
|
|
358
388
|
top_logprobs: int | None = None,
|
|
359
|
-
) ->
|
|
389
|
+
) -> OutputModels.ToolOutput:
|
|
360
390
|
"""
|
|
361
391
|
Custom tool that can do almost anything!
|
|
362
392
|
|
|
363
393
|
Returns:
|
|
364
|
-
|
|
394
|
+
ToolOutput: Object with fields:
|
|
395
|
+
- result (str): The output result
|
|
365
396
|
"""
|
|
366
397
|
return self.operator.run(
|
|
367
398
|
# User paramaeters
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/subject_to_question.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/output_models.py
RENAMED
|
File without changes
|