hamtaa-texttools 1.0.6__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.6/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.7}/PKG-INFO +1 -1
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/SOURCES.txt +7 -6
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/pyproject.toml +1 -1
- hamtaa_texttools-1.0.7/texttools/tools/async_the_tool.py +338 -0
- hamtaa_texttools-1.0.7/texttools/tools/internals/async_operator.py +191 -0
- hamtaa_texttools-1.0.7/texttools/tools/internals/base_operator.py +85 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/operator.py +25 -128
- hamtaa_texttools-1.0.7/texttools/tools/the_tool.py +438 -0
- hamtaa_texttools-1.0.6/texttools/tools/async_the_tool.py +0 -277
- hamtaa_texttools-1.0.6/texttools/tools/internals/async_operator.py +0 -308
- hamtaa_texttools-1.0.6/texttools/tools/the_tool.py +0 -501
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/LICENSE +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/README.md +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/setup.cfg +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/__init__.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/batch_manager.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/batch_runner.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/formatters/base_formatter.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/formatters/user_merge_formatter.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/categorizer.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/ner_extractor.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/extract_entities.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/keyword_extractor.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/is_question.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/question_merger.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/merge_questions.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/rewriter.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/subject_to_question.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/summarizer.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/text_to_question.yaml +0 -0
- /hamtaa_texttools-1.0.6/texttools/prompts/translator.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/translate.yaml +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/output_models.py +0 -0
- {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/prompt_loader.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -15,20 +15,21 @@ texttools/formatters/base_formatter.py
|
|
|
15
15
|
texttools/formatters/user_merge_formatter.py
|
|
16
16
|
texttools/prompts/README.md
|
|
17
17
|
texttools/prompts/categorizer.yaml
|
|
18
|
+
texttools/prompts/extract_entities.yaml
|
|
19
|
+
texttools/prompts/extract_keywords.yaml
|
|
18
20
|
texttools/prompts/is_question.yaml
|
|
19
|
-
texttools/prompts/
|
|
20
|
-
texttools/prompts/
|
|
21
|
-
texttools/prompts/question_merger.yaml
|
|
22
|
-
texttools/prompts/rewriter.yaml
|
|
21
|
+
texttools/prompts/merge_questions.yaml
|
|
22
|
+
texttools/prompts/rewrite.yaml
|
|
23
23
|
texttools/prompts/run_custom.yaml
|
|
24
24
|
texttools/prompts/subject_to_question.yaml
|
|
25
|
-
texttools/prompts/
|
|
25
|
+
texttools/prompts/summarize.yaml
|
|
26
26
|
texttools/prompts/text_to_question.yaml
|
|
27
|
-
texttools/prompts/
|
|
27
|
+
texttools/prompts/translate.yaml
|
|
28
28
|
texttools/tools/__init__.py
|
|
29
29
|
texttools/tools/async_the_tool.py
|
|
30
30
|
texttools/tools/the_tool.py
|
|
31
31
|
texttools/tools/internals/async_operator.py
|
|
32
|
+
texttools/tools/internals/base_operator.py
|
|
32
33
|
texttools/tools/internals/operator.py
|
|
33
34
|
texttools/tools/internals/output_models.py
|
|
34
35
|
texttools/tools/internals/prompt_loader.py
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
from typing import Literal, Any
|
|
2
|
+
|
|
3
|
+
from openai import AsyncOpenAI
|
|
4
|
+
|
|
5
|
+
from texttools.tools.internals.async_operator import AsyncOperator
|
|
6
|
+
import texttools.tools.internals.output_models as OutputModels
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AsyncTheTool:
|
|
10
|
+
"""
|
|
11
|
+
Async counterpart to TheTool.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
async_client = AsyncOpenAI(...)
|
|
15
|
+
tool = TheToolAsync(async_client, model="model-name")
|
|
16
|
+
result = await tool.categorize("text ...", with_analysis=True)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
client: AsyncOpenAI,
|
|
22
|
+
model: str,
|
|
23
|
+
):
|
|
24
|
+
self.operator = AsyncOperator(client=client, model=model)
|
|
25
|
+
|
|
26
|
+
async def categorize(
|
|
27
|
+
self,
|
|
28
|
+
text: str,
|
|
29
|
+
with_analysis: bool = False,
|
|
30
|
+
output_lang: str | None = None,
|
|
31
|
+
user_prompt: str | None = None,
|
|
32
|
+
temperature: float | None = 0.0,
|
|
33
|
+
logprobs: bool = False,
|
|
34
|
+
top_logprobs: int | None = None,
|
|
35
|
+
) -> dict[str, str]:
|
|
36
|
+
"""
|
|
37
|
+
Categorize a text into a single Islamic studies domain category.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
text: Input string to categorize.
|
|
41
|
+
with_analysis: If True, first runs an LLM "analysis" step and
|
|
42
|
+
conditions the main prompt on that analysis.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
{"result": <category string>}
|
|
46
|
+
Example: {"result": "باورهای دینی"}
|
|
47
|
+
"""
|
|
48
|
+
return await self.operator.run(
|
|
49
|
+
# User parameters
|
|
50
|
+
text=text,
|
|
51
|
+
with_analysis=with_analysis,
|
|
52
|
+
output_lang=output_lang,
|
|
53
|
+
user_prompt=user_prompt,
|
|
54
|
+
temperature=temperature,
|
|
55
|
+
logprobs=logprobs,
|
|
56
|
+
top_logprobs=top_logprobs,
|
|
57
|
+
# Internal parameters
|
|
58
|
+
prompt_file="categorizer.yaml",
|
|
59
|
+
output_model=OutputModels.CategorizerOutput,
|
|
60
|
+
resp_format="parse",
|
|
61
|
+
mode=None,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
async def extract_keywords(
|
|
65
|
+
self,
|
|
66
|
+
text: str,
|
|
67
|
+
with_analysis: bool = False,
|
|
68
|
+
output_lang: str | None = None,
|
|
69
|
+
user_prompt: str | None = None,
|
|
70
|
+
temperature: float | None = 0.0,
|
|
71
|
+
logprobs: bool = False,
|
|
72
|
+
top_logprobs: int | None = None,
|
|
73
|
+
) -> dict[str, list[str]]:
|
|
74
|
+
return await self.operator.run(
|
|
75
|
+
# User parameters
|
|
76
|
+
text=text,
|
|
77
|
+
with_analysis=with_analysis,
|
|
78
|
+
output_lang=output_lang,
|
|
79
|
+
user_prompt=user_prompt,
|
|
80
|
+
temperature=temperature,
|
|
81
|
+
logprobs=logprobs,
|
|
82
|
+
top_logprobs=top_logprobs,
|
|
83
|
+
# Internal parameters
|
|
84
|
+
prompt_file="extract_keywords.yaml",
|
|
85
|
+
output_model=OutputModels.ListStrOutput,
|
|
86
|
+
resp_format="parse",
|
|
87
|
+
mode=None,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
async def extract_entities(
|
|
91
|
+
self,
|
|
92
|
+
text: str,
|
|
93
|
+
with_analysis: bool = False,
|
|
94
|
+
output_lang: str | None = None,
|
|
95
|
+
user_prompt: str | None = None,
|
|
96
|
+
temperature: float | None = 0.0,
|
|
97
|
+
logprobs: bool = False,
|
|
98
|
+
top_logprobs: int | None = None,
|
|
99
|
+
) -> dict[str, list[dict[str, str]]]:
|
|
100
|
+
return await self.operator.run(
|
|
101
|
+
# User parameters
|
|
102
|
+
text=text,
|
|
103
|
+
with_analysis=with_analysis,
|
|
104
|
+
output_lang=output_lang,
|
|
105
|
+
user_prompt=user_prompt,
|
|
106
|
+
temperature=temperature,
|
|
107
|
+
logprobs=logprobs,
|
|
108
|
+
top_logprobs=top_logprobs,
|
|
109
|
+
# Internal parameters
|
|
110
|
+
prompt_file="extract_entities.yaml",
|
|
111
|
+
output_model=OutputModels.ListDictStrStrOutput,
|
|
112
|
+
resp_format="parse",
|
|
113
|
+
mode=None,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
async def is_question(
|
|
117
|
+
self,
|
|
118
|
+
text: str,
|
|
119
|
+
with_analysis: bool = False,
|
|
120
|
+
user_prompt: str | None = None,
|
|
121
|
+
temperature: float | None = 0.0,
|
|
122
|
+
logprobs: bool = False,
|
|
123
|
+
top_logprobs: int | None = None,
|
|
124
|
+
) -> dict[str, bool]:
|
|
125
|
+
return await self.operator.run(
|
|
126
|
+
# User parameters
|
|
127
|
+
text=text,
|
|
128
|
+
with_analysis=with_analysis,
|
|
129
|
+
user_prompt=user_prompt,
|
|
130
|
+
temperature=temperature,
|
|
131
|
+
logprobs=logprobs,
|
|
132
|
+
top_logprobs=top_logprobs,
|
|
133
|
+
# Internal parameters
|
|
134
|
+
prompt_file="is_question.yaml",
|
|
135
|
+
output_model=OutputModels.BoolOutput,
|
|
136
|
+
resp_format="parse",
|
|
137
|
+
mode=None,
|
|
138
|
+
output_lang=None,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
async def text_to_question(
|
|
142
|
+
self,
|
|
143
|
+
text: str,
|
|
144
|
+
with_analysis: bool = False,
|
|
145
|
+
output_lang: str | None = None,
|
|
146
|
+
user_prompt: str | None = None,
|
|
147
|
+
temperature: float | None = 0.0,
|
|
148
|
+
logprobs: bool = False,
|
|
149
|
+
top_logprobs: int | None = None,
|
|
150
|
+
) -> dict[str, str]:
|
|
151
|
+
return await self.operator.run(
|
|
152
|
+
# User parameters
|
|
153
|
+
text=text,
|
|
154
|
+
with_analysis=with_analysis,
|
|
155
|
+
output_lang=output_lang,
|
|
156
|
+
user_prompt=user_prompt,
|
|
157
|
+
temperature=temperature,
|
|
158
|
+
logprobs=logprobs,
|
|
159
|
+
top_logprobs=top_logprobs,
|
|
160
|
+
# Internal parameters
|
|
161
|
+
prompt_file="text_to_question.yaml",
|
|
162
|
+
output_model=OutputModels.StrOutput,
|
|
163
|
+
resp_format="parse",
|
|
164
|
+
mode=None,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
async def merge_questions(
|
|
168
|
+
self,
|
|
169
|
+
text: list[str],
|
|
170
|
+
with_analysis: bool = False,
|
|
171
|
+
output_lang: str | None = None,
|
|
172
|
+
user_prompt: str | None = None,
|
|
173
|
+
temperature: float | None = 0.0,
|
|
174
|
+
logprobs: bool = False,
|
|
175
|
+
top_logprobs: int | None = None,
|
|
176
|
+
mode: Literal["default", "reason"] = "default",
|
|
177
|
+
) -> dict[str, str]:
|
|
178
|
+
text = ", ".join(text)
|
|
179
|
+
return await self.operator.run(
|
|
180
|
+
# User parameters
|
|
181
|
+
text=text,
|
|
182
|
+
with_analysis=with_analysis,
|
|
183
|
+
output_lang=output_lang,
|
|
184
|
+
user_prompt=user_prompt,
|
|
185
|
+
temperature=temperature,
|
|
186
|
+
logprobs=logprobs,
|
|
187
|
+
top_logprobs=top_logprobs,
|
|
188
|
+
# Internal parameters
|
|
189
|
+
prompt_file="merge_questions.yaml",
|
|
190
|
+
output_model=OutputModels.StrOutput,
|
|
191
|
+
resp_format="parse",
|
|
192
|
+
mode=mode,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async def rewrite(
|
|
196
|
+
self,
|
|
197
|
+
text: str,
|
|
198
|
+
with_analysis: bool = False,
|
|
199
|
+
output_lang: str | None = None,
|
|
200
|
+
user_prompt: str | None = None,
|
|
201
|
+
temperature: float | None = 0.0,
|
|
202
|
+
logprobs: bool = False,
|
|
203
|
+
top_logprobs: int | None = None,
|
|
204
|
+
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
205
|
+
) -> dict[str, str]:
|
|
206
|
+
return await self.operator.run(
|
|
207
|
+
# User parameters
|
|
208
|
+
text=text,
|
|
209
|
+
with_analysis=with_analysis,
|
|
210
|
+
output_lang=output_lang,
|
|
211
|
+
user_prompt=user_prompt,
|
|
212
|
+
temperature=temperature,
|
|
213
|
+
logprobs=logprobs,
|
|
214
|
+
top_logprobs=top_logprobs,
|
|
215
|
+
# Internal parameters
|
|
216
|
+
prompt_file="rewrite.yaml",
|
|
217
|
+
output_model=OutputModels.StrOutput,
|
|
218
|
+
resp_format="parse",
|
|
219
|
+
mode=mode,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
async def subject_to_question(
|
|
223
|
+
self,
|
|
224
|
+
text: str,
|
|
225
|
+
number_of_questions: int,
|
|
226
|
+
with_analysis: bool = False,
|
|
227
|
+
output_lang: str | None = None,
|
|
228
|
+
user_prompt: str | None = None,
|
|
229
|
+
temperature: float | None = 0.0,
|
|
230
|
+
logprobs: bool = False,
|
|
231
|
+
top_logprobs: int | None = None,
|
|
232
|
+
) -> dict[str, list[str]]:
|
|
233
|
+
return await self.operator.run(
|
|
234
|
+
# User parameters
|
|
235
|
+
text=text,
|
|
236
|
+
number_of_questions=number_of_questions,
|
|
237
|
+
with_analysis=with_analysis,
|
|
238
|
+
output_lang=output_lang,
|
|
239
|
+
user_prompt=user_prompt,
|
|
240
|
+
temperature=temperature,
|
|
241
|
+
logprobs=logprobs,
|
|
242
|
+
top_logprobs=top_logprobs,
|
|
243
|
+
# Internal parameters
|
|
244
|
+
prompt_file="subject_to_question.yaml",
|
|
245
|
+
output_model=OutputModels.ReasonListStrOutput,
|
|
246
|
+
resp_format="parse",
|
|
247
|
+
mode=None,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
async def summarize(
|
|
251
|
+
self,
|
|
252
|
+
text: str,
|
|
253
|
+
with_analysis: bool = False,
|
|
254
|
+
output_lang: str | None = None,
|
|
255
|
+
user_prompt: str | None = None,
|
|
256
|
+
temperature: float | None = 0.0,
|
|
257
|
+
logprobs: bool = False,
|
|
258
|
+
top_logprobs: int | None = None,
|
|
259
|
+
) -> dict[str, str]:
|
|
260
|
+
return await self.operator.run(
|
|
261
|
+
# User parameters
|
|
262
|
+
text=text,
|
|
263
|
+
with_analysis=with_analysis,
|
|
264
|
+
output_lang=output_lang,
|
|
265
|
+
user_prompt=user_prompt,
|
|
266
|
+
temperature=temperature,
|
|
267
|
+
logprobs=logprobs,
|
|
268
|
+
top_logprobs=top_logprobs,
|
|
269
|
+
# Internal parameters
|
|
270
|
+
prompt_file="summarize.yaml",
|
|
271
|
+
output_model=OutputModels.StrOutput,
|
|
272
|
+
resp_format="parse",
|
|
273
|
+
mode=None,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
async def translate(
|
|
277
|
+
self,
|
|
278
|
+
text: str,
|
|
279
|
+
target_language: str,
|
|
280
|
+
with_analysis: bool = False,
|
|
281
|
+
output_lang: str | None = None,
|
|
282
|
+
user_prompt: str | None = None,
|
|
283
|
+
temperature: float | None = 0.0,
|
|
284
|
+
logprobs: bool = False,
|
|
285
|
+
top_logprobs: int | None = None,
|
|
286
|
+
) -> dict[str, str]:
|
|
287
|
+
return await self.operator.run(
|
|
288
|
+
# User parameters
|
|
289
|
+
text=text,
|
|
290
|
+
target_language=target_language,
|
|
291
|
+
with_analysis=with_analysis,
|
|
292
|
+
output_lang=output_lang,
|
|
293
|
+
user_prompt=user_prompt,
|
|
294
|
+
temperature=temperature,
|
|
295
|
+
logprobs=logprobs,
|
|
296
|
+
top_logprobs=top_logprobs,
|
|
297
|
+
# Internal parameters
|
|
298
|
+
prompt_file="translate.yaml",
|
|
299
|
+
output_model=OutputModels.StrOutput,
|
|
300
|
+
resp_format="parse",
|
|
301
|
+
mode=None,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
async def run_custom(
|
|
305
|
+
self,
|
|
306
|
+
prompt: str,
|
|
307
|
+
output_model: Any,
|
|
308
|
+
output_lang: str | None = None,
|
|
309
|
+
temperature: float | None = None,
|
|
310
|
+
logprobs: bool | None = None,
|
|
311
|
+
top_logprobs: int | None = None,
|
|
312
|
+
) -> dict[str, Any]:
|
|
313
|
+
"""
|
|
314
|
+
Custom tool that can do almost anything!
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
prompt: Custom prompt.
|
|
318
|
+
output_model: Custom BaseModel output model.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
{"result": <Any>}
|
|
322
|
+
"""
|
|
323
|
+
return await self.operator.run(
|
|
324
|
+
# User paramaeters
|
|
325
|
+
text=prompt,
|
|
326
|
+
output_model=output_model,
|
|
327
|
+
output_model_str=output_model.model_json_schema(),
|
|
328
|
+
output_lang=output_lang,
|
|
329
|
+
temperature=temperature,
|
|
330
|
+
logprobs=logprobs,
|
|
331
|
+
top_logprobs=top_logprobs,
|
|
332
|
+
# Internal parameters
|
|
333
|
+
prompt_file="run_custom.yaml",
|
|
334
|
+
resp_format="parse",
|
|
335
|
+
user_prompt=None,
|
|
336
|
+
with_analysis=False,
|
|
337
|
+
mode=None,
|
|
338
|
+
)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, TypeVar, Type, Literal
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from openai import AsyncOpenAI
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from texttools.tools.internals.base_operator import BaseOperator
|
|
10
|
+
from texttools.formatters.user_merge_formatter import (
|
|
11
|
+
UserMergeFormatter,
|
|
12
|
+
)
|
|
13
|
+
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
14
|
+
|
|
15
|
+
# Base Model type for output models
|
|
16
|
+
T = TypeVar("T", bound=BaseModel)
|
|
17
|
+
|
|
18
|
+
# Configure logger
|
|
19
|
+
logger = logging.getLogger("async_operator")
|
|
20
|
+
logger.setLevel(logging.INFO)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AsyncOperator(BaseOperator):
|
|
24
|
+
"""
|
|
25
|
+
Core engine for running text-processing operations with an LLM (Async).
|
|
26
|
+
|
|
27
|
+
It wires together:
|
|
28
|
+
- `PromptLoader` → loads YAML prompt templates.
|
|
29
|
+
- `UserMergeFormatter` → applies formatting to messages (e.g., merging).
|
|
30
|
+
- AsyncOpenAI client → executes completions/parsed completions.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, client: AsyncOpenAI, model: str):
|
|
34
|
+
self.client: AsyncOpenAI = client
|
|
35
|
+
self.model = model
|
|
36
|
+
|
|
37
|
+
async def _analysis_completion(
|
|
38
|
+
self,
|
|
39
|
+
analyze_message: list[dict[str, str]],
|
|
40
|
+
temperature: float,
|
|
41
|
+
) -> str:
|
|
42
|
+
completion = await self.client.chat.completions.create(
|
|
43
|
+
model=self.model,
|
|
44
|
+
messages=analyze_message,
|
|
45
|
+
temperature=temperature,
|
|
46
|
+
)
|
|
47
|
+
analysis = completion.choices[0].message.content.strip()
|
|
48
|
+
return analysis
|
|
49
|
+
|
|
50
|
+
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
51
|
+
analyze_prompt = prompt_configs["analyze_template"]
|
|
52
|
+
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
53
|
+
analysis = await self._analysis_completion(analyze_message, temperature)
|
|
54
|
+
return analysis
|
|
55
|
+
|
|
56
|
+
async def _parse_completion(
|
|
57
|
+
self,
|
|
58
|
+
message: list[dict[str, str]],
|
|
59
|
+
output_model: Type[T],
|
|
60
|
+
temperature: float,
|
|
61
|
+
logprobs: bool = False,
|
|
62
|
+
top_logprobs: int = 3,
|
|
63
|
+
) -> tuple[Type[T], Any]:
|
|
64
|
+
request_kwargs = {
|
|
65
|
+
"model": self.model,
|
|
66
|
+
"messages": message,
|
|
67
|
+
"response_format": output_model,
|
|
68
|
+
"temperature": temperature,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if logprobs:
|
|
72
|
+
request_kwargs["logprobs"] = True
|
|
73
|
+
request_kwargs["top_logprobs"] = top_logprobs
|
|
74
|
+
|
|
75
|
+
completion = await self.client.beta.chat.completions.parse(**request_kwargs)
|
|
76
|
+
parsed = completion.choices[0].message.parsed
|
|
77
|
+
return parsed, completion
|
|
78
|
+
|
|
79
|
+
async def _vllm_completion(
|
|
80
|
+
self,
|
|
81
|
+
message: list[dict[str, str]],
|
|
82
|
+
output_model: Type[T],
|
|
83
|
+
temperature: float,
|
|
84
|
+
logprobs: bool = False,
|
|
85
|
+
top_logprobs: int = 3,
|
|
86
|
+
) -> tuple[Type[T], Any]:
|
|
87
|
+
json_schema = output_model.model_json_schema()
|
|
88
|
+
|
|
89
|
+
# Build kwargs dynamically
|
|
90
|
+
request_kwargs = {
|
|
91
|
+
"model": self.model,
|
|
92
|
+
"messages": message,
|
|
93
|
+
"extra_body": {"guided_json": json_schema},
|
|
94
|
+
"temperature": temperature,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if logprobs:
|
|
98
|
+
request_kwargs["logprobs"] = True
|
|
99
|
+
request_kwargs["top_logprobs"] = top_logprobs
|
|
100
|
+
|
|
101
|
+
completion = await self.client.chat.completions.create(**request_kwargs)
|
|
102
|
+
response = completion.choices[0].message.content
|
|
103
|
+
|
|
104
|
+
# Convert the string response to output model
|
|
105
|
+
parsed = self._convert_to_output_model(response, output_model)
|
|
106
|
+
return parsed, completion
|
|
107
|
+
|
|
108
|
+
async def run(
|
|
109
|
+
self,
|
|
110
|
+
# User parameters
|
|
111
|
+
text: str,
|
|
112
|
+
with_analysis: bool,
|
|
113
|
+
output_lang: str | None,
|
|
114
|
+
user_prompt: str | None,
|
|
115
|
+
temperature: float,
|
|
116
|
+
logprobs: bool,
|
|
117
|
+
top_logprobs: int | None,
|
|
118
|
+
# Internal parameters
|
|
119
|
+
prompt_file: str,
|
|
120
|
+
output_model: Type[T],
|
|
121
|
+
resp_format: Literal["vllm", "parse"],
|
|
122
|
+
mode: str | None,
|
|
123
|
+
**extra_kwargs,
|
|
124
|
+
) -> dict[str, Any]:
|
|
125
|
+
"""
|
|
126
|
+
Execute the async LLM pipeline with the given input text. (Async)
|
|
127
|
+
"""
|
|
128
|
+
prompt_loader = PromptLoader()
|
|
129
|
+
formatter = UserMergeFormatter()
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
cleaned_text = text.strip()
|
|
133
|
+
|
|
134
|
+
prompt_configs = prompt_loader.load(
|
|
135
|
+
prompt_file=prompt_file,
|
|
136
|
+
text=cleaned_text,
|
|
137
|
+
mode=mode,
|
|
138
|
+
**extra_kwargs,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
messages: list[dict[str, str]] = []
|
|
142
|
+
|
|
143
|
+
if with_analysis:
|
|
144
|
+
analysis = await self._analyze(prompt_configs, temperature)
|
|
145
|
+
messages.append(
|
|
146
|
+
self._build_user_message(f"Based on this analysis: {analysis}")
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if output_lang:
|
|
150
|
+
messages.append(
|
|
151
|
+
self._build_user_message(
|
|
152
|
+
f"Respond only in the {output_lang} language."
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if user_prompt:
|
|
157
|
+
messages.append(
|
|
158
|
+
self._build_user_message(f"Consider this instruction {user_prompt}")
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
messages.append(self._build_user_message(prompt_configs["main_template"]))
|
|
162
|
+
messages = formatter.format(messages)
|
|
163
|
+
|
|
164
|
+
if resp_format == "vllm":
|
|
165
|
+
parsed, completion = await self._vllm_completion(
|
|
166
|
+
messages, output_model, temperature, logprobs, top_logprobs
|
|
167
|
+
)
|
|
168
|
+
elif resp_format == "parse":
|
|
169
|
+
parsed, completion = await self._vllm_completion(
|
|
170
|
+
messages, output_model, temperature, logprobs, top_logprobs
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Ensure output_model has a `result` field
|
|
174
|
+
if not hasattr(parsed, "result"):
|
|
175
|
+
logger.error(
|
|
176
|
+
"The provided output_model must define a field named 'result'"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
results = {"result": parsed.result}
|
|
180
|
+
|
|
181
|
+
if logprobs:
|
|
182
|
+
results["logprobs"] = self._extract_logprobs(completion)
|
|
183
|
+
|
|
184
|
+
if with_analysis:
|
|
185
|
+
results["analysis"] = analysis
|
|
186
|
+
|
|
187
|
+
return results
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(f"Async TheTool failed: {e}")
|
|
191
|
+
return {"Error": str(e), "result": ""}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import TypeVar, Type
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from openai import OpenAI, AsyncOpenAI
|
|
8
|
+
|
|
9
|
+
# Base Model type for output models
|
|
10
|
+
T = TypeVar("T", bound=BaseModel)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseOperator:
|
|
14
|
+
def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
|
|
15
|
+
self.client = client
|
|
16
|
+
self.model = model
|
|
17
|
+
|
|
18
|
+
def _build_user_message(self, prompt: str) -> dict[str, str]:
|
|
19
|
+
return {"role": "user", "content": prompt}
|
|
20
|
+
|
|
21
|
+
def _clean_json_response(self, response: str) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Clean JSON response by removing code block markers and whitespace.
|
|
24
|
+
Handles cases like:
|
|
25
|
+
- ```json{"result": "value"}```
|
|
26
|
+
"""
|
|
27
|
+
stripped = response.strip()
|
|
28
|
+
cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
|
|
29
|
+
cleaned = re.sub(r"\s*```$", "", cleaned)
|
|
30
|
+
|
|
31
|
+
return cleaned.strip()
|
|
32
|
+
|
|
33
|
+
def _convert_to_output_model(
|
|
34
|
+
self, response_string: str, output_model: Type[T]
|
|
35
|
+
) -> Type[T]:
|
|
36
|
+
"""
|
|
37
|
+
Convert a JSON response string to output model.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
response_string: The JSON string (may contain code block markers)
|
|
41
|
+
output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Instance of your output model
|
|
45
|
+
"""
|
|
46
|
+
# Clean the response string
|
|
47
|
+
cleaned_json = self._clean_json_response(response_string)
|
|
48
|
+
|
|
49
|
+
# Fix Python-style booleans
|
|
50
|
+
cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
|
|
51
|
+
|
|
52
|
+
# Convert string to Python dictionary
|
|
53
|
+
response_dict = json.loads(cleaned_json)
|
|
54
|
+
|
|
55
|
+
# Convert dictionary to output model
|
|
56
|
+
return output_model(**response_dict)
|
|
57
|
+
|
|
58
|
+
def _extract_logprobs(self, completion: dict):
|
|
59
|
+
logprobs_data = []
|
|
60
|
+
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
61
|
+
|
|
62
|
+
for choice in completion.choices:
|
|
63
|
+
if not getattr(choice, "logprobs", None):
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
for logprob_item in choice.logprobs.content:
|
|
67
|
+
if ignore_pattern.match(logprob_item.token):
|
|
68
|
+
continue
|
|
69
|
+
token_entry = {
|
|
70
|
+
"token": logprob_item.token,
|
|
71
|
+
"prob": round(math.exp(logprob_item.logprob), 8),
|
|
72
|
+
"top_alternatives": [],
|
|
73
|
+
}
|
|
74
|
+
for alt in logprob_item.top_logprobs:
|
|
75
|
+
if ignore_pattern.match(alt.token):
|
|
76
|
+
continue
|
|
77
|
+
token_entry["top_alternatives"].append(
|
|
78
|
+
{
|
|
79
|
+
"token": alt.token,
|
|
80
|
+
"prob": round(math.exp(alt.logprob), 8),
|
|
81
|
+
}
|
|
82
|
+
)
|
|
83
|
+
logprobs_data.append(token_entry)
|
|
84
|
+
|
|
85
|
+
return logprobs_data
|