hamtaa-texttools 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.5.dist-info → hamtaa_texttools-1.0.7.dist-info}/METADATA +15 -15
- hamtaa_texttools-1.0.7.dist-info/RECORD +31 -0
- texttools/batch/batch_manager.py +7 -18
- texttools/batch/batch_runner.py +96 -45
- texttools/prompts/README.md +4 -0
- texttools/prompts/{keyword_extractor.yaml → extract_keywords.yaml} +6 -6
- texttools/prompts/{question_merger.yaml → merge_questions.yaml} +5 -5
- texttools/tools/async_the_tool.py +204 -143
- texttools/tools/internals/async_operator.py +98 -204
- texttools/tools/internals/base_operator.py +85 -0
- texttools/tools/internals/operator.py +27 -130
- texttools/tools/internals/prompt_loader.py +12 -22
- texttools/tools/the_tool.py +162 -225
- hamtaa_texttools-1.0.5.dist-info/RECORD +0 -30
- {hamtaa_texttools-1.0.5.dist-info → hamtaa_texttools-1.0.7.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.0.5.dist-info → hamtaa_texttools-1.0.7.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.0.5.dist-info → hamtaa_texttools-1.0.7.dist-info}/top_level.txt +0 -0
- /texttools/prompts/{ner_extractor.yaml → extract_entities.yaml} +0 -0
- /texttools/prompts/{question_detector.yaml → is_question.yaml} +0 -0
- /texttools/prompts/{rewriter.yaml → rewrite.yaml} +0 -0
- /texttools/prompts/{custom_tool.yaml → run_custom.yaml} +0 -0
- /texttools/prompts/{subject_question_generator.yaml → subject_to_question.yaml} +0 -0
- /texttools/prompts/{summarizer.yaml → summarize.yaml} +0 -0
- /texttools/prompts/{question_generator.yaml → text_to_question.yaml} +0 -0
- /texttools/prompts/{translator.yaml → translate.yaml} +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from typing import Literal
|
|
1
|
+
from typing import Literal, Any
|
|
2
2
|
|
|
3
3
|
from openai import AsyncOpenAI
|
|
4
4
|
|
|
5
|
-
import texttools.tools.internals.output_models as OutputModels
|
|
6
5
|
from texttools.tools.internals.async_operator import AsyncOperator
|
|
6
|
+
import texttools.tools.internals.output_models as OutputModels
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class AsyncTheTool:
|
|
@@ -12,266 +12,327 @@ class AsyncTheTool:
|
|
|
12
12
|
|
|
13
13
|
Usage:
|
|
14
14
|
async_client = AsyncOpenAI(...)
|
|
15
|
-
tool = TheToolAsync(async_client, model="
|
|
16
|
-
result = await tool.categorize("
|
|
15
|
+
tool = TheToolAsync(async_client, model="model-name")
|
|
16
|
+
result = await tool.categorize("text ...", with_analysis=True)
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
def __init__(
|
|
20
20
|
self,
|
|
21
21
|
client: AsyncOpenAI,
|
|
22
|
-
*,
|
|
23
22
|
model: str,
|
|
24
|
-
temperature: float = 0.0,
|
|
25
23
|
):
|
|
26
|
-
self.operator = AsyncOperator(
|
|
27
|
-
client=client,
|
|
28
|
-
model=model,
|
|
29
|
-
temperature=temperature,
|
|
30
|
-
)
|
|
24
|
+
self.operator = AsyncOperator(client=client, model=model)
|
|
31
25
|
|
|
32
26
|
async def categorize(
|
|
33
27
|
self,
|
|
34
28
|
text: str,
|
|
35
29
|
with_analysis: bool = False,
|
|
36
|
-
|
|
30
|
+
output_lang: str | None = None,
|
|
31
|
+
user_prompt: str | None = None,
|
|
32
|
+
temperature: float | None = 0.0,
|
|
37
33
|
logprobs: bool = False,
|
|
38
|
-
top_logprobs: int =
|
|
39
|
-
max_tokens: int | None = None,
|
|
34
|
+
top_logprobs: int | None = None,
|
|
40
35
|
) -> dict[str, str]:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
"""
|
|
37
|
+
Categorize a text into a single Islamic studies domain category.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
text: Input string to categorize.
|
|
41
|
+
with_analysis: If True, first runs an LLM "analysis" step and
|
|
42
|
+
conditions the main prompt on that analysis.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
{"result": <category string>}
|
|
46
|
+
Example: {"result": "باورهای دینی"}
|
|
47
|
+
"""
|
|
48
|
+
return await self.operator.run(
|
|
49
|
+
# User parameters
|
|
50
|
+
text=text,
|
|
45
51
|
with_analysis=with_analysis,
|
|
46
|
-
|
|
52
|
+
output_lang=output_lang,
|
|
47
53
|
user_prompt=user_prompt,
|
|
54
|
+
temperature=temperature,
|
|
48
55
|
logprobs=logprobs,
|
|
49
56
|
top_logprobs=top_logprobs,
|
|
50
|
-
|
|
57
|
+
# Internal parameters
|
|
58
|
+
prompt_file="categorizer.yaml",
|
|
59
|
+
output_model=OutputModels.CategorizerOutput,
|
|
60
|
+
resp_format="parse",
|
|
61
|
+
mode=None,
|
|
51
62
|
)
|
|
52
|
-
return results
|
|
53
63
|
|
|
54
64
|
async def extract_keywords(
|
|
55
65
|
self,
|
|
56
66
|
text: str,
|
|
57
|
-
output_lang: str | None = None,
|
|
58
67
|
with_analysis: bool = False,
|
|
59
|
-
|
|
68
|
+
output_lang: str | None = None,
|
|
69
|
+
user_prompt: str | None = None,
|
|
70
|
+
temperature: float | None = 0.0,
|
|
60
71
|
logprobs: bool = False,
|
|
61
|
-
top_logprobs: int =
|
|
62
|
-
max_tokens: int | None = None,
|
|
72
|
+
top_logprobs: int | None = None,
|
|
63
73
|
) -> dict[str, list[str]]:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
output_model=OutputModels.ListStrOutput,
|
|
74
|
+
return await self.operator.run(
|
|
75
|
+
# User parameters
|
|
76
|
+
text=text,
|
|
68
77
|
with_analysis=with_analysis,
|
|
69
|
-
resp_format="parse",
|
|
70
|
-
user_prompt=user_prompt,
|
|
71
78
|
output_lang=output_lang,
|
|
79
|
+
user_prompt=user_prompt,
|
|
80
|
+
temperature=temperature,
|
|
72
81
|
logprobs=logprobs,
|
|
73
82
|
top_logprobs=top_logprobs,
|
|
74
|
-
|
|
83
|
+
# Internal parameters
|
|
84
|
+
prompt_file="extract_keywords.yaml",
|
|
85
|
+
output_model=OutputModels.ListStrOutput,
|
|
86
|
+
resp_format="parse",
|
|
87
|
+
mode=None,
|
|
75
88
|
)
|
|
76
|
-
return results
|
|
77
89
|
|
|
78
90
|
async def extract_entities(
|
|
79
91
|
self,
|
|
80
92
|
text: str,
|
|
81
|
-
output_lang: str | None = None,
|
|
82
93
|
with_analysis: bool = False,
|
|
83
|
-
|
|
94
|
+
output_lang: str | None = None,
|
|
95
|
+
user_prompt: str | None = None,
|
|
96
|
+
temperature: float | None = 0.0,
|
|
84
97
|
logprobs: bool = False,
|
|
85
|
-
top_logprobs: int =
|
|
86
|
-
max_tokens: int | None = None,
|
|
98
|
+
top_logprobs: int | None = None,
|
|
87
99
|
) -> dict[str, list[dict[str, str]]]:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
output_model=OutputModels.ListDictStrStrOutput,
|
|
100
|
+
return await self.operator.run(
|
|
101
|
+
# User parameters
|
|
102
|
+
text=text,
|
|
92
103
|
with_analysis=with_analysis,
|
|
93
|
-
resp_format="parse",
|
|
94
|
-
user_prompt=user_prompt,
|
|
95
104
|
output_lang=output_lang,
|
|
105
|
+
user_prompt=user_prompt,
|
|
106
|
+
temperature=temperature,
|
|
96
107
|
logprobs=logprobs,
|
|
97
108
|
top_logprobs=top_logprobs,
|
|
98
|
-
|
|
109
|
+
# Internal parameters
|
|
110
|
+
prompt_file="extract_entities.yaml",
|
|
111
|
+
output_model=OutputModels.ListDictStrStrOutput,
|
|
112
|
+
resp_format="parse",
|
|
113
|
+
mode=None,
|
|
99
114
|
)
|
|
100
|
-
return results
|
|
101
115
|
|
|
102
|
-
async def
|
|
116
|
+
async def is_question(
|
|
103
117
|
self,
|
|
104
|
-
|
|
105
|
-
output_lang: str | None = None,
|
|
118
|
+
text: str,
|
|
106
119
|
with_analysis: bool = False,
|
|
107
|
-
user_prompt: str =
|
|
120
|
+
user_prompt: str | None = None,
|
|
121
|
+
temperature: float | None = 0.0,
|
|
108
122
|
logprobs: bool = False,
|
|
109
|
-
top_logprobs: int =
|
|
110
|
-
max_tokens: int | None = None,
|
|
123
|
+
top_logprobs: int | None = None,
|
|
111
124
|
) -> dict[str, bool]:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
output_model=OutputModels.BoolOutput,
|
|
125
|
+
return await self.operator.run(
|
|
126
|
+
# User parameters
|
|
127
|
+
text=text,
|
|
116
128
|
with_analysis=with_analysis,
|
|
117
|
-
resp_format="parse",
|
|
118
129
|
user_prompt=user_prompt,
|
|
119
|
-
|
|
130
|
+
temperature=temperature,
|
|
120
131
|
logprobs=logprobs,
|
|
121
132
|
top_logprobs=top_logprobs,
|
|
122
|
-
|
|
133
|
+
# Internal parameters
|
|
134
|
+
prompt_file="is_question.yaml",
|
|
135
|
+
output_model=OutputModels.BoolOutput,
|
|
136
|
+
resp_format="parse",
|
|
137
|
+
mode=None,
|
|
138
|
+
output_lang=None,
|
|
123
139
|
)
|
|
124
|
-
return results
|
|
125
140
|
|
|
126
|
-
async def
|
|
141
|
+
async def text_to_question(
|
|
127
142
|
self,
|
|
128
143
|
text: str,
|
|
129
|
-
output_lang: str | None = None,
|
|
130
144
|
with_analysis: bool = False,
|
|
131
|
-
|
|
145
|
+
output_lang: str | None = None,
|
|
146
|
+
user_prompt: str | None = None,
|
|
147
|
+
temperature: float | None = 0.0,
|
|
132
148
|
logprobs: bool = False,
|
|
133
|
-
top_logprobs: int =
|
|
134
|
-
max_tokens: int | None = None,
|
|
149
|
+
top_logprobs: int | None = None,
|
|
135
150
|
) -> dict[str, str]:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
output_model=OutputModels.StrOutput,
|
|
151
|
+
return await self.operator.run(
|
|
152
|
+
# User parameters
|
|
153
|
+
text=text,
|
|
140
154
|
with_analysis=with_analysis,
|
|
141
|
-
resp_format="parse",
|
|
142
|
-
user_prompt=user_prompt,
|
|
143
155
|
output_lang=output_lang,
|
|
156
|
+
user_prompt=user_prompt,
|
|
157
|
+
temperature=temperature,
|
|
144
158
|
logprobs=logprobs,
|
|
145
159
|
top_logprobs=top_logprobs,
|
|
146
|
-
|
|
160
|
+
# Internal parameters
|
|
161
|
+
prompt_file="text_to_question.yaml",
|
|
162
|
+
output_model=OutputModels.StrOutput,
|
|
163
|
+
resp_format="parse",
|
|
164
|
+
mode=None,
|
|
147
165
|
)
|
|
148
|
-
return results
|
|
149
166
|
|
|
150
167
|
async def merge_questions(
|
|
151
168
|
self,
|
|
152
|
-
|
|
153
|
-
output_lang: str | None = None,
|
|
154
|
-
mode: Literal["default", "reason"] = "default",
|
|
169
|
+
text: list[str],
|
|
155
170
|
with_analysis: bool = False,
|
|
156
|
-
|
|
171
|
+
output_lang: str | None = None,
|
|
172
|
+
user_prompt: str | None = None,
|
|
173
|
+
temperature: float | None = 0.0,
|
|
157
174
|
logprobs: bool = False,
|
|
158
|
-
top_logprobs: int =
|
|
159
|
-
|
|
175
|
+
top_logprobs: int | None = None,
|
|
176
|
+
mode: Literal["default", "reason"] = "default",
|
|
160
177
|
) -> dict[str, str]:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
output_model=OutputModels.StrOutput,
|
|
178
|
+
text = ", ".join(text)
|
|
179
|
+
return await self.operator.run(
|
|
180
|
+
# User parameters
|
|
181
|
+
text=text,
|
|
166
182
|
with_analysis=with_analysis,
|
|
167
|
-
use_modes=True,
|
|
168
|
-
mode=mode,
|
|
169
|
-
resp_format="parse",
|
|
170
|
-
user_prompt=user_prompt,
|
|
171
183
|
output_lang=output_lang,
|
|
184
|
+
user_prompt=user_prompt,
|
|
185
|
+
temperature=temperature,
|
|
172
186
|
logprobs=logprobs,
|
|
173
187
|
top_logprobs=top_logprobs,
|
|
174
|
-
|
|
188
|
+
# Internal parameters
|
|
189
|
+
prompt_file="merge_questions.yaml",
|
|
190
|
+
output_model=OutputModels.StrOutput,
|
|
191
|
+
resp_format="parse",
|
|
192
|
+
mode=mode,
|
|
175
193
|
)
|
|
176
|
-
return results
|
|
177
194
|
|
|
178
195
|
async def rewrite(
|
|
179
196
|
self,
|
|
180
|
-
|
|
181
|
-
output_lang: str | None = None,
|
|
182
|
-
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
197
|
+
text: str,
|
|
183
198
|
with_analysis: bool = False,
|
|
184
|
-
|
|
199
|
+
output_lang: str | None = None,
|
|
200
|
+
user_prompt: str | None = None,
|
|
201
|
+
temperature: float | None = 0.0,
|
|
185
202
|
logprobs: bool = False,
|
|
186
|
-
top_logprobs: int =
|
|
187
|
-
|
|
203
|
+
top_logprobs: int | None = None,
|
|
204
|
+
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
188
205
|
) -> dict[str, str]:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
output_model=OutputModels.StrOutput,
|
|
206
|
+
return await self.operator.run(
|
|
207
|
+
# User parameters
|
|
208
|
+
text=text,
|
|
193
209
|
with_analysis=with_analysis,
|
|
194
|
-
use_modes=True,
|
|
195
|
-
mode=mode,
|
|
196
|
-
resp_format="parse",
|
|
197
|
-
user_prompt=user_prompt,
|
|
198
210
|
output_lang=output_lang,
|
|
211
|
+
user_prompt=user_prompt,
|
|
212
|
+
temperature=temperature,
|
|
199
213
|
logprobs=logprobs,
|
|
200
214
|
top_logprobs=top_logprobs,
|
|
201
|
-
|
|
215
|
+
# Internal parameters
|
|
216
|
+
prompt_file="rewrite.yaml",
|
|
217
|
+
output_model=OutputModels.StrOutput,
|
|
218
|
+
resp_format="parse",
|
|
219
|
+
mode=mode,
|
|
202
220
|
)
|
|
203
|
-
return results
|
|
204
221
|
|
|
205
|
-
async def
|
|
222
|
+
async def subject_to_question(
|
|
206
223
|
self,
|
|
207
|
-
|
|
224
|
+
text: str,
|
|
208
225
|
number_of_questions: int,
|
|
209
|
-
output_lang: str | None = None,
|
|
210
226
|
with_analysis: bool = False,
|
|
211
|
-
|
|
227
|
+
output_lang: str | None = None,
|
|
228
|
+
user_prompt: str | None = None,
|
|
229
|
+
temperature: float | None = 0.0,
|
|
212
230
|
logprobs: bool = False,
|
|
213
|
-
top_logprobs: int =
|
|
214
|
-
max_tokens: int | None = None,
|
|
231
|
+
top_logprobs: int | None = None,
|
|
215
232
|
) -> dict[str, list[str]]:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
output_model=OutputModels.ReasonListStrOutput,
|
|
220
|
-
with_analysis=with_analysis,
|
|
221
|
-
resp_format="parse",
|
|
222
|
-
user_prompt=user_prompt,
|
|
233
|
+
return await self.operator.run(
|
|
234
|
+
# User parameters
|
|
235
|
+
text=text,
|
|
223
236
|
number_of_questions=number_of_questions,
|
|
237
|
+
with_analysis=with_analysis,
|
|
224
238
|
output_lang=output_lang,
|
|
239
|
+
user_prompt=user_prompt,
|
|
240
|
+
temperature=temperature,
|
|
225
241
|
logprobs=logprobs,
|
|
226
242
|
top_logprobs=top_logprobs,
|
|
227
|
-
|
|
243
|
+
# Internal parameters
|
|
244
|
+
prompt_file="subject_to_question.yaml",
|
|
245
|
+
output_model=OutputModels.ReasonListStrOutput,
|
|
246
|
+
resp_format="parse",
|
|
247
|
+
mode=None,
|
|
228
248
|
)
|
|
229
|
-
return results
|
|
230
249
|
|
|
231
250
|
async def summarize(
|
|
232
251
|
self,
|
|
233
252
|
text: str,
|
|
234
|
-
output_lang: str | None = None,
|
|
235
253
|
with_analysis: bool = False,
|
|
236
|
-
|
|
254
|
+
output_lang: str | None = None,
|
|
255
|
+
user_prompt: str | None = None,
|
|
256
|
+
temperature: float | None = 0.0,
|
|
237
257
|
logprobs: bool = False,
|
|
238
|
-
top_logprobs: int =
|
|
239
|
-
max_tokens: int | None = None,
|
|
258
|
+
top_logprobs: int | None = None,
|
|
240
259
|
) -> dict[str, str]:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
output_model=OutputModels.StrOutput,
|
|
260
|
+
return await self.operator.run(
|
|
261
|
+
# User parameters
|
|
262
|
+
text=text,
|
|
245
263
|
with_analysis=with_analysis,
|
|
246
|
-
resp_format="parse",
|
|
247
|
-
user_prompt=user_prompt,
|
|
248
264
|
output_lang=output_lang,
|
|
265
|
+
user_prompt=user_prompt,
|
|
266
|
+
temperature=temperature,
|
|
249
267
|
logprobs=logprobs,
|
|
250
268
|
top_logprobs=top_logprobs,
|
|
251
|
-
|
|
269
|
+
# Internal parameters
|
|
270
|
+
prompt_file="summarize.yaml",
|
|
271
|
+
output_model=OutputModels.StrOutput,
|
|
272
|
+
resp_format="parse",
|
|
273
|
+
mode=None,
|
|
252
274
|
)
|
|
253
|
-
return results
|
|
254
275
|
|
|
255
276
|
async def translate(
|
|
256
277
|
self,
|
|
257
278
|
text: str,
|
|
258
279
|
target_language: str,
|
|
259
280
|
with_analysis: bool = False,
|
|
260
|
-
|
|
281
|
+
output_lang: str | None = None,
|
|
282
|
+
user_prompt: str | None = None,
|
|
283
|
+
temperature: float | None = 0.0,
|
|
261
284
|
logprobs: bool = False,
|
|
262
|
-
top_logprobs: int =
|
|
263
|
-
max_tokens: int | None = None,
|
|
285
|
+
top_logprobs: int | None = None,
|
|
264
286
|
) -> dict[str, str]:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
287
|
+
return await self.operator.run(
|
|
288
|
+
# User parameters
|
|
289
|
+
text=text,
|
|
290
|
+
target_language=target_language,
|
|
269
291
|
with_analysis=with_analysis,
|
|
270
|
-
|
|
292
|
+
output_lang=output_lang,
|
|
271
293
|
user_prompt=user_prompt,
|
|
272
|
-
|
|
294
|
+
temperature=temperature,
|
|
295
|
+
logprobs=logprobs,
|
|
296
|
+
top_logprobs=top_logprobs,
|
|
297
|
+
# Internal parameters
|
|
298
|
+
prompt_file="translate.yaml",
|
|
299
|
+
output_model=OutputModels.StrOutput,
|
|
300
|
+
resp_format="parse",
|
|
301
|
+
mode=None,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
async def run_custom(
|
|
305
|
+
self,
|
|
306
|
+
prompt: str,
|
|
307
|
+
output_model: Any,
|
|
308
|
+
output_lang: str | None = None,
|
|
309
|
+
temperature: float | None = None,
|
|
310
|
+
logprobs: bool | None = None,
|
|
311
|
+
top_logprobs: int | None = None,
|
|
312
|
+
) -> dict[str, Any]:
|
|
313
|
+
"""
|
|
314
|
+
Custom tool that can do almost anything!
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
prompt: Custom prompt.
|
|
318
|
+
output_model: Custom BaseModel output model.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
{"result": <Any>}
|
|
322
|
+
"""
|
|
323
|
+
return await self.operator.run(
|
|
324
|
+
# User paramaeters
|
|
325
|
+
text=prompt,
|
|
326
|
+
output_model=output_model,
|
|
327
|
+
output_model_str=output_model.model_json_schema(),
|
|
328
|
+
output_lang=output_lang,
|
|
329
|
+
temperature=temperature,
|
|
273
330
|
logprobs=logprobs,
|
|
274
331
|
top_logprobs=top_logprobs,
|
|
275
|
-
|
|
332
|
+
# Internal parameters
|
|
333
|
+
prompt_file="run_custom.yaml",
|
|
334
|
+
resp_format="parse",
|
|
335
|
+
user_prompt=None,
|
|
336
|
+
with_analysis=False,
|
|
337
|
+
mode=None,
|
|
276
338
|
)
|
|
277
|
-
return results
|