hamtaa-texttools 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (32) hide show
  1. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/METADATA +192 -141
  2. hamtaa_texttools-1.0.5.dist-info/RECORD +30 -0
  3. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/licenses/LICENSE +20 -20
  4. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/top_level.txt +0 -0
  5. texttools/__init__.py +9 -9
  6. texttools/batch/__init__.py +4 -4
  7. texttools/batch/batch_manager.py +240 -240
  8. texttools/batch/batch_runner.py +212 -212
  9. texttools/formatters/base_formatter.py +33 -33
  10. texttools/formatters/{user_merge_formatter/user_merge_formatter.py → user_merge_formatter.py} +30 -30
  11. texttools/prompts/README.md +31 -31
  12. texttools/prompts/categorizer.yaml +28 -31
  13. texttools/prompts/custom_tool.yaml +7 -0
  14. texttools/prompts/keyword_extractor.yaml +18 -14
  15. texttools/prompts/ner_extractor.yaml +20 -21
  16. texttools/prompts/question_detector.yaml +13 -14
  17. texttools/prompts/question_generator.yaml +19 -22
  18. texttools/prompts/question_merger.yaml +45 -48
  19. texttools/prompts/rewriter.yaml +111 -0
  20. texttools/prompts/subject_question_generator.yaml +22 -26
  21. texttools/prompts/summarizer.yaml +13 -11
  22. texttools/prompts/translator.yaml +14 -14
  23. texttools/tools/__init__.py +4 -4
  24. texttools/tools/async_the_tool.py +277 -263
  25. texttools/tools/internals/async_operator.py +297 -288
  26. texttools/tools/internals/operator.py +295 -306
  27. texttools/tools/internals/output_models.py +52 -62
  28. texttools/tools/internals/prompt_loader.py +76 -82
  29. texttools/tools/the_tool.py +501 -400
  30. hamtaa_texttools-1.0.4.dist-info/RECORD +0 -29
  31. texttools/prompts/question_rewriter.yaml +0 -46
  32. {hamtaa_texttools-1.0.4.dist-info → hamtaa_texttools-1.0.5.dist-info}/WHEEL +0 -0
@@ -1,263 +1,277 @@
1
- from typing import Any, Literal, Optional
2
-
3
- # async clients / operator
4
- from openai import AsyncOpenAI
5
-
6
- import texttools.tools.internals.output_models as OutputModels
7
- from texttools.tools.internals.async_operator import AsyncOperator
8
-
9
-
10
- class AsyncTheTool:
11
- """
12
- Async counterpart to TheTool.
13
-
14
- Usage:
15
- async_client = AsyncOpenAI(...)
16
- tool = TheToolAsync(async_client, model="gemma-3")
17
- result = await tool.categorize("متن ...", with_analysis=True)
18
- """
19
-
20
- def __init__(
21
- self,
22
- client: AsyncOpenAI,
23
- *,
24
- model: str,
25
- temperature: float = 0.0,
26
- **client_kwargs: Any,
27
- ):
28
- self.operator = AsyncOperator(
29
- client=client,
30
- model=model,
31
- temperature=temperature,
32
- **client_kwargs,
33
- )
34
-
35
- async def categorize(
36
- self,
37
- text: str,
38
- with_analysis: bool = False,
39
- user_prompt: str = "",
40
- logprobs: bool = False,
41
- top_logprobs: int = 8,
42
- ) -> dict[str, str]:
43
- results = await self.operator.run(
44
- text,
45
- prompt_file="categorizer.yaml",
46
- output_model=OutputModels.CategorizerOutput,
47
- with_analysis=with_analysis,
48
- resp_format="parse",
49
- user_prompt=user_prompt,
50
- logprobs=logprobs,
51
- top_logprobs=top_logprobs,
52
- )
53
- return results
54
-
55
- async def extract_keywords(
56
- self,
57
- text: str,
58
- output_lang: Optional[str] = None,
59
- with_analysis: bool = False,
60
- user_prompt: str = "",
61
- logprobs: bool = False,
62
- top_logprobs: int = 3,
63
- ) -> dict[str, list[str]]:
64
- results = await self.operator.run(
65
- text,
66
- prompt_file="keyword_extractor.yaml",
67
- output_model=OutputModels.ListStrOutput,
68
- with_analysis=with_analysis,
69
- resp_format="parse",
70
- user_prompt=user_prompt,
71
- output_lang=output_lang,
72
- logprobs=logprobs,
73
- top_logprobs=top_logprobs,
74
- )
75
- return results
76
-
77
- async def extract_entities(
78
- self,
79
- text: str,
80
- output_lang: Optional[str] = None,
81
- with_analysis: bool = False,
82
- user_prompt: str = "",
83
- logprobs: bool = False,
84
- top_logprobs: int = 3,
85
- ) -> dict[str, list[dict[str, str]]]:
86
- results = await self.operator.run(
87
- text,
88
- prompt_file="ner_extractor.yaml",
89
- output_model=OutputModels.ListDictStrStrOutput,
90
- with_analysis=with_analysis,
91
- resp_format="parse",
92
- user_prompt=user_prompt,
93
- output_lang=output_lang,
94
- logprobs=logprobs,
95
- top_logprobs=top_logprobs,
96
- )
97
- return results
98
-
99
- async def detect_question(
100
- self,
101
- question: str,
102
- output_lang: Optional[str] = None,
103
- with_analysis: bool = False,
104
- user_prompt: str = "",
105
- logprobs: bool = False,
106
- top_logprobs: int = 2,
107
- ) -> dict[str, bool]:
108
- results = await self.operator.run(
109
- question,
110
- prompt_file="question_detector.yaml",
111
- output_model=OutputModels.BoolOutput,
112
- with_analysis=with_analysis,
113
- resp_format="parse",
114
- user_prompt=user_prompt,
115
- output_lang=output_lang,
116
- logprobs=logprobs,
117
- top_logprobs=top_logprobs,
118
- )
119
- return results
120
-
121
- async def generate_question_from_text(
122
- self,
123
- text: str,
124
- output_lang: Optional[str] = None,
125
- with_analysis: bool = False,
126
- user_prompt: str = "",
127
- logprobs: bool = False,
128
- top_logprobs: int = 3,
129
- ) -> dict[str, str]:
130
- results = await self.operator.run(
131
- text,
132
- prompt_file="question_generator.yaml",
133
- output_model=OutputModels.StrOutput,
134
- with_analysis=with_analysis,
135
- resp_format="parse",
136
- user_prompt=user_prompt,
137
- output_lang=output_lang,
138
- logprobs=logprobs,
139
- top_logprobs=top_logprobs,
140
- )
141
- return results
142
-
143
- async def merge_questions(
144
- self,
145
- questions: list[str],
146
- output_lang: Optional[str] = None,
147
- mode: Literal["default", "reason"] = "default",
148
- with_analysis: bool = False,
149
- user_prompt: str = "",
150
- logprobs: bool = False,
151
- top_logprobs: int = 3,
152
- ) -> dict[str, str]:
153
- question_str = ", ".join(questions)
154
- results = await self.operator.run(
155
- question_str,
156
- prompt_file="question_merger.yaml",
157
- output_model=OutputModels.StrOutput,
158
- with_analysis=with_analysis,
159
- use_modes=True,
160
- mode=mode,
161
- resp_format="parse",
162
- user_prompt=user_prompt,
163
- output_lang=output_lang,
164
- logprobs=logprobs,
165
- top_logprobs=top_logprobs,
166
- )
167
- return results
168
-
169
- async def rewrite_question(
170
- self,
171
- question: str,
172
- output_lang: Optional[str] = None,
173
- mode: Literal[
174
- "same_meaning_different_wording",
175
- "different_meaning_similar_wording",
176
- ] = "same_meaning_different_wording",
177
- with_analysis: bool = False,
178
- user_prompt: str = "",
179
- logprobs: bool = False,
180
- top_logprobs: int = 3,
181
- ) -> dict[str, str]:
182
- results = await self.operator.run(
183
- question,
184
- prompt_file="question_rewriter.yaml",
185
- output_model=OutputModels.StrOutput,
186
- with_analysis=with_analysis,
187
- use_modes=True,
188
- mode=mode,
189
- resp_format="parse",
190
- user_prompt=user_prompt,
191
- output_lang=output_lang,
192
- logprobs=logprobs,
193
- top_logprobs=top_logprobs,
194
- )
195
- return results
196
-
197
- async def generate_questions_from_subject(
198
- self,
199
- subject: str,
200
- number_of_questions: int,
201
- output_lang: Optional[str] = None,
202
- with_analysis: bool = False,
203
- user_prompt: str = "",
204
- logprobs: bool = False,
205
- top_logprobs: int = 3,
206
- ) -> dict[str, list[str]]:
207
- results = await self.operator.run(
208
- subject,
209
- prompt_file="subject_question_generator.yaml",
210
- output_model=OutputModels.ReasonListStrOutput,
211
- with_analysis=with_analysis,
212
- resp_format="parse",
213
- user_prompt=user_prompt,
214
- number_of_questions=number_of_questions,
215
- output_lang=output_lang,
216
- logprobs=logprobs,
217
- top_logprobs=top_logprobs,
218
- )
219
- return results
220
-
221
- async def summarize(
222
- self,
223
- text: str,
224
- output_lang: Optional[str] = None,
225
- with_analysis: bool = False,
226
- user_prompt: str = "",
227
- logprobs: bool = False,
228
- top_logprobs: int = 3,
229
- ) -> dict[str, str]:
230
- results = await self.operator.run(
231
- text,
232
- prompt_file="summarizer.yaml",
233
- output_model=OutputModels.StrOutput,
234
- with_analysis=with_analysis,
235
- resp_format="parse",
236
- user_prompt=user_prompt,
237
- output_lang=output_lang,
238
- logprobs=logprobs,
239
- top_logprobs=top_logprobs,
240
- )
241
- return results
242
-
243
- async def translate(
244
- self,
245
- text: str,
246
- target_language: str,
247
- with_analysis: bool = False,
248
- user_prompt: str = "",
249
- logprobs: bool = False,
250
- top_logprobs: int = 3,
251
- ) -> dict[str, str]:
252
- results = await self.operator.run(
253
- text,
254
- prompt_file="translator.yaml",
255
- output_model=OutputModels.StrOutput,
256
- with_analysis=with_analysis,
257
- resp_format="parse",
258
- user_prompt=user_prompt,
259
- target_language=target_language,
260
- logprobs=logprobs,
261
- top_logprobs=top_logprobs,
262
- )
263
- return results
1
+ from typing import Literal
2
+
3
+ from openai import AsyncOpenAI
4
+
5
+ import texttools.tools.internals.output_models as OutputModels
6
+ from texttools.tools.internals.async_operator import AsyncOperator
7
+
8
+
9
+ class AsyncTheTool:
10
+ """
11
+ Async counterpart to TheTool.
12
+
13
+ Usage:
14
+ async_client = AsyncOpenAI(...)
15
+ tool = TheToolAsync(async_client, model="gemma-3")
16
+ result = await tool.categorize("متن ...", with_analysis=True)
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ client: AsyncOpenAI,
22
+ *,
23
+ model: str,
24
+ temperature: float = 0.0,
25
+ ):
26
+ self.operator = AsyncOperator(
27
+ client=client,
28
+ model=model,
29
+ temperature=temperature,
30
+ )
31
+
32
+ async def categorize(
33
+ self,
34
+ text: str,
35
+ with_analysis: bool = False,
36
+ user_prompt: str = "",
37
+ logprobs: bool = False,
38
+ top_logprobs: int = 8,
39
+ max_tokens: int | None = None,
40
+ ) -> dict[str, str]:
41
+ results = await self.operator.run(
42
+ text,
43
+ prompt_file="categorizer.yaml",
44
+ output_model=OutputModels.CategorizerOutput,
45
+ with_analysis=with_analysis,
46
+ resp_format="parse",
47
+ user_prompt=user_prompt,
48
+ logprobs=logprobs,
49
+ top_logprobs=top_logprobs,
50
+ max_tokens=max_tokens,
51
+ )
52
+ return results
53
+
54
+ async def extract_keywords(
55
+ self,
56
+ text: str,
57
+ output_lang: str | None = None,
58
+ with_analysis: bool = False,
59
+ user_prompt: str = "",
60
+ logprobs: bool = False,
61
+ top_logprobs: int = 3,
62
+ max_tokens: int | None = None,
63
+ ) -> dict[str, list[str]]:
64
+ results = await self.operator.run(
65
+ text,
66
+ prompt_file="keyword_extractor.yaml",
67
+ output_model=OutputModels.ListStrOutput,
68
+ with_analysis=with_analysis,
69
+ resp_format="parse",
70
+ user_prompt=user_prompt,
71
+ output_lang=output_lang,
72
+ logprobs=logprobs,
73
+ top_logprobs=top_logprobs,
74
+ max_tokens=max_tokens,
75
+ )
76
+ return results
77
+
78
+ async def extract_entities(
79
+ self,
80
+ text: str,
81
+ output_lang: str | None = None,
82
+ with_analysis: bool = False,
83
+ user_prompt: str = "",
84
+ logprobs: bool = False,
85
+ top_logprobs: int = 3,
86
+ max_tokens: int | None = None,
87
+ ) -> dict[str, list[dict[str, str]]]:
88
+ results = await self.operator.run(
89
+ text,
90
+ prompt_file="ner_extractor.yaml",
91
+ output_model=OutputModels.ListDictStrStrOutput,
92
+ with_analysis=with_analysis,
93
+ resp_format="parse",
94
+ user_prompt=user_prompt,
95
+ output_lang=output_lang,
96
+ logprobs=logprobs,
97
+ top_logprobs=top_logprobs,
98
+ max_tokens=max_tokens,
99
+ )
100
+ return results
101
+
102
+ async def detect_question(
103
+ self,
104
+ question: str,
105
+ output_lang: str | None = None,
106
+ with_analysis: bool = False,
107
+ user_prompt: str = "",
108
+ logprobs: bool = False,
109
+ top_logprobs: int = 2,
110
+ max_tokens: int | None = None,
111
+ ) -> dict[str, bool]:
112
+ results = await self.operator.run(
113
+ question,
114
+ prompt_file="question_detector.yaml",
115
+ output_model=OutputModels.BoolOutput,
116
+ with_analysis=with_analysis,
117
+ resp_format="parse",
118
+ user_prompt=user_prompt,
119
+ output_lang=output_lang,
120
+ logprobs=logprobs,
121
+ top_logprobs=top_logprobs,
122
+ max_tokens=max_tokens,
123
+ )
124
+ return results
125
+
126
+ async def generate_question_from_text(
127
+ self,
128
+ text: str,
129
+ output_lang: str | None = None,
130
+ with_analysis: bool = False,
131
+ user_prompt: str = "",
132
+ logprobs: bool = False,
133
+ top_logprobs: int = 3,
134
+ max_tokens: int | None = None,
135
+ ) -> dict[str, str]:
136
+ results = await self.operator.run(
137
+ text,
138
+ prompt_file="question_generator.yaml",
139
+ output_model=OutputModels.StrOutput,
140
+ with_analysis=with_analysis,
141
+ resp_format="parse",
142
+ user_prompt=user_prompt,
143
+ output_lang=output_lang,
144
+ logprobs=logprobs,
145
+ top_logprobs=top_logprobs,
146
+ max_tokens=max_tokens,
147
+ )
148
+ return results
149
+
150
+ async def merge_questions(
151
+ self,
152
+ questions: list[str],
153
+ output_lang: str | None = None,
154
+ mode: Literal["default", "reason"] = "default",
155
+ with_analysis: bool = False,
156
+ user_prompt: str = "",
157
+ logprobs: bool = False,
158
+ top_logprobs: int = 3,
159
+ max_tokens: int | None = None,
160
+ ) -> dict[str, str]:
161
+ question_str = ", ".join(questions)
162
+ results = await self.operator.run(
163
+ question_str,
164
+ prompt_file="question_merger.yaml",
165
+ output_model=OutputModels.StrOutput,
166
+ with_analysis=with_analysis,
167
+ use_modes=True,
168
+ mode=mode,
169
+ resp_format="parse",
170
+ user_prompt=user_prompt,
171
+ output_lang=output_lang,
172
+ logprobs=logprobs,
173
+ top_logprobs=top_logprobs,
174
+ max_tokens=max_tokens,
175
+ )
176
+ return results
177
+
178
+ async def rewrite(
179
+ self,
180
+ question: str,
181
+ output_lang: str | None = None,
182
+ mode: Literal["positive", "negative", "hard_negative"] = "positive",
183
+ with_analysis: bool = False,
184
+ user_prompt: str = "",
185
+ logprobs: bool = False,
186
+ top_logprobs: int = 3,
187
+ max_tokens: int | None = None,
188
+ ) -> dict[str, str]:
189
+ results = await self.operator.run(
190
+ question,
191
+ prompt_file="rewriter.yaml",
192
+ output_model=OutputModels.StrOutput,
193
+ with_analysis=with_analysis,
194
+ use_modes=True,
195
+ mode=mode,
196
+ resp_format="parse",
197
+ user_prompt=user_prompt,
198
+ output_lang=output_lang,
199
+ logprobs=logprobs,
200
+ top_logprobs=top_logprobs,
201
+ max_tokens=max_tokens,
202
+ )
203
+ return results
204
+
205
+ async def generate_questions_from_subject(
206
+ self,
207
+ subject: str,
208
+ number_of_questions: int,
209
+ output_lang: str | None = None,
210
+ with_analysis: bool = False,
211
+ user_prompt: str = "",
212
+ logprobs: bool = False,
213
+ top_logprobs: int = 3,
214
+ max_tokens: int | None = None,
215
+ ) -> dict[str, list[str]]:
216
+ results = await self.operator.run(
217
+ subject,
218
+ prompt_file="subject_question_generator.yaml",
219
+ output_model=OutputModels.ReasonListStrOutput,
220
+ with_analysis=with_analysis,
221
+ resp_format="parse",
222
+ user_prompt=user_prompt,
223
+ number_of_questions=number_of_questions,
224
+ output_lang=output_lang,
225
+ logprobs=logprobs,
226
+ top_logprobs=top_logprobs,
227
+ max_tokens=max_tokens,
228
+ )
229
+ return results
230
+
231
+ async def summarize(
232
+ self,
233
+ text: str,
234
+ output_lang: str | None = None,
235
+ with_analysis: bool = False,
236
+ user_prompt: str = "",
237
+ logprobs: bool = False,
238
+ top_logprobs: int = 3,
239
+ max_tokens: int | None = None,
240
+ ) -> dict[str, str]:
241
+ results = await self.operator.run(
242
+ text,
243
+ prompt_file="summarizer.yaml",
244
+ output_model=OutputModels.StrOutput,
245
+ with_analysis=with_analysis,
246
+ resp_format="parse",
247
+ user_prompt=user_prompt,
248
+ output_lang=output_lang,
249
+ logprobs=logprobs,
250
+ top_logprobs=top_logprobs,
251
+ max_tokens=max_tokens,
252
+ )
253
+ return results
254
+
255
+ async def translate(
256
+ self,
257
+ text: str,
258
+ target_language: str,
259
+ with_analysis: bool = False,
260
+ user_prompt: str = "",
261
+ logprobs: bool = False,
262
+ top_logprobs: int = 3,
263
+ max_tokens: int | None = None,
264
+ ) -> dict[str, str]:
265
+ results = await self.operator.run(
266
+ text,
267
+ prompt_file="translator.yaml",
268
+ output_model=OutputModels.StrOutput,
269
+ with_analysis=with_analysis,
270
+ resp_format="parse",
271
+ user_prompt=user_prompt,
272
+ target_language=target_language,
273
+ logprobs=logprobs,
274
+ top_logprobs=top_logprobs,
275
+ max_tokens=max_tokens,
276
+ )
277
+ return results