hamtaa-texttools 1.0.6__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (40) hide show
  1. {hamtaa_texttools-1.0.6/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.7}/PKG-INFO +1 -1
  2. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
  3. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/SOURCES.txt +7 -6
  4. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/pyproject.toml +1 -1
  5. hamtaa_texttools-1.0.7/texttools/tools/async_the_tool.py +338 -0
  6. hamtaa_texttools-1.0.7/texttools/tools/internals/async_operator.py +191 -0
  7. hamtaa_texttools-1.0.7/texttools/tools/internals/base_operator.py +85 -0
  8. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/operator.py +25 -128
  9. hamtaa_texttools-1.0.7/texttools/tools/the_tool.py +438 -0
  10. hamtaa_texttools-1.0.6/texttools/tools/async_the_tool.py +0 -277
  11. hamtaa_texttools-1.0.6/texttools/tools/internals/async_operator.py +0 -308
  12. hamtaa_texttools-1.0.6/texttools/tools/the_tool.py +0 -501
  13. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/LICENSE +0 -0
  14. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/MANIFEST.in +0 -0
  15. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/README.md +0 -0
  16. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  17. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/requires.txt +0 -0
  18. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  19. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/setup.cfg +0 -0
  20. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/__init__.py +0 -0
  21. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/__init__.py +0 -0
  22. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/batch_manager.py +0 -0
  23. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/batch/batch_runner.py +0 -0
  24. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/formatters/base_formatter.py +0 -0
  25. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/formatters/user_merge_formatter.py +0 -0
  26. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/README.md +0 -0
  27. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/categorizer.yaml +0 -0
  28. /hamtaa_texttools-1.0.6/texttools/prompts/ner_extractor.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/extract_entities.yaml +0 -0
  29. /hamtaa_texttools-1.0.6/texttools/prompts/keyword_extractor.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/extract_keywords.yaml +0 -0
  30. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/is_question.yaml +0 -0
  31. /hamtaa_texttools-1.0.6/texttools/prompts/question_merger.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/merge_questions.yaml +0 -0
  32. /hamtaa_texttools-1.0.6/texttools/prompts/rewriter.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/rewrite.yaml +0 -0
  33. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/run_custom.yaml +0 -0
  34. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/subject_to_question.yaml +0 -0
  35. /hamtaa_texttools-1.0.6/texttools/prompts/summarizer.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/summarize.yaml +0 -0
  36. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/prompts/text_to_question.yaml +0 -0
  37. /hamtaa_texttools-1.0.6/texttools/prompts/translator.yaml → /hamtaa_texttools-1.0.7/texttools/prompts/translate.yaml +0 -0
  38. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/__init__.py +0 -0
  39. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/output_models.py +0 -0
  40. {hamtaa_texttools-1.0.6 → hamtaa_texttools-1.0.7}/texttools/tools/internals/prompt_loader.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -15,20 +15,21 @@ texttools/formatters/base_formatter.py
15
15
  texttools/formatters/user_merge_formatter.py
16
16
  texttools/prompts/README.md
17
17
  texttools/prompts/categorizer.yaml
18
+ texttools/prompts/extract_entities.yaml
19
+ texttools/prompts/extract_keywords.yaml
18
20
  texttools/prompts/is_question.yaml
19
- texttools/prompts/keyword_extractor.yaml
20
- texttools/prompts/ner_extractor.yaml
21
- texttools/prompts/question_merger.yaml
22
- texttools/prompts/rewriter.yaml
21
+ texttools/prompts/merge_questions.yaml
22
+ texttools/prompts/rewrite.yaml
23
23
  texttools/prompts/run_custom.yaml
24
24
  texttools/prompts/subject_to_question.yaml
25
- texttools/prompts/summarizer.yaml
25
+ texttools/prompts/summarize.yaml
26
26
  texttools/prompts/text_to_question.yaml
27
- texttools/prompts/translator.yaml
27
+ texttools/prompts/translate.yaml
28
28
  texttools/tools/__init__.py
29
29
  texttools/tools/async_the_tool.py
30
30
  texttools/tools/the_tool.py
31
31
  texttools/tools/internals/async_operator.py
32
+ texttools/tools/internals/base_operator.py
32
33
  texttools/tools/internals/operator.py
33
34
  texttools/tools/internals/output_models.py
34
35
  texttools/tools/internals/prompt_loader.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.0.6"
7
+ version = "1.0.7"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
@@ -0,0 +1,338 @@
1
+ from typing import Literal, Any
2
+
3
+ from openai import AsyncOpenAI
4
+
5
+ from texttools.tools.internals.async_operator import AsyncOperator
6
+ import texttools.tools.internals.output_models as OutputModels
7
+
8
+
9
+ class AsyncTheTool:
10
+ """
11
+ Async counterpart to TheTool.
12
+
13
+ Usage:
14
+ async_client = AsyncOpenAI(...)
15
+ tool = TheToolAsync(async_client, model="model-name")
16
+ result = await tool.categorize("text ...", with_analysis=True)
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ client: AsyncOpenAI,
22
+ model: str,
23
+ ):
24
+ self.operator = AsyncOperator(client=client, model=model)
25
+
26
+ async def categorize(
27
+ self,
28
+ text: str,
29
+ with_analysis: bool = False,
30
+ output_lang: str | None = None,
31
+ user_prompt: str | None = None,
32
+ temperature: float | None = 0.0,
33
+ logprobs: bool = False,
34
+ top_logprobs: int | None = None,
35
+ ) -> dict[str, str]:
36
+ """
37
+ Categorize a text into a single Islamic studies domain category.
38
+
39
+ Args:
40
+ text: Input string to categorize.
41
+ with_analysis: If True, first runs an LLM "analysis" step and
42
+ conditions the main prompt on that analysis.
43
+
44
+ Returns:
45
+ {"result": <category string>}
46
+ Example: {"result": "باورهای دینی"}
47
+ """
48
+ return await self.operator.run(
49
+ # User parameters
50
+ text=text,
51
+ with_analysis=with_analysis,
52
+ output_lang=output_lang,
53
+ user_prompt=user_prompt,
54
+ temperature=temperature,
55
+ logprobs=logprobs,
56
+ top_logprobs=top_logprobs,
57
+ # Internal parameters
58
+ prompt_file="categorizer.yaml",
59
+ output_model=OutputModels.CategorizerOutput,
60
+ resp_format="parse",
61
+ mode=None,
62
+ )
63
+
64
+ async def extract_keywords(
65
+ self,
66
+ text: str,
67
+ with_analysis: bool = False,
68
+ output_lang: str | None = None,
69
+ user_prompt: str | None = None,
70
+ temperature: float | None = 0.0,
71
+ logprobs: bool = False,
72
+ top_logprobs: int | None = None,
73
+ ) -> dict[str, list[str]]:
74
+ return await self.operator.run(
75
+ # User parameters
76
+ text=text,
77
+ with_analysis=with_analysis,
78
+ output_lang=output_lang,
79
+ user_prompt=user_prompt,
80
+ temperature=temperature,
81
+ logprobs=logprobs,
82
+ top_logprobs=top_logprobs,
83
+ # Internal parameters
84
+ prompt_file="extract_keywords.yaml",
85
+ output_model=OutputModels.ListStrOutput,
86
+ resp_format="parse",
87
+ mode=None,
88
+ )
89
+
90
+ async def extract_entities(
91
+ self,
92
+ text: str,
93
+ with_analysis: bool = False,
94
+ output_lang: str | None = None,
95
+ user_prompt: str | None = None,
96
+ temperature: float | None = 0.0,
97
+ logprobs: bool = False,
98
+ top_logprobs: int | None = None,
99
+ ) -> dict[str, list[dict[str, str]]]:
100
+ return await self.operator.run(
101
+ # User parameters
102
+ text=text,
103
+ with_analysis=with_analysis,
104
+ output_lang=output_lang,
105
+ user_prompt=user_prompt,
106
+ temperature=temperature,
107
+ logprobs=logprobs,
108
+ top_logprobs=top_logprobs,
109
+ # Internal parameters
110
+ prompt_file="extract_entities.yaml",
111
+ output_model=OutputModels.ListDictStrStrOutput,
112
+ resp_format="parse",
113
+ mode=None,
114
+ )
115
+
116
+ async def is_question(
117
+ self,
118
+ text: str,
119
+ with_analysis: bool = False,
120
+ user_prompt: str | None = None,
121
+ temperature: float | None = 0.0,
122
+ logprobs: bool = False,
123
+ top_logprobs: int | None = None,
124
+ ) -> dict[str, bool]:
125
+ return await self.operator.run(
126
+ # User parameters
127
+ text=text,
128
+ with_analysis=with_analysis,
129
+ user_prompt=user_prompt,
130
+ temperature=temperature,
131
+ logprobs=logprobs,
132
+ top_logprobs=top_logprobs,
133
+ # Internal parameters
134
+ prompt_file="is_question.yaml",
135
+ output_model=OutputModels.BoolOutput,
136
+ resp_format="parse",
137
+ mode=None,
138
+ output_lang=None,
139
+ )
140
+
141
+ async def text_to_question(
142
+ self,
143
+ text: str,
144
+ with_analysis: bool = False,
145
+ output_lang: str | None = None,
146
+ user_prompt: str | None = None,
147
+ temperature: float | None = 0.0,
148
+ logprobs: bool = False,
149
+ top_logprobs: int | None = None,
150
+ ) -> dict[str, str]:
151
+ return await self.operator.run(
152
+ # User parameters
153
+ text=text,
154
+ with_analysis=with_analysis,
155
+ output_lang=output_lang,
156
+ user_prompt=user_prompt,
157
+ temperature=temperature,
158
+ logprobs=logprobs,
159
+ top_logprobs=top_logprobs,
160
+ # Internal parameters
161
+ prompt_file="text_to_question.yaml",
162
+ output_model=OutputModels.StrOutput,
163
+ resp_format="parse",
164
+ mode=None,
165
+ )
166
+
167
+ async def merge_questions(
168
+ self,
169
+ text: list[str],
170
+ with_analysis: bool = False,
171
+ output_lang: str | None = None,
172
+ user_prompt: str | None = None,
173
+ temperature: float | None = 0.0,
174
+ logprobs: bool = False,
175
+ top_logprobs: int | None = None,
176
+ mode: Literal["default", "reason"] = "default",
177
+ ) -> dict[str, str]:
178
+ text = ", ".join(text)
179
+ return await self.operator.run(
180
+ # User parameters
181
+ text=text,
182
+ with_analysis=with_analysis,
183
+ output_lang=output_lang,
184
+ user_prompt=user_prompt,
185
+ temperature=temperature,
186
+ logprobs=logprobs,
187
+ top_logprobs=top_logprobs,
188
+ # Internal parameters
189
+ prompt_file="merge_questions.yaml",
190
+ output_model=OutputModels.StrOutput,
191
+ resp_format="parse",
192
+ mode=mode,
193
+ )
194
+
195
+ async def rewrite(
196
+ self,
197
+ text: str,
198
+ with_analysis: bool = False,
199
+ output_lang: str | None = None,
200
+ user_prompt: str | None = None,
201
+ temperature: float | None = 0.0,
202
+ logprobs: bool = False,
203
+ top_logprobs: int | None = None,
204
+ mode: Literal["positive", "negative", "hard_negative"] = "positive",
205
+ ) -> dict[str, str]:
206
+ return await self.operator.run(
207
+ # User parameters
208
+ text=text,
209
+ with_analysis=with_analysis,
210
+ output_lang=output_lang,
211
+ user_prompt=user_prompt,
212
+ temperature=temperature,
213
+ logprobs=logprobs,
214
+ top_logprobs=top_logprobs,
215
+ # Internal parameters
216
+ prompt_file="rewrite.yaml",
217
+ output_model=OutputModels.StrOutput,
218
+ resp_format="parse",
219
+ mode=mode,
220
+ )
221
+
222
+ async def subject_to_question(
223
+ self,
224
+ text: str,
225
+ number_of_questions: int,
226
+ with_analysis: bool = False,
227
+ output_lang: str | None = None,
228
+ user_prompt: str | None = None,
229
+ temperature: float | None = 0.0,
230
+ logprobs: bool = False,
231
+ top_logprobs: int | None = None,
232
+ ) -> dict[str, list[str]]:
233
+ return await self.operator.run(
234
+ # User parameters
235
+ text=text,
236
+ number_of_questions=number_of_questions,
237
+ with_analysis=with_analysis,
238
+ output_lang=output_lang,
239
+ user_prompt=user_prompt,
240
+ temperature=temperature,
241
+ logprobs=logprobs,
242
+ top_logprobs=top_logprobs,
243
+ # Internal parameters
244
+ prompt_file="subject_to_question.yaml",
245
+ output_model=OutputModels.ReasonListStrOutput,
246
+ resp_format="parse",
247
+ mode=None,
248
+ )
249
+
250
+ async def summarize(
251
+ self,
252
+ text: str,
253
+ with_analysis: bool = False,
254
+ output_lang: str | None = None,
255
+ user_prompt: str | None = None,
256
+ temperature: float | None = 0.0,
257
+ logprobs: bool = False,
258
+ top_logprobs: int | None = None,
259
+ ) -> dict[str, str]:
260
+ return await self.operator.run(
261
+ # User parameters
262
+ text=text,
263
+ with_analysis=with_analysis,
264
+ output_lang=output_lang,
265
+ user_prompt=user_prompt,
266
+ temperature=temperature,
267
+ logprobs=logprobs,
268
+ top_logprobs=top_logprobs,
269
+ # Internal parameters
270
+ prompt_file="summarize.yaml",
271
+ output_model=OutputModels.StrOutput,
272
+ resp_format="parse",
273
+ mode=None,
274
+ )
275
+
276
+ async def translate(
277
+ self,
278
+ text: str,
279
+ target_language: str,
280
+ with_analysis: bool = False,
281
+ output_lang: str | None = None,
282
+ user_prompt: str | None = None,
283
+ temperature: float | None = 0.0,
284
+ logprobs: bool = False,
285
+ top_logprobs: int | None = None,
286
+ ) -> dict[str, str]:
287
+ return await self.operator.run(
288
+ # User parameters
289
+ text=text,
290
+ target_language=target_language,
291
+ with_analysis=with_analysis,
292
+ output_lang=output_lang,
293
+ user_prompt=user_prompt,
294
+ temperature=temperature,
295
+ logprobs=logprobs,
296
+ top_logprobs=top_logprobs,
297
+ # Internal parameters
298
+ prompt_file="translate.yaml",
299
+ output_model=OutputModels.StrOutput,
300
+ resp_format="parse",
301
+ mode=None,
302
+ )
303
+
304
+ async def run_custom(
305
+ self,
306
+ prompt: str,
307
+ output_model: Any,
308
+ output_lang: str | None = None,
309
+ temperature: float | None = None,
310
+ logprobs: bool | None = None,
311
+ top_logprobs: int | None = None,
312
+ ) -> dict[str, Any]:
313
+ """
314
+ Custom tool that can do almost anything!
315
+
316
+ Args:
317
+ prompt: Custom prompt.
318
+ output_model: Custom BaseModel output model.
319
+
320
+ Returns:
321
+ {"result": <Any>}
322
+ """
323
+ return await self.operator.run(
324
+ # User paramaeters
325
+ text=prompt,
326
+ output_model=output_model,
327
+ output_model_str=output_model.model_json_schema(),
328
+ output_lang=output_lang,
329
+ temperature=temperature,
330
+ logprobs=logprobs,
331
+ top_logprobs=top_logprobs,
332
+ # Internal parameters
333
+ prompt_file="run_custom.yaml",
334
+ resp_format="parse",
335
+ user_prompt=None,
336
+ with_analysis=False,
337
+ mode=None,
338
+ )
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, TypeVar, Type, Literal
4
+ import logging
5
+
6
+ from openai import AsyncOpenAI
7
+ from pydantic import BaseModel
8
+
9
+ from texttools.tools.internals.base_operator import BaseOperator
10
+ from texttools.formatters.user_merge_formatter import (
11
+ UserMergeFormatter,
12
+ )
13
+ from texttools.tools.internals.prompt_loader import PromptLoader
14
+
15
+ # Base Model type for output models
16
+ T = TypeVar("T", bound=BaseModel)
17
+
18
+ # Configure logger
19
+ logger = logging.getLogger("async_operator")
20
+ logger.setLevel(logging.INFO)
21
+
22
+
23
+ class AsyncOperator(BaseOperator):
24
+ """
25
+ Core engine for running text-processing operations with an LLM (Async).
26
+
27
+ It wires together:
28
+ - `PromptLoader` → loads YAML prompt templates.
29
+ - `UserMergeFormatter` → applies formatting to messages (e.g., merging).
30
+ - AsyncOpenAI client → executes completions/parsed completions.
31
+ """
32
+
33
+ def __init__(self, client: AsyncOpenAI, model: str):
34
+ self.client: AsyncOpenAI = client
35
+ self.model = model
36
+
37
+ async def _analysis_completion(
38
+ self,
39
+ analyze_message: list[dict[str, str]],
40
+ temperature: float,
41
+ ) -> str:
42
+ completion = await self.client.chat.completions.create(
43
+ model=self.model,
44
+ messages=analyze_message,
45
+ temperature=temperature,
46
+ )
47
+ analysis = completion.choices[0].message.content.strip()
48
+ return analysis
49
+
50
+ async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
51
+ analyze_prompt = prompt_configs["analyze_template"]
52
+ analyze_message = [self._build_user_message(analyze_prompt)]
53
+ analysis = await self._analysis_completion(analyze_message, temperature)
54
+ return analysis
55
+
56
+ async def _parse_completion(
57
+ self,
58
+ message: list[dict[str, str]],
59
+ output_model: Type[T],
60
+ temperature: float,
61
+ logprobs: bool = False,
62
+ top_logprobs: int = 3,
63
+ ) -> tuple[Type[T], Any]:
64
+ request_kwargs = {
65
+ "model": self.model,
66
+ "messages": message,
67
+ "response_format": output_model,
68
+ "temperature": temperature,
69
+ }
70
+
71
+ if logprobs:
72
+ request_kwargs["logprobs"] = True
73
+ request_kwargs["top_logprobs"] = top_logprobs
74
+
75
+ completion = await self.client.beta.chat.completions.parse(**request_kwargs)
76
+ parsed = completion.choices[0].message.parsed
77
+ return parsed, completion
78
+
79
+ async def _vllm_completion(
80
+ self,
81
+ message: list[dict[str, str]],
82
+ output_model: Type[T],
83
+ temperature: float,
84
+ logprobs: bool = False,
85
+ top_logprobs: int = 3,
86
+ ) -> tuple[Type[T], Any]:
87
+ json_schema = output_model.model_json_schema()
88
+
89
+ # Build kwargs dynamically
90
+ request_kwargs = {
91
+ "model": self.model,
92
+ "messages": message,
93
+ "extra_body": {"guided_json": json_schema},
94
+ "temperature": temperature,
95
+ }
96
+
97
+ if logprobs:
98
+ request_kwargs["logprobs"] = True
99
+ request_kwargs["top_logprobs"] = top_logprobs
100
+
101
+ completion = await self.client.chat.completions.create(**request_kwargs)
102
+ response = completion.choices[0].message.content
103
+
104
+ # Convert the string response to output model
105
+ parsed = self._convert_to_output_model(response, output_model)
106
+ return parsed, completion
107
+
108
+ async def run(
109
+ self,
110
+ # User parameters
111
+ text: str,
112
+ with_analysis: bool,
113
+ output_lang: str | None,
114
+ user_prompt: str | None,
115
+ temperature: float,
116
+ logprobs: bool,
117
+ top_logprobs: int | None,
118
+ # Internal parameters
119
+ prompt_file: str,
120
+ output_model: Type[T],
121
+ resp_format: Literal["vllm", "parse"],
122
+ mode: str | None,
123
+ **extra_kwargs,
124
+ ) -> dict[str, Any]:
125
+ """
126
+ Execute the async LLM pipeline with the given input text. (Async)
127
+ """
128
+ prompt_loader = PromptLoader()
129
+ formatter = UserMergeFormatter()
130
+
131
+ try:
132
+ cleaned_text = text.strip()
133
+
134
+ prompt_configs = prompt_loader.load(
135
+ prompt_file=prompt_file,
136
+ text=cleaned_text,
137
+ mode=mode,
138
+ **extra_kwargs,
139
+ )
140
+
141
+ messages: list[dict[str, str]] = []
142
+
143
+ if with_analysis:
144
+ analysis = await self._analyze(prompt_configs, temperature)
145
+ messages.append(
146
+ self._build_user_message(f"Based on this analysis: {analysis}")
147
+ )
148
+
149
+ if output_lang:
150
+ messages.append(
151
+ self._build_user_message(
152
+ f"Respond only in the {output_lang} language."
153
+ )
154
+ )
155
+
156
+ if user_prompt:
157
+ messages.append(
158
+ self._build_user_message(f"Consider this instruction {user_prompt}")
159
+ )
160
+
161
+ messages.append(self._build_user_message(prompt_configs["main_template"]))
162
+ messages = formatter.format(messages)
163
+
164
+ if resp_format == "vllm":
165
+ parsed, completion = await self._vllm_completion(
166
+ messages, output_model, temperature, logprobs, top_logprobs
167
+ )
168
+ elif resp_format == "parse":
169
+ parsed, completion = await self._vllm_completion(
170
+ messages, output_model, temperature, logprobs, top_logprobs
171
+ )
172
+
173
+ # Ensure output_model has a `result` field
174
+ if not hasattr(parsed, "result"):
175
+ logger.error(
176
+ "The provided output_model must define a field named 'result'"
177
+ )
178
+
179
+ results = {"result": parsed.result}
180
+
181
+ if logprobs:
182
+ results["logprobs"] = self._extract_logprobs(completion)
183
+
184
+ if with_analysis:
185
+ results["analysis"] = analysis
186
+
187
+ return results
188
+
189
+ except Exception as e:
190
+ logger.error(f"Async TheTool failed: {e}")
191
+ return {"Error": str(e), "result": ""}
@@ -0,0 +1,85 @@
1
+ from typing import TypeVar, Type
2
+ import json
3
+ import re
4
+ import math
5
+
6
+ from pydantic import BaseModel
7
+ from openai import OpenAI, AsyncOpenAI
8
+
9
+ # Base Model type for output models
10
+ T = TypeVar("T", bound=BaseModel)
11
+
12
+
13
+ class BaseOperator:
14
+ def __init__(self, client: OpenAI | AsyncOpenAI, model: str):
15
+ self.client = client
16
+ self.model = model
17
+
18
+ def _build_user_message(self, prompt: str) -> dict[str, str]:
19
+ return {"role": "user", "content": prompt}
20
+
21
+ def _clean_json_response(self, response: str) -> str:
22
+ """
23
+ Clean JSON response by removing code block markers and whitespace.
24
+ Handles cases like:
25
+ - ```json{"result": "value"}```
26
+ """
27
+ stripped = response.strip()
28
+ cleaned = re.sub(r"^```(?:json)?\s*", "", stripped)
29
+ cleaned = re.sub(r"\s*```$", "", cleaned)
30
+
31
+ return cleaned.strip()
32
+
33
+ def _convert_to_output_model(
34
+ self, response_string: str, output_model: Type[T]
35
+ ) -> Type[T]:
36
+ """
37
+ Convert a JSON response string to output model.
38
+
39
+ Args:
40
+ response_string: The JSON string (may contain code block markers)
41
+ output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
42
+
43
+ Returns:
44
+ Instance of your output model
45
+ """
46
+ # Clean the response string
47
+ cleaned_json = self._clean_json_response(response_string)
48
+
49
+ # Fix Python-style booleans
50
+ cleaned_json = cleaned_json.replace("False", "false").replace("True", "true")
51
+
52
+ # Convert string to Python dictionary
53
+ response_dict = json.loads(cleaned_json)
54
+
55
+ # Convert dictionary to output model
56
+ return output_model(**response_dict)
57
+
58
+ def _extract_logprobs(self, completion: dict):
59
+ logprobs_data = []
60
+ ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
61
+
62
+ for choice in completion.choices:
63
+ if not getattr(choice, "logprobs", None):
64
+ continue
65
+
66
+ for logprob_item in choice.logprobs.content:
67
+ if ignore_pattern.match(logprob_item.token):
68
+ continue
69
+ token_entry = {
70
+ "token": logprob_item.token,
71
+ "prob": round(math.exp(logprob_item.logprob), 8),
72
+ "top_alternatives": [],
73
+ }
74
+ for alt in logprob_item.top_logprobs:
75
+ if ignore_pattern.match(alt.token):
76
+ continue
77
+ token_entry["top_alternatives"].append(
78
+ {
79
+ "token": alt.token,
80
+ "prob": round(math.exp(alt.logprob), 8),
81
+ }
82
+ )
83
+ logprobs_data.append(token_entry)
84
+
85
+ return logprobs_data