hamtaa-texttools 1.1.0__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (36) hide show
  1. {hamtaa_texttools-1.1.0/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.2}/PKG-INFO +2 -2
  2. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2/hamtaa_texttools.egg-info}/PKG-INFO +2 -2
  3. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/requires.txt +1 -1
  4. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/pyproject.toml +32 -32
  5. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/batch_manager.py +0 -1
  6. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/batch_runner.py +0 -1
  7. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/async_the_tool.py +53 -22
  8. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/async_operator.py +21 -8
  9. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/base_operator.py +5 -8
  10. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/operator.py +22 -9
  11. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/prompt_loader.py +3 -0
  12. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/the_tool.py +53 -22
  13. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/LICENSE +0 -0
  14. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/MANIFEST.in +0 -0
  15. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/README.md +0 -0
  16. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
  17. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  18. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  19. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/setup.cfg +0 -0
  20. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/__init__.py +0 -0
  21. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/batch/__init__.py +0 -0
  22. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/README.md +0 -0
  23. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/categorizer.yaml +0 -0
  24. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/extract_entities.yaml +0 -0
  25. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/extract_keywords.yaml +0 -0
  26. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/is_question.yaml +0 -0
  27. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/merge_questions.yaml +0 -0
  28. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/rewrite.yaml +0 -0
  29. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/run_custom.yaml +0 -0
  30. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/subject_to_question.yaml +0 -0
  31. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/summarize.yaml +0 -0
  32. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/text_to_question.yaml +0 -0
  33. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/prompts/translate.yaml +0 -0
  34. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/__init__.py +0 -0
  35. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/formatters.py +0 -0
  36. {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.2}/texttools/tools/internals/output_models.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.0
3
+ Version: 1.1.2
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
31
  Requires-Dist: openai==1.97.1
32
- Requires-Dist: PyYAML>=6.0
32
+ Requires-Dist: pyyaml>=6.0
33
33
  Dynamic: license-file
34
34
 
35
35
  # TextTools
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.0
3
+ Version: 1.1.2
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
31
  Requires-Dist: openai==1.97.1
32
- Requires-Dist: PyYAML>=6.0
32
+ Requires-Dist: pyyaml>=6.0
33
33
  Dynamic: license-file
34
34
 
35
35
  # TextTools
@@ -1,2 +1,2 @@
1
1
  openai==1.97.1
2
- PyYAML>=6.0
2
+ pyyaml>=6.0
@@ -1,32 +1,32 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "hamtaa-texttools"
7
- version = "1.1.0"
8
- authors = [
9
- { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
- { name = "Montazer", email = "montazerh82@gmail.com" },
11
- { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
- { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
- ]
14
- description = "A high-level NLP toolkit built on top of modern LLMs."
15
- readme = "README.md"
16
- license = {file = "LICENSE"}
17
- requires-python = ">=3.8"
18
- dependencies = [
19
- "openai==1.97.1",
20
- "PyYAML>=6.0",
21
- ]
22
- keywords = ["nlp", "llm", "text-processing", "openai"]
23
-
24
- [tool.setuptools.packages.find]
25
- where = ["."]
26
- include = ["texttools*"]
27
-
28
- [tool.setuptools]
29
- include-package-data = true
30
-
31
- [tool.setuptools.package-data]
32
- "texttools" = ["prompts/*.yaml", "prompts/*.yml"]
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "1.1.2"
8
+ authors = [
9
+ { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
+ { name = "Montazer", email = "montazerh82@gmail.com" },
11
+ { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
+ { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
+ ]
14
+ description = "A high-level NLP toolkit built on top of modern LLMs."
15
+ readme = "README.md"
16
+ license = {file = "LICENSE"}
17
+ requires-python = ">=3.8"
18
+ dependencies = [
19
+ "openai==1.97.1",
20
+ "pyyaml>=6.0",
21
+ ]
22
+ keywords = ["nlp", "llm", "text-processing", "openai"]
23
+
24
+ [tool.setuptools.packages.find]
25
+ where = ["."]
26
+ include = ["texttools*"]
27
+
28
+ [tool.setuptools]
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.package-data]
32
+ "texttools" = ["prompts/*.yaml", "prompts/*.yml"]
@@ -8,7 +8,6 @@ from pydantic import BaseModel
8
8
  from openai import OpenAI
9
9
  from openai.lib._pydantic import to_strict_json_schema
10
10
 
11
- # Configure logger
12
11
  logger = logging.getLogger("batch_runner")
13
12
  logger.setLevel(logging.INFO)
14
13
 
@@ -12,7 +12,6 @@ from pydantic import BaseModel
12
12
 
13
13
  from texttools.batch import SimpleBatchManager
14
14
 
15
- # Configure logger
16
15
  logger = logging.getLogger("batch_runner")
17
16
  logger.setLevel(logging.INFO)
18
17
 
@@ -34,12 +34,15 @@ class AsyncTheTool:
34
34
  temperature: float | None = 0.0,
35
35
  logprobs: bool = False,
36
36
  top_logprobs: int | None = None,
37
- ) -> dict[str, str]:
37
+ ) -> OutputModels.ToolOutput:
38
38
  """
39
39
  Categorize a text into a single Islamic studies domain category.
40
40
 
41
41
  Returns:
42
- {"result": <category string>} + ("logprobs" and "analysis" if enabled)
42
+ ToolOutput: Object containing:
43
+ - result (str): The assigned Islamic studies category
44
+ - logprobs (list | None): Probability data if logprobs enabled
45
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
43
46
  """
44
47
  return await self.operator.run(
45
48
  # User parameters
@@ -66,12 +69,15 @@ class AsyncTheTool:
66
69
  temperature: float | None = 0.0,
67
70
  logprobs: bool = False,
68
71
  top_logprobs: int | None = None,
69
- ) -> dict[str, list[str]]:
72
+ ) -> OutputModels.ToolOutput:
70
73
  """
71
74
  Extract salient keywords from text.
72
75
 
73
76
  Returns:
74
- {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
77
+ ToolOutput: Object containing:
78
+ - result (list[str]): List of extracted keywords
79
+ - logprobs (list | None): Probability data if logprobs enabled
80
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
75
81
  """
76
82
  return await self.operator.run(
77
83
  # User parameters
@@ -98,12 +104,15 @@ class AsyncTheTool:
98
104
  temperature: float | None = 0.0,
99
105
  logprobs: bool = False,
100
106
  top_logprobs: int | None = None,
101
- ) -> dict[str, list[dict[str, str]]]:
107
+ ) -> OutputModels.ToolOutput:
102
108
  """
103
109
  Perform Named Entity Recognition (NER) over the input text.
104
110
 
105
111
  Returns:
106
- {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
112
+ ToolOutput: Object containing:
113
+ - result (list[dict]): List of entities with 'text' and 'type' keys
114
+ - logprobs (list | None): Probability data if logprobs enabled
115
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
107
116
  """
108
117
  return await self.operator.run(
109
118
  # User parameters
@@ -129,12 +138,15 @@ class AsyncTheTool:
129
138
  temperature: float | None = 0.0,
130
139
  logprobs: bool = False,
131
140
  top_logprobs: int | None = None,
132
- ) -> dict[str, bool]:
141
+ ) -> OutputModels.ToolOutput:
133
142
  """
134
143
  Detect if the input is phrased as a question.
135
144
 
136
145
  Returns:
137
- {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
146
+ ToolOutput: Object containing:
147
+ - result (bool): True if text is a question, False otherwise
148
+ - logprobs (list | None): Probability data if logprobs enabled
149
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
138
150
  """
139
151
  return await self.operator.run(
140
152
  # User parameters
@@ -161,12 +173,15 @@ class AsyncTheTool:
161
173
  temperature: float | None = 0.0,
162
174
  logprobs: bool = False,
163
175
  top_logprobs: int | None = None,
164
- ) -> dict[str, str]:
176
+ ) -> OutputModels.ToolOutput:
165
177
  """
166
178
  Generate a single question from the given text.
167
179
 
168
180
  Returns:
169
- {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
181
+ ToolOutput: Object containing:
182
+ - result (str): The generated question
183
+ - logprobs (list | None): Probability data if logprobs enabled
184
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
170
185
  """
171
186
  return await self.operator.run(
172
187
  # User parameters
@@ -194,12 +209,15 @@ class AsyncTheTool:
194
209
  logprobs: bool = False,
195
210
  top_logprobs: int | None = None,
196
211
  mode: Literal["default", "reason"] = "default",
197
- ) -> dict[str, str]:
212
+ ) -> OutputModels.ToolOutput:
198
213
  """
199
214
  Merge multiple questions into a single unified question.
200
215
 
201
216
  Returns:
202
- {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
217
+ ToolOutput: Object containing:
218
+ - result (str): The merged question
219
+ - logprobs (list | None): Probability data if logprobs enabled
220
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
203
221
  """
204
222
  text = ", ".join(text)
205
223
  return await self.operator.run(
@@ -228,12 +246,15 @@ class AsyncTheTool:
228
246
  logprobs: bool = False,
229
247
  top_logprobs: int | None = None,
230
248
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
231
- ) -> dict[str, str]:
249
+ ) -> OutputModels.ToolOutput:
232
250
  """
233
251
  Rewrite a text with different modes.
234
252
 
235
253
  Returns:
236
- {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
254
+ ToolOutput: Object containing:
255
+ - result (str): The rewritten text
256
+ - logprobs (list | None): Probability data if logprobs enabled
257
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
237
258
  """
238
259
  return await self.operator.run(
239
260
  # User parameters
@@ -261,12 +282,15 @@ class AsyncTheTool:
261
282
  temperature: float | None = 0.0,
262
283
  logprobs: bool = False,
263
284
  top_logprobs: int | None = None,
264
- ) -> dict[str, list[str]]:
285
+ ) -> OutputModels.ToolOutput:
265
286
  """
266
287
  Generate a list of questions about a subject.
267
288
 
268
289
  Returns:
269
- {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
290
+ ToolOutput: Object containing:
291
+ - result (list[str]): List of generated questions
292
+ - logprobs (list | None): Probability data if logprobs enabled
293
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
270
294
  """
271
295
  return await self.operator.run(
272
296
  # User parameters
@@ -294,12 +318,15 @@ class AsyncTheTool:
294
318
  temperature: float | None = 0.0,
295
319
  logprobs: bool = False,
296
320
  top_logprobs: int | None = None,
297
- ) -> dict[str, str]:
321
+ ) -> OutputModels.ToolOutput:
298
322
  """
299
323
  Summarize the given subject text.
300
324
 
301
325
  Returns:
302
- {"result": <summary>} + ("logprobs" and "analysis" if enabled)
326
+ ToolOutput: Object containing:
327
+ - result (str): The summary text
328
+ - logprobs (list | None): Probability data if logprobs enabled
329
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
303
330
  """
304
331
  return await self.operator.run(
305
332
  # User parameters
@@ -326,12 +353,15 @@ class AsyncTheTool:
326
353
  temperature: float | None = 0.0,
327
354
  logprobs: bool = False,
328
355
  top_logprobs: int | None = None,
329
- ) -> dict[str, str]:
356
+ ) -> OutputModels.ToolOutput:
330
357
  """
331
358
  Translate text between languages.
332
359
 
333
360
  Returns:
334
- {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
361
+ ToolOutput: Object containing:
362
+ - result (str): The translated text
363
+ - logprobs (list | None): Probability data if logprobs enabled
364
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
335
365
  """
336
366
  return await self.operator.run(
337
367
  # User parameters
@@ -358,12 +388,13 @@ class AsyncTheTool:
358
388
  temperature: float | None = None,
359
389
  logprobs: bool | None = None,
360
390
  top_logprobs: int | None = None,
361
- ) -> dict[str, Any]:
391
+ ) -> OutputModels.ToolOutput:
362
392
  """
363
393
  Custom tool that can do almost anything!
364
394
 
365
395
  Returns:
366
- {"result": <Any>}
396
+ ToolOutput: Object with fields:
397
+ - result (str): The output result
367
398
  """
368
399
  return await self.operator.run(
369
400
  # User paramaeters
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
12
12
  # Base Model type for output models
13
13
  T = TypeVar("T", bound=BaseModel)
14
14
 
15
- # Configure logger
16
15
  logger = logging.getLogger("async_operator")
17
16
  logger.setLevel(logging.INFO)
18
17
 
@@ -32,6 +31,10 @@ class AsyncOperator(BaseOperator):
32
31
  self.model = model
33
32
 
34
33
  async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
34
+ """
35
+ Calls OpenAI API for analysis using the configured prompt template.
36
+ Returns the analyzed content as a string.
37
+ """
35
38
  analyze_prompt = prompt_configs["analyze_template"]
36
39
  analyze_message = [self._build_user_message(analyze_prompt)]
37
40
  completion = await self.client.chat.completions.create(
@@ -50,6 +53,10 @@ class AsyncOperator(BaseOperator):
50
53
  logprobs: bool = False,
51
54
  top_logprobs: int = 3,
52
55
  ) -> tuple[Type[T], Any]:
56
+ """
57
+ Parses a chat completion using OpenAI's structured output format.
58
+ Returns both the parsed object and the raw completion for logging.
59
+ """
53
60
  request_kwargs = {
54
61
  "model": self.model,
55
62
  "messages": message,
@@ -73,6 +80,10 @@ class AsyncOperator(BaseOperator):
73
80
  logprobs: bool = False,
74
81
  top_logprobs: int = 3,
75
82
  ) -> tuple[Type[T], Any]:
83
+ """
84
+ Generates a completion using vLLM with JSON schema guidance.
85
+ Returns the parsed output model and raw completion.
86
+ """
76
87
  json_schema = output_model.model_json_schema()
77
88
 
78
89
  # Build kwargs dynamically
@@ -110,14 +121,16 @@ class AsyncOperator(BaseOperator):
110
121
  resp_format: Literal["vllm", "parse"],
111
122
  mode: str | None,
112
123
  **extra_kwargs,
113
- ) -> dict[str, Any]:
124
+ ) -> ToolOutput:
114
125
  """
115
126
  Execute the async LLM pipeline with the given input text. (Async)
116
127
  """
117
128
  prompt_loader = PromptLoader()
118
129
  formatter = Formatter()
130
+ output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
119
131
 
120
132
  try:
133
+ # Prompt configs contain two keys: main_template and analyze template, both are string
121
134
  prompt_configs = prompt_loader.load(
122
135
  prompt_file=prompt_file,
123
136
  text=text.strip(),
@@ -159,11 +172,10 @@ class AsyncOperator(BaseOperator):
159
172
 
160
173
  # Ensure output_model has a `result` field
161
174
  if not hasattr(parsed, "result"):
162
- logger.error(
163
- "The provided output_model must define a field named 'result'"
164
- )
165
-
166
- output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
175
+ error = "The provided output_model must define a field named 'result'"
176
+ logger.error(error)
177
+ output.errors.append(error)
178
+ return output
167
179
 
168
180
  output.result = parsed.result
169
181
 
@@ -174,6 +186,7 @@ class AsyncOperator(BaseOperator):
174
186
  output.analysis = analysis
175
187
 
176
188
  return output
189
+
177
190
  except Exception as e:
178
191
  logger.error(f"AsyncTheTool failed: {e}")
179
- return ToolOutput(result="", analysis="", logprobs=[], errors=[str(e)])
192
+ return output.errors.append(str(e))
@@ -10,7 +10,6 @@ from openai import OpenAI, AsyncOpenAI
10
10
  # Base Model type for output models
11
11
  T = TypeVar("T", bound=BaseModel)
12
12
 
13
- # Configure logger
14
13
  logger = logging.getLogger("base_operator")
15
14
  logger.setLevel(logging.INFO)
16
15
 
@@ -40,13 +39,6 @@ class BaseOperator:
40
39
  ) -> Type[T]:
41
40
  """
42
41
  Convert a JSON response string to output model.
43
-
44
- Args:
45
- response_string: The JSON string (may contain code block markers)
46
- output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
47
-
48
- Returns:
49
- Instance of your output model
50
42
  """
51
43
  # Clean the response string
52
44
  cleaned_json = self._clean_json_response(response_string)
@@ -61,7 +53,12 @@ class BaseOperator:
61
53
  return output_model(**response_dict)
62
54
 
63
55
  def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
56
+ """
57
+ Extracts and filters token probabilities from completion logprobs.
58
+ Skips punctuation and structural tokens, returns cleaned probability data.
59
+ """
64
60
  logprobs_data = []
61
+
65
62
  ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
66
63
 
67
64
  for choice in completion.choices:
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
12
12
  # Base Model type for output models
13
13
  T = TypeVar("T", bound=BaseModel)
14
14
 
15
- # Configure logger
16
15
  logger = logging.getLogger("operator")
17
16
  logger.setLevel(logging.INFO)
18
17
 
@@ -32,6 +31,10 @@ class Operator(BaseOperator):
32
31
  self.model = model
33
32
 
34
33
  def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
34
+ """
35
+ Calls OpenAI API for analysis using the configured prompt template.
36
+ Returns the analyzed content as a string.
37
+ """
35
38
  analyze_prompt = prompt_configs["analyze_template"]
36
39
  analyze_message = [self._build_user_message(analyze_prompt)]
37
40
  completion = self.client.chat.completions.create(
@@ -50,6 +53,10 @@ class Operator(BaseOperator):
50
53
  logprobs: bool = False,
51
54
  top_logprobs: int = 3,
52
55
  ) -> tuple[Type[T], Any]:
56
+ """
57
+ Parses a chat completion using OpenAI's structured output format.
58
+ Returns both the parsed object and the raw completion for logging.
59
+ """
53
60
  request_kwargs = {
54
61
  "model": self.model,
55
62
  "messages": message,
@@ -73,6 +80,10 @@ class Operator(BaseOperator):
73
80
  logprobs: bool = False,
74
81
  top_logprobs: int = 3,
75
82
  ) -> tuple[Type[T], Any]:
83
+ """
84
+ Generates a completion using vLLM with JSON schema guidance.
85
+ Returns the parsed output model and raw completion.
86
+ """
76
87
  json_schema = output_model.model_json_schema()
77
88
 
78
89
  # Build kwargs dynamically
@@ -110,14 +121,16 @@ class Operator(BaseOperator):
110
121
  resp_format: Literal["vllm", "parse"],
111
122
  mode: str | None,
112
123
  **extra_kwargs,
113
- ) -> dict[str, Any]:
124
+ ) -> ToolOutput:
114
125
  """
115
126
  Execute the LLM pipeline with the given input text.
116
127
  """
117
128
  prompt_loader = PromptLoader()
118
129
  formatter = Formatter()
130
+ output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
119
131
 
120
132
  try:
133
+ # Prompt configs contain two keys: main_template and analyze template, both are string
121
134
  prompt_configs = prompt_loader.load(
122
135
  prompt_file=prompt_file,
123
136
  text=text.strip(),
@@ -159,11 +172,10 @@ class Operator(BaseOperator):
159
172
 
160
173
  # Ensure output_model has a `result` field
161
174
  if not hasattr(parsed, "result"):
162
- logger.error(
163
- "The provided output_model must define a field named 'result'"
164
- )
165
-
166
- output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
175
+ error = "The provided output_model must define a field named 'result'"
176
+ logger.error(error)
177
+ output.errors.append(error)
178
+ return output
167
179
 
168
180
  output.result = parsed.result
169
181
 
@@ -174,6 +186,7 @@ class Operator(BaseOperator):
174
186
  output.analysis = analysis
175
187
 
176
188
  return output
189
+
177
190
  except Exception as e:
178
- logger.error(f"TheTool failed: {e}")
179
- return ToolOutput(result="", analysis="", logprobs=[], errors=[str(e)])
191
+ logger.error(f"AsyncTheTool failed: {e}")
192
+ return output.errors.append(str(e))
@@ -24,6 +24,9 @@ class PromptLoader:
24
24
  # Use lru_cache to load each file once
25
25
  @lru_cache(maxsize=32)
26
26
  def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
27
+ """
28
+ Loads prompt templates from YAML file with optional mode selection.
29
+ """
27
30
  base_dir = Path(__file__).parent.parent.parent / Path("prompts")
28
31
  prompt_path = base_dir / prompt_file
29
32
  data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
@@ -32,12 +32,15 @@ class TheTool:
32
32
  temperature: float | None = 0.0,
33
33
  logprobs: bool = False,
34
34
  top_logprobs: int | None = None,
35
- ) -> dict[str, str]:
35
+ ) -> OutputModels.ToolOutput:
36
36
  """
37
37
  Categorize a text into a single Islamic studies domain category.
38
38
 
39
39
  Returns:
40
- {"result": <category string>} + ("logprobs" and "analysis" if enabled)
40
+ ToolOutput: Object containing:
41
+ - result (str): The assigned Islamic studies category
42
+ - logprobs (list | None): Probability data if logprobs enabled
43
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
41
44
  """
42
45
  return self.operator.run(
43
46
  # User parameters
@@ -64,12 +67,15 @@ class TheTool:
64
67
  temperature: float | None = 0.0,
65
68
  logprobs: bool = False,
66
69
  top_logprobs: int | None = None,
67
- ) -> dict[str, list[str]]:
70
+ ) -> OutputModels.ToolOutput:
68
71
  """
69
72
  Extract salient keywords from text.
70
73
 
71
74
  Returns:
72
- {"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
75
+ ToolOutput: Object containing:
76
+ - result (list[str]): List of extracted keywords
77
+ - logprobs (list | None): Probability data if logprobs enabled
78
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
73
79
  """
74
80
  return self.operator.run(
75
81
  # User parameters
@@ -96,12 +102,15 @@ class TheTool:
96
102
  temperature: float | None = 0.0,
97
103
  logprobs: bool = False,
98
104
  top_logprobs: int | None = None,
99
- ) -> dict[str, list[dict[str, str]]]:
105
+ ) -> OutputModels.ToolOutput:
100
106
  """
101
107
  Perform Named Entity Recognition (NER) over the input text.
102
108
 
103
109
  Returns:
104
- {"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
110
+ ToolOutput: Object containing:
111
+ - result (list[dict]): List of entities with 'text' and 'type' keys
112
+ - logprobs (list | None): Probability data if logprobs enabled
113
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
105
114
  """
106
115
  return self.operator.run(
107
116
  # User parameters
@@ -127,12 +136,15 @@ class TheTool:
127
136
  temperature: float | None = 0.0,
128
137
  logprobs: bool = False,
129
138
  top_logprobs: int | None = None,
130
- ) -> dict[str, bool]:
139
+ ) -> OutputModels.ToolOutput:
131
140
  """
132
141
  Detect if the input is phrased as a question.
133
142
 
134
143
  Returns:
135
- {"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
144
+ ToolOutput: Object containing:
145
+ - result (bool): True if text is a question, False otherwise
146
+ - logprobs (list | None): Probability data if logprobs enabled
147
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
136
148
  """
137
149
  return self.operator.run(
138
150
  # User parameters
@@ -159,12 +171,15 @@ class TheTool:
159
171
  temperature: float | None = 0.0,
160
172
  logprobs: bool = False,
161
173
  top_logprobs: int | None = None,
162
- ) -> dict[str, str]:
174
+ ) -> OutputModels.ToolOutput:
163
175
  """
164
176
  Generate a single question from the given text.
165
177
 
166
178
  Returns:
167
- {"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
179
+ ToolOutput: Object containing:
180
+ - result (str): The generated question
181
+ - logprobs (list | None): Probability data if logprobs enabled
182
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
168
183
  """
169
184
  return self.operator.run(
170
185
  # User parameters
@@ -192,12 +207,15 @@ class TheTool:
192
207
  logprobs: bool = False,
193
208
  top_logprobs: int | None = None,
194
209
  mode: Literal["default", "reason"] = "default",
195
- ) -> dict[str, str]:
210
+ ) -> OutputModels.ToolOutput:
196
211
  """
197
212
  Merge multiple questions into a single unified question.
198
213
 
199
214
  Returns:
200
- {"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
215
+ ToolOutput: Object containing:
216
+ - result (str): The merged question
217
+ - logprobs (list | None): Probability data if logprobs enabled
218
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
201
219
  """
202
220
  text = ", ".join(text)
203
221
  return self.operator.run(
@@ -226,12 +244,15 @@ class TheTool:
226
244
  logprobs: bool = False,
227
245
  top_logprobs: int | None = None,
228
246
  mode: Literal["positive", "negative", "hard_negative"] = "positive",
229
- ) -> dict[str, str]:
247
+ ) -> OutputModels.ToolOutput:
230
248
  """
231
249
  Rewrite a text with different modes.
232
250
 
233
251
  Returns:
234
- {"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
252
+ ToolOutput: Object containing:
253
+ - result (str): The rewritten text
254
+ - logprobs (list | None): Probability data if logprobs enabled
255
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
235
256
  """
236
257
  return self.operator.run(
237
258
  # User parameters
@@ -259,12 +280,15 @@ class TheTool:
259
280
  temperature: float | None = 0.0,
260
281
  logprobs: bool = False,
261
282
  top_logprobs: int | None = None,
262
- ) -> dict[str, list[str]]:
283
+ ) -> OutputModels.ToolOutput:
263
284
  """
264
285
  Generate a list of questions about a subject.
265
286
 
266
287
  Returns:
267
- {"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
288
+ ToolOutput: Object containing:
289
+ - result (list[str]): List of generated questions
290
+ - logprobs (list | None): Probability data if logprobs enabled
291
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
268
292
  """
269
293
  return self.operator.run(
270
294
  # User parameters
@@ -292,12 +316,15 @@ class TheTool:
292
316
  temperature: float | None = 0.0,
293
317
  logprobs: bool = False,
294
318
  top_logprobs: int | None = None,
295
- ) -> dict[str, str]:
319
+ ) -> OutputModels.ToolOutput:
296
320
  """
297
321
  Summarize the given subject text.
298
322
 
299
323
  Returns:
300
- {"result": <summary>} + ("logprobs" and "analysis" if enabled)
324
+ ToolOutput: Object containing:
325
+ - result (str): The summary text
326
+ - logprobs (list | None): Probability data if logprobs enabled
327
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
301
328
  """
302
329
  return self.operator.run(
303
330
  # User parameters
@@ -324,12 +351,15 @@ class TheTool:
324
351
  temperature: float | None = 0.0,
325
352
  logprobs: bool = False,
326
353
  top_logprobs: int | None = None,
327
- ) -> dict[str, str]:
354
+ ) -> OutputModels.ToolOutput:
328
355
  """
329
356
  Translate text between languages.
330
357
 
331
358
  Returns:
332
- {"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
359
+ ToolOutput: Object containing:
360
+ - result (str): The translated text
361
+ - logprobs (list | None): Probability data if logprobs enabled
362
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
333
363
  """
334
364
  return self.operator.run(
335
365
  # User parameters
@@ -356,12 +386,13 @@ class TheTool:
356
386
  temperature: float | None = None,
357
387
  logprobs: bool | None = None,
358
388
  top_logprobs: int | None = None,
359
- ) -> dict[str, Any]:
389
+ ) -> OutputModels.ToolOutput:
360
390
  """
361
391
  Custom tool that can do almost anything!
362
392
 
363
393
  Returns:
364
- {"result": <Any>}
394
+ ToolOutput: Object with fields:
395
+ - result (str): The output result
365
396
  """
366
397
  return self.operator.run(
367
398
  # User paramaeters