hamtaa-texttools 1.1.11__tar.gz → 1.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {hamtaa_texttools-1.1.11/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.12}/PKG-INFO +1 -1
  2. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
  3. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/hamtaa_texttools.egg-info/SOURCES.txt +1 -2
  4. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/pyproject.toml +1 -1
  5. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/tests/test_all_async_tools.py +4 -1
  6. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/tests/test_all_tools.py +1 -0
  7. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/async_tools.py +139 -3
  8. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/internals/async_operator.py +18 -21
  9. hamtaa_texttools-1.1.11/texttools/tools/internals/base_operator.py → hamtaa_texttools-1.1.12/texttools/tools/internals/operator_utils.py +7 -17
  10. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/internals/sync_operator.py +18 -19
  11. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/sync_tools.py +139 -3
  12. hamtaa_texttools-1.1.11/texttools/tools/internals/formatters.py +0 -24
  13. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/LICENSE +0 -0
  14. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/MANIFEST.in +0 -0
  15. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/README.md +0 -0
  16. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  17. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/hamtaa_texttools.egg-info/requires.txt +0 -0
  18. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  19. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/setup.cfg +0 -0
  20. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/tests/test_logprobs.py +0 -0
  21. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/tests/test_output_validation.py +0 -0
  22. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/__init__.py +0 -0
  23. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/batch/batch_config.py +0 -0
  24. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/batch/batch_runner.py +0 -0
  25. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/batch/internals/batch_manager.py +0 -0
  26. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/batch/internals/utils.py +0 -0
  27. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/README.md +0 -0
  28. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/categorizer.yaml +0 -0
  29. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/extract_entities.yaml +0 -0
  30. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/extract_keywords.yaml +0 -0
  31. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/is_question.yaml +0 -0
  32. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/merge_questions.yaml +0 -0
  33. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/rewrite.yaml +0 -0
  34. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/run_custom.yaml +0 -0
  35. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/subject_to_question.yaml +0 -0
  36. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/summarize.yaml +0 -0
  37. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/text_to_question.yaml +0 -0
  38. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/prompts/translate.yaml +0 -0
  39. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/internals/output_models.py +0 -0
  40. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.12}/texttools/tools/internals/prompt_loader.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.11
3
+ Version: 1.1.12
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.11
3
+ Version: 1.1.12
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
6
  License: MIT License
@@ -31,8 +31,7 @@ texttools/prompts/translate.yaml
31
31
  texttools/tools/async_tools.py
32
32
  texttools/tools/sync_tools.py
33
33
  texttools/tools/internals/async_operator.py
34
- texttools/tools/internals/base_operator.py
35
- texttools/tools/internals/formatters.py
34
+ texttools/tools/internals/operator_utils.py
36
35
  texttools/tools/internals/output_models.py
37
36
  texttools/tools/internals/prompt_loader.py
38
37
  texttools/tools/internals/sync_operator.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.1.11"
7
+ version = "1.1.12"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
@@ -28,9 +28,12 @@ async def main():
28
28
  merged_task = t.merge_questions(
29
29
  ["چرا ما موجوداتی اجتماعی هستیم؟", "چرا باید در کنار هم زندگی کنیم؟"],
30
30
  mode="default",
31
+ with_analysis=True,
31
32
  )
32
33
  rewritten_task = t.rewrite(
33
- "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟", mode="positive"
34
+ "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
35
+ mode="positive",
36
+ user_prompt="Be carefull",
34
37
  )
35
38
  questions_task = t.subject_to_question("Friendship", 3)
36
39
  summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
@@ -49,6 +49,7 @@ print(repr(merged))
49
49
  rewritten = t.rewrite(
50
50
  "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
51
51
  mode="positive",
52
+ with_analysis=True,
52
53
  )
53
54
  print(repr(rewritten))
54
55
 
@@ -40,11 +40,22 @@ class AsyncTheTool:
40
40
  """
41
41
  Categorize a text into a single Islamic studies domain category.
42
42
 
43
+ Arguments:
44
+ text: The input text to categorize
45
+ with_analysis: Whether to include detailed reasoning analysis
46
+ user_prompt: Additional instructions for the categorization
47
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
48
+ logprobs: Whether to return token probability information
49
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
50
+ validator: Custom validation function to validate the output
51
+ max_validation_retries: Maximum number of retry attempts if validation fails
52
+
43
53
  Returns:
44
54
  ToolOutput: Object containing:
45
55
  - result (str): The assigned Islamic studies category
46
56
  - logprobs (list | None): Probability data if logprobs enabled
47
57
  - analysis (str | None): Detailed reasoning if with_analysis enabled
58
+ - errors (list(str) | None): Errors occured during tool call
48
59
  """
49
60
  return await self._operator.run(
50
61
  # User parameters
@@ -78,11 +89,23 @@ class AsyncTheTool:
78
89
  """
79
90
  Extract salient keywords from text.
80
91
 
92
+ Arguments:
93
+ text: The input text to extract keywords from
94
+ with_analysis: Whether to include detailed reasoning analysis
95
+ output_lang: Language for the output response
96
+ user_prompt: Additional instructions for keyword extraction
97
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
98
+ logprobs: Whether to return token probability information
99
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
100
+ validator: Custom validation function to validate the output
101
+ max_validation_retries: Maximum number of retry attempts if validation fails
102
+
81
103
  Returns:
82
104
  ToolOutput: Object containing:
83
105
  - result (list[str]): List of extracted keywords
84
106
  - logprobs (list | None): Probability data if logprobs enabled
85
107
  - analysis (str | None): Detailed reasoning if with_analysis enabled
108
+ - errors (list(str) | None): Errors occured during tool call
86
109
  """
87
110
  return await self._operator.run(
88
111
  # User parameters
@@ -116,11 +139,23 @@ class AsyncTheTool:
116
139
  """
117
140
  Perform Named Entity Recognition (NER) over the input text.
118
141
 
142
+ Arguments:
143
+ text: The input text to extract entities from
144
+ with_analysis: Whether to include detailed reasoning analysis
145
+ output_lang: Language for the output response
146
+ user_prompt: Additional instructions for entity extraction
147
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
148
+ logprobs: Whether to return token probability information
149
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
150
+ validator: Custom validation function to validate the output
151
+ max_validation_retries: Maximum number of retry attempts if validation fails
152
+
119
153
  Returns:
120
154
  ToolOutput: Object containing:
121
155
  - result (list[dict]): List of entities with 'text' and 'type' keys
122
156
  - logprobs (list | None): Probability data if logprobs enabled
123
157
  - analysis (str | None): Detailed reasoning if with_analysis enabled
158
+ - errors (list(str) | None): Errors occured during tool call
124
159
  """
125
160
  return await self._operator.run(
126
161
  # User parameters
@@ -153,11 +188,22 @@ class AsyncTheTool:
153
188
  """
154
189
  Detect if the input is phrased as a question.
155
190
 
191
+ Arguments:
192
+ text: The input text to analyze
193
+ with_analysis: Whether to include detailed reasoning analysis
194
+ user_prompt: Additional instructions for question detection
195
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
196
+ logprobs: Whether to return token probability information
197
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
198
+ validator: Custom validation function to validate the output
199
+ max_validation_retries: Maximum number of retry attempts if validation fails
200
+
156
201
  Returns:
157
202
  ToolOutput: Object containing:
158
203
  - result (bool): True if text is a question, False otherwise
159
204
  - logprobs (list | None): Probability data if logprobs enabled
160
205
  - analysis (str | None): Detailed reasoning if with_analysis enabled
206
+ - errors (list(str) | None): Errors occured during tool call
161
207
  """
162
208
  return await self._operator.run(
163
209
  # User parameters
@@ -191,11 +237,23 @@ class AsyncTheTool:
191
237
  """
192
238
  Generate a single question from the given text.
193
239
 
240
+ Arguments:
241
+ text: The input text to generate a question from
242
+ with_analysis: Whether to include detailed reasoning analysis
243
+ output_lang: Language for the output question
244
+ user_prompt: Additional instructions for question generation
245
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
246
+ logprobs: Whether to return token probability information
247
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
248
+ validator: Custom validation function to validate the output
249
+ max_validation_retries: Maximum number of retry attempts if validation fails
250
+
194
251
  Returns:
195
252
  ToolOutput: Object containing:
196
253
  - result (str): The generated question
197
254
  - logprobs (list | None): Probability data if logprobs enabled
198
255
  - analysis (str | None): Detailed reasoning if with_analysis enabled
256
+ - errors (list(str) | None): Errors occured during tool call
199
257
  """
200
258
  return await self._operator.run(
201
259
  # User parameters
@@ -230,11 +288,24 @@ class AsyncTheTool:
230
288
  """
231
289
  Merge multiple questions into a single unified question.
232
290
 
291
+ Arguments:
292
+ text: List of questions to merge
293
+ with_analysis: Whether to include detailed reasoning analysis
294
+ output_lang: Language for the output merged question
295
+ user_prompt: Additional instructions for question merging
296
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
297
+ logprobs: Whether to return token probability information
298
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
299
+ mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
300
+ validator: Custom validation function to validate the output
301
+ max_validation_retries: Maximum number of retry attempts if validation fails
302
+
233
303
  Returns:
234
304
  ToolOutput: Object containing:
235
305
  - result (str): The merged question
236
306
  - logprobs (list | None): Probability data if logprobs enabled
237
307
  - analysis (str | None): Detailed reasoning if with_analysis enabled
308
+ - errors (list(str) | None): Errors occured during tool call
238
309
  """
239
310
  text = ", ".join(text)
240
311
  return await self._operator.run(
@@ -270,11 +341,24 @@ class AsyncTheTool:
270
341
  """
271
342
  Rewrite a text with different modes.
272
343
 
344
+ Arguments:
345
+ text: The input text to rewrite
346
+ with_analysis: Whether to include detailed reasoning analysis
347
+ output_lang: Language for the output rewritten text
348
+ user_prompt: Additional instructions for rewriting
349
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
350
+ logprobs: Whether to return token probability information
351
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
352
+ mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
353
+ validator: Custom validation function to validate the output
354
+ max_validation_retries: Maximum number of retry attempts if validation fails
355
+
273
356
  Returns:
274
357
  ToolOutput: Object containing:
275
358
  - result (str): The rewritten text
276
359
  - logprobs (list | None): Probability data if logprobs enabled
277
360
  - analysis (str | None): Detailed reasoning if with_analysis enabled
361
+ - errors (list(str) | None): Errors occured during tool call
278
362
  """
279
363
  return await self._operator.run(
280
364
  # User parameters
@@ -309,11 +393,24 @@ class AsyncTheTool:
309
393
  """
310
394
  Generate a list of questions about a subject.
311
395
 
396
+ Arguments:
397
+ text: The subject text to generate questions about
398
+ number_of_questions: Number of questions to generate
399
+ with_analysis: Whether to include detailed reasoning analysis
400
+ output_lang: Language for the output questions
401
+ user_prompt: Additional instructions for question generation
402
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
403
+ logprobs: Whether to return token probability information
404
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
405
+ validator: Custom validation function to validate the output
406
+ max_validation_retries: Maximum number of retry attempts if validation fails
407
+
312
408
  Returns:
313
409
  ToolOutput: Object containing:
314
410
  - result (list[str]): List of generated questions
315
411
  - logprobs (list | None): Probability data if logprobs enabled
316
412
  - analysis (str | None): Detailed reasoning if with_analysis enabled
413
+ - errors (list(str) | None): Errors occured during tool call
317
414
  """
318
415
  return await self._operator.run(
319
416
  # User parameters
@@ -348,11 +445,23 @@ class AsyncTheTool:
348
445
  """
349
446
  Summarize the given subject text.
350
447
 
448
+ Arguments:
449
+ text: The input text to summarize
450
+ with_analysis: Whether to include detailed reasoning analysis
451
+ output_lang: Language for the output summary
452
+ user_prompt: Additional instructions for summarization
453
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
454
+ logprobs: Whether to return token probability information
455
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
456
+ validator: Custom validation function to validate the output
457
+ max_validation_retries: Maximum number of retry attempts if validation fails
458
+
351
459
  Returns:
352
460
  ToolOutput: Object containing:
353
461
  - result (str): The summary text
354
462
  - logprobs (list | None): Probability data if logprobs enabled
355
463
  - analysis (str | None): Detailed reasoning if with_analysis enabled
464
+ - errors (list(str) | None): Errors occured during tool call
356
465
  """
357
466
  return await self._operator.run(
358
467
  # User parameters
@@ -386,11 +495,23 @@ class AsyncTheTool:
386
495
  """
387
496
  Translate text between languages.
388
497
 
498
+ Arguments:
499
+ text: The input text to translate
500
+ target_language: The target language for translation
501
+ with_analysis: Whether to include detailed reasoning analysis
502
+ user_prompt: Additional instructions for translation
503
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
504
+ logprobs: Whether to return token probability information
505
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
506
+ validator: Custom validation function to validate the output
507
+ max_validation_retries: Maximum number of retry attempts if validation fails
508
+
389
509
  Returns:
390
510
  ToolOutput: Object containing:
391
511
  - result (str): The translated text
392
512
  - logprobs (list | None): Probability data if logprobs enabled
393
513
  - analysis (str | None): Detailed reasoning if with_analysis enabled
514
+ - errors (list(str) | None): Errors occured during tool call
394
515
  """
395
516
  return await self._operator.run(
396
517
  # User parameters
@@ -418,13 +539,27 @@ class AsyncTheTool:
418
539
  temperature: float | None = None,
419
540
  logprobs: bool | None = None,
420
541
  top_logprobs: int | None = None,
542
+ validator: Callable[[Any], bool] | None = None,
543
+ max_validation_retries: int | None = None,
421
544
  ) -> OM.ToolOutput:
422
545
  """
423
546
  Custom tool that can do almost anything!
424
547
 
548
+ Arguments:
549
+ text: The user prompt
550
+ output_lang: Language for the output summary
551
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
552
+ logprobs: Whether to return token probability information
553
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
554
+ validator: Custom validation function to validate the output
555
+ max_validation_retries: Maximum number of retry attempts if validation fails
556
+
425
557
  Returns:
426
- ToolOutput: Object with fields:
427
- - result (str): The output result
558
+ ToolOutput: Object containing:
559
+ - result (str): The translated text
560
+ - logprobs (list | None): Probability data if logprobs enabled
561
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
562
+ - errors (list(str) | None): Errors occured during tool call
428
563
  """
429
564
  return await self._operator.run(
430
565
  # User paramaeters
@@ -435,10 +570,11 @@ class AsyncTheTool:
435
570
  temperature=temperature,
436
571
  logprobs=logprobs,
437
572
  top_logprobs=top_logprobs,
573
+ validator=validator,
574
+ max_validation_retries=max_validation_retries,
438
575
  # Internal parameters
439
576
  prompt_file="run_custom.yaml",
440
577
  user_prompt=None,
441
578
  with_analysis=False,
442
579
  mode=None,
443
- validator=None,
444
580
  )
@@ -5,8 +5,7 @@ from openai import AsyncOpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
7
  from texttools.tools.internals.output_models import ToolOutput
8
- from texttools.tools.internals.base_operator import BaseOperator
9
- from texttools.tools.internals.formatters import Formatter
8
+ from texttools.tools.internals.operator_utils import OperatorUtils
10
9
  from texttools.tools.internals.prompt_loader import PromptLoader
11
10
 
12
11
  # Base Model type for output models
@@ -15,7 +14,7 @@ T = TypeVar("T", bound=BaseModel)
15
14
  logger = logging.getLogger("texttools.async_operator")
16
15
 
17
16
 
18
- class AsyncOperator(BaseOperator):
17
+ class AsyncOperator:
19
18
  """
20
19
  Core engine for running text-processing operations with an LLM (Async).
21
20
 
@@ -26,7 +25,8 @@ class AsyncOperator(BaseOperator):
26
25
  """
27
26
 
28
27
  def __init__(self, client: AsyncOpenAI, model: str):
29
- super().__init__(client, model)
28
+ self._client = client
29
+ self._model = model
30
30
 
31
31
  async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
32
32
  """
@@ -34,7 +34,7 @@ class AsyncOperator(BaseOperator):
34
34
  Returns the analyzed content as a string.
35
35
  """
36
36
  analyze_prompt = prompt_configs["analyze_template"]
37
- analyze_message = [self._build_user_message(analyze_prompt)]
37
+ analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
38
38
  completion = await self._client.chat.completions.create(
39
39
  model=self._model,
40
40
  messages=analyze_message,
@@ -53,7 +53,7 @@ class AsyncOperator(BaseOperator):
53
53
  ) -> tuple[T, Any]:
54
54
  """
55
55
  Parses a chat completion using OpenAI's structured output format.
56
- Returns both the parsed object and the raw completion for logging.
56
+ Returns both the parsed object and the raw completion for logprobs.
57
57
  """
58
58
  request_kwargs = {
59
59
  "model": self._model,
@@ -92,7 +92,6 @@ class AsyncOperator(BaseOperator):
92
92
  Execute the async LLM pipeline with the given input text. (Async)
93
93
  """
94
94
  prompt_loader = PromptLoader()
95
- formatter = Formatter()
96
95
  output = ToolOutput()
97
96
 
98
97
  try:
@@ -109,35 +108,33 @@ class AsyncOperator(BaseOperator):
109
108
  if with_analysis:
110
109
  analysis = await self._analyze(prompt_configs, temperature)
111
110
  messages.append(
112
- self._build_user_message(f"Based on this analysis: {analysis}")
111
+ OperatorUtils.build_user_message(
112
+ f"Based on this analysis: {analysis}"
113
+ )
113
114
  )
114
115
 
115
116
  if output_lang:
116
117
  messages.append(
117
- self._build_user_message(
118
+ OperatorUtils.build_user_message(
118
119
  f"Respond only in the {output_lang} language."
119
120
  )
120
121
  )
121
122
 
122
123
  if user_prompt:
123
124
  messages.append(
124
- self._build_user_message(f"Consider this instruction {user_prompt}")
125
+ OperatorUtils.build_user_message(
126
+ f"Consider this instruction {user_prompt}"
127
+ )
125
128
  )
126
129
 
127
- messages.append(self._build_user_message(prompt_configs["main_template"]))
128
- messages = formatter.user_merge_format(messages)
130
+ messages.append(
131
+ OperatorUtils.build_user_message(prompt_configs["main_template"])
132
+ )
129
133
 
130
134
  parsed, completion = await self._parse_completion(
131
135
  messages, output_model, temperature, logprobs, top_logprobs
132
136
  )
133
137
 
134
- # Ensure output_model has a `result` field
135
- if not hasattr(parsed, "result"):
136
- error = "The provided output_model must define a field named 'result'"
137
- logger.error(error)
138
- output.errors.append(error)
139
- return output
140
-
141
138
  output.result = parsed.result
142
139
 
143
140
  # Retry logic if validation fails
@@ -148,7 +145,7 @@ class AsyncOperator(BaseOperator):
148
145
  )
149
146
 
150
147
  # Generate new temperature for retry
151
- retry_temperature = self._get_retry_temp(temperature)
148
+ retry_temperature = OperatorUtils.get_retry_temp(temperature)
152
149
  try:
153
150
  parsed, completion = await self._parse_completion(
154
151
  messages,
@@ -180,7 +177,7 @@ class AsyncOperator(BaseOperator):
180
177
  output.errors.append("Validation failed after all retry attempts")
181
178
 
182
179
  if logprobs:
183
- output.logprobs = self._extract_logprobs(completion)
180
+ output.logprobs = OperatorUtils.extract_logprobs(completion)
184
181
 
185
182
  if with_analysis:
186
183
  output.analysis = analysis
@@ -1,26 +1,15 @@
1
- from typing import TypeVar, Any, Union
2
1
  import re
3
2
  import math
4
3
  import random
5
4
 
6
- from pydantic import BaseModel
7
- from openai import OpenAI, AsyncOpenAI
8
5
 
9
- # Base Model type for output models
10
- T = TypeVar("T", bound=BaseModel)
11
-
12
- ClientType = Union[OpenAI, AsyncOpenAI]
13
-
14
-
15
- class BaseOperator:
16
- def __init__(self, client: ClientType, model: str):
17
- self._client = client
18
- self._model = model
19
-
20
- def _build_user_message(self, prompt: str) -> dict[str, str]:
6
+ class OperatorUtils:
7
+ @staticmethod
8
+ def build_user_message(prompt: str) -> dict[str, str]:
21
9
  return {"role": "user", "content": prompt}
22
10
 
23
- def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
11
+ @staticmethod
12
+ def extract_logprobs(completion: dict) -> list[dict]:
24
13
  """
25
14
  Extracts and filters token probabilities from completion logprobs.
26
15
  Skips punctuation and structural tokens, returns cleaned probability data.
@@ -54,7 +43,8 @@ class BaseOperator:
54
43
 
55
44
  return logprobs_data
56
45
 
57
- def _get_retry_temp(self, base_temp: float) -> float:
46
+ @staticmethod
47
+ def get_retry_temp(base_temp: float) -> float:
58
48
  """
59
49
  Calculate temperature for retry attempts.
60
50
  """
@@ -5,7 +5,7 @@ from openai import OpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
7
  from texttools.tools.internals.output_models import ToolOutput
8
- from texttools.tools.internals.base_operator import BaseOperator
8
+ from texttools.tools.internals.operator_utils import OperatorUtils
9
9
  from texttools.tools.internals.prompt_loader import PromptLoader
10
10
 
11
11
  # Base Model type for output models
@@ -14,7 +14,7 @@ T = TypeVar("T", bound=BaseModel)
14
14
  logger = logging.getLogger("texttools.operator")
15
15
 
16
16
 
17
- class Operator(BaseOperator):
17
+ class Operator:
18
18
  """
19
19
  Core engine for running text-processing operations with an LLM (Sync).
20
20
 
@@ -25,7 +25,8 @@ class Operator(BaseOperator):
25
25
  """
26
26
 
27
27
  def __init__(self, client: OpenAI, model: str):
28
- super().__init__(client, model)
28
+ self._client = client
29
+ self._model = model
29
30
 
30
31
  def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
31
32
  """
@@ -33,7 +34,7 @@ class Operator(BaseOperator):
33
34
  Returns the analyzed content as a string.
34
35
  """
35
36
  analyze_prompt = prompt_configs["analyze_template"]
36
- analyze_message = [self._build_user_message(analyze_prompt)]
37
+ analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
37
38
  completion = self._client.chat.completions.create(
38
39
  model=self._model,
39
40
  messages=analyze_message,
@@ -52,7 +53,7 @@ class Operator(BaseOperator):
52
53
  ) -> tuple[T, Any]:
53
54
  """
54
55
  Parses a chat completion using OpenAI's structured output format.
55
- Returns both the parsed object and the raw completion for logging.
56
+ Returns both the parsed object and the raw completion for logprobs.
56
57
  """
57
58
  request_kwargs = {
58
59
  "model": self._model,
@@ -107,35 +108,33 @@ class Operator(BaseOperator):
107
108
  if with_analysis:
108
109
  analysis = self._analyze(prompt_configs, temperature)
109
110
  messages.append(
110
- self._build_user_message(f"Based on this analysis: {analysis}")
111
+ OperatorUtils.build_user_message(
112
+ f"Based on this analysis: {analysis}"
113
+ )
111
114
  )
112
115
 
113
116
  if output_lang:
114
117
  messages.append(
115
- self._build_user_message(
118
+ OperatorUtils.build_user_message(
116
119
  f"Respond only in the {output_lang} language."
117
120
  )
118
121
  )
119
122
 
120
123
  if user_prompt:
121
124
  messages.append(
122
- self._build_user_message(f"Consider this instruction {user_prompt}")
125
+ OperatorUtils.build_user_message(
126
+ f"Consider this instruction {user_prompt}"
127
+ )
123
128
  )
124
129
 
125
- messages.append(self._build_user_message(prompt_configs["main_template"]))
126
- messages
130
+ messages.append(
131
+ OperatorUtils.build_user_message(prompt_configs["main_template"])
132
+ )
127
133
 
128
134
  parsed, completion = self._parse_completion(
129
135
  messages, output_model, temperature, logprobs, top_logprobs
130
136
  )
131
137
 
132
- # Ensure output_model has a `result` field
133
- if not hasattr(parsed, "result"):
134
- error = "The provided output_model must define a field named 'result'"
135
- logger.error(error)
136
- output.errors.append(error)
137
- return output
138
-
139
138
  output.result = parsed.result
140
139
 
141
140
  # Retry logic if validation fails
@@ -146,7 +145,7 @@ class Operator(BaseOperator):
146
145
  )
147
146
 
148
147
  # Generate new temperature for retry
149
- retry_temperature = self._get_retry_temp(temperature)
148
+ retry_temperature = OperatorUtils.get_retry_temp(temperature)
150
149
  try:
151
150
  parsed, completion = self._parse_completion(
152
151
  messages,
@@ -178,7 +177,7 @@ class Operator(BaseOperator):
178
177
  output.errors.append("Validation failed after all retry attempts")
179
178
 
180
179
  if logprobs:
181
- output.logprobs = self._extract_logprobs(completion)
180
+ output.logprobs = OperatorUtils.extract_logprobs(completion)
182
181
 
183
182
  if with_analysis:
184
183
  output.analysis = analysis
@@ -38,11 +38,22 @@ class TheTool:
38
38
  """
39
39
  Categorize a text into a single Islamic studies domain category.
40
40
 
41
+ Arguments:
42
+ text: The input text to categorize
43
+ with_analysis: Whether to include detailed reasoning analysis
44
+ user_prompt: Additional instructions for the categorization
45
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
46
+ logprobs: Whether to return token probability information
47
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
48
+ validator: Custom validation function to validate the output
49
+ max_validation_retries: Maximum number of retry attempts if validation fails
50
+
41
51
  Returns:
42
52
  ToolOutput: Object containing:
43
53
  - result (str): The assigned Islamic studies category
44
54
  - logprobs (list | None): Probability data if logprobs enabled
45
55
  - analysis (str | None): Detailed reasoning if with_analysis enabled
56
+ - errors (list(str) | None): Errors occured during tool call
46
57
  """
47
58
  return self._operator.run(
48
59
  # User parameters
@@ -76,11 +87,23 @@ class TheTool:
76
87
  """
77
88
  Extract salient keywords from text.
78
89
 
90
+ Arguments:
91
+ text: The input text to extract keywords from
92
+ with_analysis: Whether to include detailed reasoning analysis
93
+ output_lang: Language for the output response
94
+ user_prompt: Additional instructions for keyword extraction
95
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
96
+ logprobs: Whether to return token probability information
97
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
98
+ validator: Custom validation function to validate the output
99
+ max_validation_retries: Maximum number of retry attempts if validation fails
100
+
79
101
  Returns:
80
102
  ToolOutput: Object containing:
81
103
  - result (list[str]): List of extracted keywords
82
104
  - logprobs (list | None): Probability data if logprobs enabled
83
105
  - analysis (str | None): Detailed reasoning if with_analysis enabled
106
+ - errors (list(str) | None): Errors occured during tool call
84
107
  """
85
108
  return self._operator.run(
86
109
  # User parameters
@@ -114,11 +137,23 @@ class TheTool:
114
137
  """
115
138
  Perform Named Entity Recognition (NER) over the input text.
116
139
 
140
+ Arguments:
141
+ text: The input text to extract entities from
142
+ with_analysis: Whether to include detailed reasoning analysis
143
+ output_lang: Language for the output response
144
+ user_prompt: Additional instructions for entity extraction
145
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
146
+ logprobs: Whether to return token probability information
147
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
148
+ validator: Custom validation function to validate the output
149
+ max_validation_retries: Maximum number of retry attempts if validation fails
150
+
117
151
  Returns:
118
152
  ToolOutput: Object containing:
119
153
  - result (list[dict]): List of entities with 'text' and 'type' keys
120
154
  - logprobs (list | None): Probability data if logprobs enabled
121
155
  - analysis (str | None): Detailed reasoning if with_analysis enabled
156
+ - errors (list(str) | None): Errors occured during tool call
122
157
  """
123
158
  return self._operator.run(
124
159
  # User parameters
@@ -151,11 +186,22 @@ class TheTool:
151
186
  """
152
187
  Detect if the input is phrased as a question.
153
188
 
189
+ Arguments:
190
+ text: The input text to analyze
191
+ with_analysis: Whether to include detailed reasoning analysis
192
+ user_prompt: Additional instructions for question detection
193
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
194
+ logprobs: Whether to return token probability information
195
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
196
+ validator: Custom validation function to validate the output
197
+ max_validation_retries: Maximum number of retry attempts if validation fails
198
+
154
199
  Returns:
155
200
  ToolOutput: Object containing:
156
201
  - result (bool): True if text is a question, False otherwise
157
202
  - logprobs (list | None): Probability data if logprobs enabled
158
203
  - analysis (str | None): Detailed reasoning if with_analysis enabled
204
+ - errors (list(str) | None): Errors occured during tool call
159
205
  """
160
206
  return self._operator.run(
161
207
  # User parameters
@@ -189,11 +235,23 @@ class TheTool:
189
235
  """
190
236
  Generate a single question from the given text.
191
237
 
238
+ Arguments:
239
+ text: The input text to generate a question from
240
+ with_analysis: Whether to include detailed reasoning analysis
241
+ output_lang: Language for the output question
242
+ user_prompt: Additional instructions for question generation
243
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
244
+ logprobs: Whether to return token probability information
245
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
246
+ validator: Custom validation function to validate the output
247
+ max_validation_retries: Maximum number of retry attempts if validation fails
248
+
192
249
  Returns:
193
250
  ToolOutput: Object containing:
194
251
  - result (str): The generated question
195
252
  - logprobs (list | None): Probability data if logprobs enabled
196
253
  - analysis (str | None): Detailed reasoning if with_analysis enabled
254
+ - errors (list(str) | None): Errors occured during tool call
197
255
  """
198
256
  return self._operator.run(
199
257
  # User parameters
@@ -228,11 +286,24 @@ class TheTool:
228
286
  """
229
287
  Merge multiple questions into a single unified question.
230
288
 
289
+ Arguments:
290
+ text: List of questions to merge
291
+ with_analysis: Whether to include detailed reasoning analysis
292
+ output_lang: Language for the output merged question
293
+ user_prompt: Additional instructions for question merging
294
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
295
+ logprobs: Whether to return token probability information
296
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
297
+ mode: Merging strategy - 'default' for direct merge, 'reason' for reasoned merge
298
+ validator: Custom validation function to validate the output
299
+ max_validation_retries: Maximum number of retry attempts if validation fails
300
+
231
301
  Returns:
232
302
  ToolOutput: Object containing:
233
303
  - result (str): The merged question
234
304
  - logprobs (list | None): Probability data if logprobs enabled
235
305
  - analysis (str | None): Detailed reasoning if with_analysis enabled
306
+ - errors (list(str) | None): Errors occured during tool call
236
307
  """
237
308
  text = ", ".join(text)
238
309
  return self._operator.run(
@@ -268,11 +339,24 @@ class TheTool:
268
339
  """
269
340
  Rewrite a text with different modes.
270
341
 
342
+ Arguments:
343
+ text: The input text to rewrite
344
+ with_analysis: Whether to include detailed reasoning analysis
345
+ output_lang: Language for the output rewritten text
346
+ user_prompt: Additional instructions for rewriting
347
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
348
+ logprobs: Whether to return token probability information
349
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
350
+ mode: Rewriting mode - 'positive', 'negative', or 'hard_negative'
351
+ validator: Custom validation function to validate the output
352
+ max_validation_retries: Maximum number of retry attempts if validation fails
353
+
271
354
  Returns:
272
355
  ToolOutput: Object containing:
273
356
  - result (str): The rewritten text
274
357
  - logprobs (list | None): Probability data if logprobs enabled
275
358
  - analysis (str | None): Detailed reasoning if with_analysis enabled
359
+ - errors (list(str) | None): Errors occured during tool call
276
360
  """
277
361
  return self._operator.run(
278
362
  # User parameters
@@ -307,11 +391,24 @@ class TheTool:
307
391
  """
308
392
  Generate a list of questions about a subject.
309
393
 
394
+ Arguments:
395
+ text: The subject text to generate questions about
396
+ number_of_questions: Number of questions to generate
397
+ with_analysis: Whether to include detailed reasoning analysis
398
+ output_lang: Language for the output questions
399
+ user_prompt: Additional instructions for question generation
400
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
401
+ logprobs: Whether to return token probability information
402
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
403
+ validator: Custom validation function to validate the output
404
+ max_validation_retries: Maximum number of retry attempts if validation fails
405
+
310
406
  Returns:
311
407
  ToolOutput: Object containing:
312
408
  - result (list[str]): List of generated questions
313
409
  - logprobs (list | None): Probability data if logprobs enabled
314
410
  - analysis (str | None): Detailed reasoning if with_analysis enabled
411
+ - errors (list(str) | None): Errors occured during tool call
315
412
  """
316
413
  return self._operator.run(
317
414
  # User parameters
@@ -346,11 +443,23 @@ class TheTool:
346
443
  """
347
444
  Summarize the given subject text.
348
445
 
446
+ Arguments:
447
+ text: The input text to summarize
448
+ with_analysis: Whether to include detailed reasoning analysis
449
+ output_lang: Language for the output summary
450
+ user_prompt: Additional instructions for summarization
451
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
452
+ logprobs: Whether to return token probability information
453
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
454
+ validator: Custom validation function to validate the output
455
+ max_validation_retries: Maximum number of retry attempts if validation fails
456
+
349
457
  Returns:
350
458
  ToolOutput: Object containing:
351
459
  - result (str): The summary text
352
460
  - logprobs (list | None): Probability data if logprobs enabled
353
461
  - analysis (str | None): Detailed reasoning if with_analysis enabled
462
+ - errors (list(str) | None): Errors occured during tool call
354
463
  """
355
464
  return self._operator.run(
356
465
  # User parameters
@@ -384,11 +493,23 @@ class TheTool:
384
493
  """
385
494
  Translate text between languages.
386
495
 
496
+ Arguments:
497
+ text: The input text to translate
498
+ target_language: The target language for translation
499
+ with_analysis: Whether to include detailed reasoning analysis
500
+ user_prompt: Additional instructions for translation
501
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
502
+ logprobs: Whether to return token probability information
503
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
504
+ validator: Custom validation function to validate the output
505
+ max_validation_retries: Maximum number of retry attempts if validation fails
506
+
387
507
  Returns:
388
508
  ToolOutput: Object containing:
389
509
  - result (str): The translated text
390
510
  - logprobs (list | None): Probability data if logprobs enabled
391
511
  - analysis (str | None): Detailed reasoning if with_analysis enabled
512
+ - errors (list(str) | None): Errors occured during tool call
392
513
  """
393
514
  return self._operator.run(
394
515
  # User parameters
@@ -416,13 +537,27 @@ class TheTool:
416
537
  temperature: float | None = None,
417
538
  logprobs: bool | None = None,
418
539
  top_logprobs: int | None = None,
540
+ validator: Callable[[Any], bool] | None = None,
541
+ max_validation_retries: int | None = None,
419
542
  ) -> OM.ToolOutput:
420
543
  """
421
544
  Custom tool that can do almost anything!
422
545
 
546
+ Arguments:
547
+ text: The user prompt
548
+ output_lang: Language for the output summary
549
+ temperature: Controls randomness (0.0 = deterministic, 1.0 = creative)
550
+ logprobs: Whether to return token probability information
551
+ top_logprobs: Number of top token alternatives to return if logprobs enabled
552
+ validator: Custom validation function to validate the output
553
+ max_validation_retries: Maximum number of retry attempts if validation fails
554
+
423
555
  Returns:
424
- ToolOutput: Object with fields:
425
- - result (str): The output result
556
+ ToolOutput: Object containing:
557
+ - result (str): The translated text
558
+ - logprobs (list | None): Probability data if logprobs enabled
559
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
560
+ - errors (list(str) | None): Errors occured during tool call
426
561
  """
427
562
  return self._operator.run(
428
563
  # User paramaeters
@@ -433,10 +568,11 @@ class TheTool:
433
568
  temperature=temperature,
434
569
  logprobs=logprobs,
435
570
  top_logprobs=top_logprobs,
571
+ validator=validator,
572
+ max_validation_retries=max_validation_retries,
436
573
  # Internal parameters
437
574
  prompt_file="run_custom.yaml",
438
575
  user_prompt=None,
439
576
  with_analysis=False,
440
577
  mode=None,
441
- validator=None,
442
578
  )
@@ -1,24 +0,0 @@
1
- class Formatter:
2
- @staticmethod
3
- def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
4
- """
5
- Merges consecutive user messages into a single message, separated by newlines.
6
-
7
- This is useful for condensing a multi-turn user input into a single
8
- message for the LLM. Assistant and system messages are left unchanged and
9
- act as separators between user message groups.
10
- """
11
- merged: list[dict[str, str]] = []
12
-
13
- for message in messages:
14
- role, content = message["role"], message["content"].strip()
15
-
16
- # Merge with previous user turn
17
- if merged and role == "user" and merged[-1]["role"] == "user":
18
- merged[-1]["content"] += "\n" + content
19
-
20
- # Otherwise, start a new turn
21
- else:
22
- merged.append({"role": role, "content": content})
23
-
24
- return merged