hamtaa-texttools 1.1.17__tar.gz → 1.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {hamtaa_texttools-1.1.17/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.18}/PKG-INFO +1 -1
  2. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18/hamtaa_texttools.egg-info}/PKG-INFO +1 -1
  3. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/hamtaa_texttools.egg-info/SOURCES.txt +8 -7
  4. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/pyproject.toml +1 -1
  5. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/tests/test_all_async_tools.py +8 -2
  6. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/tests/test_all_tools.py +12 -3
  7. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/tests/test_output_validation.py +2 -2
  8. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/__init__.py +1 -1
  9. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/batch/batch_runner.py +75 -64
  10. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/async_operator.py +96 -48
  11. hamtaa_texttools-1.1.18/texttools/internals/exceptions.py +28 -0
  12. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/models.py +2 -2
  13. hamtaa_texttools-1.1.18/texttools/internals/prompt_loader.py +80 -0
  14. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/sync_operator.py +92 -47
  15. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/tools/async_tools.py +551 -341
  16. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/tools/sync_tools.py +548 -339
  17. hamtaa_texttools-1.1.17/texttools/tools/internals/prompt_loader.py +0 -56
  18. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/LICENSE +0 -0
  19. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/MANIFEST.in +0 -0
  20. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/README.md +0 -0
  21. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  22. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/hamtaa_texttools.egg-info/requires.txt +0 -0
  23. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  24. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/setup.cfg +0 -0
  25. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/batch/batch_config.py +0 -0
  26. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/batch/internals/batch_manager.py +0 -0
  27. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/batch/internals/utils.py +0 -0
  28. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/formatters.py +0 -0
  29. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.18/texttools}/internals/operator_utils.py +0 -0
  30. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/README.md +0 -0
  31. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/categorize.yaml +0 -0
  32. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/detect_entity.yaml +0 -0
  33. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/extract_entities.yaml +0 -0
  34. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/extract_keywords.yaml +0 -0
  35. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/is_question.yaml +0 -0
  36. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/merge_questions.yaml +0 -0
  37. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/propositionize.yaml +0 -0
  38. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/rewrite.yaml +0 -0
  39. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/run_custom.yaml +0 -0
  40. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/subject_to_question.yaml +0 -0
  41. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/summarize.yaml +0 -0
  42. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/text_to_question.yaml +0 -0
  43. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.18}/texttools/prompts/translate.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.17
3
+ Version: 1.1.18
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.17
3
+ Version: 1.1.18
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -15,6 +15,13 @@ texttools/batch/batch_config.py
15
15
  texttools/batch/batch_runner.py
16
16
  texttools/batch/internals/batch_manager.py
17
17
  texttools/batch/internals/utils.py
18
+ texttools/internals/async_operator.py
19
+ texttools/internals/exceptions.py
20
+ texttools/internals/formatters.py
21
+ texttools/internals/models.py
22
+ texttools/internals/operator_utils.py
23
+ texttools/internals/prompt_loader.py
24
+ texttools/internals/sync_operator.py
18
25
  texttools/prompts/README.md
19
26
  texttools/prompts/categorize.yaml
20
27
  texttools/prompts/detect_entity.yaml
@@ -30,10 +37,4 @@ texttools/prompts/summarize.yaml
30
37
  texttools/prompts/text_to_question.yaml
31
38
  texttools/prompts/translate.yaml
32
39
  texttools/tools/async_tools.py
33
- texttools/tools/sync_tools.py
34
- texttools/tools/internals/async_operator.py
35
- texttools/tools/internals/formatters.py
36
- texttools/tools/internals/models.py
37
- texttools/tools/internals/operator_utils.py
38
- texttools/tools/internals/prompt_loader.py
39
- texttools/tools/internals/sync_operator.py
40
+ texttools/tools/sync_tools.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.1.17"
7
+ version = "1.1.18"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
@@ -21,7 +21,10 @@ t = AsyncTheTool(client=client, model=MODEL)
21
21
 
22
22
  async def main():
23
23
  category_task = t.categorize(
24
- "سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"]
24
+ "سلام حالت چطوره؟",
25
+ categories=["هیچکدام", "دینی", "فلسفه"],
26
+ logprobs=True,
27
+ top_logprobs=-1,
25
28
  )
26
29
  keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
27
30
  entities_task = t.extract_entities("We will be dead by the car crash")
@@ -40,7 +43,10 @@ async def main():
40
43
  questions_task = t.subject_to_question("Friendship", 3)
41
44
  summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
42
45
  translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
43
- propositionize_task = t.propositionize("جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.", output_lang="Persian")
46
+ propositionize_task = t.propositionize(
47
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
48
+ output_lang="Persian",
49
+ )
44
50
  (
45
51
  category,
46
52
  keywords,
@@ -19,7 +19,12 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
19
19
  t = TheTool(client=client, model=MODEL)
20
20
 
21
21
  # Categorizer: list mode
22
- category = t.categorize("سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"])
22
+ category = t.categorize(
23
+ "سلام حالت چطوره؟",
24
+ categories=["هیچکدام", "دینی", "فلسفه"],
25
+ logprobs=True,
26
+ top_logprobs=-1,
27
+ )
23
28
  print(repr(category))
24
29
 
25
30
  # Categorizer: tree mode
@@ -46,7 +51,7 @@ keywords = t.extract_keywords(
46
51
  print(repr(keywords))
47
52
 
48
53
  # NER Extractor
49
- entities = t.extract_entities("We will be dead by the car crash")
54
+ entities = t.extract_entities("We will be dead by the car crash", with_analysis=True)
50
55
  print(repr(entities))
51
56
 
52
57
 
@@ -85,9 +90,13 @@ translation = t.translate("سلام حالت چطوره؟", target_language="Eng
85
90
  print(repr(translation))
86
91
 
87
92
  # propositionize
88
- propositionize = t.propositionize("جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.", output_lang="Persian")
93
+ propositionize = t.propositionize(
94
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
95
+ output_lang="Persian",
96
+ )
89
97
  print(repr(propositionize))
90
98
 
99
+
91
100
  # Custom tool
92
101
  class Student(BaseModel):
93
102
  result: list[dict[str, str]]
@@ -29,7 +29,7 @@ question = t.text_to_question(
29
29
  "زندگی",
30
30
  output_lang="Persian",
31
31
  validator=validate,
32
- max_validation_retries=5,
32
+ max_validation_retries=0,
33
33
  temperature=1.0,
34
34
  )
35
- print(question)
35
+ print(repr(question))
@@ -2,6 +2,6 @@ from .batch.batch_runner import BatchJobRunner
2
2
  from .batch.batch_config import BatchConfig
3
3
  from .tools.sync_tools import TheTool
4
4
  from .tools.async_tools import AsyncTheTool
5
- from .tools.internals.models import CategoryTree
5
+ from .internals.models import CategoryTree
6
6
 
7
7
  __all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]
@@ -11,7 +11,8 @@ from pydantic import BaseModel
11
11
 
12
12
  from texttools.batch.internals.batch_manager import BatchManager
13
13
  from texttools.batch.batch_config import BatchConfig
14
- from texttools.tools.internals.models import StrOutput
14
+ from texttools.internals.models import StrOutput
15
+ from texttools.internals.exceptions import TextToolsError, ConfigurationError
15
16
 
16
17
  # Base Model type for output models
17
18
  T = TypeVar("T", bound=BaseModel)
@@ -27,22 +28,26 @@ class BatchJobRunner:
27
28
  def __init__(
28
29
  self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
29
30
  ):
30
- self._config = config
31
- self._system_prompt = config.system_prompt
32
- self._job_name = config.job_name
33
- self._input_data_path = config.input_data_path
34
- self._output_data_filename = config.output_data_filename
35
- self._model = config.model
36
- self._output_model = output_model
37
- self._manager = self._init_manager()
38
- self._data = self._load_data()
39
- self._parts: list[list[dict[str, Any]]] = []
40
- # Map part index to job name
41
- self._part_idx_to_job_name: dict[int, str] = {}
42
- # Track retry attempts per part
43
- self._part_attempts: dict[int, int] = {}
44
- self._partition_data()
45
- Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
31
+ try:
32
+ self._config = config
33
+ self._system_prompt = config.system_prompt
34
+ self._job_name = config.job_name
35
+ self._input_data_path = config.input_data_path
36
+ self._output_data_filename = config.output_data_filename
37
+ self._model = config.model
38
+ self._output_model = output_model
39
+ self._manager = self._init_manager()
40
+ self._data = self._load_data()
41
+ self._parts: list[list[dict[str, Any]]] = []
42
+ # Map part index to job name
43
+ self._part_idx_to_job_name: dict[int, str] = {}
44
+ # Track retry attempts per part
45
+ self._part_attempts: dict[int, int] = {}
46
+ self._partition_data()
47
+ Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
48
+
49
+ except Exception as e:
50
+ raise ConfigurationError(f"Batch runner initialization failed: {e}")
46
51
 
47
52
  def _init_manager(self) -> BatchManager:
48
53
  load_dotenv()
@@ -162,56 +167,62 @@ class BatchJobRunner:
162
167
 
163
168
  Submits jobs, monitors progress, handles retries, and saves results.
164
169
  """
165
- # Submit all jobs up-front for concurrent execution
166
- self._submit_all_jobs()
167
- pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
168
- logger.info(f"Pending parts: {sorted(pending_parts)}")
169
- # Polling loop
170
- while pending_parts:
171
- finished_this_round: list[int] = []
172
- for part_idx in list(pending_parts):
173
- job_name = self._part_idx_to_job_name[part_idx]
174
- status = self._manager.check_status(job_name=job_name)
175
- logger.info(f"Status for {job_name}: {status}")
176
- if status == "completed":
177
- logger.info(
178
- f"Job completed. Fetching results for part {part_idx + 1}..."
179
- )
180
- output_data, log = self._manager.fetch_results(
181
- job_name=job_name, remove_cache=False
182
- )
183
- output_data = self._config.import_function(output_data)
184
- self._save_results(output_data, log, part_idx)
185
- logger.info(f"Fetched and saved results for part {part_idx + 1}.")
186
- finished_this_round.append(part_idx)
187
- elif status == "failed":
188
- attempt = self._part_attempts.get(part_idx, 0) + 1
189
- self._part_attempts[part_idx] = attempt
190
- if attempt <= self._config.max_retries:
170
+ try:
171
+ # Submit all jobs up-front for concurrent execution
172
+ self._submit_all_jobs()
173
+ pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
174
+ logger.info(f"Pending parts: {sorted(pending_parts)}")
175
+ # Polling loop
176
+ while pending_parts:
177
+ finished_this_round: list[int] = []
178
+ for part_idx in list(pending_parts):
179
+ job_name = self._part_idx_to_job_name[part_idx]
180
+ status = self._manager.check_status(job_name=job_name)
181
+ logger.info(f"Status for {job_name}: {status}")
182
+ if status == "completed":
191
183
  logger.info(
192
- f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
184
+ f"Job completed. Fetching results for part {part_idx + 1}..."
193
185
  )
194
- self._manager._clear_state(job_name)
195
- time.sleep(10)
196
- payload = self._to_manager_payload(self._parts[part_idx])
197
- new_job_name = (
198
- f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
186
+ output_data, log = self._manager.fetch_results(
187
+ job_name=job_name, remove_cache=False
199
188
  )
200
- self._manager.start(payload, job_name=new_job_name)
201
- self._part_idx_to_job_name[part_idx] = new_job_name
202
- else:
189
+ output_data = self._config.import_function(output_data)
190
+ self._save_results(output_data, log, part_idx)
203
191
  logger.info(
204
- f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
192
+ f"Fetched and saved results for part {part_idx + 1}."
205
193
  )
206
194
  finished_this_round.append(part_idx)
207
- else:
208
- # Still running or queued
209
- continue
210
- # Remove finished parts
211
- for part_idx in finished_this_round:
212
- pending_parts.discard(part_idx)
213
- if pending_parts:
214
- logger.info(
215
- f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
216
- )
217
- time.sleep(self._config.poll_interval_seconds)
195
+ elif status == "failed":
196
+ attempt = self._part_attempts.get(part_idx, 0) + 1
197
+ self._part_attempts[part_idx] = attempt
198
+ if attempt <= self._config.max_retries:
199
+ logger.info(
200
+ f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
201
+ )
202
+ self._manager._clear_state(job_name)
203
+ time.sleep(10)
204
+ payload = self._to_manager_payload(self._parts[part_idx])
205
+ new_job_name = (
206
+ f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
207
+ )
208
+ self._manager.start(payload, job_name=new_job_name)
209
+ self._part_idx_to_job_name[part_idx] = new_job_name
210
+ else:
211
+ logger.info(
212
+ f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
213
+ )
214
+ finished_this_round.append(part_idx)
215
+ else:
216
+ # Still running or queued
217
+ continue
218
+ # Remove finished parts
219
+ for part_idx in finished_this_round:
220
+ pending_parts.discard(part_idx)
221
+ if pending_parts:
222
+ logger.info(
223
+ f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
224
+ )
225
+ time.sleep(self._config.poll_interval_seconds)
226
+
227
+ except Exception as e:
228
+ raise TextToolsError(f"Batch job execution failed: {e}")
@@ -5,10 +5,16 @@ import logging
5
5
  from openai import AsyncOpenAI
6
6
  from pydantic import BaseModel
7
7
 
8
- from texttools.tools.internals.models import ToolOutput
9
- from texttools.tools.internals.operator_utils import OperatorUtils
10
- from texttools.tools.internals.formatters import Formatter
11
- from texttools.tools.internals.prompt_loader import PromptLoader
8
+ from texttools.internals.models import ToolOutput
9
+ from texttools.internals.operator_utils import OperatorUtils
10
+ from texttools.internals.formatters import Formatter
11
+ from texttools.internals.prompt_loader import PromptLoader
12
+ from texttools.internals.exceptions import (
13
+ TextToolsError,
14
+ LLMError,
15
+ ValidationError,
16
+ PromptError,
17
+ )
12
18
 
13
19
  # Base Model type for output models
14
20
  T = TypeVar("T", bound=BaseModel)
@@ -35,15 +41,33 @@ class AsyncOperator:
35
41
  Calls OpenAI API for analysis using the configured prompt template.
36
42
  Returns the analyzed content as a string.
37
43
  """
38
- analyze_prompt = prompt_configs["analyze_template"]
39
- analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
40
- completion = await self._client.chat.completions.create(
41
- model=self._model,
42
- messages=analyze_message,
43
- temperature=temperature,
44
- )
45
- analysis = completion.choices[0].message.content.strip()
46
- return analysis
44
+ try:
45
+ analyze_prompt = prompt_configs["analyze_template"]
46
+
47
+ if not analyze_prompt:
48
+ raise PromptError("Analyze template is empty")
49
+
50
+ analyze_message = [OperatorUtils.build_user_message(analyze_prompt)]
51
+ completion = await self._client.chat.completions.create(
52
+ model=self._model,
53
+ messages=analyze_message,
54
+ temperature=temperature,
55
+ )
56
+
57
+ if not completion.choices:
58
+ raise LLMError("No choices returned from LLM")
59
+
60
+ analysis = completion.choices[0].message.content.strip()
61
+
62
+ if not analysis:
63
+ raise LLMError("Empty analysis response")
64
+
65
+ return analysis.strip()
66
+
67
+ except Exception as e:
68
+ if isinstance(e, (PromptError, LLMError)):
69
+ raise
70
+ raise LLMError(f"Analysis failed: {e}")
47
71
 
48
72
  async def _parse_completion(
49
73
  self,
@@ -58,21 +82,37 @@ class AsyncOperator:
58
82
  Parses a chat completion using OpenAI's structured output format.
59
83
  Returns both the parsed object and the raw completion for logprobs.
60
84
  """
61
- request_kwargs = {
62
- "model": self._model,
63
- "messages": message,
64
- "response_format": output_model,
65
- "temperature": temperature,
66
- }
67
-
68
- if logprobs:
69
- request_kwargs["logprobs"] = True
70
- request_kwargs["top_logprobs"] = top_logprobs
71
- if priority:
72
- request_kwargs["extra_body"] = {"priority": priority}
73
- completion = await self._client.beta.chat.completions.parse(**request_kwargs)
74
- parsed = completion.choices[0].message.parsed
75
- return parsed, completion
85
+ try:
86
+ request_kwargs = {
87
+ "model": self._model,
88
+ "messages": message,
89
+ "response_format": output_model,
90
+ "temperature": temperature,
91
+ }
92
+
93
+ if logprobs:
94
+ request_kwargs["logprobs"] = True
95
+ request_kwargs["top_logprobs"] = top_logprobs
96
+ if priority:
97
+ request_kwargs["extra_body"] = {"priority": priority}
98
+ completion = await self._client.beta.chat.completions.parse(
99
+ **request_kwargs
100
+ )
101
+
102
+ if not completion.choices:
103
+ raise LLMError("No choices returned from LLM")
104
+
105
+ parsed = completion.choices[0].message.parsed
106
+
107
+ if not parsed:
108
+ raise LLMError("Failed to parse LLM response")
109
+
110
+ return parsed, completion
111
+
112
+ except Exception as e:
113
+ if isinstance(e, LLMError):
114
+ raise
115
+ raise LLMError(f"Completion failed: {e}")
76
116
 
77
117
  async def run(
78
118
  self,
@@ -94,13 +134,13 @@ class AsyncOperator:
94
134
  **extra_kwargs,
95
135
  ) -> ToolOutput:
96
136
  """
97
- Execute the async LLM pipeline with the given input text. (Async)
137
+ Execute the LLM pipeline with the given input text. (Async)
98
138
  """
99
- prompt_loader = PromptLoader()
100
- formatter = Formatter()
101
- output = ToolOutput()
102
-
103
139
  try:
140
+ prompt_loader = PromptLoader()
141
+ formatter = Formatter()
142
+ output = ToolOutput()
143
+
104
144
  # Prompt configs contain two keys: main_template and analyze template, both are string
105
145
  prompt_configs = prompt_loader.load(
106
146
  prompt_file=prompt_file,
@@ -139,6 +179,9 @@ class AsyncOperator:
139
179
 
140
180
  messages = formatter.user_merge_format(messages)
141
181
 
182
+ if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
183
+ raise ValueError("top_logprobs should be an integer greater than 1")
184
+
142
185
  parsed, completion = await self._parse_completion(
143
186
  messages, output_model, temperature, logprobs, top_logprobs, priority
144
187
  )
@@ -147,6 +190,15 @@ class AsyncOperator:
147
190
 
148
191
  # Retry logic if validation fails
149
192
  if validator and not validator(output.result):
193
+ if (
194
+ not isinstance(max_validation_retries, int)
195
+ or max_validation_retries < 1
196
+ ):
197
+ raise ValueError(
198
+ "max_validation_retries should be a positive integer"
199
+ )
200
+
201
+ succeeded = False
150
202
  for attempt in range(max_validation_retries):
151
203
  logger.warning(
152
204
  f"Validation failed, retrying for the {attempt + 1} time."
@@ -154,6 +206,7 @@ class AsyncOperator:
154
206
 
155
207
  # Generate new temperature for retry
156
208
  retry_temperature = OperatorUtils.get_retry_temp(temperature)
209
+
157
210
  try:
158
211
  parsed, completion = await self._parse_completion(
159
212
  messages,
@@ -161,28 +214,23 @@ class AsyncOperator:
161
214
  retry_temperature,
162
215
  logprobs,
163
216
  top_logprobs,
217
+ priority=priority,
164
218
  )
165
219
 
166
220
  output.result = parsed.result
167
221
 
168
222
  # Check if retry was successful
169
223
  if validator(output.result):
170
- logger.info(
171
- f"Validation passed on retry attempt {attempt + 1}"
172
- )
224
+ succeeded = True
173
225
  break
174
- else:
175
- logger.warning(
176
- f"Validation still failing after retry attempt {attempt + 1}"
177
- )
178
226
 
179
- except Exception as e:
227
+ except LLMError as e:
180
228
  logger.error(f"Retry attempt {attempt + 1} failed: {e}")
181
- # Continue to next retry attempt if this one fails
182
229
 
183
- # Final check after all retries
184
- if validator and not validator(output.result):
185
- output.errors.append("Validation failed after all retry attempts")
230
+ if not succeeded:
231
+ raise ValidationError(
232
+ f"Validation failed after {max_validation_retries} retries"
233
+ )
186
234
 
187
235
  if logprobs:
188
236
  output.logprobs = OperatorUtils.extract_logprobs(completion)
@@ -194,7 +242,7 @@ class AsyncOperator:
194
242
 
195
243
  return output
196
244
 
245
+ except (PromptError, LLMError, ValidationError):
246
+ raise
197
247
  except Exception as e:
198
- logger.error(f"AsyncTheTool failed: {e}")
199
- output.errors.append(str(e))
200
- return output
248
+ raise TextToolsError(f"Unexpected error in operator: {e}")
@@ -0,0 +1,28 @@
1
+ class TextToolsError(Exception):
2
+ """Base exception for all TextTools errors."""
3
+
4
+ pass
5
+
6
+
7
+ class PromptError(TextToolsError):
8
+ """Errors related to prompt loading and formatting."""
9
+
10
+ pass
11
+
12
+
13
+ class LLMError(TextToolsError):
14
+ """Errors from LLM API calls."""
15
+
16
+ pass
17
+
18
+
19
+ class ValidationError(TextToolsError):
20
+ """Errors from output validation."""
21
+
22
+ pass
23
+
24
+
25
+ class ConfigurationError(TextToolsError):
26
+ """Errors from misconfiguration."""
27
+
28
+ pass
@@ -8,9 +8,9 @@ class ToolOutput(BaseModel):
8
8
  result: Any = None
9
9
  logprobs: list[dict[str, Any]] = []
10
10
  analysis: str = ""
11
- process: str = ""
11
+ process: str | None = None
12
12
  processed_at: datetime = datetime.now()
13
- execution_time: float = -1.0
13
+ execution_time: float | None = None
14
14
  errors: list[str] = []
15
15
 
16
16
  def __repr__(self) -> str:
@@ -0,0 +1,80 @@
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+ import yaml
4
+
5
+ from texttools.internals.exceptions import PromptError
6
+
7
+
8
+ class PromptLoader:
9
+ """
10
+ Utility for loading and formatting YAML prompt templates.
11
+
12
+ Responsibilities:
13
+ - Load and parse YAML prompt definitions.
14
+ - Select the right template (by mode, if applicable).
15
+ - Inject variables (`{input}`, plus any extra kwargs) into the templates.
16
+ """
17
+
18
+ MAIN_TEMPLATE = "main_template"
19
+ ANALYZE_TEMPLATE = "analyze_template"
20
+
21
+ @staticmethod
22
+ def _build_format_args(text: str, **extra_kwargs) -> dict[str, str]:
23
+ # Base formatting args
24
+ format_args = {"input": text}
25
+ # Merge extras
26
+ format_args.update(extra_kwargs)
27
+ return format_args
28
+
29
+ # Use lru_cache to load each file once
30
+ @lru_cache(maxsize=32)
31
+ def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
32
+ """
33
+ Loads prompt templates from YAML file with optional mode selection.
34
+ """
35
+ try:
36
+ base_dir = Path(__file__).parent.parent / Path("prompts")
37
+ prompt_path = base_dir / prompt_file
38
+
39
+ if not prompt_path.exists():
40
+ raise PromptError(f"Prompt file not found: {prompt_file}")
41
+
42
+ data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
43
+
44
+ if self.MAIN_TEMPLATE not in data:
45
+ raise PromptError(f"Missing 'main_template' in {prompt_file}")
46
+
47
+ if mode and mode not in data.get(self.MAIN_TEMPLATE, {}):
48
+ raise PromptError(f"Mode '{mode}' not found in {prompt_file}")
49
+
50
+ return {
51
+ self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
52
+ if mode and isinstance(data[self.MAIN_TEMPLATE], dict)
53
+ else data[self.MAIN_TEMPLATE],
54
+ self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE, {}).get(mode)
55
+ if mode and isinstance(data.get(self.ANALYZE_TEMPLATE), dict)
56
+ else data.get(self.ANALYZE_TEMPLATE, ""),
57
+ }
58
+
59
+ except yaml.YAMLError as e:
60
+ raise PromptError(f"Invalid YAML in {prompt_file}: {e}")
61
+ except Exception as e:
62
+ raise PromptError(f"Failed to load prompt {prompt_file}: {e}")
63
+
64
+ def load(
65
+ self, prompt_file: str, text: str, mode: str, **extra_kwargs
66
+ ) -> dict[str, str]:
67
+ try:
68
+ template_configs = self._load_templates(prompt_file, mode)
69
+ format_args = self._build_format_args(text, **extra_kwargs)
70
+
71
+ # Inject variables inside each template
72
+ for key in template_configs.keys():
73
+ template_configs[key] = template_configs[key].format(**format_args)
74
+
75
+ return template_configs
76
+
77
+ except KeyError as e:
78
+ raise PromptError(f"Missing template variable: {e}")
79
+ except Exception as e:
80
+ raise PromptError(f"Failed to format prompt: {e}")