hamtaa-texttools 1.1.17__tar.gz → 1.1.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {hamtaa_texttools-1.1.17/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.19}/PKG-INFO +31 -1
  2. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/README.md +30 -0
  3. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19/hamtaa_texttools.egg-info}/PKG-INFO +31 -1
  4. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/SOURCES.txt +9 -8
  5. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/pyproject.toml +1 -1
  6. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_all_async_tools.py +12 -2
  7. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_all_tools.py +19 -3
  8. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_output_validation.py +2 -2
  9. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/__init__.py +1 -1
  10. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/batch_runner.py +75 -64
  11. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/async_operator.py +96 -48
  12. hamtaa_texttools-1.1.19/texttools/internals/exceptions.py +28 -0
  13. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/models.py +2 -2
  14. hamtaa_texttools-1.1.19/texttools/internals/prompt_loader.py +108 -0
  15. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/sync_operator.py +92 -47
  16. hamtaa_texttools-1.1.19/texttools/prompts/check_fact.yaml +19 -0
  17. hamtaa_texttools-1.1.19/texttools/prompts/propositionize.yaml +22 -0
  18. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/run_custom.yaml +1 -1
  19. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/tools/async_tools.py +576 -348
  20. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/tools/sync_tools.py +573 -346
  21. hamtaa_texttools-1.1.17/texttools/prompts/detect_entity.yaml +0 -22
  22. hamtaa_texttools-1.1.17/texttools/prompts/propositionize.yaml +0 -15
  23. hamtaa_texttools-1.1.17/texttools/tools/internals/prompt_loader.py +0 -56
  24. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/LICENSE +0 -0
  25. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/MANIFEST.in +0 -0
  26. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  27. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/requires.txt +0 -0
  28. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  29. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/setup.cfg +0 -0
  30. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/batch_config.py +0 -0
  31. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/internals/batch_manager.py +0 -0
  32. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/internals/utils.py +0 -0
  33. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/formatters.py +0 -0
  34. {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/operator_utils.py +0 -0
  35. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/README.md +0 -0
  36. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/categorize.yaml +0 -0
  37. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/extract_entities.yaml +0 -0
  38. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/extract_keywords.yaml +0 -0
  39. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/is_question.yaml +0 -0
  40. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/merge_questions.yaml +0 -0
  41. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/rewrite.yaml +0 -0
  42. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/subject_to_question.yaml +0 -0
  43. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/summarize.yaml +0 -0
  44. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/text_to_question.yaml +0 -0
  45. {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/translate.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.17
3
+ Version: 1.1.19
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -61,10 +61,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
61
61
  - **`summarize()`** - Text summarization
62
62
  - **`translate()`** - Text translation between languages
63
63
  - **`propositionize()`** - Convert text to atomic independence meaningful sentences
64
+ - **`check_fact()`** - Check a statement is relevant to source text or not
64
65
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
65
66
 
66
67
  ---
67
68
 
69
+ ## 📊 Tool Quality Tiers
70
+
71
+ | Status | Meaning | Use in Production? |
72
+ |--------|---------|-------------------|
73
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
74
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
75
+
76
+ ### Current Status
77
+ **Production Tools:**
78
+ - `categorize()` (list mode)
79
+ - `extract_keywords()`
80
+ - `extract_entities()`
81
+ - `is_question()`
82
+ - `text_to_question()`
83
+ - `merge_questions()`
84
+ - `rewrite()`
85
+ - `subject_to_question()`
86
+ - `summarize()`
87
+ - `run_custom()` (fine in most cases)
88
+
89
+ **Experimental Tools:**
90
+ - `categorize()` (tree mode)
91
+ - `translate()`
92
+ - `propositionize()`
93
+ - `check_fact()`
94
+ - `run_custom()` (not evaluated in all scenarios)
95
+
96
+ ---
97
+
68
98
  ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
69
99
 
70
100
  TextTools provides several optional flags to customize LLM behavior:
@@ -26,10 +26,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
26
26
  - **`summarize()`** - Text summarization
27
27
  - **`translate()`** - Text translation between languages
28
28
  - **`propositionize()`** - Convert text to atomic independence meaningful sentences
29
+ - **`check_fact()`** - Check a statement is relevant to source text or not
29
30
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
30
31
 
31
32
  ---
32
33
 
34
+ ## 📊 Tool Quality Tiers
35
+
36
+ | Status | Meaning | Use in Production? |
37
+ |--------|---------|-------------------|
38
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
39
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
40
+
41
+ ### Current Status
42
+ **Production Tools:**
43
+ - `categorize()` (list mode)
44
+ - `extract_keywords()`
45
+ - `extract_entities()`
46
+ - `is_question()`
47
+ - `text_to_question()`
48
+ - `merge_questions()`
49
+ - `rewrite()`
50
+ - `subject_to_question()`
51
+ - `summarize()`
52
+ - `run_custom()` (fine in most cases)
53
+
54
+ **Experimental Tools:**
55
+ - `categorize()` (tree mode)
56
+ - `translate()`
57
+ - `propositionize()`
58
+ - `check_fact()`
59
+ - `run_custom()` (not evaluated in all scenarios)
60
+
61
+ ---
62
+
33
63
  ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
34
64
 
35
65
  TextTools provides several optional flags to customize LLM behavior:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.17
3
+ Version: 1.1.19
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
@@ -61,10 +61,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
61
61
  - **`summarize()`** - Text summarization
62
62
  - **`translate()`** - Text translation between languages
63
63
  - **`propositionize()`** - Convert text to atomic independence meaningful sentences
64
+ - **`check_fact()`** - Check a statement is relevant to source text or not
64
65
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
65
66
 
66
67
  ---
67
68
 
69
+ ## 📊 Tool Quality Tiers
70
+
71
+ | Status | Meaning | Use in Production? |
72
+ |--------|---------|-------------------|
73
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
74
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
75
+
76
+ ### Current Status
77
+ **Production Tools:**
78
+ - `categorize()` (list mode)
79
+ - `extract_keywords()`
80
+ - `extract_entities()`
81
+ - `is_question()`
82
+ - `text_to_question()`
83
+ - `merge_questions()`
84
+ - `rewrite()`
85
+ - `subject_to_question()`
86
+ - `summarize()`
87
+ - `run_custom()` (fine in most cases)
88
+
89
+ **Experimental Tools:**
90
+ - `categorize()` (tree mode)
91
+ - `translate()`
92
+ - `propositionize()`
93
+ - `check_fact()`
94
+ - `run_custom()` (not evaluated in all scenarios)
95
+
96
+ ---
97
+
68
98
  ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
69
99
 
70
100
  TextTools provides several optional flags to customize LLM behavior:
@@ -15,9 +15,16 @@ texttools/batch/batch_config.py
15
15
  texttools/batch/batch_runner.py
16
16
  texttools/batch/internals/batch_manager.py
17
17
  texttools/batch/internals/utils.py
18
+ texttools/internals/async_operator.py
19
+ texttools/internals/exceptions.py
20
+ texttools/internals/formatters.py
21
+ texttools/internals/models.py
22
+ texttools/internals/operator_utils.py
23
+ texttools/internals/prompt_loader.py
24
+ texttools/internals/sync_operator.py
18
25
  texttools/prompts/README.md
19
26
  texttools/prompts/categorize.yaml
20
- texttools/prompts/detect_entity.yaml
27
+ texttools/prompts/check_fact.yaml
21
28
  texttools/prompts/extract_entities.yaml
22
29
  texttools/prompts/extract_keywords.yaml
23
30
  texttools/prompts/is_question.yaml
@@ -30,10 +37,4 @@ texttools/prompts/summarize.yaml
30
37
  texttools/prompts/text_to_question.yaml
31
38
  texttools/prompts/translate.yaml
32
39
  texttools/tools/async_tools.py
33
- texttools/tools/sync_tools.py
34
- texttools/tools/internals/async_operator.py
35
- texttools/tools/internals/formatters.py
36
- texttools/tools/internals/models.py
37
- texttools/tools/internals/operator_utils.py
38
- texttools/tools/internals/prompt_loader.py
39
- texttools/tools/internals/sync_operator.py
40
+ texttools/tools/sync_tools.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.1.17"
7
+ version = "1.1.19"
8
8
  authors = [
9
9
  { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
10
  { name = "Montazer", email = "montazerh82@gmail.com" },
@@ -21,7 +21,10 @@ t = AsyncTheTool(client=client, model=MODEL)
21
21
 
22
22
  async def main():
23
23
  category_task = t.categorize(
24
- "سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"]
24
+ "سلام حالت چطوره؟",
25
+ categories=["هیچکدام", "دینی", "فلسفه"],
26
+ logprobs=True,
27
+ top_logprobs=-1,
25
28
  )
26
29
  keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
27
30
  entities_task = t.extract_entities("We will be dead by the car crash")
@@ -40,7 +43,11 @@ async def main():
40
43
  questions_task = t.subject_to_question("Friendship", 3)
41
44
  summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
42
45
  translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
43
- propositionize_task = t.propositionize("جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.", output_lang="Persian")
46
+ propositionize_task = t.propositionize(
47
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
48
+ output_lang="Persian",
49
+ )
50
+ check_fact_task = t.check_fact(text="امام نهم در ایران به خاک سپرده شد", source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست")
44
51
  (
45
52
  category,
46
53
  keywords,
@@ -53,6 +60,7 @@ async def main():
53
60
  summary,
54
61
  translation,
55
62
  propositionize,
63
+ check_fact,
56
64
  ) = await asyncio.gather(
57
65
  category_task,
58
66
  keywords_task,
@@ -65,6 +73,7 @@ async def main():
65
73
  summary_task,
66
74
  translation_task,
67
75
  propositionize_task,
76
+ check_fact_task,
68
77
  )
69
78
 
70
79
  for tool_output in (
@@ -79,6 +88,7 @@ async def main():
79
88
  summary,
80
89
  translation,
81
90
  propositionize,
91
+ check_fact,
82
92
  ):
83
93
  print(repr(tool_output))
84
94
 
@@ -19,7 +19,12 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
19
19
  t = TheTool(client=client, model=MODEL)
20
20
 
21
21
  # Categorizer: list mode
22
- category = t.categorize("سلام حالت چطوره؟", categories=["هیچکدام", "دینی", "فلسفه"])
22
+ category = t.categorize(
23
+ "سلام حالت چطوره؟",
24
+ categories=["هیچکدام", "دینی", "فلسفه"],
25
+ logprobs=True,
26
+ top_logprobs=-1,
27
+ )
23
28
  print(repr(category))
24
29
 
25
30
  # Categorizer: tree mode
@@ -46,7 +51,7 @@ keywords = t.extract_keywords(
46
51
  print(repr(keywords))
47
52
 
48
53
  # NER Extractor
49
- entities = t.extract_entities("We will be dead by the car crash")
54
+ entities = t.extract_entities("We will be dead by the car crash", with_analysis=True)
50
55
  print(repr(entities))
51
56
 
52
57
 
@@ -85,9 +90,20 @@ translation = t.translate("سلام حالت چطوره؟", target_language="Eng
85
90
  print(repr(translation))
86
91
 
87
92
  # propositionize
88
- propositionize = t.propositionize("جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.", output_lang="Persian")
93
+ propositionize = t.propositionize(
94
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
95
+ output_lang="Persian",
96
+ )
89
97
  print(repr(propositionize))
90
98
 
99
+ # check_fact
100
+ check_fact = t.check_fact(
101
+ text="امام نهم در ایران به خاک سپرده شد",
102
+ source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
103
+ )
104
+ print(repr(check_fact))
105
+
106
+
91
107
  # Custom tool
92
108
  class Student(BaseModel):
93
109
  result: list[dict[str, str]]
@@ -29,7 +29,7 @@ question = t.text_to_question(
29
29
  "زندگی",
30
30
  output_lang="Persian",
31
31
  validator=validate,
32
- max_validation_retries=5,
32
+ max_validation_retries=0,
33
33
  temperature=1.0,
34
34
  )
35
- print(question)
35
+ print(repr(question))
@@ -2,6 +2,6 @@ from .batch.batch_runner import BatchJobRunner
2
2
  from .batch.batch_config import BatchConfig
3
3
  from .tools.sync_tools import TheTool
4
4
  from .tools.async_tools import AsyncTheTool
5
- from .tools.internals.models import CategoryTree
5
+ from .internals.models import CategoryTree
6
6
 
7
7
  __all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]
@@ -11,7 +11,8 @@ from pydantic import BaseModel
11
11
 
12
12
  from texttools.batch.internals.batch_manager import BatchManager
13
13
  from texttools.batch.batch_config import BatchConfig
14
- from texttools.tools.internals.models import StrOutput
14
+ from texttools.internals.models import StrOutput
15
+ from texttools.internals.exceptions import TextToolsError, ConfigurationError
15
16
 
16
17
  # Base Model type for output models
17
18
  T = TypeVar("T", bound=BaseModel)
@@ -27,22 +28,26 @@ class BatchJobRunner:
27
28
  def __init__(
28
29
  self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
29
30
  ):
30
- self._config = config
31
- self._system_prompt = config.system_prompt
32
- self._job_name = config.job_name
33
- self._input_data_path = config.input_data_path
34
- self._output_data_filename = config.output_data_filename
35
- self._model = config.model
36
- self._output_model = output_model
37
- self._manager = self._init_manager()
38
- self._data = self._load_data()
39
- self._parts: list[list[dict[str, Any]]] = []
40
- # Map part index to job name
41
- self._part_idx_to_job_name: dict[int, str] = {}
42
- # Track retry attempts per part
43
- self._part_attempts: dict[int, int] = {}
44
- self._partition_data()
45
- Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
31
+ try:
32
+ self._config = config
33
+ self._system_prompt = config.system_prompt
34
+ self._job_name = config.job_name
35
+ self._input_data_path = config.input_data_path
36
+ self._output_data_filename = config.output_data_filename
37
+ self._model = config.model
38
+ self._output_model = output_model
39
+ self._manager = self._init_manager()
40
+ self._data = self._load_data()
41
+ self._parts: list[list[dict[str, Any]]] = []
42
+ # Map part index to job name
43
+ self._part_idx_to_job_name: dict[int, str] = {}
44
+ # Track retry attempts per part
45
+ self._part_attempts: dict[int, int] = {}
46
+ self._partition_data()
47
+ Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
48
+
49
+ except Exception as e:
50
+ raise ConfigurationError(f"Batch runner initialization failed: {e}")
46
51
 
47
52
  def _init_manager(self) -> BatchManager:
48
53
  load_dotenv()
@@ -162,56 +167,62 @@ class BatchJobRunner:
162
167
 
163
168
  Submits jobs, monitors progress, handles retries, and saves results.
164
169
  """
165
- # Submit all jobs up-front for concurrent execution
166
- self._submit_all_jobs()
167
- pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
168
- logger.info(f"Pending parts: {sorted(pending_parts)}")
169
- # Polling loop
170
- while pending_parts:
171
- finished_this_round: list[int] = []
172
- for part_idx in list(pending_parts):
173
- job_name = self._part_idx_to_job_name[part_idx]
174
- status = self._manager.check_status(job_name=job_name)
175
- logger.info(f"Status for {job_name}: {status}")
176
- if status == "completed":
177
- logger.info(
178
- f"Job completed. Fetching results for part {part_idx + 1}..."
179
- )
180
- output_data, log = self._manager.fetch_results(
181
- job_name=job_name, remove_cache=False
182
- )
183
- output_data = self._config.import_function(output_data)
184
- self._save_results(output_data, log, part_idx)
185
- logger.info(f"Fetched and saved results for part {part_idx + 1}.")
186
- finished_this_round.append(part_idx)
187
- elif status == "failed":
188
- attempt = self._part_attempts.get(part_idx, 0) + 1
189
- self._part_attempts[part_idx] = attempt
190
- if attempt <= self._config.max_retries:
170
+ try:
171
+ # Submit all jobs up-front for concurrent execution
172
+ self._submit_all_jobs()
173
+ pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
174
+ logger.info(f"Pending parts: {sorted(pending_parts)}")
175
+ # Polling loop
176
+ while pending_parts:
177
+ finished_this_round: list[int] = []
178
+ for part_idx in list(pending_parts):
179
+ job_name = self._part_idx_to_job_name[part_idx]
180
+ status = self._manager.check_status(job_name=job_name)
181
+ logger.info(f"Status for {job_name}: {status}")
182
+ if status == "completed":
191
183
  logger.info(
192
- f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
184
+ f"Job completed. Fetching results for part {part_idx + 1}..."
193
185
  )
194
- self._manager._clear_state(job_name)
195
- time.sleep(10)
196
- payload = self._to_manager_payload(self._parts[part_idx])
197
- new_job_name = (
198
- f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
186
+ output_data, log = self._manager.fetch_results(
187
+ job_name=job_name, remove_cache=False
199
188
  )
200
- self._manager.start(payload, job_name=new_job_name)
201
- self._part_idx_to_job_name[part_idx] = new_job_name
202
- else:
189
+ output_data = self._config.import_function(output_data)
190
+ self._save_results(output_data, log, part_idx)
203
191
  logger.info(
204
- f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
192
+ f"Fetched and saved results for part {part_idx + 1}."
205
193
  )
206
194
  finished_this_round.append(part_idx)
207
- else:
208
- # Still running or queued
209
- continue
210
- # Remove finished parts
211
- for part_idx in finished_this_round:
212
- pending_parts.discard(part_idx)
213
- if pending_parts:
214
- logger.info(
215
- f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
216
- )
217
- time.sleep(self._config.poll_interval_seconds)
195
+ elif status == "failed":
196
+ attempt = self._part_attempts.get(part_idx, 0) + 1
197
+ self._part_attempts[part_idx] = attempt
198
+ if attempt <= self._config.max_retries:
199
+ logger.info(
200
+ f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
201
+ )
202
+ self._manager._clear_state(job_name)
203
+ time.sleep(10)
204
+ payload = self._to_manager_payload(self._parts[part_idx])
205
+ new_job_name = (
206
+ f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
207
+ )
208
+ self._manager.start(payload, job_name=new_job_name)
209
+ self._part_idx_to_job_name[part_idx] = new_job_name
210
+ else:
211
+ logger.info(
212
+ f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
213
+ )
214
+ finished_this_round.append(part_idx)
215
+ else:
216
+ # Still running or queued
217
+ continue
218
+ # Remove finished parts
219
+ for part_idx in finished_this_round:
220
+ pending_parts.discard(part_idx)
221
+ if pending_parts:
222
+ logger.info(
223
+ f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
224
+ )
225
+ time.sleep(self._config.poll_interval_seconds)
226
+
227
+ except Exception as e:
228
+ raise TextToolsError(f"Batch job execution failed: {e}")