hamtaa-texttools 1.1.17__tar.gz → 1.1.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.17/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.19}/PKG-INFO +31 -1
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/README.md +30 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19/hamtaa_texttools.egg-info}/PKG-INFO +31 -1
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/SOURCES.txt +9 -8
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/pyproject.toml +1 -1
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_all_async_tools.py +12 -2
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_all_tools.py +19 -3
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/tests/test_output_validation.py +2 -2
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/__init__.py +1 -1
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/batch_runner.py +75 -64
- {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/async_operator.py +96 -48
- hamtaa_texttools-1.1.19/texttools/internals/exceptions.py +28 -0
- {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/models.py +2 -2
- hamtaa_texttools-1.1.19/texttools/internals/prompt_loader.py +108 -0
- {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/sync_operator.py +92 -47
- hamtaa_texttools-1.1.19/texttools/prompts/check_fact.yaml +19 -0
- hamtaa_texttools-1.1.19/texttools/prompts/propositionize.yaml +22 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/run_custom.yaml +1 -1
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/tools/async_tools.py +576 -348
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/tools/sync_tools.py +573 -346
- hamtaa_texttools-1.1.17/texttools/prompts/detect_entity.yaml +0 -22
- hamtaa_texttools-1.1.17/texttools/prompts/propositionize.yaml +0 -15
- hamtaa_texttools-1.1.17/texttools/tools/internals/prompt_loader.py +0 -56
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/LICENSE +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/batch_config.py +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/internals/batch_manager.py +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/batch/internals/utils.py +0 -0
- {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/formatters.py +0 -0
- {hamtaa_texttools-1.1.17/texttools/tools → hamtaa_texttools-1.1.19/texttools}/internals/operator_utils.py +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/categorize.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.17 → hamtaa_texttools-1.1.19}/texttools/prompts/translate.yaml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.19
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -61,10 +61,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
63
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
64
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
65
66
|
|
|
66
67
|
---
|
|
67
68
|
|
|
69
|
+
## 📊 Tool Quality Tiers
|
|
70
|
+
|
|
71
|
+
| Status | Meaning | Use in Production? |
|
|
72
|
+
|--------|---------|-------------------|
|
|
73
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
+
|
|
76
|
+
### Current Status
|
|
77
|
+
**Production Tools:**
|
|
78
|
+
- `categorize()` (list mode)
|
|
79
|
+
- `extract_keywords()`
|
|
80
|
+
- `extract_entities()`
|
|
81
|
+
- `is_question()`
|
|
82
|
+
- `text_to_question()`
|
|
83
|
+
- `merge_questions()`
|
|
84
|
+
- `rewrite()`
|
|
85
|
+
- `subject_to_question()`
|
|
86
|
+
- `summarize()`
|
|
87
|
+
- `run_custom()` (fine in most cases)
|
|
88
|
+
|
|
89
|
+
**Experimental Tools:**
|
|
90
|
+
- `categorize()` (tree mode)
|
|
91
|
+
- `translate()`
|
|
92
|
+
- `propositionize()`
|
|
93
|
+
- `check_fact()`
|
|
94
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
68
98
|
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
69
99
|
|
|
70
100
|
TextTools provides several optional flags to customize LLM behavior:
|
|
@@ -26,10 +26,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
26
26
|
- **`summarize()`** - Text summarization
|
|
27
27
|
- **`translate()`** - Text translation between languages
|
|
28
28
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
29
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
29
30
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
30
31
|
|
|
31
32
|
---
|
|
32
33
|
|
|
34
|
+
## 📊 Tool Quality Tiers
|
|
35
|
+
|
|
36
|
+
| Status | Meaning | Use in Production? |
|
|
37
|
+
|--------|---------|-------------------|
|
|
38
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
39
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
40
|
+
|
|
41
|
+
### Current Status
|
|
42
|
+
**Production Tools:**
|
|
43
|
+
- `categorize()` (list mode)
|
|
44
|
+
- `extract_keywords()`
|
|
45
|
+
- `extract_entities()`
|
|
46
|
+
- `is_question()`
|
|
47
|
+
- `text_to_question()`
|
|
48
|
+
- `merge_questions()`
|
|
49
|
+
- `rewrite()`
|
|
50
|
+
- `subject_to_question()`
|
|
51
|
+
- `summarize()`
|
|
52
|
+
- `run_custom()` (fine in most cases)
|
|
53
|
+
|
|
54
|
+
**Experimental Tools:**
|
|
55
|
+
- `categorize()` (tree mode)
|
|
56
|
+
- `translate()`
|
|
57
|
+
- `propositionize()`
|
|
58
|
+
- `check_fact()`
|
|
59
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
33
63
|
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
34
64
|
|
|
35
65
|
TextTools provides several optional flags to customize LLM behavior:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.19
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -61,10 +61,40 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
63
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
64
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
65
66
|
|
|
66
67
|
---
|
|
67
68
|
|
|
69
|
+
## 📊 Tool Quality Tiers
|
|
70
|
+
|
|
71
|
+
| Status | Meaning | Use in Production? |
|
|
72
|
+
|--------|---------|-------------------|
|
|
73
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
+
|
|
76
|
+
### Current Status
|
|
77
|
+
**Production Tools:**
|
|
78
|
+
- `categorize()` (list mode)
|
|
79
|
+
- `extract_keywords()`
|
|
80
|
+
- `extract_entities()`
|
|
81
|
+
- `is_question()`
|
|
82
|
+
- `text_to_question()`
|
|
83
|
+
- `merge_questions()`
|
|
84
|
+
- `rewrite()`
|
|
85
|
+
- `subject_to_question()`
|
|
86
|
+
- `summarize()`
|
|
87
|
+
- `run_custom()` (fine in most cases)
|
|
88
|
+
|
|
89
|
+
**Experimental Tools:**
|
|
90
|
+
- `categorize()` (tree mode)
|
|
91
|
+
- `translate()`
|
|
92
|
+
- `propositionize()`
|
|
93
|
+
- `check_fact()`
|
|
94
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
68
98
|
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
69
99
|
|
|
70
100
|
TextTools provides several optional flags to customize LLM behavior:
|
|
@@ -15,9 +15,16 @@ texttools/batch/batch_config.py
|
|
|
15
15
|
texttools/batch/batch_runner.py
|
|
16
16
|
texttools/batch/internals/batch_manager.py
|
|
17
17
|
texttools/batch/internals/utils.py
|
|
18
|
+
texttools/internals/async_operator.py
|
|
19
|
+
texttools/internals/exceptions.py
|
|
20
|
+
texttools/internals/formatters.py
|
|
21
|
+
texttools/internals/models.py
|
|
22
|
+
texttools/internals/operator_utils.py
|
|
23
|
+
texttools/internals/prompt_loader.py
|
|
24
|
+
texttools/internals/sync_operator.py
|
|
18
25
|
texttools/prompts/README.md
|
|
19
26
|
texttools/prompts/categorize.yaml
|
|
20
|
-
texttools/prompts/
|
|
27
|
+
texttools/prompts/check_fact.yaml
|
|
21
28
|
texttools/prompts/extract_entities.yaml
|
|
22
29
|
texttools/prompts/extract_keywords.yaml
|
|
23
30
|
texttools/prompts/is_question.yaml
|
|
@@ -30,10 +37,4 @@ texttools/prompts/summarize.yaml
|
|
|
30
37
|
texttools/prompts/text_to_question.yaml
|
|
31
38
|
texttools/prompts/translate.yaml
|
|
32
39
|
texttools/tools/async_tools.py
|
|
33
|
-
texttools/tools/sync_tools.py
|
|
34
|
-
texttools/tools/internals/async_operator.py
|
|
35
|
-
texttools/tools/internals/formatters.py
|
|
36
|
-
texttools/tools/internals/models.py
|
|
37
|
-
texttools/tools/internals/operator_utils.py
|
|
38
|
-
texttools/tools/internals/prompt_loader.py
|
|
39
|
-
texttools/tools/internals/sync_operator.py
|
|
40
|
+
texttools/tools/sync_tools.py
|
|
@@ -21,7 +21,10 @@ t = AsyncTheTool(client=client, model=MODEL)
|
|
|
21
21
|
|
|
22
22
|
async def main():
|
|
23
23
|
category_task = t.categorize(
|
|
24
|
-
"سلام حالت چطوره؟",
|
|
24
|
+
"سلام حالت چطوره؟",
|
|
25
|
+
categories=["هیچکدام", "دینی", "فلسفه"],
|
|
26
|
+
logprobs=True,
|
|
27
|
+
top_logprobs=-1,
|
|
25
28
|
)
|
|
26
29
|
keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
27
30
|
entities_task = t.extract_entities("We will be dead by the car crash")
|
|
@@ -40,7 +43,11 @@ async def main():
|
|
|
40
43
|
questions_task = t.subject_to_question("Friendship", 3)
|
|
41
44
|
summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
|
|
42
45
|
translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
|
|
43
|
-
propositionize_task = t.propositionize(
|
|
46
|
+
propositionize_task = t.propositionize(
|
|
47
|
+
"جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
|
|
48
|
+
output_lang="Persian",
|
|
49
|
+
)
|
|
50
|
+
check_fact_task = t.check_fact(text="امام نهم در ایران به خاک سپرده شد", source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست")
|
|
44
51
|
(
|
|
45
52
|
category,
|
|
46
53
|
keywords,
|
|
@@ -53,6 +60,7 @@ async def main():
|
|
|
53
60
|
summary,
|
|
54
61
|
translation,
|
|
55
62
|
propositionize,
|
|
63
|
+
check_fact,
|
|
56
64
|
) = await asyncio.gather(
|
|
57
65
|
category_task,
|
|
58
66
|
keywords_task,
|
|
@@ -65,6 +73,7 @@ async def main():
|
|
|
65
73
|
summary_task,
|
|
66
74
|
translation_task,
|
|
67
75
|
propositionize_task,
|
|
76
|
+
check_fact_task,
|
|
68
77
|
)
|
|
69
78
|
|
|
70
79
|
for tool_output in (
|
|
@@ -79,6 +88,7 @@ async def main():
|
|
|
79
88
|
summary,
|
|
80
89
|
translation,
|
|
81
90
|
propositionize,
|
|
91
|
+
check_fact,
|
|
82
92
|
):
|
|
83
93
|
print(repr(tool_output))
|
|
84
94
|
|
|
@@ -19,7 +19,12 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
|
|
|
19
19
|
t = TheTool(client=client, model=MODEL)
|
|
20
20
|
|
|
21
21
|
# Categorizer: list mode
|
|
22
|
-
category = t.categorize(
|
|
22
|
+
category = t.categorize(
|
|
23
|
+
"سلام حالت چطوره؟",
|
|
24
|
+
categories=["هیچکدام", "دینی", "فلسفه"],
|
|
25
|
+
logprobs=True,
|
|
26
|
+
top_logprobs=-1,
|
|
27
|
+
)
|
|
23
28
|
print(repr(category))
|
|
24
29
|
|
|
25
30
|
# Categorizer: tree mode
|
|
@@ -46,7 +51,7 @@ keywords = t.extract_keywords(
|
|
|
46
51
|
print(repr(keywords))
|
|
47
52
|
|
|
48
53
|
# NER Extractor
|
|
49
|
-
entities = t.extract_entities("We will be dead by the car crash")
|
|
54
|
+
entities = t.extract_entities("We will be dead by the car crash", with_analysis=True)
|
|
50
55
|
print(repr(entities))
|
|
51
56
|
|
|
52
57
|
|
|
@@ -85,9 +90,20 @@ translation = t.translate("سلام حالت چطوره؟", target_language="Eng
|
|
|
85
90
|
print(repr(translation))
|
|
86
91
|
|
|
87
92
|
# propositionize
|
|
88
|
-
propositionize = t.propositionize(
|
|
93
|
+
propositionize = t.propositionize(
|
|
94
|
+
"جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
|
|
95
|
+
output_lang="Persian",
|
|
96
|
+
)
|
|
89
97
|
print(repr(propositionize))
|
|
90
98
|
|
|
99
|
+
# check_fact
|
|
100
|
+
check_fact = t.check_fact(
|
|
101
|
+
text="امام نهم در ایران به خاک سپرده شد",
|
|
102
|
+
source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
|
|
103
|
+
)
|
|
104
|
+
print(repr(check_fact))
|
|
105
|
+
|
|
106
|
+
|
|
91
107
|
# Custom tool
|
|
92
108
|
class Student(BaseModel):
|
|
93
109
|
result: list[dict[str, str]]
|
|
@@ -2,6 +2,6 @@ from .batch.batch_runner import BatchJobRunner
|
|
|
2
2
|
from .batch.batch_config import BatchConfig
|
|
3
3
|
from .tools.sync_tools import TheTool
|
|
4
4
|
from .tools.async_tools import AsyncTheTool
|
|
5
|
-
from .
|
|
5
|
+
from .internals.models import CategoryTree
|
|
6
6
|
|
|
7
7
|
__all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]
|
|
@@ -11,7 +11,8 @@ from pydantic import BaseModel
|
|
|
11
11
|
|
|
12
12
|
from texttools.batch.internals.batch_manager import BatchManager
|
|
13
13
|
from texttools.batch.batch_config import BatchConfig
|
|
14
|
-
from texttools.
|
|
14
|
+
from texttools.internals.models import StrOutput
|
|
15
|
+
from texttools.internals.exceptions import TextToolsError, ConfigurationError
|
|
15
16
|
|
|
16
17
|
# Base Model type for output models
|
|
17
18
|
T = TypeVar("T", bound=BaseModel)
|
|
@@ -27,22 +28,26 @@ class BatchJobRunner:
|
|
|
27
28
|
def __init__(
|
|
28
29
|
self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
|
|
29
30
|
):
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
try:
|
|
32
|
+
self._config = config
|
|
33
|
+
self._system_prompt = config.system_prompt
|
|
34
|
+
self._job_name = config.job_name
|
|
35
|
+
self._input_data_path = config.input_data_path
|
|
36
|
+
self._output_data_filename = config.output_data_filename
|
|
37
|
+
self._model = config.model
|
|
38
|
+
self._output_model = output_model
|
|
39
|
+
self._manager = self._init_manager()
|
|
40
|
+
self._data = self._load_data()
|
|
41
|
+
self._parts: list[list[dict[str, Any]]] = []
|
|
42
|
+
# Map part index to job name
|
|
43
|
+
self._part_idx_to_job_name: dict[int, str] = {}
|
|
44
|
+
# Track retry attempts per part
|
|
45
|
+
self._part_attempts: dict[int, int] = {}
|
|
46
|
+
self._partition_data()
|
|
47
|
+
Path(self._config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
raise ConfigurationError(f"Batch runner initialization failed: {e}")
|
|
46
51
|
|
|
47
52
|
def _init_manager(self) -> BatchManager:
|
|
48
53
|
load_dotenv()
|
|
@@ -162,56 +167,62 @@ class BatchJobRunner:
|
|
|
162
167
|
|
|
163
168
|
Submits jobs, monitors progress, handles retries, and saves results.
|
|
164
169
|
"""
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
f"Job completed. Fetching results for part {part_idx + 1}..."
|
|
179
|
-
)
|
|
180
|
-
output_data, log = self._manager.fetch_results(
|
|
181
|
-
job_name=job_name, remove_cache=False
|
|
182
|
-
)
|
|
183
|
-
output_data = self._config.import_function(output_data)
|
|
184
|
-
self._save_results(output_data, log, part_idx)
|
|
185
|
-
logger.info(f"Fetched and saved results for part {part_idx + 1}.")
|
|
186
|
-
finished_this_round.append(part_idx)
|
|
187
|
-
elif status == "failed":
|
|
188
|
-
attempt = self._part_attempts.get(part_idx, 0) + 1
|
|
189
|
-
self._part_attempts[part_idx] = attempt
|
|
190
|
-
if attempt <= self._config.max_retries:
|
|
170
|
+
try:
|
|
171
|
+
# Submit all jobs up-front for concurrent execution
|
|
172
|
+
self._submit_all_jobs()
|
|
173
|
+
pending_parts: set[int] = set(self._part_idx_to_job_name.keys())
|
|
174
|
+
logger.info(f"Pending parts: {sorted(pending_parts)}")
|
|
175
|
+
# Polling loop
|
|
176
|
+
while pending_parts:
|
|
177
|
+
finished_this_round: list[int] = []
|
|
178
|
+
for part_idx in list(pending_parts):
|
|
179
|
+
job_name = self._part_idx_to_job_name[part_idx]
|
|
180
|
+
status = self._manager.check_status(job_name=job_name)
|
|
181
|
+
logger.info(f"Status for {job_name}: {status}")
|
|
182
|
+
if status == "completed":
|
|
191
183
|
logger.info(
|
|
192
|
-
f"Job
|
|
184
|
+
f"Job completed. Fetching results for part {part_idx + 1}..."
|
|
193
185
|
)
|
|
194
|
-
self._manager.
|
|
195
|
-
|
|
196
|
-
payload = self._to_manager_payload(self._parts[part_idx])
|
|
197
|
-
new_job_name = (
|
|
198
|
-
f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
|
|
186
|
+
output_data, log = self._manager.fetch_results(
|
|
187
|
+
job_name=job_name, remove_cache=False
|
|
199
188
|
)
|
|
200
|
-
self.
|
|
201
|
-
self.
|
|
202
|
-
else:
|
|
189
|
+
output_data = self._config.import_function(output_data)
|
|
190
|
+
self._save_results(output_data, log, part_idx)
|
|
203
191
|
logger.info(
|
|
204
|
-
f"
|
|
192
|
+
f"Fetched and saved results for part {part_idx + 1}."
|
|
205
193
|
)
|
|
206
194
|
finished_this_round.append(part_idx)
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
195
|
+
elif status == "failed":
|
|
196
|
+
attempt = self._part_attempts.get(part_idx, 0) + 1
|
|
197
|
+
self._part_attempts[part_idx] = attempt
|
|
198
|
+
if attempt <= self._config.max_retries:
|
|
199
|
+
logger.info(
|
|
200
|
+
f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
|
|
201
|
+
)
|
|
202
|
+
self._manager._clear_state(job_name)
|
|
203
|
+
time.sleep(10)
|
|
204
|
+
payload = self._to_manager_payload(self._parts[part_idx])
|
|
205
|
+
new_job_name = (
|
|
206
|
+
f"{self._job_name}_part_{part_idx + 1}_retry_{attempt}"
|
|
207
|
+
)
|
|
208
|
+
self._manager.start(payload, job_name=new_job_name)
|
|
209
|
+
self._part_idx_to_job_name[part_idx] = new_job_name
|
|
210
|
+
else:
|
|
211
|
+
logger.info(
|
|
212
|
+
f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
|
|
213
|
+
)
|
|
214
|
+
finished_this_round.append(part_idx)
|
|
215
|
+
else:
|
|
216
|
+
# Still running or queued
|
|
217
|
+
continue
|
|
218
|
+
# Remove finished parts
|
|
219
|
+
for part_idx in finished_this_round:
|
|
220
|
+
pending_parts.discard(part_idx)
|
|
221
|
+
if pending_parts:
|
|
222
|
+
logger.info(
|
|
223
|
+
f"Waiting {self._config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
|
|
224
|
+
)
|
|
225
|
+
time.sleep(self._config.poll_interval_seconds)
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
raise TextToolsError(f"Batch job execution failed: {e}")
|