hamtaa-texttools 1.0.5__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.5/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.6}/PKG-INFO +15 -15
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/README.md +14 -14
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6/hamtaa_texttools.egg-info}/PKG-INFO +15 -15
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/hamtaa_texttools.egg-info/SOURCES.txt +4 -4
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/pyproject.toml +2 -2
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/batch/batch_manager.py +7 -18
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/batch/batch_runner.py +96 -45
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/README.md +4 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/keyword_extractor.yaml +6 -6
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/question_merger.yaml +5 -5
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/async_the_tool.py +6 -6
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/async_operator.py +21 -10
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/operator.py +2 -2
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/prompt_loader.py +12 -22
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/the_tool.py +12 -12
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/LICENSE +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/setup.cfg +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/batch/__init__.py +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/formatters/base_formatter.py +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/formatters/user_merge_formatter.py +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/categorizer.yaml +0 -0
- /hamtaa_texttools-1.0.5/texttools/prompts/question_detector.yaml → /hamtaa_texttools-1.0.6/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/ner_extractor.yaml +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/rewriter.yaml +0 -0
- /hamtaa_texttools-1.0.5/texttools/prompts/custom_tool.yaml → /hamtaa_texttools-1.0.6/texttools/prompts/run_custom.yaml +0 -0
- /hamtaa_texttools-1.0.5/texttools/prompts/subject_question_generator.yaml → /hamtaa_texttools-1.0.6/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/summarizer.yaml +0 -0
- /hamtaa_texttools-1.0.5/texttools/prompts/question_generator.yaml → /hamtaa_texttools-1.0.6/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/prompts/translator.yaml +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/output_models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -51,17 +51,17 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
51
51
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
52
52
|
Each tool is designed to work out-of-the-box with structured outputs (JSON / Pydantic).
|
|
53
53
|
|
|
54
|
-
-
|
|
55
|
-
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
54
|
+
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
55
|
+
- **`is_question()`** - Binary detection of whether input is a question
|
|
56
|
+
- **`extract_keywords()`** - Extracts keywords from text
|
|
57
|
+
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
58
|
+
- **`summarize()`** - Text summarization
|
|
59
|
+
- **`text_to_question()`** - Generates questions from text
|
|
60
|
+
- **`merge_questions()`** - Merges multiple questions with different modes
|
|
61
|
+
- **`rewrite()`** - Rewrites text with different wording/meaning
|
|
62
|
+
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
63
|
+
- **`translate()`** - Text translation between languages
|
|
64
|
+
- **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
|
|
65
65
|
|
|
66
66
|
---
|
|
67
67
|
|
|
@@ -87,7 +87,7 @@ All these flags can be used individually or together to tailor the behavior of a
|
|
|
87
87
|
Install the latest release via PyPI:
|
|
88
88
|
|
|
89
89
|
```bash
|
|
90
|
-
pip install -U
|
|
90
|
+
pip install -U hamtaa-texttools
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
---
|
|
@@ -118,7 +118,7 @@ model = "gpt-4o-mini"
|
|
|
118
118
|
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
119
119
|
|
|
120
120
|
# Example: Question Detection
|
|
121
|
-
detection = the_tool.
|
|
121
|
+
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
122
122
|
print(detection["result"])
|
|
123
123
|
print(detection["logprobs"])
|
|
124
124
|
# Output: True
|
|
@@ -135,7 +135,7 @@ class Custom(BaseModel):
|
|
|
135
135
|
result: list[list[dict[str, int]]]
|
|
136
136
|
|
|
137
137
|
custom_prompt = "Something"
|
|
138
|
-
custom_result = the_tool.
|
|
138
|
+
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
139
139
|
print(custom_result)
|
|
140
140
|
```
|
|
141
141
|
|
|
@@ -17,17 +17,17 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
17
17
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
18
18
|
Each tool is designed to work out-of-the-box with structured outputs (JSON / Pydantic).
|
|
19
19
|
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
20
|
+
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
21
|
+
- **`is_question()`** - Binary detection of whether input is a question
|
|
22
|
+
- **`extract_keywords()`** - Extracts keywords from text
|
|
23
|
+
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
24
|
+
- **`summarize()`** - Text summarization
|
|
25
|
+
- **`text_to_question()`** - Generates questions from text
|
|
26
|
+
- **`merge_questions()`** - Merges multiple questions with different modes
|
|
27
|
+
- **`rewrite()`** - Rewrites text with different wording/meaning
|
|
28
|
+
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
29
|
+
- **`translate()`** - Text translation between languages
|
|
30
|
+
- **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
|
|
@@ -53,7 +53,7 @@ All these flags can be used individually or together to tailor the behavior of a
|
|
|
53
53
|
Install the latest release via PyPI:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install -U
|
|
56
|
+
pip install -U hamtaa-texttools
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
---
|
|
@@ -84,7 +84,7 @@ model = "gpt-4o-mini"
|
|
|
84
84
|
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
85
85
|
|
|
86
86
|
# Example: Question Detection
|
|
87
|
-
detection = the_tool.
|
|
87
|
+
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
88
88
|
print(detection["result"])
|
|
89
89
|
print(detection["logprobs"])
|
|
90
90
|
# Output: True
|
|
@@ -101,7 +101,7 @@ class Custom(BaseModel):
|
|
|
101
101
|
result: list[list[dict[str, int]]]
|
|
102
102
|
|
|
103
103
|
custom_prompt = "Something"
|
|
104
|
-
custom_result = the_tool.
|
|
104
|
+
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
105
105
|
print(custom_result)
|
|
106
106
|
```
|
|
107
107
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -51,17 +51,17 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
51
51
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
52
52
|
Each tool is designed to work out-of-the-box with structured outputs (JSON / Pydantic).
|
|
53
53
|
|
|
54
|
-
-
|
|
55
|
-
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
64
|
-
-
|
|
54
|
+
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
55
|
+
- **`is_question()`** - Binary detection of whether input is a question
|
|
56
|
+
- **`extract_keywords()`** - Extracts keywords from text
|
|
57
|
+
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
58
|
+
- **`summarize()`** - Text summarization
|
|
59
|
+
- **`text_to_question()`** - Generates questions from text
|
|
60
|
+
- **`merge_questions()`** - Merges multiple questions with different modes
|
|
61
|
+
- **`rewrite()`** - Rewrites text with different wording/meaning
|
|
62
|
+
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
63
|
+
- **`translate()`** - Text translation between languages
|
|
64
|
+
- **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
|
|
65
65
|
|
|
66
66
|
---
|
|
67
67
|
|
|
@@ -87,7 +87,7 @@ All these flags can be used individually or together to tailor the behavior of a
|
|
|
87
87
|
Install the latest release via PyPI:
|
|
88
88
|
|
|
89
89
|
```bash
|
|
90
|
-
pip install -U
|
|
90
|
+
pip install -U hamtaa-texttools
|
|
91
91
|
```
|
|
92
92
|
|
|
93
93
|
---
|
|
@@ -118,7 +118,7 @@ model = "gpt-4o-mini"
|
|
|
118
118
|
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
119
119
|
|
|
120
120
|
# Example: Question Detection
|
|
121
|
-
detection = the_tool.
|
|
121
|
+
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
122
122
|
print(detection["result"])
|
|
123
123
|
print(detection["logprobs"])
|
|
124
124
|
# Output: True
|
|
@@ -135,7 +135,7 @@ class Custom(BaseModel):
|
|
|
135
135
|
result: list[list[dict[str, int]]]
|
|
136
136
|
|
|
137
137
|
custom_prompt = "Something"
|
|
138
|
-
custom_result = the_tool.
|
|
138
|
+
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
139
139
|
print(custom_result)
|
|
140
140
|
```
|
|
141
141
|
|
|
@@ -15,15 +15,15 @@ texttools/formatters/base_formatter.py
|
|
|
15
15
|
texttools/formatters/user_merge_formatter.py
|
|
16
16
|
texttools/prompts/README.md
|
|
17
17
|
texttools/prompts/categorizer.yaml
|
|
18
|
-
texttools/prompts/
|
|
18
|
+
texttools/prompts/is_question.yaml
|
|
19
19
|
texttools/prompts/keyword_extractor.yaml
|
|
20
20
|
texttools/prompts/ner_extractor.yaml
|
|
21
|
-
texttools/prompts/question_detector.yaml
|
|
22
|
-
texttools/prompts/question_generator.yaml
|
|
23
21
|
texttools/prompts/question_merger.yaml
|
|
24
22
|
texttools/prompts/rewriter.yaml
|
|
25
|
-
texttools/prompts/
|
|
23
|
+
texttools/prompts/run_custom.yaml
|
|
24
|
+
texttools/prompts/subject_to_question.yaml
|
|
26
25
|
texttools/prompts/summarizer.yaml
|
|
26
|
+
texttools/prompts/text_to_question.yaml
|
|
27
27
|
texttools/prompts/translator.yaml
|
|
28
28
|
texttools/tools/__init__.py
|
|
29
29
|
texttools/tools/async_the_tool.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hamtaa-texttools"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.6"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
10
|
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
@@ -17,7 +17,7 @@ license = {file = "LICENSE"}
|
|
|
17
17
|
requires-python = ">=3.8"
|
|
18
18
|
dependencies = [
|
|
19
19
|
"openai==1.97.1",
|
|
20
|
-
"PyYAML>=6.0"
|
|
20
|
+
"PyYAML>=6.0",
|
|
21
21
|
]
|
|
22
22
|
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
23
23
|
|
|
@@ -2,11 +2,16 @@ import json
|
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Type
|
|
5
|
+
import logging
|
|
5
6
|
|
|
6
7
|
from pydantic import BaseModel
|
|
7
8
|
from openai import OpenAI
|
|
8
9
|
from openai.lib._pydantic import to_strict_json_schema
|
|
9
10
|
|
|
11
|
+
# Configure logger
|
|
12
|
+
logger = logging.getLogger("batch_runner")
|
|
13
|
+
logger.setLevel(logging.INFO)
|
|
14
|
+
|
|
10
15
|
|
|
11
16
|
class SimpleBatchManager:
|
|
12
17
|
"""
|
|
@@ -159,25 +164,9 @@ class SimpleBatchManager:
|
|
|
159
164
|
info = self.client.batches.retrieve(job["id"])
|
|
160
165
|
job = info.to_dict()
|
|
161
166
|
self._save_state(job_name, [job])
|
|
162
|
-
|
|
167
|
+
logger.info("Batch job status: %s", job)
|
|
163
168
|
return job["status"]
|
|
164
169
|
|
|
165
|
-
def _parsed(self, result: dict) -> list:
|
|
166
|
-
"""
|
|
167
|
-
Parses the result dictionary, extracting the desired output or error for each item.
|
|
168
|
-
Returns a list of dictionaries with 'id' and 'output' keys.
|
|
169
|
-
"""
|
|
170
|
-
modified_result = []
|
|
171
|
-
|
|
172
|
-
for key, d in result.items():
|
|
173
|
-
if "desired_output" in d:
|
|
174
|
-
new_dict = {"id": key, "output": d["desired_output"]}
|
|
175
|
-
modified_result.append(new_dict)
|
|
176
|
-
else:
|
|
177
|
-
new_dict = {"id": key, "output": d["error"]}
|
|
178
|
-
modified_result.append(new_dict)
|
|
179
|
-
return modified_result
|
|
180
|
-
|
|
181
170
|
def fetch_results(
|
|
182
171
|
self, job_name: str, remove_cache: bool = True
|
|
183
172
|
) -> tuple[dict[str, str], list]:
|
|
@@ -198,7 +187,7 @@ class SimpleBatchManager:
|
|
|
198
187
|
err_content = (
|
|
199
188
|
self.client.files.content(error_file_id).read().decode("utf-8")
|
|
200
189
|
)
|
|
201
|
-
|
|
190
|
+
logger.info("Error file content:", err_content)
|
|
202
191
|
return {}
|
|
203
192
|
|
|
204
193
|
content = self.client.files.content(out_file_id).read().decode("utf-8")
|
|
@@ -4,23 +4,27 @@ import time
|
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any, Callable
|
|
7
|
+
import logging
|
|
7
8
|
|
|
9
|
+
from dotenv import load_dotenv
|
|
8
10
|
from openai import OpenAI
|
|
9
11
|
from pydantic import BaseModel
|
|
10
12
|
|
|
11
|
-
from texttools.batch
|
|
13
|
+
from texttools.batch import SimpleBatchManager
|
|
12
14
|
|
|
15
|
+
# Configure logger
|
|
16
|
+
logger = logging.getLogger("batch_runner")
|
|
17
|
+
logger.setLevel(logging.INFO)
|
|
13
18
|
|
|
14
|
-
|
|
15
|
-
|
|
19
|
+
|
|
20
|
+
class OutputModel(BaseModel):
|
|
21
|
+
desired_output: str
|
|
16
22
|
|
|
17
23
|
|
|
18
24
|
def export_data(data):
|
|
19
25
|
"""
|
|
20
26
|
Produces a structure of the following form from an initial data structure:
|
|
21
|
-
[
|
|
22
|
-
{"id": str, "content": str},...
|
|
23
|
-
]
|
|
27
|
+
[{"id": str, "text": str},...]
|
|
24
28
|
"""
|
|
25
29
|
return data
|
|
26
30
|
|
|
@@ -50,19 +54,17 @@ class BatchConfig:
|
|
|
50
54
|
BASE_OUTPUT_DIR: str = "Data/batch_entity_result"
|
|
51
55
|
import_function: Callable = import_data
|
|
52
56
|
export_function: Callable = export_data
|
|
57
|
+
poll_interval_seconds: int = 30
|
|
58
|
+
max_retries: int = 3
|
|
53
59
|
|
|
54
60
|
|
|
55
61
|
class BatchJobRunner:
|
|
56
62
|
"""
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
Handles data loading, partitioning, job execution via SimpleBatchManager,
|
|
60
|
-
and result saving. Manages the complete workflow from input data to processed outputs,
|
|
61
|
-
including retries and progress tracking across multiple batch parts.
|
|
63
|
+
Handles running batch jobs using a batch manager and configuration.
|
|
62
64
|
"""
|
|
63
65
|
|
|
64
66
|
def __init__(
|
|
65
|
-
self, config: BatchConfig = BatchConfig(), output_model: type =
|
|
67
|
+
self, config: BatchConfig = BatchConfig(), output_model: type = OutputModel
|
|
66
68
|
):
|
|
67
69
|
self.config = config
|
|
68
70
|
self.system_prompt = config.system_prompt
|
|
@@ -76,8 +78,13 @@ class BatchJobRunner:
|
|
|
76
78
|
self.parts: list[list[dict[str, Any]]] = []
|
|
77
79
|
self._partition_data()
|
|
78
80
|
Path(self.config.BASE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
|
81
|
+
# Map part index to job name
|
|
82
|
+
self.part_idx_to_job_name: dict[int, str] = {}
|
|
83
|
+
# Track retry attempts per part
|
|
84
|
+
self.part_attempts: dict[int, int] = {}
|
|
79
85
|
|
|
80
86
|
def _init_manager(self) -> SimpleBatchManager:
|
|
87
|
+
load_dotenv()
|
|
81
88
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
82
89
|
client = OpenAI(api_key=api_key)
|
|
83
90
|
return SimpleBatchManager(
|
|
@@ -111,7 +118,7 @@ class BatchJobRunner:
|
|
|
111
118
|
prompt_length = len(self.system_prompt)
|
|
112
119
|
total = total_length + (prompt_length * len(self.data))
|
|
113
120
|
calculation = total / self.config.CHARS_PER_TOKEN
|
|
114
|
-
|
|
121
|
+
logger.info(
|
|
115
122
|
f"Total chars: {total_length}, Prompt chars: {prompt_length}, Total: {total}, Tokens: {calculation}"
|
|
116
123
|
)
|
|
117
124
|
if calculation < self.config.MAX_TOTAL_TOKENS:
|
|
@@ -122,55 +129,99 @@ class BatchJobRunner:
|
|
|
122
129
|
self.data[i : i + self.config.MAX_BATCH_SIZE]
|
|
123
130
|
for i in range(0, len(self.data), self.config.MAX_BATCH_SIZE)
|
|
124
131
|
]
|
|
125
|
-
|
|
132
|
+
logger.info(f"Data split into {len(self.parts)} part(s)")
|
|
126
133
|
|
|
127
|
-
def
|
|
134
|
+
def _submit_all_jobs(self) -> None:
|
|
128
135
|
for idx, part in enumerate(self.parts):
|
|
129
136
|
if self._result_exists(idx):
|
|
130
|
-
|
|
137
|
+
logger.info(f"Skipping part {idx + 1}: result already exists.")
|
|
131
138
|
continue
|
|
132
139
|
part_job_name = (
|
|
133
140
|
f"{self.job_name}_part_{idx + 1}"
|
|
134
141
|
if len(self.parts) > 1
|
|
135
142
|
else self.job_name
|
|
136
143
|
)
|
|
137
|
-
|
|
138
|
-
|
|
144
|
+
# If a job with this name already exists, register and skip submitting
|
|
145
|
+
existing_job = self.manager._load_state(part_job_name)
|
|
146
|
+
if existing_job:
|
|
147
|
+
logger.info(
|
|
148
|
+
f"Skipping part {idx + 1}: job already exists ({part_job_name})."
|
|
149
|
+
)
|
|
150
|
+
self.part_idx_to_job_name[idx] = part_job_name
|
|
151
|
+
self.part_attempts.setdefault(idx, 0)
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
payload = part
|
|
155
|
+
logger.info(
|
|
156
|
+
f"Submitting job for part {idx + 1}/{len(self.parts)}: {part_job_name}"
|
|
139
157
|
)
|
|
140
|
-
self.
|
|
158
|
+
self.manager.start(payload, job_name=part_job_name)
|
|
159
|
+
self.part_idx_to_job_name[idx] = part_job_name
|
|
160
|
+
self.part_attempts.setdefault(idx, 0)
|
|
161
|
+
# This is added for letting file get uploaded, before starting the next part.
|
|
162
|
+
logger.info("Uploading...")
|
|
163
|
+
time.sleep(30)
|
|
141
164
|
|
|
142
|
-
def
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
165
|
+
def run(self):
|
|
166
|
+
# Submit all jobs up-front for concurrent execution
|
|
167
|
+
self._submit_all_jobs()
|
|
168
|
+
pending_parts: set[int] = set(self.part_idx_to_job_name.keys())
|
|
169
|
+
logger.info(f"Pending parts: {sorted(pending_parts)}")
|
|
170
|
+
# Polling loop
|
|
171
|
+
while pending_parts:
|
|
172
|
+
finished_this_round: list[int] = []
|
|
173
|
+
for part_idx in list(pending_parts):
|
|
174
|
+
job_name = self.part_idx_to_job_name[part_idx]
|
|
175
|
+
status = self.manager.check_status(job_name=job_name)
|
|
176
|
+
logger.info(f"Status for {job_name}: {status}")
|
|
152
177
|
if status == "completed":
|
|
153
|
-
|
|
178
|
+
logger.info(
|
|
179
|
+
f"Job completed. Fetching results for part {part_idx + 1}..."
|
|
180
|
+
)
|
|
154
181
|
output_data, log = self.manager.fetch_results(
|
|
155
|
-
job_name=
|
|
182
|
+
job_name=job_name, remove_cache=False
|
|
156
183
|
)
|
|
157
184
|
output_data = self.config.import_function(output_data)
|
|
158
185
|
self._save_results(output_data, log, part_idx)
|
|
159
|
-
|
|
160
|
-
|
|
186
|
+
logger.info(f"Fetched and saved results for part {part_idx + 1}.")
|
|
187
|
+
finished_this_round.append(part_idx)
|
|
161
188
|
elif status == "failed":
|
|
162
|
-
|
|
163
|
-
self.
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
189
|
+
attempt = self.part_attempts.get(part_idx, 0) + 1
|
|
190
|
+
self.part_attempts[part_idx] = attempt
|
|
191
|
+
if attempt <= self.config.max_retries:
|
|
192
|
+
logger.info(
|
|
193
|
+
f"Job {job_name} failed (attempt {attempt}). Retrying after short backoff..."
|
|
194
|
+
)
|
|
195
|
+
self.manager._clear_state(job_name)
|
|
196
|
+
time.sleep(10)
|
|
197
|
+
payload = self._to_manager_payload(self.parts[part_idx])
|
|
198
|
+
new_job_name = (
|
|
199
|
+
f"{self.job_name}_part_{part_idx + 1}_retry_{attempt}"
|
|
200
|
+
)
|
|
201
|
+
self.manager.start(payload, job_name=new_job_name)
|
|
202
|
+
self.part_idx_to_job_name[part_idx] = new_job_name
|
|
203
|
+
else:
|
|
204
|
+
logger.info(
|
|
205
|
+
f"Job {job_name} failed after {attempt - 1} retries. Marking as failed."
|
|
206
|
+
)
|
|
207
|
+
finished_this_round.append(part_idx)
|
|
168
208
|
else:
|
|
169
|
-
#
|
|
170
|
-
|
|
209
|
+
# Still running or queued
|
|
210
|
+
continue
|
|
211
|
+
# Remove finished parts
|
|
212
|
+
for part_idx in finished_this_round:
|
|
213
|
+
pending_parts.discard(part_idx)
|
|
214
|
+
if pending_parts:
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Waiting {self.config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
|
|
217
|
+
)
|
|
218
|
+
time.sleep(self.config.poll_interval_seconds)
|
|
171
219
|
|
|
172
220
|
def _save_results(
|
|
173
|
-
self,
|
|
221
|
+
self,
|
|
222
|
+
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
223
|
+
log: list[Any],
|
|
224
|
+
part_idx: int,
|
|
174
225
|
):
|
|
175
226
|
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
176
227
|
result_path = (
|
|
@@ -178,7 +229,7 @@ class BatchJobRunner:
|
|
|
178
229
|
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
179
230
|
)
|
|
180
231
|
if not output_data:
|
|
181
|
-
|
|
232
|
+
logger.info("No output data to save. Skipping this part.")
|
|
182
233
|
return
|
|
183
234
|
else:
|
|
184
235
|
with open(result_path, "w", encoding="utf-8") as f:
|
|
@@ -195,13 +246,13 @@ class BatchJobRunner:
|
|
|
195
246
|
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
196
247
|
result_path = (
|
|
197
248
|
Path(self.config.BASE_OUTPUT_DIR)
|
|
198
|
-
/ f"{Path(self.
|
|
249
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
199
250
|
)
|
|
200
251
|
return result_path.exists()
|
|
201
252
|
|
|
202
253
|
|
|
203
254
|
if __name__ == "__main__":
|
|
204
|
-
|
|
255
|
+
logger.info("=== Batch Job Runner ===")
|
|
205
256
|
config = BatchConfig(
|
|
206
257
|
system_prompt="",
|
|
207
258
|
job_name="job_name",
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
## Overview
|
|
4
4
|
This folder contains YAML files for all prompts used in the project. Each file represents a separate prompt template, which can be loaded by tools or scripts that require structured prompts for AI models.
|
|
5
5
|
|
|
6
|
+
---
|
|
7
|
+
|
|
6
8
|
## Structure
|
|
7
9
|
- **prompt_file.yaml**: Each YAML file represents a single prompt template.
|
|
8
10
|
- **main_template**: The main instruction template for the model.
|
|
@@ -24,6 +26,8 @@ analyze_template:
|
|
|
24
26
|
Optional detailed analysis template.
|
|
25
27
|
```
|
|
26
28
|
|
|
29
|
+
---
|
|
30
|
+
|
|
27
31
|
## Guidelines
|
|
28
32
|
1. **Naming**: Use descriptive names for each YAML file corresponding to the tool or task it serves.
|
|
29
33
|
2. **Placeholders**: Use `{input}` or other relevant placeholders to dynamically inject data.
|
|
@@ -2,12 +2,12 @@ main_template: |
|
|
|
2
2
|
You are an expert keyword extractor.
|
|
3
3
|
Extract the most relevant keywords from the given text.
|
|
4
4
|
Guidelines:
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
5
|
+
- Keywords must represent the main concepts of the text.
|
|
6
|
+
- If two words have overlapping meanings, choose only one.
|
|
7
|
+
- Do not include generic or unrelated words.
|
|
8
|
+
- Keywords must be single, self-contained words (no phrases).
|
|
9
|
+
- Output between 3 and 7 keywords based on the input length.
|
|
10
|
+
- Respond only in JSON format:
|
|
11
11
|
{{"result": ["keyword1", "keyword2", etc.]}}
|
|
12
12
|
Here is the text:
|
|
13
13
|
{input}
|
|
@@ -5,11 +5,11 @@ main_template:
|
|
|
5
5
|
I will give you a list of questions that are semantically similar.
|
|
6
6
|
Your task is to merge them into one unified question.
|
|
7
7
|
Guidelines:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
- Preserves all the information and intent from the original questions.
|
|
9
|
+
- Sounds natural, fluent, and concise.
|
|
10
|
+
- Avoids redundancy or unnecessary repetition.
|
|
11
|
+
- Does not omit any unique idea from the originals.
|
|
12
|
+
- Respond only in JSON format:
|
|
13
13
|
{{"result": "string"}}
|
|
14
14
|
Here is the questions:
|
|
15
15
|
{input}
|
|
@@ -99,7 +99,7 @@ class AsyncTheTool:
|
|
|
99
99
|
)
|
|
100
100
|
return results
|
|
101
101
|
|
|
102
|
-
async def
|
|
102
|
+
async def is_question(
|
|
103
103
|
self,
|
|
104
104
|
question: str,
|
|
105
105
|
output_lang: str | None = None,
|
|
@@ -111,7 +111,7 @@ class AsyncTheTool:
|
|
|
111
111
|
) -> dict[str, bool]:
|
|
112
112
|
results = await self.operator.run(
|
|
113
113
|
question,
|
|
114
|
-
prompt_file="
|
|
114
|
+
prompt_file="is_question.yaml",
|
|
115
115
|
output_model=OutputModels.BoolOutput,
|
|
116
116
|
with_analysis=with_analysis,
|
|
117
117
|
resp_format="parse",
|
|
@@ -123,7 +123,7 @@ class AsyncTheTool:
|
|
|
123
123
|
)
|
|
124
124
|
return results
|
|
125
125
|
|
|
126
|
-
async def
|
|
126
|
+
async def text_to_question(
|
|
127
127
|
self,
|
|
128
128
|
text: str,
|
|
129
129
|
output_lang: str | None = None,
|
|
@@ -135,7 +135,7 @@ class AsyncTheTool:
|
|
|
135
135
|
) -> dict[str, str]:
|
|
136
136
|
results = await self.operator.run(
|
|
137
137
|
text,
|
|
138
|
-
prompt_file="
|
|
138
|
+
prompt_file="text_to_question.yaml",
|
|
139
139
|
output_model=OutputModels.StrOutput,
|
|
140
140
|
with_analysis=with_analysis,
|
|
141
141
|
resp_format="parse",
|
|
@@ -202,7 +202,7 @@ class AsyncTheTool:
|
|
|
202
202
|
)
|
|
203
203
|
return results
|
|
204
204
|
|
|
205
|
-
async def
|
|
205
|
+
async def subject_to_question(
|
|
206
206
|
self,
|
|
207
207
|
subject: str,
|
|
208
208
|
number_of_questions: int,
|
|
@@ -215,7 +215,7 @@ class AsyncTheTool:
|
|
|
215
215
|
) -> dict[str, list[str]]:
|
|
216
216
|
results = await self.operator.run(
|
|
217
217
|
subject,
|
|
218
|
-
prompt_file="
|
|
218
|
+
prompt_file="subject_to_question.yaml",
|
|
219
219
|
output_model=OutputModels.ReasonListStrOutput,
|
|
220
220
|
with_analysis=with_analysis,
|
|
221
221
|
resp_format="parse",
|
{hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/async_operator.py
RENAMED
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import math
|
|
5
5
|
import re
|
|
6
|
-
from typing import Any, Literal,
|
|
6
|
+
from typing import Any, Literal, TypeVar
|
|
7
|
+
import logging
|
|
7
8
|
|
|
8
9
|
from openai import AsyncOpenAI
|
|
9
10
|
from pydantic import BaseModel
|
|
@@ -16,6 +17,10 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
16
17
|
# Base Model type for output models
|
|
17
18
|
T = TypeVar("T", bound=BaseModel)
|
|
18
19
|
|
|
20
|
+
# Configure logger
|
|
21
|
+
logger = logging.getLogger("async_operator")
|
|
22
|
+
logger.setLevel(logging.INFO)
|
|
23
|
+
|
|
19
24
|
|
|
20
25
|
class AsyncOperator:
|
|
21
26
|
"""
|
|
@@ -190,6 +195,7 @@ class AsyncOperator:
|
|
|
190
195
|
|
|
191
196
|
for choice in completion.choices:
|
|
192
197
|
if not getattr(choice, "logprobs", None):
|
|
198
|
+
logger.info("No logprobs found.")
|
|
193
199
|
continue
|
|
194
200
|
|
|
195
201
|
for logprob_item in choice.logprobs.content:
|
|
@@ -237,11 +243,10 @@ class AsyncOperator:
|
|
|
237
243
|
try:
|
|
238
244
|
cleaned_text = input_text.strip()
|
|
239
245
|
|
|
240
|
-
# FIXED: Correct parameter order for load
|
|
241
246
|
prompt_configs = prompt_loader.load(
|
|
242
|
-
prompt_file=prompt_file,
|
|
243
|
-
text=cleaned_text,
|
|
244
|
-
mode=mode if use_modes else "",
|
|
247
|
+
prompt_file=prompt_file,
|
|
248
|
+
text=cleaned_text,
|
|
249
|
+
mode=mode if use_modes else "",
|
|
245
250
|
**extra_kwargs,
|
|
246
251
|
)
|
|
247
252
|
|
|
@@ -269,7 +274,7 @@ class AsyncOperator:
|
|
|
269
274
|
output_model,
|
|
270
275
|
logprobs,
|
|
271
276
|
top_logprobs,
|
|
272
|
-
max_tokens,
|
|
277
|
+
max_tokens,
|
|
273
278
|
)
|
|
274
279
|
elif resp_format == "parse":
|
|
275
280
|
parsed, completion = await self._parse_completion(
|
|
@@ -277,10 +282,16 @@ class AsyncOperator:
|
|
|
277
282
|
output_model,
|
|
278
283
|
logprobs,
|
|
279
284
|
top_logprobs,
|
|
280
|
-
max_tokens,
|
|
285
|
+
max_tokens,
|
|
281
286
|
)
|
|
282
287
|
else:
|
|
283
|
-
|
|
288
|
+
logger.error(f"Unknown resp_format: {resp_format}")
|
|
289
|
+
|
|
290
|
+
# Ensure output_model has a `result` field
|
|
291
|
+
if not hasattr(parsed, "result"):
|
|
292
|
+
logger.error(
|
|
293
|
+
"The provided output_model must define a field named 'result'"
|
|
294
|
+
)
|
|
284
295
|
|
|
285
296
|
results = {"result": parsed.result}
|
|
286
297
|
|
|
@@ -293,5 +304,5 @@ class AsyncOperator:
|
|
|
293
304
|
return results
|
|
294
305
|
|
|
295
306
|
except Exception as e:
|
|
296
|
-
|
|
297
|
-
|
|
307
|
+
logger.error(f"Async TheTool failed: {e}")
|
|
308
|
+
return {"Error": str(e), "result": ""}
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
4
|
import re
|
|
5
|
-
from typing import Any, TypeVar, Type, Literal
|
|
5
|
+
from typing import Any, TypeVar, Type, Literal
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
8
|
|
|
@@ -291,5 +291,5 @@ class Operator:
|
|
|
291
291
|
return results
|
|
292
292
|
|
|
293
293
|
except Exception as e:
|
|
294
|
-
logger.error(f"
|
|
294
|
+
logger.error(f"TheTool failed: {e}")
|
|
295
295
|
return {"Error": str(e), "result": ""}
|
{hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/prompt_loader.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from
|
|
1
|
+
from functools import lru_cache
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
@@ -7,10 +7,6 @@ class PromptLoader:
|
|
|
7
7
|
"""
|
|
8
8
|
Utility for loading and formatting YAML prompt templates.
|
|
9
9
|
|
|
10
|
-
Each YAML file under `prompts/` must define at least a `main_template`,
|
|
11
|
-
and optionally an `analyze_template`. These can either be a single string
|
|
12
|
-
or a dictionary keyed by mode names (if `use_modes=True`).
|
|
13
|
-
|
|
14
10
|
Responsibilities:
|
|
15
11
|
- Load and parse YAML prompt definitions.
|
|
16
12
|
- Select the right template (by mode, if applicable).
|
|
@@ -22,31 +18,30 @@ class PromptLoader:
|
|
|
22
18
|
}
|
|
23
19
|
"""
|
|
24
20
|
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
23
|
+
|
|
25
24
|
MAIN_TEMPLATE: str = "main_template"
|
|
26
25
|
ANALYZE_TEMPLATE: str = "analyze_template"
|
|
27
26
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
prompt_file
|
|
32
|
-
mode: str | None,
|
|
33
|
-
) -> dict[str, str]:
|
|
34
|
-
prompt_path = Path(__file__).parent.parent.parent / prompts_dir / prompt_file
|
|
27
|
+
# Use lru_cache to load each file once
|
|
28
|
+
@lru_cache(maxsize=32)
|
|
29
|
+
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
30
|
+
prompt_path = self.base_dir / prompt_file
|
|
35
31
|
|
|
36
32
|
if not prompt_path.exists():
|
|
37
33
|
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
|
|
38
34
|
|
|
39
35
|
try:
|
|
40
|
-
# Load the data
|
|
41
36
|
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
42
37
|
except yaml.YAMLError as e:
|
|
43
38
|
raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
|
|
44
39
|
|
|
45
40
|
return {
|
|
46
|
-
|
|
41
|
+
self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
|
|
47
42
|
if mode
|
|
48
43
|
else data[self.MAIN_TEMPLATE],
|
|
49
|
-
|
|
44
|
+
self.ANALYZE_TEMPLATE: data.get(self.ANALYZE_TEMPLATE)[mode]
|
|
50
45
|
if mode
|
|
51
46
|
else data.get(self.ANALYZE_TEMPLATE),
|
|
52
47
|
}
|
|
@@ -59,14 +54,9 @@ class PromptLoader:
|
|
|
59
54
|
return format_args
|
|
60
55
|
|
|
61
56
|
def load(
|
|
62
|
-
self,
|
|
63
|
-
prompt_file: str,
|
|
64
|
-
text: str,
|
|
65
|
-
mode: str,
|
|
66
|
-
prompts_dir: str = "prompts",
|
|
67
|
-
**extra_kwargs,
|
|
57
|
+
self, prompt_file: str, text: str, mode: str, **extra_kwargs
|
|
68
58
|
) -> dict[str, str]:
|
|
69
|
-
template_configs = self._load_templates(
|
|
59
|
+
template_configs = self._load_templates(prompt_file, mode)
|
|
70
60
|
format_args = self._build_format_args(text, **extra_kwargs)
|
|
71
61
|
|
|
72
62
|
# Inject variables inside each template
|
|
@@ -17,11 +17,11 @@ class TheTool:
|
|
|
17
17
|
- categorize: assign a text to one of several Islamic categories.
|
|
18
18
|
- extract_keywords: produce a keyword list from text.
|
|
19
19
|
- extract_entities: simple NER (name/type pairs).
|
|
20
|
-
-
|
|
21
|
-
-
|
|
20
|
+
- is_question: binary check whether input is a question.
|
|
21
|
+
- text_to_question: produce a new question from a text.
|
|
22
22
|
- merge_questions: combine multiple questions (default/reason modes).
|
|
23
23
|
- rewrite: rephrase questions (same meaning/different wording, or vice versa).
|
|
24
|
-
-
|
|
24
|
+
- subject_to_question: generate multiple questions given a subject.
|
|
25
25
|
- summarize: produce a concise summary of a subject.
|
|
26
26
|
- translate: translate text between languages.
|
|
27
27
|
|
|
@@ -174,7 +174,7 @@ class TheTool:
|
|
|
174
174
|
top_logprobs=self.top_logprobs if top_logprobs is None else top_logprobs,
|
|
175
175
|
)
|
|
176
176
|
|
|
177
|
-
def
|
|
177
|
+
def is_question(
|
|
178
178
|
self,
|
|
179
179
|
text: str,
|
|
180
180
|
model: str | None = None,
|
|
@@ -196,7 +196,7 @@ class TheTool:
|
|
|
196
196
|
"""
|
|
197
197
|
return self.operator.run(
|
|
198
198
|
# Internal parameters
|
|
199
|
-
prompt_file="
|
|
199
|
+
prompt_file="is_question.yaml",
|
|
200
200
|
output_model=OutputModels.BoolOutput,
|
|
201
201
|
resp_format="parse",
|
|
202
202
|
output_lang=False,
|
|
@@ -212,7 +212,7 @@ class TheTool:
|
|
|
212
212
|
top_logprobs=self.top_logprobs if top_logprobs is None else top_logprobs,
|
|
213
213
|
)
|
|
214
214
|
|
|
215
|
-
def
|
|
215
|
+
def text_to_question(
|
|
216
216
|
self,
|
|
217
217
|
text: str,
|
|
218
218
|
model: str | None = None,
|
|
@@ -235,7 +235,7 @@ class TheTool:
|
|
|
235
235
|
"""
|
|
236
236
|
return self.operator.run(
|
|
237
237
|
# Internal parameters
|
|
238
|
-
prompt_file="
|
|
238
|
+
prompt_file="text_to_question.yaml",
|
|
239
239
|
output_model=OutputModels.StrOutput,
|
|
240
240
|
resp_format="parse",
|
|
241
241
|
# User parameters
|
|
@@ -340,7 +340,7 @@ class TheTool:
|
|
|
340
340
|
top_logprobs=self.top_logprobs if top_logprobs is None else top_logprobs,
|
|
341
341
|
)
|
|
342
342
|
|
|
343
|
-
def
|
|
343
|
+
def subject_to_question(
|
|
344
344
|
self,
|
|
345
345
|
text: str,
|
|
346
346
|
number_of_questions: int,
|
|
@@ -366,7 +366,7 @@ class TheTool:
|
|
|
366
366
|
"""
|
|
367
367
|
return self.operator.run(
|
|
368
368
|
# Internal parameters
|
|
369
|
-
prompt_file="
|
|
369
|
+
prompt_file="subject_to_question.yaml",
|
|
370
370
|
output_model=OutputModels.ReasonListStrOutput,
|
|
371
371
|
resp_format="parse",
|
|
372
372
|
# User parameters
|
|
@@ -463,14 +463,14 @@ class TheTool:
|
|
|
463
463
|
top_logprobs=self.top_logprobs if top_logprobs is None else top_logprobs,
|
|
464
464
|
)
|
|
465
465
|
|
|
466
|
-
def
|
|
466
|
+
def run_custom(
|
|
467
467
|
self,
|
|
468
468
|
prompt: str,
|
|
469
469
|
output_model: Any,
|
|
470
470
|
model: str | None = None,
|
|
471
471
|
output_lang: str | None = None,
|
|
472
472
|
temperature: float | None = None,
|
|
473
|
-
logprobs:
|
|
473
|
+
logprobs: bool | None = None,
|
|
474
474
|
top_logprobs: int | None = None,
|
|
475
475
|
) -> dict[str, Any]:
|
|
476
476
|
"""
|
|
@@ -485,7 +485,7 @@ class TheTool:
|
|
|
485
485
|
"""
|
|
486
486
|
return self.operator.run(
|
|
487
487
|
# Internal parameters
|
|
488
|
-
prompt_file="
|
|
488
|
+
prompt_file="run_custom.yaml",
|
|
489
489
|
resp_format="parse",
|
|
490
490
|
user_prompt=False,
|
|
491
491
|
with_analysis=False,
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/formatters/user_merge_formatter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.5 → hamtaa_texttools-1.0.6}/texttools/tools/internals/output_models.py
RENAMED
|
File without changes
|