hamtaa-texttools 1.1.1__py3-none-any.whl → 1.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.8.dist-info}/METADATA +57 -12
- hamtaa_texttools-1.1.8.dist-info/RECORD +30 -0
- texttools/__init__.py +2 -7
- texttools/batch/__init__.py +2 -3
- texttools/batch/batch_manager.py +14 -15
- texttools/batch/batch_runner.py +53 -62
- texttools/prompts/README.md +4 -4
- texttools/tools/__init__.py +2 -2
- texttools/tools/{async_the_tool.py → async_tools.py} +33 -12
- texttools/tools/internals/async_operator.py +74 -11
- texttools/tools/internals/base_operator.py +19 -10
- texttools/tools/internals/operator.py +74 -11
- texttools/tools/internals/output_models.py +7 -4
- texttools/tools/internals/prompt_loader.py +3 -0
- texttools/tools/{the_tool.py → sync_tools.py} +33 -12
- hamtaa_texttools-1.1.1.dist-info/RECORD +0 -30
- {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.8.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.8.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.1.dist-info → hamtaa_texttools-1.1.8.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.8
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -40,14 +40,14 @@ Dynamic: license-file
|
|
|
40
40
|
|
|
41
41
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
42
42
|
|
|
43
|
-
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER
|
|
43
|
+
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
44
44
|
|
|
45
45
|
---
|
|
46
46
|
|
|
47
47
|
## ✨ Features
|
|
48
48
|
|
|
49
49
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
50
|
-
Each tool is designed to work
|
|
50
|
+
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
51
51
|
|
|
52
52
|
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
53
53
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -63,7 +63,7 @@ Each tool is designed to work out-of-the-box with structured outputs (JSON / Pyd
|
|
|
63
63
|
|
|
64
64
|
---
|
|
65
65
|
|
|
66
|
-
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt` and `
|
|
66
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
|
|
67
67
|
|
|
68
68
|
TextTools provides several optional flags to customize LLM behavior:
|
|
69
69
|
|
|
@@ -78,12 +78,26 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
78
78
|
|
|
79
79
|
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
80
80
|
|
|
81
|
+
- **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
|
|
82
|
+
|
|
81
83
|
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
82
84
|
|
|
83
85
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
84
86
|
|
|
85
87
|
---
|
|
86
88
|
|
|
89
|
+
## 🧩 ToolOutput
|
|
90
|
+
|
|
91
|
+
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
92
|
+
- **`result`** → The output of LLM (`type=Any`)
|
|
93
|
+
- **`analysis`** → The reasoning step before generating the final output (`type=str`)
|
|
94
|
+
- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
|
|
95
|
+
- **`errors`** → Any error that have occured during calling LLM (`type=str`)
|
|
96
|
+
|
|
97
|
+
**None:** You can use `repr(ToolOutput)` to see details of an output.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
87
101
|
## 🚀 Installation
|
|
88
102
|
|
|
89
103
|
Install the latest release via PyPI:
|
|
@@ -121,13 +135,13 @@ the_tool = TheTool(client=client, model=model)
|
|
|
121
135
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
122
136
|
print(detection.result)
|
|
123
137
|
print(detection.logprobs)
|
|
124
|
-
# Output: True
|
|
138
|
+
# Output: True + logprobs
|
|
125
139
|
|
|
126
140
|
# Example: Translation
|
|
127
141
|
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
128
142
|
print(translation.result)
|
|
129
143
|
print(translation.analysis)
|
|
130
|
-
# Output: "Hi! How are you?"
|
|
144
|
+
# Output: "Hi! How are you?" + analysis
|
|
131
145
|
```
|
|
132
146
|
|
|
133
147
|
---
|
|
@@ -147,19 +161,22 @@ async def main():
|
|
|
147
161
|
model = "gpt-4o-mini"
|
|
148
162
|
|
|
149
163
|
# Create an instance of AsyncTheTool
|
|
150
|
-
|
|
164
|
+
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
165
|
+
|
|
166
|
+
# Example: Async Translation and Keyword Extraction
|
|
167
|
+
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
168
|
+
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
151
169
|
|
|
152
|
-
|
|
153
|
-
translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
170
|
+
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
154
171
|
print(translation.result)
|
|
155
|
-
|
|
172
|
+
print(keywords.result)
|
|
156
173
|
|
|
157
174
|
asyncio.run(main())
|
|
158
175
|
```
|
|
159
176
|
|
|
160
177
|
---
|
|
161
178
|
|
|
162
|
-
##
|
|
179
|
+
## 👍 Use Cases
|
|
163
180
|
|
|
164
181
|
Use **TextTools** when you need to:
|
|
165
182
|
|
|
@@ -167,7 +184,35 @@ Use **TextTools** when you need to:
|
|
|
167
184
|
- 🌍 **Translate** and process multilingual corpora with ease
|
|
168
185
|
- 🧩 **Integrate** LLMs into production pipelines (structured outputs)
|
|
169
186
|
- 📊 **Analyze** large text collections using embeddings and categorization
|
|
170
|
-
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 📚 Batch Processing
|
|
191
|
+
|
|
192
|
+
Process large datasets efficiently using OpenAI's batch API.
|
|
193
|
+
|
|
194
|
+
## Quick Start
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from texttools import BatchJobRunner, BatchConfig
|
|
198
|
+
|
|
199
|
+
# Configure your batch job
|
|
200
|
+
config = BatchConfig(
|
|
201
|
+
system_prompt="Extract entities from the text",
|
|
202
|
+
job_name="entity_extraction",
|
|
203
|
+
input_data_path="data.json",
|
|
204
|
+
output_data_filename="results.json",
|
|
205
|
+
model="gpt-4o-mini"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Define your output schema
|
|
209
|
+
class Output(BaseModel):
|
|
210
|
+
entities: list[str]
|
|
211
|
+
|
|
212
|
+
# Run the batch job
|
|
213
|
+
runner = BatchJobRunner(config, output_model=Output)
|
|
214
|
+
runner.run()
|
|
215
|
+
```
|
|
171
216
|
|
|
172
217
|
---
|
|
173
218
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
hamtaa_texttools-1.1.8.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
|
+
texttools/__init__.py,sha256=lFYe1jdssHC1h8qcPpV3whANxiDi8aiiFdY-7L0Ck10,164
|
|
3
|
+
texttools/batch/__init__.py,sha256=DJGJTfR6F3Yv4_alsj9g1tesGzdcSV27Zw74DonhW_s,102
|
|
4
|
+
texttools/batch/batch_manager.py,sha256=ZgLiO9maCHnx2cJbUjsYXFnlUsMLI2TP3Vc9uKU0BLg,8706
|
|
5
|
+
texttools/batch/batch_runner.py,sha256=X0YQmaowO_jUSAFWBHdxOLoRrX_gvmrJDgp9qPlOSEw,10254
|
|
6
|
+
texttools/prompts/README.md,sha256=-5YO93CN93QLifqZpUeUnCOCBbDiOTV-cFQeJ7Gg0I4,1377
|
|
7
|
+
texttools/prompts/categorizer.yaml,sha256=GMqIIzQFhgnlpkgU1qi3FAD3mD4A2jiWD5TilQ2XnnE,1204
|
|
8
|
+
texttools/prompts/extract_entities.yaml,sha256=KiKjeDpHaeh3JVtZ6q1pa3k4DYucUIU9WnEcRTCA-SE,651
|
|
9
|
+
texttools/prompts/extract_keywords.yaml,sha256=0O7ypL_OsEOxtvlQ2CZjnsv9637DJwAKprZsf9Vo2_s,769
|
|
10
|
+
texttools/prompts/is_question.yaml,sha256=d0-vKRbXWkxvO64ikvxRjEmpAXGpCYIPGhgexvPPjws,471
|
|
11
|
+
texttools/prompts/merge_questions.yaml,sha256=0J85GvTirZB4ELwH3sk8ub_WcqqpYf6PrMKr3djlZeo,1792
|
|
12
|
+
texttools/prompts/rewrite.yaml,sha256=LO7He_IA3MZKz8a-LxH9DHJpOjpYwaYN1pbjp1Y0tFo,5392
|
|
13
|
+
texttools/prompts/run_custom.yaml,sha256=38OkCoVITbuuS9c08UZSP1jZW4WjSmRIi8fR0RAiPu4,108
|
|
14
|
+
texttools/prompts/subject_to_question.yaml,sha256=C7x7rNNm6U_ZG9HOn6zuzYOtvJUZ2skuWbL1-aYdd3E,1147
|
|
15
|
+
texttools/prompts/summarize.yaml,sha256=o6rxGPfWtZd61Duvm8NVvCJqfq73b-wAuMSKR6UYUqY,459
|
|
16
|
+
texttools/prompts/text_to_question.yaml,sha256=UheKYpDn6iyKI8NxunHZtFpNyfCLZZe5cvkuXpurUJY,783
|
|
17
|
+
texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6sr50,634
|
|
18
|
+
texttools/tools/__init__.py,sha256=3fPoeB-E5wGxWgv7axztHkeolR7ZDUJudd0xmpPFjao,113
|
|
19
|
+
texttools/tools/async_tools.py,sha256=2ZY7Lo6Jj9xoTF8bfdh_g8VOXZ7ljMMesd1_QHXyf4s,15395
|
|
20
|
+
texttools/tools/sync_tools.py,sha256=XKgZuzriFnk8B-YihJfs6BKivxjGCgOFfe7hnCpEiXs,15161
|
|
21
|
+
texttools/tools/internals/async_operator.py,sha256=fCi70LXasC_2G9iz8uVFptnZEvVeb9TXopMBLi-cFuE,9022
|
|
22
|
+
texttools/tools/internals/base_operator.py,sha256=rV2WqGdiHK4ezYz1f1EWcdbKFSFJhBJpORnJzPICFvk,3471
|
|
23
|
+
texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
|
|
24
|
+
texttools/tools/internals/operator.py,sha256=UBDScStTUXf8CIhwXb-6e_YOWTLggoiBV71vXRzr0P0,8904
|
|
25
|
+
texttools/tools/internals/output_models.py,sha256=ekpbyocmXj_dee7ieOT1zOkMo9cPHT7xcUFCZoUaXA0,1886
|
|
26
|
+
texttools/tools/internals/prompt_loader.py,sha256=1khayXcRC5w0Vf2SufpNaN1IUIhbKzS5ATiKheoBcGE,2082
|
|
27
|
+
hamtaa_texttools-1.1.8.dist-info/METADATA,sha256=Cfb4VkcUELzRN6TrKdWK5jr4YsGbh_VlAtYVny86cb4,8690
|
|
28
|
+
hamtaa_texttools-1.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
hamtaa_texttools-1.1.8.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
30
|
+
hamtaa_texttools-1.1.8.dist-info/RECORD,,
|
texttools/__init__.py
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
from .batch import BatchJobRunner,
|
|
1
|
+
from .batch import BatchJobRunner, BatchConfig
|
|
2
2
|
from .tools import AsyncTheTool, TheTool
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
5
|
-
"TheTool",
|
|
6
|
-
"AsyncTheTool",
|
|
7
|
-
"SimpleBatchManager",
|
|
8
|
-
"BatchJobRunner",
|
|
9
|
-
]
|
|
4
|
+
__all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig"]
|
texttools/batch/__init__.py
CHANGED
texttools/batch/batch_manager.py
CHANGED
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Type
|
|
4
|
+
from typing import Any, Type, TypeVar
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
from openai import OpenAI
|
|
9
9
|
from openai.lib._pydantic import to_strict_json_schema
|
|
10
10
|
|
|
11
|
-
#
|
|
12
|
-
|
|
13
|
-
logger.setLevel(logging.INFO)
|
|
11
|
+
# Base Model type for output models
|
|
12
|
+
T = TypeVar("T", bound=BaseModel)
|
|
14
13
|
|
|
14
|
+
logger = logging.getLogger("texttools.batch_manager")
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
class BatchManager:
|
|
17
18
|
"""
|
|
18
19
|
Manages batch processing jobs for OpenAI's chat completions with structured outputs.
|
|
19
20
|
|
|
@@ -26,9 +27,8 @@ class SimpleBatchManager:
|
|
|
26
27
|
self,
|
|
27
28
|
client: OpenAI,
|
|
28
29
|
model: str,
|
|
29
|
-
output_model: Type[
|
|
30
|
+
output_model: Type[T],
|
|
30
31
|
prompt_template: str,
|
|
31
|
-
handlers: list[Any] | None = None,
|
|
32
32
|
state_dir: Path = Path(".batch_jobs"),
|
|
33
33
|
custom_json_schema_obj_str: dict | None = None,
|
|
34
34
|
**client_kwargs: Any,
|
|
@@ -37,16 +37,16 @@ class SimpleBatchManager:
|
|
|
37
37
|
self.model = model
|
|
38
38
|
self.output_model = output_model
|
|
39
39
|
self.prompt_template = prompt_template
|
|
40
|
-
self.handlers = handlers or []
|
|
41
40
|
self.state_dir = state_dir
|
|
42
41
|
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
43
42
|
self.custom_json_schema_obj_str = custom_json_schema_obj_str
|
|
44
43
|
self.client_kwargs = client_kwargs
|
|
45
44
|
self.dict_input = False
|
|
46
45
|
|
|
47
|
-
if
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
if custom_json_schema_obj_str and not isinstance(
|
|
47
|
+
custom_json_schema_obj_str, dict
|
|
48
|
+
):
|
|
49
|
+
raise ValueError("Schema should be a dict")
|
|
50
50
|
|
|
51
51
|
def _state_file(self, job_name: str) -> Path:
|
|
52
52
|
return self.state_dir / f"{job_name}.json"
|
|
@@ -127,7 +127,7 @@ class SimpleBatchManager:
|
|
|
127
127
|
|
|
128
128
|
else:
|
|
129
129
|
raise TypeError(
|
|
130
|
-
"The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}
|
|
130
|
+
"The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}"
|
|
131
131
|
)
|
|
132
132
|
|
|
133
133
|
file_path = self.state_dir / f"batch_{uuid.uuid4().hex}.jsonl"
|
|
@@ -143,6 +143,7 @@ class SimpleBatchManager:
|
|
|
143
143
|
"""
|
|
144
144
|
if self._load_state(job_name):
|
|
145
145
|
return
|
|
146
|
+
|
|
146
147
|
path = self._prepare_file(payload)
|
|
147
148
|
upload = self.client.files.create(file=open(path, "rb"), purpose="batch")
|
|
148
149
|
job = self.client.batches.create(
|
|
@@ -187,7 +188,7 @@ class SimpleBatchManager:
|
|
|
187
188
|
err_content = (
|
|
188
189
|
self.client.files.content(error_file_id).read().decode("utf-8")
|
|
189
190
|
)
|
|
190
|
-
logger.
|
|
191
|
+
logger.error("Error file content:", err_content)
|
|
191
192
|
return {}
|
|
192
193
|
|
|
193
194
|
content = self.client.files.content(out_file_id).read().decode("utf-8")
|
|
@@ -221,8 +222,6 @@ class SimpleBatchManager:
|
|
|
221
222
|
error_d = {custom_id: results[custom_id]}
|
|
222
223
|
log.append(error_d)
|
|
223
224
|
|
|
224
|
-
for handler in self.handlers:
|
|
225
|
-
handler.handle(results)
|
|
226
225
|
if remove_cache:
|
|
227
226
|
self._clear_state(job_name)
|
|
228
227
|
|
texttools/batch/batch_runner.py
CHANGED
|
@@ -3,25 +3,23 @@ import os
|
|
|
3
3
|
import time
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Callable
|
|
6
|
+
from typing import Any, Callable, Type, TypeVar
|
|
7
7
|
import logging
|
|
8
8
|
|
|
9
9
|
from dotenv import load_dotenv
|
|
10
10
|
from openai import OpenAI
|
|
11
11
|
from pydantic import BaseModel
|
|
12
12
|
|
|
13
|
-
from texttools.batch import
|
|
13
|
+
from texttools.batch.batch_manager import BatchManager
|
|
14
|
+
from texttools.tools.internals.output_models import StrOutput
|
|
14
15
|
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
logger.setLevel(logging.INFO)
|
|
16
|
+
# Base Model type for output models
|
|
17
|
+
T = TypeVar("T", bound=BaseModel)
|
|
18
18
|
|
|
19
|
+
logger = logging.getLogger("texttools.batch_runner")
|
|
19
20
|
|
|
20
|
-
class OutputModel(BaseModel):
|
|
21
|
-
desired_output: str
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
def export_data(data):
|
|
22
|
+
def export_data(data) -> list[dict[str, str]]:
|
|
25
23
|
"""
|
|
26
24
|
Produces a structure of the following form from an initial data structure:
|
|
27
25
|
[{"id": str, "text": str},...]
|
|
@@ -29,7 +27,7 @@ def export_data(data):
|
|
|
29
27
|
return data
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
def import_data(data):
|
|
30
|
+
def import_data(data) -> Any:
|
|
33
31
|
"""
|
|
34
32
|
Takes the output and adds and aggregates it to the original structure.
|
|
35
33
|
"""
|
|
@@ -48,9 +46,9 @@ class BatchConfig:
|
|
|
48
46
|
output_data_filename: str = ""
|
|
49
47
|
model: str = "gpt-4.1-mini"
|
|
50
48
|
MAX_BATCH_SIZE: int = 100
|
|
51
|
-
MAX_TOTAL_TOKENS: int =
|
|
49
|
+
MAX_TOTAL_TOKENS: int = 2_000_000
|
|
52
50
|
CHARS_PER_TOKEN: float = 2.7
|
|
53
|
-
PROMPT_TOKEN_MULTIPLIER: int =
|
|
51
|
+
PROMPT_TOKEN_MULTIPLIER: int = 1_000
|
|
54
52
|
BASE_OUTPUT_DIR: str = "Data/batch_entity_result"
|
|
55
53
|
import_function: Callable = import_data
|
|
56
54
|
export_function: Callable = export_data
|
|
@@ -64,7 +62,7 @@ class BatchJobRunner:
|
|
|
64
62
|
"""
|
|
65
63
|
|
|
66
64
|
def __init__(
|
|
67
|
-
self, config: BatchConfig = BatchConfig(), output_model:
|
|
65
|
+
self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
|
|
68
66
|
):
|
|
69
67
|
self.config = config
|
|
70
68
|
self.system_prompt = config.system_prompt
|
|
@@ -83,11 +81,11 @@ class BatchJobRunner:
|
|
|
83
81
|
# Track retry attempts per part
|
|
84
82
|
self.part_attempts: dict[int, int] = {}
|
|
85
83
|
|
|
86
|
-
def _init_manager(self) ->
|
|
84
|
+
def _init_manager(self) -> BatchManager:
|
|
87
85
|
load_dotenv()
|
|
88
86
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
89
87
|
client = OpenAI(api_key=api_key)
|
|
90
|
-
return
|
|
88
|
+
return BatchManager(
|
|
91
89
|
client=client,
|
|
92
90
|
model=self.model,
|
|
93
91
|
prompt_template=self.system_prompt,
|
|
@@ -102,12 +100,12 @@ class BatchJobRunner:
|
|
|
102
100
|
# Ensure data is a list of dicts with 'id' and 'content' as strings
|
|
103
101
|
if not isinstance(data, list):
|
|
104
102
|
raise ValueError(
|
|
105
|
-
|
|
103
|
+
"Exported data must be a list of dicts with 'id' and 'content' keys"
|
|
106
104
|
)
|
|
107
105
|
for item in data:
|
|
108
106
|
if not (isinstance(item, dict) and "id" in item and "content" in item):
|
|
109
107
|
raise ValueError(
|
|
110
|
-
"
|
|
108
|
+
f"Item must be a dict with 'id' and 'content' keys. Got: {type(item)}"
|
|
111
109
|
)
|
|
112
110
|
if not (isinstance(item["id"], str) and isinstance(item["content"], str)):
|
|
113
111
|
raise ValueError("'id' and 'content' must be strings.")
|
|
@@ -162,7 +160,45 @@ class BatchJobRunner:
|
|
|
162
160
|
logger.info("Uploading...")
|
|
163
161
|
time.sleep(30)
|
|
164
162
|
|
|
163
|
+
def _save_results(
|
|
164
|
+
self,
|
|
165
|
+
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
166
|
+
log: list[Any],
|
|
167
|
+
part_idx: int,
|
|
168
|
+
):
|
|
169
|
+
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
170
|
+
result_path = (
|
|
171
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
172
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
173
|
+
)
|
|
174
|
+
if not output_data:
|
|
175
|
+
logger.info("No output data to save. Skipping this part.")
|
|
176
|
+
return
|
|
177
|
+
else:
|
|
178
|
+
with open(result_path, "w", encoding="utf-8") as f:
|
|
179
|
+
json.dump(output_data, f, ensure_ascii=False, indent=4)
|
|
180
|
+
if log:
|
|
181
|
+
log_path = (
|
|
182
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
183
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
|
|
184
|
+
)
|
|
185
|
+
with open(log_path, "w", encoding="utf-8") as f:
|
|
186
|
+
json.dump(log, f, ensure_ascii=False, indent=4)
|
|
187
|
+
|
|
188
|
+
def _result_exists(self, part_idx: int) -> bool:
|
|
189
|
+
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
190
|
+
result_path = (
|
|
191
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
192
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
193
|
+
)
|
|
194
|
+
return result_path.exists()
|
|
195
|
+
|
|
165
196
|
def run(self):
|
|
197
|
+
"""
|
|
198
|
+
Execute the batch job processing pipeline.
|
|
199
|
+
|
|
200
|
+
Submits jobs, monitors progress, handles retries, and saves results.
|
|
201
|
+
"""
|
|
166
202
|
# Submit all jobs up-front for concurrent execution
|
|
167
203
|
self._submit_all_jobs()
|
|
168
204
|
pending_parts: set[int] = set(self.part_idx_to_job_name.keys())
|
|
@@ -216,48 +252,3 @@ class BatchJobRunner:
|
|
|
216
252
|
f"Waiting {self.config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
|
|
217
253
|
)
|
|
218
254
|
time.sleep(self.config.poll_interval_seconds)
|
|
219
|
-
|
|
220
|
-
def _save_results(
|
|
221
|
-
self,
|
|
222
|
-
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
223
|
-
log: list[Any],
|
|
224
|
-
part_idx: int,
|
|
225
|
-
):
|
|
226
|
-
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
227
|
-
result_path = (
|
|
228
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
229
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
230
|
-
)
|
|
231
|
-
if not output_data:
|
|
232
|
-
logger.info("No output data to save. Skipping this part.")
|
|
233
|
-
return
|
|
234
|
-
else:
|
|
235
|
-
with open(result_path, "w", encoding="utf-8") as f:
|
|
236
|
-
json.dump(output_data, f, ensure_ascii=False, indent=4)
|
|
237
|
-
if log:
|
|
238
|
-
log_path = (
|
|
239
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
240
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
|
|
241
|
-
)
|
|
242
|
-
with open(log_path, "w", encoding="utf-8") as f:
|
|
243
|
-
json.dump(log, f, ensure_ascii=False, indent=4)
|
|
244
|
-
|
|
245
|
-
def _result_exists(self, part_idx: int) -> bool:
|
|
246
|
-
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
247
|
-
result_path = (
|
|
248
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
249
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
250
|
-
)
|
|
251
|
-
return result_path.exists()
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
if __name__ == "__main__":
|
|
255
|
-
logger.info("=== Batch Job Runner ===")
|
|
256
|
-
config = BatchConfig(
|
|
257
|
-
system_prompt="",
|
|
258
|
-
job_name="job_name",
|
|
259
|
-
input_data_path="Data.json",
|
|
260
|
-
output_data_filename="output",
|
|
261
|
-
)
|
|
262
|
-
runner = BatchJobRunner(config)
|
|
263
|
-
runner.run()
|
texttools/prompts/README.md
CHANGED
|
@@ -14,15 +14,15 @@ This folder contains YAML files for all prompts used in the project. Each file r
|
|
|
14
14
|
### Example YAML Structure
|
|
15
15
|
```yaml
|
|
16
16
|
main_template:
|
|
17
|
-
|
|
17
|
+
mode_1: |
|
|
18
18
|
Your main instructions here with placeholders like {input}.
|
|
19
|
-
|
|
19
|
+
mode_2: |
|
|
20
20
|
Optional reasoning instructions here.
|
|
21
21
|
|
|
22
22
|
analyze_template:
|
|
23
|
-
|
|
23
|
+
mode_1: |
|
|
24
24
|
Analyze and summarize the input.
|
|
25
|
-
|
|
25
|
+
mode_2: |
|
|
26
26
|
Optional detailed analysis template.
|
|
27
27
|
```
|
|
28
28
|
|
texttools/tools/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import AsyncOpenAI
|
|
4
4
|
|
|
@@ -34,7 +34,8 @@ class AsyncTheTool:
|
|
|
34
34
|
temperature: float | None = 0.0,
|
|
35
35
|
logprobs: bool = False,
|
|
36
36
|
top_logprobs: int | None = None,
|
|
37
|
-
|
|
37
|
+
validator: Callable[[Any], bool] | None = None,
|
|
38
|
+
) -> OutputModels.ToolOutput:
|
|
38
39
|
"""
|
|
39
40
|
Categorize a text into a single Islamic studies domain category.
|
|
40
41
|
|
|
@@ -52,6 +53,7 @@ class AsyncTheTool:
|
|
|
52
53
|
temperature=temperature,
|
|
53
54
|
logprobs=logprobs,
|
|
54
55
|
top_logprobs=top_logprobs,
|
|
56
|
+
validator=validator,
|
|
55
57
|
# Internal parameters
|
|
56
58
|
prompt_file="categorizer.yaml",
|
|
57
59
|
output_model=OutputModels.CategorizerOutput,
|
|
@@ -69,7 +71,8 @@ class AsyncTheTool:
|
|
|
69
71
|
temperature: float | None = 0.0,
|
|
70
72
|
logprobs: bool = False,
|
|
71
73
|
top_logprobs: int | None = None,
|
|
72
|
-
|
|
74
|
+
validator: Callable[[Any], bool] | None = None,
|
|
75
|
+
) -> OutputModels.ToolOutput:
|
|
73
76
|
"""
|
|
74
77
|
Extract salient keywords from text.
|
|
75
78
|
|
|
@@ -88,6 +91,7 @@ class AsyncTheTool:
|
|
|
88
91
|
temperature=temperature,
|
|
89
92
|
logprobs=logprobs,
|
|
90
93
|
top_logprobs=top_logprobs,
|
|
94
|
+
validator=validator,
|
|
91
95
|
# Internal parameters
|
|
92
96
|
prompt_file="extract_keywords.yaml",
|
|
93
97
|
output_model=OutputModels.ListStrOutput,
|
|
@@ -104,7 +108,8 @@ class AsyncTheTool:
|
|
|
104
108
|
temperature: float | None = 0.0,
|
|
105
109
|
logprobs: bool = False,
|
|
106
110
|
top_logprobs: int | None = None,
|
|
107
|
-
|
|
111
|
+
validator: Callable[[Any], bool] | None = None,
|
|
112
|
+
) -> OutputModels.ToolOutput:
|
|
108
113
|
"""
|
|
109
114
|
Perform Named Entity Recognition (NER) over the input text.
|
|
110
115
|
|
|
@@ -123,6 +128,7 @@ class AsyncTheTool:
|
|
|
123
128
|
temperature=temperature,
|
|
124
129
|
logprobs=logprobs,
|
|
125
130
|
top_logprobs=top_logprobs,
|
|
131
|
+
validator=validator,
|
|
126
132
|
# Internal parameters
|
|
127
133
|
prompt_file="extract_entities.yaml",
|
|
128
134
|
output_model=OutputModels.ListDictStrStrOutput,
|
|
@@ -138,7 +144,8 @@ class AsyncTheTool:
|
|
|
138
144
|
temperature: float | None = 0.0,
|
|
139
145
|
logprobs: bool = False,
|
|
140
146
|
top_logprobs: int | None = None,
|
|
141
|
-
|
|
147
|
+
validator: Callable[[Any], bool] | None = None,
|
|
148
|
+
) -> OutputModels.ToolOutput:
|
|
142
149
|
"""
|
|
143
150
|
Detect if the input is phrased as a question.
|
|
144
151
|
|
|
@@ -156,6 +163,7 @@ class AsyncTheTool:
|
|
|
156
163
|
temperature=temperature,
|
|
157
164
|
logprobs=logprobs,
|
|
158
165
|
top_logprobs=top_logprobs,
|
|
166
|
+
validator=validator,
|
|
159
167
|
# Internal parameters
|
|
160
168
|
prompt_file="is_question.yaml",
|
|
161
169
|
output_model=OutputModels.BoolOutput,
|
|
@@ -173,7 +181,8 @@ class AsyncTheTool:
|
|
|
173
181
|
temperature: float | None = 0.0,
|
|
174
182
|
logprobs: bool = False,
|
|
175
183
|
top_logprobs: int | None = None,
|
|
176
|
-
|
|
184
|
+
validator: Callable[[Any], bool] | None = None,
|
|
185
|
+
) -> OutputModels.ToolOutput:
|
|
177
186
|
"""
|
|
178
187
|
Generate a single question from the given text.
|
|
179
188
|
|
|
@@ -192,6 +201,7 @@ class AsyncTheTool:
|
|
|
192
201
|
temperature=temperature,
|
|
193
202
|
logprobs=logprobs,
|
|
194
203
|
top_logprobs=top_logprobs,
|
|
204
|
+
validator=validator,
|
|
195
205
|
# Internal parameters
|
|
196
206
|
prompt_file="text_to_question.yaml",
|
|
197
207
|
output_model=OutputModels.StrOutput,
|
|
@@ -209,7 +219,8 @@ class AsyncTheTool:
|
|
|
209
219
|
logprobs: bool = False,
|
|
210
220
|
top_logprobs: int | None = None,
|
|
211
221
|
mode: Literal["default", "reason"] = "default",
|
|
212
|
-
|
|
222
|
+
validator: Callable[[Any], bool] | None = None,
|
|
223
|
+
) -> OutputModels.ToolOutput:
|
|
213
224
|
"""
|
|
214
225
|
Merge multiple questions into a single unified question.
|
|
215
226
|
|
|
@@ -229,6 +240,7 @@ class AsyncTheTool:
|
|
|
229
240
|
temperature=temperature,
|
|
230
241
|
logprobs=logprobs,
|
|
231
242
|
top_logprobs=top_logprobs,
|
|
243
|
+
validator=validator,
|
|
232
244
|
# Internal parameters
|
|
233
245
|
prompt_file="merge_questions.yaml",
|
|
234
246
|
output_model=OutputModels.StrOutput,
|
|
@@ -246,7 +258,8 @@ class AsyncTheTool:
|
|
|
246
258
|
logprobs: bool = False,
|
|
247
259
|
top_logprobs: int | None = None,
|
|
248
260
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
249
|
-
|
|
261
|
+
validator: Callable[[Any], bool] | None = None,
|
|
262
|
+
) -> OutputModels.ToolOutput:
|
|
250
263
|
"""
|
|
251
264
|
Rewrite a text with different modes.
|
|
252
265
|
|
|
@@ -265,6 +278,7 @@ class AsyncTheTool:
|
|
|
265
278
|
temperature=temperature,
|
|
266
279
|
logprobs=logprobs,
|
|
267
280
|
top_logprobs=top_logprobs,
|
|
281
|
+
validator=validator,
|
|
268
282
|
# Internal parameters
|
|
269
283
|
prompt_file="rewrite.yaml",
|
|
270
284
|
output_model=OutputModels.StrOutput,
|
|
@@ -282,7 +296,8 @@ class AsyncTheTool:
|
|
|
282
296
|
temperature: float | None = 0.0,
|
|
283
297
|
logprobs: bool = False,
|
|
284
298
|
top_logprobs: int | None = None,
|
|
285
|
-
|
|
299
|
+
validator: Callable[[Any], bool] | None = None,
|
|
300
|
+
) -> OutputModels.ToolOutput:
|
|
286
301
|
"""
|
|
287
302
|
Generate a list of questions about a subject.
|
|
288
303
|
|
|
@@ -302,6 +317,7 @@ class AsyncTheTool:
|
|
|
302
317
|
temperature=temperature,
|
|
303
318
|
logprobs=logprobs,
|
|
304
319
|
top_logprobs=top_logprobs,
|
|
320
|
+
validator=validator,
|
|
305
321
|
# Internal parameters
|
|
306
322
|
prompt_file="subject_to_question.yaml",
|
|
307
323
|
output_model=OutputModels.ReasonListStrOutput,
|
|
@@ -318,7 +334,8 @@ class AsyncTheTool:
|
|
|
318
334
|
temperature: float | None = 0.0,
|
|
319
335
|
logprobs: bool = False,
|
|
320
336
|
top_logprobs: int | None = None,
|
|
321
|
-
|
|
337
|
+
validator: Callable[[Any], bool] | None = None,
|
|
338
|
+
) -> OutputModels.ToolOutput:
|
|
322
339
|
"""
|
|
323
340
|
Summarize the given subject text.
|
|
324
341
|
|
|
@@ -337,6 +354,7 @@ class AsyncTheTool:
|
|
|
337
354
|
temperature=temperature,
|
|
338
355
|
logprobs=logprobs,
|
|
339
356
|
top_logprobs=top_logprobs,
|
|
357
|
+
validator=validator,
|
|
340
358
|
# Internal parameters
|
|
341
359
|
prompt_file="summarize.yaml",
|
|
342
360
|
output_model=OutputModels.StrOutput,
|
|
@@ -353,7 +371,8 @@ class AsyncTheTool:
|
|
|
353
371
|
temperature: float | None = 0.0,
|
|
354
372
|
logprobs: bool = False,
|
|
355
373
|
top_logprobs: int | None = None,
|
|
356
|
-
|
|
374
|
+
validator: Callable[[Any], bool] | None = None,
|
|
375
|
+
) -> OutputModels.ToolOutput:
|
|
357
376
|
"""
|
|
358
377
|
Translate text between languages.
|
|
359
378
|
|
|
@@ -372,6 +391,7 @@ class AsyncTheTool:
|
|
|
372
391
|
temperature=temperature,
|
|
373
392
|
logprobs=logprobs,
|
|
374
393
|
top_logprobs=top_logprobs,
|
|
394
|
+
validator=validator,
|
|
375
395
|
# Internal parameters
|
|
376
396
|
prompt_file="translate.yaml",
|
|
377
397
|
output_model=OutputModels.StrOutput,
|
|
@@ -388,7 +408,7 @@ class AsyncTheTool:
|
|
|
388
408
|
temperature: float | None = None,
|
|
389
409
|
logprobs: bool | None = None,
|
|
390
410
|
top_logprobs: int | None = None,
|
|
391
|
-
) ->
|
|
411
|
+
) -> OutputModels.ToolOutput:
|
|
392
412
|
"""
|
|
393
413
|
Custom tool that can do almost anything!
|
|
394
414
|
|
|
@@ -411,4 +431,5 @@ class AsyncTheTool:
|
|
|
411
431
|
user_prompt=None,
|
|
412
432
|
with_analysis=False,
|
|
413
433
|
mode=None,
|
|
434
|
+
validator=None,
|
|
414
435
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import AsyncOpenAI
|
|
@@ -12,9 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
logger = logging.getLogger("async_operator")
|
|
17
|
-
logger.setLevel(logging.INFO)
|
|
15
|
+
logger = logging.getLogger("texttools.async_operator")
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
class AsyncOperator(BaseOperator):
|
|
@@ -32,6 +30,10 @@ class AsyncOperator(BaseOperator):
|
|
|
32
30
|
self.model = model
|
|
33
31
|
|
|
34
32
|
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
35
|
+
Returns the analyzed content as a string.
|
|
36
|
+
"""
|
|
35
37
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
38
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
39
|
completion = await self.client.chat.completions.create(
|
|
@@ -50,6 +52,10 @@ class AsyncOperator(BaseOperator):
|
|
|
50
52
|
logprobs: bool = False,
|
|
51
53
|
top_logprobs: int = 3,
|
|
52
54
|
) -> tuple[Type[T], Any]:
|
|
55
|
+
"""
|
|
56
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
57
|
+
Returns both the parsed object and the raw completion for logging.
|
|
58
|
+
"""
|
|
53
59
|
request_kwargs = {
|
|
54
60
|
"model": self.model,
|
|
55
61
|
"messages": message,
|
|
@@ -73,6 +79,10 @@ class AsyncOperator(BaseOperator):
|
|
|
73
79
|
logprobs: bool = False,
|
|
74
80
|
top_logprobs: int = 3,
|
|
75
81
|
) -> tuple[Type[T], Any]:
|
|
82
|
+
"""
|
|
83
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
84
|
+
Returns the parsed output model and raw completion.
|
|
85
|
+
"""
|
|
76
86
|
json_schema = output_model.model_json_schema()
|
|
77
87
|
|
|
78
88
|
# Build kwargs dynamically
|
|
@@ -104,20 +114,23 @@ class AsyncOperator(BaseOperator):
|
|
|
104
114
|
temperature: float,
|
|
105
115
|
logprobs: bool,
|
|
106
116
|
top_logprobs: int | None,
|
|
117
|
+
validator: Callable[[Any], bool] | None,
|
|
107
118
|
# Internal parameters
|
|
108
119
|
prompt_file: str,
|
|
109
120
|
output_model: Type[T],
|
|
110
121
|
resp_format: Literal["vllm", "parse"],
|
|
111
122
|
mode: str | None,
|
|
112
123
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
124
|
+
) -> ToolOutput:
|
|
114
125
|
"""
|
|
115
126
|
Execute the async LLM pipeline with the given input text. (Async)
|
|
116
127
|
"""
|
|
117
128
|
prompt_loader = PromptLoader()
|
|
118
129
|
formatter = Formatter()
|
|
130
|
+
output = ToolOutput()
|
|
119
131
|
|
|
120
132
|
try:
|
|
133
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
134
|
prompt_configs = prompt_loader.load(
|
|
122
135
|
prompt_file=prompt_file,
|
|
123
136
|
text=text.strip(),
|
|
@@ -159,14 +172,62 @@ class AsyncOperator(BaseOperator):
|
|
|
159
172
|
|
|
160
173
|
# Ensure output_model has a `result` field
|
|
161
174
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
175
|
+
error = "The provided output_model must define a field named 'result'"
|
|
176
|
+
logger.error(error)
|
|
177
|
+
output.errors.append(error)
|
|
178
|
+
return output
|
|
167
179
|
|
|
168
180
|
output.result = parsed.result
|
|
169
181
|
|
|
182
|
+
# Retry logic if validation fails
|
|
183
|
+
if validator and not validator(output.result):
|
|
184
|
+
max_retries = 3
|
|
185
|
+
for attempt in range(max_retries):
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Validation failed, retrying for the {attempt + 1} time."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Generate new temperature for retry
|
|
191
|
+
retry_temperature = self._get_retry_temp(temperature)
|
|
192
|
+
try:
|
|
193
|
+
if resp_format == "vllm":
|
|
194
|
+
parsed, completion = await self._vllm_completion(
|
|
195
|
+
messages,
|
|
196
|
+
output_model,
|
|
197
|
+
retry_temperature,
|
|
198
|
+
logprobs,
|
|
199
|
+
top_logprobs,
|
|
200
|
+
)
|
|
201
|
+
elif resp_format == "parse":
|
|
202
|
+
parsed, completion = await self._parse_completion(
|
|
203
|
+
messages,
|
|
204
|
+
output_model,
|
|
205
|
+
retry_temperature,
|
|
206
|
+
logprobs,
|
|
207
|
+
top_logprobs,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
output.result = parsed.result
|
|
211
|
+
|
|
212
|
+
# Check if retry was successful
|
|
213
|
+
if validator(output.result):
|
|
214
|
+
logger.info(
|
|
215
|
+
f"Validation passed on retry attempt {attempt + 1}"
|
|
216
|
+
)
|
|
217
|
+
break
|
|
218
|
+
else:
|
|
219
|
+
logger.warning(
|
|
220
|
+
f"Validation still failing after retry attempt {attempt + 1}"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
except Exception as e:
|
|
224
|
+
logger.error(f"Retry attempt {attempt + 1} failed: {e}")
|
|
225
|
+
# Continue to next retry attempt if this one fails
|
|
226
|
+
|
|
227
|
+
# Final check after all retries
|
|
228
|
+
if validator and not validator(output.result):
|
|
229
|
+
output.errors.append("Validation failed after all retry attempts")
|
|
230
|
+
|
|
170
231
|
if logprobs:
|
|
171
232
|
output.logprobs = self._extract_logprobs(completion)
|
|
172
233
|
|
|
@@ -174,6 +235,8 @@ class AsyncOperator(BaseOperator):
|
|
|
174
235
|
output.analysis = analysis
|
|
175
236
|
|
|
176
237
|
return output
|
|
238
|
+
|
|
177
239
|
except Exception as e:
|
|
178
240
|
logger.error(f"AsyncTheTool failed: {e}")
|
|
179
|
-
|
|
241
|
+
output.errors.append(str(e))
|
|
242
|
+
return output
|
|
@@ -3,6 +3,7 @@ import json
|
|
|
3
3
|
import re
|
|
4
4
|
import math
|
|
5
5
|
import logging
|
|
6
|
+
import random
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel
|
|
8
9
|
from openai import OpenAI, AsyncOpenAI
|
|
@@ -10,9 +11,7 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
10
11
|
# Base Model type for output models
|
|
11
12
|
T = TypeVar("T", bound=BaseModel)
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger("base_operator")
|
|
15
|
-
logger.setLevel(logging.INFO)
|
|
14
|
+
logger = logging.getLogger("texttools.base_operator")
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class BaseOperator:
|
|
@@ -40,13 +39,6 @@ class BaseOperator:
|
|
|
40
39
|
) -> Type[T]:
|
|
41
40
|
"""
|
|
42
41
|
Convert a JSON response string to output model.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
response_string: The JSON string (may contain code block markers)
|
|
46
|
-
output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
Instance of your output model
|
|
50
42
|
"""
|
|
51
43
|
# Clean the response string
|
|
52
44
|
cleaned_json = self._clean_json_response(response_string)
|
|
@@ -61,7 +53,12 @@ class BaseOperator:
|
|
|
61
53
|
return output_model(**response_dict)
|
|
62
54
|
|
|
63
55
|
def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
|
|
56
|
+
"""
|
|
57
|
+
Extracts and filters token probabilities from completion logprobs.
|
|
58
|
+
Skips punctuation and structural tokens, returns cleaned probability data.
|
|
59
|
+
"""
|
|
64
60
|
logprobs_data = []
|
|
61
|
+
|
|
65
62
|
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
66
63
|
|
|
67
64
|
for choice in completion.choices:
|
|
@@ -89,3 +86,15 @@ class BaseOperator:
|
|
|
89
86
|
logprobs_data.append(token_entry)
|
|
90
87
|
|
|
91
88
|
return logprobs_data
|
|
89
|
+
|
|
90
|
+
def _get_retry_temp(self, base_temp: float) -> float:
|
|
91
|
+
"""
|
|
92
|
+
Calculate temperature for retry attempts.
|
|
93
|
+
"""
|
|
94
|
+
delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
|
|
95
|
+
new_temp = base_temp + delta_temp
|
|
96
|
+
print(f"Base Temp: {base_temp}")
|
|
97
|
+
print(f"Delta Temp: {delta_temp}")
|
|
98
|
+
print(f"New Temp: {new_temp}")
|
|
99
|
+
|
|
100
|
+
return max(0.0, min(new_temp, 1.5))
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import OpenAI
|
|
@@ -12,9 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
logger = logging.getLogger("operator")
|
|
17
|
-
logger.setLevel(logging.INFO)
|
|
15
|
+
logger = logging.getLogger("texttools.operator")
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
class Operator(BaseOperator):
|
|
@@ -32,6 +30,10 @@ class Operator(BaseOperator):
|
|
|
32
30
|
self.model = model
|
|
33
31
|
|
|
34
32
|
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
35
|
+
Returns the analyzed content as a string.
|
|
36
|
+
"""
|
|
35
37
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
38
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
39
|
completion = self.client.chat.completions.create(
|
|
@@ -50,6 +52,10 @@ class Operator(BaseOperator):
|
|
|
50
52
|
logprobs: bool = False,
|
|
51
53
|
top_logprobs: int = 3,
|
|
52
54
|
) -> tuple[Type[T], Any]:
|
|
55
|
+
"""
|
|
56
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
57
|
+
Returns both the parsed object and the raw completion for logging.
|
|
58
|
+
"""
|
|
53
59
|
request_kwargs = {
|
|
54
60
|
"model": self.model,
|
|
55
61
|
"messages": message,
|
|
@@ -73,6 +79,10 @@ class Operator(BaseOperator):
|
|
|
73
79
|
logprobs: bool = False,
|
|
74
80
|
top_logprobs: int = 3,
|
|
75
81
|
) -> tuple[Type[T], Any]:
|
|
82
|
+
"""
|
|
83
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
84
|
+
Returns the parsed output model and raw completion.
|
|
85
|
+
"""
|
|
76
86
|
json_schema = output_model.model_json_schema()
|
|
77
87
|
|
|
78
88
|
# Build kwargs dynamically
|
|
@@ -104,20 +114,23 @@ class Operator(BaseOperator):
|
|
|
104
114
|
temperature: float,
|
|
105
115
|
logprobs: bool,
|
|
106
116
|
top_logprobs: int | None,
|
|
117
|
+
validator: Callable[[Any], bool] | None,
|
|
107
118
|
# Internal parameters
|
|
108
119
|
prompt_file: str,
|
|
109
120
|
output_model: Type[T],
|
|
110
121
|
resp_format: Literal["vllm", "parse"],
|
|
111
122
|
mode: str | None,
|
|
112
123
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
124
|
+
) -> ToolOutput:
|
|
114
125
|
"""
|
|
115
126
|
Execute the LLM pipeline with the given input text.
|
|
116
127
|
"""
|
|
117
128
|
prompt_loader = PromptLoader()
|
|
118
129
|
formatter = Formatter()
|
|
130
|
+
output = ToolOutput()
|
|
119
131
|
|
|
120
132
|
try:
|
|
133
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
134
|
prompt_configs = prompt_loader.load(
|
|
122
135
|
prompt_file=prompt_file,
|
|
123
136
|
text=text.strip(),
|
|
@@ -159,14 +172,62 @@ class Operator(BaseOperator):
|
|
|
159
172
|
|
|
160
173
|
# Ensure output_model has a `result` field
|
|
161
174
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
175
|
+
error = "The provided output_model must define a field named 'result'"
|
|
176
|
+
logger.error(error)
|
|
177
|
+
output.errors.append(error)
|
|
178
|
+
return output
|
|
167
179
|
|
|
168
180
|
output.result = parsed.result
|
|
169
181
|
|
|
182
|
+
# Retry logic if validation fails
|
|
183
|
+
if validator and not validator(output.result):
|
|
184
|
+
max_retries = 3
|
|
185
|
+
for attempt in range(max_retries):
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Validation failed, retrying for the {attempt + 1} time."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Generate new temperature for retry
|
|
191
|
+
retry_temperature = self._get_retry_temp(temperature)
|
|
192
|
+
try:
|
|
193
|
+
if resp_format == "vllm":
|
|
194
|
+
parsed, completion = self._vllm_completion(
|
|
195
|
+
messages,
|
|
196
|
+
output_model,
|
|
197
|
+
retry_temperature,
|
|
198
|
+
logprobs,
|
|
199
|
+
top_logprobs,
|
|
200
|
+
)
|
|
201
|
+
elif resp_format == "parse":
|
|
202
|
+
parsed, completion = self._parse_completion(
|
|
203
|
+
messages,
|
|
204
|
+
output_model,
|
|
205
|
+
retry_temperature,
|
|
206
|
+
logprobs,
|
|
207
|
+
top_logprobs,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
output.result = parsed.result
|
|
211
|
+
|
|
212
|
+
# Check if retry was successful
|
|
213
|
+
if validator(output.result):
|
|
214
|
+
logger.info(
|
|
215
|
+
f"Validation passed on retry attempt {attempt + 1}"
|
|
216
|
+
)
|
|
217
|
+
break
|
|
218
|
+
else:
|
|
219
|
+
logger.warning(
|
|
220
|
+
f"Validation still failing after retry attempt {attempt + 1}"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
except Exception as e:
|
|
224
|
+
logger.error(f"Retry attempt {attempt + 1} failed: {e}")
|
|
225
|
+
# Continue to next retry attempt if this one fails
|
|
226
|
+
|
|
227
|
+
# Final check after all retries
|
|
228
|
+
if validator and not validator(output.result):
|
|
229
|
+
output.errors.append("Validation failed after all retry attempts")
|
|
230
|
+
|
|
170
231
|
if logprobs:
|
|
171
232
|
output.logprobs = self._extract_logprobs(completion)
|
|
172
233
|
|
|
@@ -174,6 +235,8 @@ class Operator(BaseOperator):
|
|
|
174
235
|
output.analysis = analysis
|
|
175
236
|
|
|
176
237
|
return output
|
|
238
|
+
|
|
177
239
|
except Exception as e:
|
|
178
240
|
logger.error(f"TheTool failed: {e}")
|
|
179
|
-
|
|
241
|
+
output.errors.append(str(e))
|
|
242
|
+
return output
|
|
@@ -4,10 +4,13 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ToolOutput(BaseModel):
|
|
7
|
-
result:
|
|
8
|
-
analysis: str
|
|
9
|
-
logprobs: list[dict[str, Any]]
|
|
10
|
-
errors: list[str]
|
|
7
|
+
result: Any = None
|
|
8
|
+
analysis: str = ""
|
|
9
|
+
logprobs: list[dict[str, Any]] = []
|
|
10
|
+
errors: list[str] = []
|
|
11
|
+
|
|
12
|
+
def __repr__(self) -> str:
|
|
13
|
+
return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class StrOutput(BaseModel):
|
|
@@ -24,6 +24,9 @@ class PromptLoader:
|
|
|
24
24
|
# Use lru_cache to load each file once
|
|
25
25
|
@lru_cache(maxsize=32)
|
|
26
26
|
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
27
|
+
"""
|
|
28
|
+
Loads prompt templates from YAML file with optional mode selection.
|
|
29
|
+
"""
|
|
27
30
|
base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
28
31
|
prompt_path = base_dir / prompt_file
|
|
29
32
|
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import OpenAI
|
|
4
4
|
|
|
@@ -32,7 +32,8 @@ class TheTool:
|
|
|
32
32
|
temperature: float | None = 0.0,
|
|
33
33
|
logprobs: bool = False,
|
|
34
34
|
top_logprobs: int | None = None,
|
|
35
|
-
|
|
35
|
+
validator: Callable[[Any], bool] | None = None,
|
|
36
|
+
) -> OutputModels.ToolOutput:
|
|
36
37
|
"""
|
|
37
38
|
Categorize a text into a single Islamic studies domain category.
|
|
38
39
|
|
|
@@ -50,6 +51,7 @@ class TheTool:
|
|
|
50
51
|
temperature=temperature,
|
|
51
52
|
logprobs=logprobs,
|
|
52
53
|
top_logprobs=top_logprobs,
|
|
54
|
+
validator=validator,
|
|
53
55
|
# Internal parameters
|
|
54
56
|
prompt_file="categorizer.yaml",
|
|
55
57
|
output_model=OutputModels.CategorizerOutput,
|
|
@@ -67,7 +69,8 @@ class TheTool:
|
|
|
67
69
|
temperature: float | None = 0.0,
|
|
68
70
|
logprobs: bool = False,
|
|
69
71
|
top_logprobs: int | None = None,
|
|
70
|
-
|
|
72
|
+
validator: Callable[[Any], bool] | None = None,
|
|
73
|
+
) -> OutputModels.ToolOutput:
|
|
71
74
|
"""
|
|
72
75
|
Extract salient keywords from text.
|
|
73
76
|
|
|
@@ -86,6 +89,7 @@ class TheTool:
|
|
|
86
89
|
temperature=temperature,
|
|
87
90
|
logprobs=logprobs,
|
|
88
91
|
top_logprobs=top_logprobs,
|
|
92
|
+
validator=validator,
|
|
89
93
|
# Internal parameters
|
|
90
94
|
prompt_file="extract_keywords.yaml",
|
|
91
95
|
output_model=OutputModels.ListStrOutput,
|
|
@@ -102,7 +106,8 @@ class TheTool:
|
|
|
102
106
|
temperature: float | None = 0.0,
|
|
103
107
|
logprobs: bool = False,
|
|
104
108
|
top_logprobs: int | None = None,
|
|
105
|
-
|
|
109
|
+
validator: Callable[[Any], bool] | None = None,
|
|
110
|
+
) -> OutputModels.ToolOutput:
|
|
106
111
|
"""
|
|
107
112
|
Perform Named Entity Recognition (NER) over the input text.
|
|
108
113
|
|
|
@@ -121,6 +126,7 @@ class TheTool:
|
|
|
121
126
|
temperature=temperature,
|
|
122
127
|
logprobs=logprobs,
|
|
123
128
|
top_logprobs=top_logprobs,
|
|
129
|
+
validator=validator,
|
|
124
130
|
# Internal parameters
|
|
125
131
|
prompt_file="extract_entities.yaml",
|
|
126
132
|
output_model=OutputModels.ListDictStrStrOutput,
|
|
@@ -136,7 +142,8 @@ class TheTool:
|
|
|
136
142
|
temperature: float | None = 0.0,
|
|
137
143
|
logprobs: bool = False,
|
|
138
144
|
top_logprobs: int | None = None,
|
|
139
|
-
|
|
145
|
+
validator: Callable[[Any], bool] | None = None,
|
|
146
|
+
) -> OutputModels.ToolOutput:
|
|
140
147
|
"""
|
|
141
148
|
Detect if the input is phrased as a question.
|
|
142
149
|
|
|
@@ -154,6 +161,7 @@ class TheTool:
|
|
|
154
161
|
temperature=temperature,
|
|
155
162
|
logprobs=logprobs,
|
|
156
163
|
top_logprobs=top_logprobs,
|
|
164
|
+
validator=validator,
|
|
157
165
|
# Internal parameters
|
|
158
166
|
prompt_file="is_question.yaml",
|
|
159
167
|
output_model=OutputModels.BoolOutput,
|
|
@@ -171,7 +179,8 @@ class TheTool:
|
|
|
171
179
|
temperature: float | None = 0.0,
|
|
172
180
|
logprobs: bool = False,
|
|
173
181
|
top_logprobs: int | None = None,
|
|
174
|
-
|
|
182
|
+
validator: Callable[[Any], bool] | None = None,
|
|
183
|
+
) -> OutputModels.ToolOutput:
|
|
175
184
|
"""
|
|
176
185
|
Generate a single question from the given text.
|
|
177
186
|
|
|
@@ -190,6 +199,7 @@ class TheTool:
|
|
|
190
199
|
temperature=temperature,
|
|
191
200
|
logprobs=logprobs,
|
|
192
201
|
top_logprobs=top_logprobs,
|
|
202
|
+
validator=validator,
|
|
193
203
|
# Internal parameters
|
|
194
204
|
prompt_file="text_to_question.yaml",
|
|
195
205
|
output_model=OutputModels.StrOutput,
|
|
@@ -207,7 +217,8 @@ class TheTool:
|
|
|
207
217
|
logprobs: bool = False,
|
|
208
218
|
top_logprobs: int | None = None,
|
|
209
219
|
mode: Literal["default", "reason"] = "default",
|
|
210
|
-
|
|
220
|
+
validator: Callable[[Any], bool] | None = None,
|
|
221
|
+
) -> OutputModels.ToolOutput:
|
|
211
222
|
"""
|
|
212
223
|
Merge multiple questions into a single unified question.
|
|
213
224
|
|
|
@@ -227,6 +238,7 @@ class TheTool:
|
|
|
227
238
|
temperature=temperature,
|
|
228
239
|
logprobs=logprobs,
|
|
229
240
|
top_logprobs=top_logprobs,
|
|
241
|
+
validator=validator,
|
|
230
242
|
# Internal parameters
|
|
231
243
|
prompt_file="merge_questions.yaml",
|
|
232
244
|
output_model=OutputModels.StrOutput,
|
|
@@ -244,7 +256,8 @@ class TheTool:
|
|
|
244
256
|
logprobs: bool = False,
|
|
245
257
|
top_logprobs: int | None = None,
|
|
246
258
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
247
|
-
|
|
259
|
+
validator: Callable[[Any], bool] | None = None,
|
|
260
|
+
) -> OutputModels.ToolOutput:
|
|
248
261
|
"""
|
|
249
262
|
Rewrite a text with different modes.
|
|
250
263
|
|
|
@@ -263,6 +276,7 @@ class TheTool:
|
|
|
263
276
|
temperature=temperature,
|
|
264
277
|
logprobs=logprobs,
|
|
265
278
|
top_logprobs=top_logprobs,
|
|
279
|
+
validator=validator,
|
|
266
280
|
# Internal parameters
|
|
267
281
|
prompt_file="rewrite.yaml",
|
|
268
282
|
output_model=OutputModels.StrOutput,
|
|
@@ -280,7 +294,8 @@ class TheTool:
|
|
|
280
294
|
temperature: float | None = 0.0,
|
|
281
295
|
logprobs: bool = False,
|
|
282
296
|
top_logprobs: int | None = None,
|
|
283
|
-
|
|
297
|
+
validator: Callable[[Any], bool] | None = None,
|
|
298
|
+
) -> OutputModels.ToolOutput:
|
|
284
299
|
"""
|
|
285
300
|
Generate a list of questions about a subject.
|
|
286
301
|
|
|
@@ -300,6 +315,7 @@ class TheTool:
|
|
|
300
315
|
temperature=temperature,
|
|
301
316
|
logprobs=logprobs,
|
|
302
317
|
top_logprobs=top_logprobs,
|
|
318
|
+
validator=validator,
|
|
303
319
|
# Internal parameters
|
|
304
320
|
prompt_file="subject_to_question.yaml",
|
|
305
321
|
output_model=OutputModels.ReasonListStrOutput,
|
|
@@ -316,7 +332,8 @@ class TheTool:
|
|
|
316
332
|
temperature: float | None = 0.0,
|
|
317
333
|
logprobs: bool = False,
|
|
318
334
|
top_logprobs: int | None = None,
|
|
319
|
-
|
|
335
|
+
validator: Callable[[Any], bool] | None = None,
|
|
336
|
+
) -> OutputModels.ToolOutput:
|
|
320
337
|
"""
|
|
321
338
|
Summarize the given subject text.
|
|
322
339
|
|
|
@@ -335,6 +352,7 @@ class TheTool:
|
|
|
335
352
|
temperature=temperature,
|
|
336
353
|
logprobs=logprobs,
|
|
337
354
|
top_logprobs=top_logprobs,
|
|
355
|
+
validator=validator,
|
|
338
356
|
# Internal parameters
|
|
339
357
|
prompt_file="summarize.yaml",
|
|
340
358
|
output_model=OutputModels.StrOutput,
|
|
@@ -351,7 +369,8 @@ class TheTool:
|
|
|
351
369
|
temperature: float | None = 0.0,
|
|
352
370
|
logprobs: bool = False,
|
|
353
371
|
top_logprobs: int | None = None,
|
|
354
|
-
|
|
372
|
+
validator: Callable[[Any], bool] | None = None,
|
|
373
|
+
) -> OutputModels.ToolOutput:
|
|
355
374
|
"""
|
|
356
375
|
Translate text between languages.
|
|
357
376
|
|
|
@@ -370,6 +389,7 @@ class TheTool:
|
|
|
370
389
|
temperature=temperature,
|
|
371
390
|
logprobs=logprobs,
|
|
372
391
|
top_logprobs=top_logprobs,
|
|
392
|
+
validator=validator,
|
|
373
393
|
# Internal parameters
|
|
374
394
|
prompt_file="translate.yaml",
|
|
375
395
|
output_model=OutputModels.StrOutput,
|
|
@@ -386,7 +406,7 @@ class TheTool:
|
|
|
386
406
|
temperature: float | None = None,
|
|
387
407
|
logprobs: bool | None = None,
|
|
388
408
|
top_logprobs: int | None = None,
|
|
389
|
-
) ->
|
|
409
|
+
) -> OutputModels.ToolOutput:
|
|
390
410
|
"""
|
|
391
411
|
Custom tool that can do almost anything!
|
|
392
412
|
|
|
@@ -409,4 +429,5 @@ class TheTool:
|
|
|
409
429
|
user_prompt=None,
|
|
410
430
|
with_analysis=False,
|
|
411
431
|
mode=None,
|
|
432
|
+
validator=None,
|
|
412
433
|
)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
hamtaa_texttools-1.1.1.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
|
-
texttools/__init__.py,sha256=v3tQCH_Cjj47fCpuhK6sKSVAqEjNkc-cZbY4OJa4IZw,202
|
|
3
|
-
texttools/batch/__init__.py,sha256=q50JsQsmQGp_8RW0KNasYeYWVV0R4FUNZ-ujXwEJemY,143
|
|
4
|
-
texttools/batch/batch_manager.py,sha256=leVIFkR-3HpDkQi_MK3TgFNnHYsCN-wbS4mTWoPmO3c,8828
|
|
5
|
-
texttools/batch/batch_runner.py,sha256=cgiCYLIBQQC0dBWM8_lVP9c5QLJoAmS2ijMtp0p3U2o,10313
|
|
6
|
-
texttools/prompts/README.md,sha256=rclMaCV1N8gT1KcpZu0-ka0dKGNg2f1CEcRMdQkgQOc,1379
|
|
7
|
-
texttools/prompts/categorizer.yaml,sha256=GMqIIzQFhgnlpkgU1qi3FAD3mD4A2jiWD5TilQ2XnnE,1204
|
|
8
|
-
texttools/prompts/extract_entities.yaml,sha256=KiKjeDpHaeh3JVtZ6q1pa3k4DYucUIU9WnEcRTCA-SE,651
|
|
9
|
-
texttools/prompts/extract_keywords.yaml,sha256=0O7ypL_OsEOxtvlQ2CZjnsv9637DJwAKprZsf9Vo2_s,769
|
|
10
|
-
texttools/prompts/is_question.yaml,sha256=d0-vKRbXWkxvO64ikvxRjEmpAXGpCYIPGhgexvPPjws,471
|
|
11
|
-
texttools/prompts/merge_questions.yaml,sha256=0J85GvTirZB4ELwH3sk8ub_WcqqpYf6PrMKr3djlZeo,1792
|
|
12
|
-
texttools/prompts/rewrite.yaml,sha256=LO7He_IA3MZKz8a-LxH9DHJpOjpYwaYN1pbjp1Y0tFo,5392
|
|
13
|
-
texttools/prompts/run_custom.yaml,sha256=38OkCoVITbuuS9c08UZSP1jZW4WjSmRIi8fR0RAiPu4,108
|
|
14
|
-
texttools/prompts/subject_to_question.yaml,sha256=C7x7rNNm6U_ZG9HOn6zuzYOtvJUZ2skuWbL1-aYdd3E,1147
|
|
15
|
-
texttools/prompts/summarize.yaml,sha256=o6rxGPfWtZd61Duvm8NVvCJqfq73b-wAuMSKR6UYUqY,459
|
|
16
|
-
texttools/prompts/text_to_question.yaml,sha256=UheKYpDn6iyKI8NxunHZtFpNyfCLZZe5cvkuXpurUJY,783
|
|
17
|
-
texttools/prompts/translate.yaml,sha256=mGT2uBCei6uucWqVbs4silk-UV060v3G0jnt0P6sr50,634
|
|
18
|
-
texttools/tools/__init__.py,sha256=hG1I28Q7BJ1Dbs95x6QMKXdsAlC5Eh_tqC-EbAibwiU,114
|
|
19
|
-
texttools/tools/async_the_tool.py,sha256=h6-Zkedet-eRUrkV5fANNoh4WmoqhXU5wJEHpd8nyNU,14377
|
|
20
|
-
texttools/tools/the_tool.py,sha256=lKy3_CKcWo2cBLQ7dDgvh7-oos7UOx1NYM26tcMhwaI,14143
|
|
21
|
-
texttools/tools/internals/async_operator.py,sha256=Kj-DLBKcKbZPCJYn4lVo4Iiei11M04pwgWpIl8L69aM,6169
|
|
22
|
-
texttools/tools/internals/base_operator.py,sha256=OWJe8ybA6qmmoc7ysYeB8ccHPneDlEtmFGH1jLWQCeY,3135
|
|
23
|
-
texttools/tools/internals/formatters.py,sha256=tACNLP6PeoqaRpNudVxBaHA25zyWqWYPZQuYysIu88g,941
|
|
24
|
-
texttools/tools/internals/operator.py,sha256=g1E1WkgnKRDgOs6fEFu0-gPCw1Bniwb4VI9Er3Op_gk,6063
|
|
25
|
-
texttools/tools/internals/output_models.py,sha256=gbVbzBWeyHUVNsCBuawdgz9ZEzsC7wfygGgZJsAaexY,1662
|
|
26
|
-
texttools/tools/internals/prompt_loader.py,sha256=rbitJD3e8vAdcooP1Yx6KnSI83g28ho-FegfZ1cJ4j4,1979
|
|
27
|
-
hamtaa_texttools-1.1.1.dist-info/METADATA,sha256=Cc1Rq94QyXgJ8SNhsBgyUfhho3oywzGpx6y16s50b-Q,7144
|
|
28
|
-
hamtaa_texttools-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
-
hamtaa_texttools-1.1.1.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
30
|
-
hamtaa_texttools-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|