hamtaa-texttools 1.1.3__tar.gz → 1.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.3/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.7}/PKG-INFO +52 -9
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/README.md +51 -8
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7/hamtaa_texttools.egg-info}/PKG-INFO +52 -9
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/pyproject.toml +1 -1
- hamtaa_texttools-1.1.7/texttools/__init__.py +4 -0
- hamtaa_texttools-1.1.7/texttools/batch/__init__.py +3 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/batch/batch_manager.py +9 -11
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/batch/batch_runner.py +53 -61
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/async_the_tool.py +11 -11
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/async_operator.py +6 -6
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/base_operator.py +1 -2
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/operator.py +6 -6
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/output_models.py +7 -4
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/the_tool.py +11 -11
- hamtaa_texttools-1.1.3/texttools/__init__.py +0 -9
- hamtaa_texttools-1.1.3/texttools/batch/__init__.py +0 -4
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/LICENSE +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/categorizer.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/translate.yaml +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/formatters.py +0 -0
- {hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/prompt_loader.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -86,6 +86,18 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
86
86
|
|
|
87
87
|
---
|
|
88
88
|
|
|
89
|
+
## 🧩 ToolOutput
|
|
90
|
+
|
|
91
|
+
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
92
|
+
- **`result`** → The output of LLM (`type=Any`)
|
|
93
|
+
- **`analysis`** → The reasoning step before generating the final output (`type=str`)
|
|
94
|
+
- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
|
|
95
|
+
- **`errors`** → Any error that have occured during calling LLM (`type=str`)
|
|
96
|
+
|
|
97
|
+
**None:** You can use `repr(ToolOutput)` to see details of an output.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
89
101
|
## 🚀 Installation
|
|
90
102
|
|
|
91
103
|
Install the latest release via PyPI:
|
|
@@ -123,13 +135,13 @@ the_tool = TheTool(client=client, model=model)
|
|
|
123
135
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
124
136
|
print(detection.result)
|
|
125
137
|
print(detection.logprobs)
|
|
126
|
-
# Output: True
|
|
138
|
+
# Output: True + logprobs
|
|
127
139
|
|
|
128
140
|
# Example: Translation
|
|
129
141
|
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
130
142
|
print(translation.result)
|
|
131
143
|
print(translation.analysis)
|
|
132
|
-
# Output: "Hi! How are you?"
|
|
144
|
+
# Output: "Hi! How are you?" + analysis
|
|
133
145
|
```
|
|
134
146
|
|
|
135
147
|
---
|
|
@@ -149,19 +161,22 @@ async def main():
|
|
|
149
161
|
model = "gpt-4o-mini"
|
|
150
162
|
|
|
151
163
|
# Create an instance of AsyncTheTool
|
|
152
|
-
|
|
164
|
+
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
165
|
+
|
|
166
|
+
# Example: Async Translation and Keyword Extraction
|
|
167
|
+
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
168
|
+
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
153
169
|
|
|
154
|
-
|
|
155
|
-
translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
170
|
+
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
156
171
|
print(translation.result)
|
|
157
|
-
|
|
172
|
+
print(keywords.result)
|
|
158
173
|
|
|
159
174
|
asyncio.run(main())
|
|
160
175
|
```
|
|
161
176
|
|
|
162
177
|
---
|
|
163
178
|
|
|
164
|
-
##
|
|
179
|
+
## 👍 Use Cases
|
|
165
180
|
|
|
166
181
|
Use **TextTools** when you need to:
|
|
167
182
|
|
|
@@ -169,7 +184,35 @@ Use **TextTools** when you need to:
|
|
|
169
184
|
- 🌍 **Translate** and process multilingual corpora with ease
|
|
170
185
|
- 🧩 **Integrate** LLMs into production pipelines (structured outputs)
|
|
171
186
|
- 📊 **Analyze** large text collections using embeddings and categorization
|
|
172
|
-
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 📚 Batch Processing
|
|
191
|
+
|
|
192
|
+
Process large datasets efficiently using OpenAI's batch API.
|
|
193
|
+
|
|
194
|
+
## Quick Start
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from texttools import BatchJobRunner, BatchConfig
|
|
198
|
+
|
|
199
|
+
# Configure your batch job
|
|
200
|
+
config = BatchConfig(
|
|
201
|
+
system_prompt="Extract entities from the text",
|
|
202
|
+
job_name="entity_extraction",
|
|
203
|
+
input_data_path="data.json",
|
|
204
|
+
output_data_filename="results.json",
|
|
205
|
+
model="gpt-4o-mini"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Define your output schema
|
|
209
|
+
class Output(BaseModel):
|
|
210
|
+
entities: list[str]
|
|
211
|
+
|
|
212
|
+
# Run the batch job
|
|
213
|
+
runner = BatchJobRunner(config, output_model=Output)
|
|
214
|
+
runner.run()
|
|
215
|
+
```
|
|
173
216
|
|
|
174
217
|
---
|
|
175
218
|
|
|
@@ -52,6 +52,18 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
52
52
|
|
|
53
53
|
---
|
|
54
54
|
|
|
55
|
+
## 🧩 ToolOutput
|
|
56
|
+
|
|
57
|
+
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
58
|
+
- **`result`** → The output of LLM (`type=Any`)
|
|
59
|
+
- **`analysis`** → The reasoning step before generating the final output (`type=str`)
|
|
60
|
+
- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
|
|
61
|
+
- **`errors`** → Any error that have occured during calling LLM (`type=str`)
|
|
62
|
+
|
|
63
|
+
**None:** You can use `repr(ToolOutput)` to see details of an output.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
55
67
|
## 🚀 Installation
|
|
56
68
|
|
|
57
69
|
Install the latest release via PyPI:
|
|
@@ -89,13 +101,13 @@ the_tool = TheTool(client=client, model=model)
|
|
|
89
101
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
90
102
|
print(detection.result)
|
|
91
103
|
print(detection.logprobs)
|
|
92
|
-
# Output: True
|
|
104
|
+
# Output: True + logprobs
|
|
93
105
|
|
|
94
106
|
# Example: Translation
|
|
95
107
|
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
96
108
|
print(translation.result)
|
|
97
109
|
print(translation.analysis)
|
|
98
|
-
# Output: "Hi! How are you?"
|
|
110
|
+
# Output: "Hi! How are you?" + analysis
|
|
99
111
|
```
|
|
100
112
|
|
|
101
113
|
---
|
|
@@ -115,19 +127,22 @@ async def main():
|
|
|
115
127
|
model = "gpt-4o-mini"
|
|
116
128
|
|
|
117
129
|
# Create an instance of AsyncTheTool
|
|
118
|
-
|
|
130
|
+
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
131
|
+
|
|
132
|
+
# Example: Async Translation and Keyword Extraction
|
|
133
|
+
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
134
|
+
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
119
135
|
|
|
120
|
-
|
|
121
|
-
translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
136
|
+
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
122
137
|
print(translation.result)
|
|
123
|
-
|
|
138
|
+
print(keywords.result)
|
|
124
139
|
|
|
125
140
|
asyncio.run(main())
|
|
126
141
|
```
|
|
127
142
|
|
|
128
143
|
---
|
|
129
144
|
|
|
130
|
-
##
|
|
145
|
+
## 👍 Use Cases
|
|
131
146
|
|
|
132
147
|
Use **TextTools** when you need to:
|
|
133
148
|
|
|
@@ -135,7 +150,35 @@ Use **TextTools** when you need to:
|
|
|
135
150
|
- 🌍 **Translate** and process multilingual corpora with ease
|
|
136
151
|
- 🧩 **Integrate** LLMs into production pipelines (structured outputs)
|
|
137
152
|
- 📊 **Analyze** large text collections using embeddings and categorization
|
|
138
|
-
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## 📚 Batch Processing
|
|
157
|
+
|
|
158
|
+
Process large datasets efficiently using OpenAI's batch API.
|
|
159
|
+
|
|
160
|
+
## Quick Start
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
from texttools import BatchJobRunner, BatchConfig
|
|
164
|
+
|
|
165
|
+
# Configure your batch job
|
|
166
|
+
config = BatchConfig(
|
|
167
|
+
system_prompt="Extract entities from the text",
|
|
168
|
+
job_name="entity_extraction",
|
|
169
|
+
input_data_path="data.json",
|
|
170
|
+
output_data_filename="results.json",
|
|
171
|
+
model="gpt-4o-mini"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Define your output schema
|
|
175
|
+
class Output(BaseModel):
|
|
176
|
+
entities: list[str]
|
|
177
|
+
|
|
178
|
+
# Run the batch job
|
|
179
|
+
runner = BatchJobRunner(config, output_model=Output)
|
|
180
|
+
runner.run()
|
|
181
|
+
```
|
|
139
182
|
|
|
140
183
|
---
|
|
141
184
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -86,6 +86,18 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
86
86
|
|
|
87
87
|
---
|
|
88
88
|
|
|
89
|
+
## 🧩 ToolOutput
|
|
90
|
+
|
|
91
|
+
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
92
|
+
- **`result`** → The output of LLM (`type=Any`)
|
|
93
|
+
- **`analysis`** → The reasoning step before generating the final output (`type=str`)
|
|
94
|
+
- **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
|
|
95
|
+
- **`errors`** → Any error that have occured during calling LLM (`type=str`)
|
|
96
|
+
|
|
97
|
+
**None:** You can use `repr(ToolOutput)` to see details of an output.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
89
101
|
## 🚀 Installation
|
|
90
102
|
|
|
91
103
|
Install the latest release via PyPI:
|
|
@@ -123,13 +135,13 @@ the_tool = TheTool(client=client, model=model)
|
|
|
123
135
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
124
136
|
print(detection.result)
|
|
125
137
|
print(detection.logprobs)
|
|
126
|
-
# Output: True
|
|
138
|
+
# Output: True + logprobs
|
|
127
139
|
|
|
128
140
|
# Example: Translation
|
|
129
141
|
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
130
142
|
print(translation.result)
|
|
131
143
|
print(translation.analysis)
|
|
132
|
-
# Output: "Hi! How are you?"
|
|
144
|
+
# Output: "Hi! How are you?" + analysis
|
|
133
145
|
```
|
|
134
146
|
|
|
135
147
|
---
|
|
@@ -149,19 +161,22 @@ async def main():
|
|
|
149
161
|
model = "gpt-4o-mini"
|
|
150
162
|
|
|
151
163
|
# Create an instance of AsyncTheTool
|
|
152
|
-
|
|
164
|
+
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
165
|
+
|
|
166
|
+
# Example: Async Translation and Keyword Extraction
|
|
167
|
+
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
168
|
+
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
153
169
|
|
|
154
|
-
|
|
155
|
-
translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
170
|
+
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
156
171
|
print(translation.result)
|
|
157
|
-
|
|
172
|
+
print(keywords.result)
|
|
158
173
|
|
|
159
174
|
asyncio.run(main())
|
|
160
175
|
```
|
|
161
176
|
|
|
162
177
|
---
|
|
163
178
|
|
|
164
|
-
##
|
|
179
|
+
## 👍 Use Cases
|
|
165
180
|
|
|
166
181
|
Use **TextTools** when you need to:
|
|
167
182
|
|
|
@@ -169,7 +184,35 @@ Use **TextTools** when you need to:
|
|
|
169
184
|
- 🌍 **Translate** and process multilingual corpora with ease
|
|
170
185
|
- 🧩 **Integrate** LLMs into production pipelines (structured outputs)
|
|
171
186
|
- 📊 **Analyze** large text collections using embeddings and categorization
|
|
172
|
-
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 📚 Batch Processing
|
|
191
|
+
|
|
192
|
+
Process large datasets efficiently using OpenAI's batch API.
|
|
193
|
+
|
|
194
|
+
## Quick Start
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from texttools import BatchJobRunner, BatchConfig
|
|
198
|
+
|
|
199
|
+
# Configure your batch job
|
|
200
|
+
config = BatchConfig(
|
|
201
|
+
system_prompt="Extract entities from the text",
|
|
202
|
+
job_name="entity_extraction",
|
|
203
|
+
input_data_path="data.json",
|
|
204
|
+
output_data_filename="results.json",
|
|
205
|
+
model="gpt-4o-mini"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Define your output schema
|
|
209
|
+
class Output(BaseModel):
|
|
210
|
+
entities: list[str]
|
|
211
|
+
|
|
212
|
+
# Run the batch job
|
|
213
|
+
runner = BatchJobRunner(config, output_model=Output)
|
|
214
|
+
runner.run()
|
|
215
|
+
```
|
|
173
216
|
|
|
174
217
|
---
|
|
175
218
|
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Type
|
|
4
|
+
from typing import Any, Type, TypeVar
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
from openai import OpenAI
|
|
9
9
|
from openai.lib._pydantic import to_strict_json_schema
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
# Base Model type for output models
|
|
12
|
+
T = TypeVar("T", bound=BaseModel)
|
|
13
13
|
|
|
14
|
+
logger = logging.getLogger("texttools.batch_runner")
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
|
|
17
|
+
class BatchManager:
|
|
16
18
|
"""
|
|
17
19
|
Manages batch processing jobs for OpenAI's chat completions with structured outputs.
|
|
18
20
|
|
|
@@ -25,9 +27,8 @@ class SimpleBatchManager:
|
|
|
25
27
|
self,
|
|
26
28
|
client: OpenAI,
|
|
27
29
|
model: str,
|
|
28
|
-
output_model: Type[
|
|
30
|
+
output_model: Type[T],
|
|
29
31
|
prompt_template: str,
|
|
30
|
-
handlers: list[Any] | None = None,
|
|
31
32
|
state_dir: Path = Path(".batch_jobs"),
|
|
32
33
|
custom_json_schema_obj_str: dict | None = None,
|
|
33
34
|
**client_kwargs: Any,
|
|
@@ -36,7 +37,6 @@ class SimpleBatchManager:
|
|
|
36
37
|
self.model = model
|
|
37
38
|
self.output_model = output_model
|
|
38
39
|
self.prompt_template = prompt_template
|
|
39
|
-
self.handlers = handlers or []
|
|
40
40
|
self.state_dir = state_dir
|
|
41
41
|
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
42
42
|
self.custom_json_schema_obj_str = custom_json_schema_obj_str
|
|
@@ -45,7 +45,7 @@ class SimpleBatchManager:
|
|
|
45
45
|
|
|
46
46
|
if self.custom_json_schema_obj_str:
|
|
47
47
|
if self.custom_json_schema_obj_str is not dict:
|
|
48
|
-
raise ValueError("
|
|
48
|
+
raise ValueError("Schema should be a dict")
|
|
49
49
|
|
|
50
50
|
def _state_file(self, job_name: str) -> Path:
|
|
51
51
|
return self.state_dir / f"{job_name}.json"
|
|
@@ -126,7 +126,7 @@ class SimpleBatchManager:
|
|
|
126
126
|
|
|
127
127
|
else:
|
|
128
128
|
raise TypeError(
|
|
129
|
-
"The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}
|
|
129
|
+
"The input must be either a list of texts or a dictionary in the form {'id': str, 'text': str}"
|
|
130
130
|
)
|
|
131
131
|
|
|
132
132
|
file_path = self.state_dir / f"batch_{uuid.uuid4().hex}.jsonl"
|
|
@@ -220,8 +220,6 @@ class SimpleBatchManager:
|
|
|
220
220
|
error_d = {custom_id: results[custom_id]}
|
|
221
221
|
log.append(error_d)
|
|
222
222
|
|
|
223
|
-
for handler in self.handlers:
|
|
224
|
-
handler.handle(results)
|
|
225
223
|
if remove_cache:
|
|
226
224
|
self._clear_state(job_name)
|
|
227
225
|
|
|
@@ -3,24 +3,23 @@ import os
|
|
|
3
3
|
import time
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Callable
|
|
6
|
+
from typing import Any, Callable, Type, TypeVar
|
|
7
7
|
import logging
|
|
8
8
|
|
|
9
9
|
from dotenv import load_dotenv
|
|
10
10
|
from openai import OpenAI
|
|
11
11
|
from pydantic import BaseModel
|
|
12
12
|
|
|
13
|
-
from texttools.batch import
|
|
13
|
+
from texttools.batch.batch_manager import BatchManager
|
|
14
|
+
from texttools.tools.internals.output_models import StrOutput
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
# Base Model type for output models
|
|
17
|
+
T = TypeVar("T", bound=BaseModel)
|
|
17
18
|
|
|
19
|
+
logger = logging.getLogger("texttools.batch_runner")
|
|
18
20
|
|
|
19
|
-
class OutputModel(BaseModel):
|
|
20
|
-
desired_output: str
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
def export_data(data):
|
|
22
|
+
def export_data(data) -> list[dict[str, str]]:
|
|
24
23
|
"""
|
|
25
24
|
Produces a structure of the following form from an initial data structure:
|
|
26
25
|
[{"id": str, "text": str},...]
|
|
@@ -28,7 +27,7 @@ def export_data(data):
|
|
|
28
27
|
return data
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
def import_data(data):
|
|
30
|
+
def import_data(data) -> Any:
|
|
32
31
|
"""
|
|
33
32
|
Takes the output and adds and aggregates it to the original structure.
|
|
34
33
|
"""
|
|
@@ -47,9 +46,9 @@ class BatchConfig:
|
|
|
47
46
|
output_data_filename: str = ""
|
|
48
47
|
model: str = "gpt-4.1-mini"
|
|
49
48
|
MAX_BATCH_SIZE: int = 100
|
|
50
|
-
MAX_TOTAL_TOKENS: int =
|
|
49
|
+
MAX_TOTAL_TOKENS: int = 2_000_000
|
|
51
50
|
CHARS_PER_TOKEN: float = 2.7
|
|
52
|
-
PROMPT_TOKEN_MULTIPLIER: int =
|
|
51
|
+
PROMPT_TOKEN_MULTIPLIER: int = 1_000
|
|
53
52
|
BASE_OUTPUT_DIR: str = "Data/batch_entity_result"
|
|
54
53
|
import_function: Callable = import_data
|
|
55
54
|
export_function: Callable = export_data
|
|
@@ -63,7 +62,7 @@ class BatchJobRunner:
|
|
|
63
62
|
"""
|
|
64
63
|
|
|
65
64
|
def __init__(
|
|
66
|
-
self, config: BatchConfig = BatchConfig(), output_model:
|
|
65
|
+
self, config: BatchConfig = BatchConfig(), output_model: Type[T] = StrOutput
|
|
67
66
|
):
|
|
68
67
|
self.config = config
|
|
69
68
|
self.system_prompt = config.system_prompt
|
|
@@ -82,11 +81,11 @@ class BatchJobRunner:
|
|
|
82
81
|
# Track retry attempts per part
|
|
83
82
|
self.part_attempts: dict[int, int] = {}
|
|
84
83
|
|
|
85
|
-
def _init_manager(self) ->
|
|
84
|
+
def _init_manager(self) -> BatchManager:
|
|
86
85
|
load_dotenv()
|
|
87
86
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
88
87
|
client = OpenAI(api_key=api_key)
|
|
89
|
-
return
|
|
88
|
+
return BatchManager(
|
|
90
89
|
client=client,
|
|
91
90
|
model=self.model,
|
|
92
91
|
prompt_template=self.system_prompt,
|
|
@@ -101,12 +100,12 @@ class BatchJobRunner:
|
|
|
101
100
|
# Ensure data is a list of dicts with 'id' and 'content' as strings
|
|
102
101
|
if not isinstance(data, list):
|
|
103
102
|
raise ValueError(
|
|
104
|
-
|
|
103
|
+
"Exported data must be a list of dicts with 'id' and 'content' keys"
|
|
105
104
|
)
|
|
106
105
|
for item in data:
|
|
107
106
|
if not (isinstance(item, dict) and "id" in item and "content" in item):
|
|
108
107
|
raise ValueError(
|
|
109
|
-
"
|
|
108
|
+
f"Item must be a dict with 'id' and 'content' keys. Got: {type(item)}"
|
|
110
109
|
)
|
|
111
110
|
if not (isinstance(item["id"], str) and isinstance(item["content"], str)):
|
|
112
111
|
raise ValueError("'id' and 'content' must be strings.")
|
|
@@ -161,7 +160,45 @@ class BatchJobRunner:
|
|
|
161
160
|
logger.info("Uploading...")
|
|
162
161
|
time.sleep(30)
|
|
163
162
|
|
|
163
|
+
def _save_results(
|
|
164
|
+
self,
|
|
165
|
+
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
166
|
+
log: list[Any],
|
|
167
|
+
part_idx: int,
|
|
168
|
+
):
|
|
169
|
+
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
170
|
+
result_path = (
|
|
171
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
172
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
173
|
+
)
|
|
174
|
+
if not output_data:
|
|
175
|
+
logger.info("No output data to save. Skipping this part.")
|
|
176
|
+
return
|
|
177
|
+
else:
|
|
178
|
+
with open(result_path, "w", encoding="utf-8") as f:
|
|
179
|
+
json.dump(output_data, f, ensure_ascii=False, indent=4)
|
|
180
|
+
if log:
|
|
181
|
+
log_path = (
|
|
182
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
183
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
|
|
184
|
+
)
|
|
185
|
+
with open(log_path, "w", encoding="utf-8") as f:
|
|
186
|
+
json.dump(log, f, ensure_ascii=False, indent=4)
|
|
187
|
+
|
|
188
|
+
def _result_exists(self, part_idx: int) -> bool:
|
|
189
|
+
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
190
|
+
result_path = (
|
|
191
|
+
Path(self.config.BASE_OUTPUT_DIR)
|
|
192
|
+
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
193
|
+
)
|
|
194
|
+
return result_path.exists()
|
|
195
|
+
|
|
164
196
|
def run(self):
|
|
197
|
+
"""
|
|
198
|
+
Execute the batch job processing pipeline.
|
|
199
|
+
|
|
200
|
+
Submits jobs, monitors progress, handles retries, and saves results.
|
|
201
|
+
"""
|
|
165
202
|
# Submit all jobs up-front for concurrent execution
|
|
166
203
|
self._submit_all_jobs()
|
|
167
204
|
pending_parts: set[int] = set(self.part_idx_to_job_name.keys())
|
|
@@ -215,48 +252,3 @@ class BatchJobRunner:
|
|
|
215
252
|
f"Waiting {self.config.poll_interval_seconds}s before next status check for parts: {sorted(pending_parts)}"
|
|
216
253
|
)
|
|
217
254
|
time.sleep(self.config.poll_interval_seconds)
|
|
218
|
-
|
|
219
|
-
def _save_results(
|
|
220
|
-
self,
|
|
221
|
-
output_data: list[dict[str, Any]] | dict[str, Any],
|
|
222
|
-
log: list[Any],
|
|
223
|
-
part_idx: int,
|
|
224
|
-
):
|
|
225
|
-
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
226
|
-
result_path = (
|
|
227
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
228
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
229
|
-
)
|
|
230
|
-
if not output_data:
|
|
231
|
-
logger.info("No output data to save. Skipping this part.")
|
|
232
|
-
return
|
|
233
|
-
else:
|
|
234
|
-
with open(result_path, "w", encoding="utf-8") as f:
|
|
235
|
-
json.dump(output_data, f, ensure_ascii=False, indent=4)
|
|
236
|
-
if log:
|
|
237
|
-
log_path = (
|
|
238
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
239
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}_log.json"
|
|
240
|
-
)
|
|
241
|
-
with open(log_path, "w", encoding="utf-8") as f:
|
|
242
|
-
json.dump(log, f, ensure_ascii=False, indent=4)
|
|
243
|
-
|
|
244
|
-
def _result_exists(self, part_idx: int) -> bool:
|
|
245
|
-
part_suffix = f"_part_{part_idx + 1}" if len(self.parts) > 1 else ""
|
|
246
|
-
result_path = (
|
|
247
|
-
Path(self.config.BASE_OUTPUT_DIR)
|
|
248
|
-
/ f"{Path(self.output_data_filename).stem}{part_suffix}.json"
|
|
249
|
-
)
|
|
250
|
-
return result_path.exists()
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
if __name__ == "__main__":
|
|
254
|
-
logger.info("=== Batch Job Runner ===")
|
|
255
|
-
config = BatchConfig(
|
|
256
|
-
system_prompt="",
|
|
257
|
-
job_name="job_name",
|
|
258
|
-
input_data_path="Data.json",
|
|
259
|
-
output_data_filename="output",
|
|
260
|
-
)
|
|
261
|
-
runner = BatchJobRunner(config)
|
|
262
|
-
runner.run()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import AsyncOpenAI
|
|
4
4
|
|
|
@@ -34,7 +34,7 @@ class AsyncTheTool:
|
|
|
34
34
|
temperature: float | None = 0.0,
|
|
35
35
|
logprobs: bool = False,
|
|
36
36
|
top_logprobs: int | None = None,
|
|
37
|
-
validator: Any | None = None,
|
|
37
|
+
validator: Callable[[Any], bool] | None = None,
|
|
38
38
|
) -> OutputModels.ToolOutput:
|
|
39
39
|
"""
|
|
40
40
|
Categorize a text into a single Islamic studies domain category.
|
|
@@ -71,7 +71,7 @@ class AsyncTheTool:
|
|
|
71
71
|
temperature: float | None = 0.0,
|
|
72
72
|
logprobs: bool = False,
|
|
73
73
|
top_logprobs: int | None = None,
|
|
74
|
-
validator: Any | None = None,
|
|
74
|
+
validator: Callable[[Any], bool] | None = None,
|
|
75
75
|
) -> OutputModels.ToolOutput:
|
|
76
76
|
"""
|
|
77
77
|
Extract salient keywords from text.
|
|
@@ -108,7 +108,7 @@ class AsyncTheTool:
|
|
|
108
108
|
temperature: float | None = 0.0,
|
|
109
109
|
logprobs: bool = False,
|
|
110
110
|
top_logprobs: int | None = None,
|
|
111
|
-
validator: Any | None = None,
|
|
111
|
+
validator: Callable[[Any], bool] | None = None,
|
|
112
112
|
) -> OutputModels.ToolOutput:
|
|
113
113
|
"""
|
|
114
114
|
Perform Named Entity Recognition (NER) over the input text.
|
|
@@ -144,7 +144,7 @@ class AsyncTheTool:
|
|
|
144
144
|
temperature: float | None = 0.0,
|
|
145
145
|
logprobs: bool = False,
|
|
146
146
|
top_logprobs: int | None = None,
|
|
147
|
-
validator: Any | None = None,
|
|
147
|
+
validator: Callable[[Any], bool] | None = None,
|
|
148
148
|
) -> OutputModels.ToolOutput:
|
|
149
149
|
"""
|
|
150
150
|
Detect if the input is phrased as a question.
|
|
@@ -181,7 +181,7 @@ class AsyncTheTool:
|
|
|
181
181
|
temperature: float | None = 0.0,
|
|
182
182
|
logprobs: bool = False,
|
|
183
183
|
top_logprobs: int | None = None,
|
|
184
|
-
validator: Any | None = None,
|
|
184
|
+
validator: Callable[[Any], bool] | None = None,
|
|
185
185
|
) -> OutputModels.ToolOutput:
|
|
186
186
|
"""
|
|
187
187
|
Generate a single question from the given text.
|
|
@@ -219,7 +219,7 @@ class AsyncTheTool:
|
|
|
219
219
|
logprobs: bool = False,
|
|
220
220
|
top_logprobs: int | None = None,
|
|
221
221
|
mode: Literal["default", "reason"] = "default",
|
|
222
|
-
validator: Any | None = None,
|
|
222
|
+
validator: Callable[[Any], bool] | None = None,
|
|
223
223
|
) -> OutputModels.ToolOutput:
|
|
224
224
|
"""
|
|
225
225
|
Merge multiple questions into a single unified question.
|
|
@@ -258,7 +258,7 @@ class AsyncTheTool:
|
|
|
258
258
|
logprobs: bool = False,
|
|
259
259
|
top_logprobs: int | None = None,
|
|
260
260
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
261
|
-
validator: Any | None = None,
|
|
261
|
+
validator: Callable[[Any], bool] | None = None,
|
|
262
262
|
) -> OutputModels.ToolOutput:
|
|
263
263
|
"""
|
|
264
264
|
Rewrite a text with different modes.
|
|
@@ -296,7 +296,7 @@ class AsyncTheTool:
|
|
|
296
296
|
temperature: float | None = 0.0,
|
|
297
297
|
logprobs: bool = False,
|
|
298
298
|
top_logprobs: int | None = None,
|
|
299
|
-
validator: Any | None = None,
|
|
299
|
+
validator: Callable[[Any], bool] | None = None,
|
|
300
300
|
) -> OutputModels.ToolOutput:
|
|
301
301
|
"""
|
|
302
302
|
Generate a list of questions about a subject.
|
|
@@ -334,7 +334,7 @@ class AsyncTheTool:
|
|
|
334
334
|
temperature: float | None = 0.0,
|
|
335
335
|
logprobs: bool = False,
|
|
336
336
|
top_logprobs: int | None = None,
|
|
337
|
-
validator: Any | None = None,
|
|
337
|
+
validator: Callable[[Any], bool] | None = None,
|
|
338
338
|
) -> OutputModels.ToolOutput:
|
|
339
339
|
"""
|
|
340
340
|
Summarize the given subject text.
|
|
@@ -371,7 +371,7 @@ class AsyncTheTool:
|
|
|
371
371
|
temperature: float | None = 0.0,
|
|
372
372
|
logprobs: bool = False,
|
|
373
373
|
top_logprobs: int | None = None,
|
|
374
|
-
validator: Any | None = None,
|
|
374
|
+
validator: Callable[[Any], bool] | None = None,
|
|
375
375
|
) -> OutputModels.ToolOutput:
|
|
376
376
|
"""
|
|
377
377
|
Translate text between languages.
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/async_operator.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import AsyncOpenAI
|
|
@@ -12,8 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
logger = logging.getLogger("async_operator")
|
|
16
|
-
logger.setLevel(logging.INFO)
|
|
15
|
+
logger = logging.getLogger("texttools.async_operator")
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
class AsyncOperator(BaseOperator):
|
|
@@ -115,7 +114,7 @@ class AsyncOperator(BaseOperator):
|
|
|
115
114
|
temperature: float,
|
|
116
115
|
logprobs: bool,
|
|
117
116
|
top_logprobs: int | None,
|
|
118
|
-
validator: Any | None,
|
|
117
|
+
validator: Callable[[Any], bool] | None,
|
|
119
118
|
# Internal parameters
|
|
120
119
|
prompt_file: str,
|
|
121
120
|
output_model: Type[T],
|
|
@@ -128,7 +127,7 @@ class AsyncOperator(BaseOperator):
|
|
|
128
127
|
"""
|
|
129
128
|
prompt_loader = PromptLoader()
|
|
130
129
|
formatter = Formatter()
|
|
131
|
-
output = ToolOutput(
|
|
130
|
+
output = ToolOutput()
|
|
132
131
|
|
|
133
132
|
try:
|
|
134
133
|
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
@@ -239,4 +238,5 @@ class AsyncOperator(BaseOperator):
|
|
|
239
238
|
|
|
240
239
|
except Exception as e:
|
|
241
240
|
logger.error(f"AsyncTheTool failed: {e}")
|
|
242
|
-
|
|
241
|
+
output.errors.append(str(e))
|
|
242
|
+
return output
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/base_operator.py
RENAMED
|
@@ -11,8 +11,7 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
11
11
|
# Base Model type for output models
|
|
12
12
|
T = TypeVar("T", bound=BaseModel)
|
|
13
13
|
|
|
14
|
-
logger = logging.getLogger("base_operator")
|
|
15
|
-
logger.setLevel(logging.INFO)
|
|
14
|
+
logger = logging.getLogger("texttools.base_operator")
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class BaseOperator:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import OpenAI
|
|
@@ -12,8 +12,7 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
logger = logging.getLogger("operator")
|
|
16
|
-
logger.setLevel(logging.INFO)
|
|
15
|
+
logger = logging.getLogger("texttools.operator")
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
class Operator(BaseOperator):
|
|
@@ -115,7 +114,7 @@ class Operator(BaseOperator):
|
|
|
115
114
|
temperature: float,
|
|
116
115
|
logprobs: bool,
|
|
117
116
|
top_logprobs: int | None,
|
|
118
|
-
validator: Any | None,
|
|
117
|
+
validator: Callable[[Any], bool] | None,
|
|
119
118
|
# Internal parameters
|
|
120
119
|
prompt_file: str,
|
|
121
120
|
output_model: Type[T],
|
|
@@ -128,7 +127,7 @@ class Operator(BaseOperator):
|
|
|
128
127
|
"""
|
|
129
128
|
prompt_loader = PromptLoader()
|
|
130
129
|
formatter = Formatter()
|
|
131
|
-
output = ToolOutput(
|
|
130
|
+
output = ToolOutput()
|
|
132
131
|
|
|
133
132
|
try:
|
|
134
133
|
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
@@ -239,4 +238,5 @@ class Operator(BaseOperator):
|
|
|
239
238
|
|
|
240
239
|
except Exception as e:
|
|
241
240
|
logger.error(f"TheTool failed: {e}")
|
|
242
|
-
|
|
241
|
+
output.errors.append(str(e))
|
|
242
|
+
return output
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/output_models.py
RENAMED
|
@@ -4,10 +4,13 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ToolOutput(BaseModel):
|
|
7
|
-
result:
|
|
8
|
-
analysis: str
|
|
9
|
-
logprobs: list[dict[str, Any]]
|
|
10
|
-
errors: list[str]
|
|
7
|
+
result: Any = None
|
|
8
|
+
analysis: str = ""
|
|
9
|
+
logprobs: list[dict[str, Any]] = []
|
|
10
|
+
errors: list[str] = []
|
|
11
|
+
|
|
12
|
+
def __repr__(self) -> str:
|
|
13
|
+
return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class StrOutput(BaseModel):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import OpenAI
|
|
4
4
|
|
|
@@ -32,7 +32,7 @@ class TheTool:
|
|
|
32
32
|
temperature: float | None = 0.0,
|
|
33
33
|
logprobs: bool = False,
|
|
34
34
|
top_logprobs: int | None = None,
|
|
35
|
-
validator: Any | None = None,
|
|
35
|
+
validator: Callable[[Any], bool] | None = None,
|
|
36
36
|
) -> OutputModels.ToolOutput:
|
|
37
37
|
"""
|
|
38
38
|
Categorize a text into a single Islamic studies domain category.
|
|
@@ -69,7 +69,7 @@ class TheTool:
|
|
|
69
69
|
temperature: float | None = 0.0,
|
|
70
70
|
logprobs: bool = False,
|
|
71
71
|
top_logprobs: int | None = None,
|
|
72
|
-
validator: Any | None = None,
|
|
72
|
+
validator: Callable[[Any], bool] | None = None,
|
|
73
73
|
) -> OutputModels.ToolOutput:
|
|
74
74
|
"""
|
|
75
75
|
Extract salient keywords from text.
|
|
@@ -106,7 +106,7 @@ class TheTool:
|
|
|
106
106
|
temperature: float | None = 0.0,
|
|
107
107
|
logprobs: bool = False,
|
|
108
108
|
top_logprobs: int | None = None,
|
|
109
|
-
validator: Any | None = None,
|
|
109
|
+
validator: Callable[[Any], bool] | None = None,
|
|
110
110
|
) -> OutputModels.ToolOutput:
|
|
111
111
|
"""
|
|
112
112
|
Perform Named Entity Recognition (NER) over the input text.
|
|
@@ -142,7 +142,7 @@ class TheTool:
|
|
|
142
142
|
temperature: float | None = 0.0,
|
|
143
143
|
logprobs: bool = False,
|
|
144
144
|
top_logprobs: int | None = None,
|
|
145
|
-
validator: Any | None = None,
|
|
145
|
+
validator: Callable[[Any], bool] | None = None,
|
|
146
146
|
) -> OutputModels.ToolOutput:
|
|
147
147
|
"""
|
|
148
148
|
Detect if the input is phrased as a question.
|
|
@@ -179,7 +179,7 @@ class TheTool:
|
|
|
179
179
|
temperature: float | None = 0.0,
|
|
180
180
|
logprobs: bool = False,
|
|
181
181
|
top_logprobs: int | None = None,
|
|
182
|
-
validator: Any | None = None,
|
|
182
|
+
validator: Callable[[Any], bool] | None = None,
|
|
183
183
|
) -> OutputModels.ToolOutput:
|
|
184
184
|
"""
|
|
185
185
|
Generate a single question from the given text.
|
|
@@ -217,7 +217,7 @@ class TheTool:
|
|
|
217
217
|
logprobs: bool = False,
|
|
218
218
|
top_logprobs: int | None = None,
|
|
219
219
|
mode: Literal["default", "reason"] = "default",
|
|
220
|
-
validator: Any | None = None,
|
|
220
|
+
validator: Callable[[Any], bool] | None = None,
|
|
221
221
|
) -> OutputModels.ToolOutput:
|
|
222
222
|
"""
|
|
223
223
|
Merge multiple questions into a single unified question.
|
|
@@ -256,7 +256,7 @@ class TheTool:
|
|
|
256
256
|
logprobs: bool = False,
|
|
257
257
|
top_logprobs: int | None = None,
|
|
258
258
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
259
|
-
validator: Any | None = None,
|
|
259
|
+
validator: Callable[[Any], bool] | None = None,
|
|
260
260
|
) -> OutputModels.ToolOutput:
|
|
261
261
|
"""
|
|
262
262
|
Rewrite a text with different modes.
|
|
@@ -294,7 +294,7 @@ class TheTool:
|
|
|
294
294
|
temperature: float | None = 0.0,
|
|
295
295
|
logprobs: bool = False,
|
|
296
296
|
top_logprobs: int | None = None,
|
|
297
|
-
validator: Any | None = None,
|
|
297
|
+
validator: Callable[[Any], bool] | None = None,
|
|
298
298
|
) -> OutputModels.ToolOutput:
|
|
299
299
|
"""
|
|
300
300
|
Generate a list of questions about a subject.
|
|
@@ -332,7 +332,7 @@ class TheTool:
|
|
|
332
332
|
temperature: float | None = 0.0,
|
|
333
333
|
logprobs: bool = False,
|
|
334
334
|
top_logprobs: int | None = None,
|
|
335
|
-
validator: Any | None = None,
|
|
335
|
+
validator: Callable[[Any], bool] | None = None,
|
|
336
336
|
) -> OutputModels.ToolOutput:
|
|
337
337
|
"""
|
|
338
338
|
Summarize the given subject text.
|
|
@@ -369,7 +369,7 @@ class TheTool:
|
|
|
369
369
|
temperature: float | None = 0.0,
|
|
370
370
|
logprobs: bool = False,
|
|
371
371
|
top_logprobs: int | None = None,
|
|
372
|
-
validator: Any | None = None,
|
|
372
|
+
validator: Callable[[Any], bool] | None = None,
|
|
373
373
|
) -> OutputModels.ToolOutput:
|
|
374
374
|
"""
|
|
375
375
|
Translate text between languages.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/prompts/subject_to_question.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.3 → hamtaa_texttools-1.1.7}/texttools/tools/internals/prompt_loader.py
RENAMED
|
File without changes
|