hamtaa-texttools 1.1.21__tar.gz → 1.1.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.21/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.23}/PKG-INFO +46 -87
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/README.md +45 -86
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23/hamtaa_texttools.egg-info}/PKG-INFO +46 -87
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/pyproject.toml +1 -1
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/tests/test_all_async_tools.py +2 -5
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/tests/test_all_tools.py +3 -10
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/tests/test_output_validation.py +2 -6
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/__init__.py +3 -3
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/batch/batch_config.py +2 -1
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/batch/batch_manager.py +6 -6
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/batch/batch_runner.py +7 -7
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/async_operator.py +29 -41
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/exceptions.py +0 -6
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/operator_utils.py +24 -5
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/prompt_loader.py +0 -5
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/sync_operator.py +29 -41
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/categorize.yaml +3 -2
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/check_fact.yaml +5 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_entities.yaml +4 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/extract_keywords.yaml +15 -3
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/is_question.yaml +4 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/merge_questions.yaml +8 -1
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/propositionize.yaml +2 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/rewrite.yaml +3 -4
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/subject_to_question.yaml +5 -1
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/summarize.yaml +4 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/text_to_question.yaml +4 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/translate.yaml +5 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/tools/async_tools.py +87 -103
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/tools/sync_tools.py +87 -104
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/LICENSE +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/models.py +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/internals/text_to_chunks.py +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.21 → hamtaa_texttools-1.1.23}/texttools/prompts/run_custom.yaml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.23
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -37,61 +37,53 @@ Dynamic: license-file
|
|
|
37
37
|
|
|
38
38
|
## 📌 Overview
|
|
39
39
|
|
|
40
|
-
**TextTools** is a high-level **NLP toolkit** built on top of
|
|
40
|
+
**TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
|
|
41
41
|
|
|
42
42
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
43
43
|
|
|
44
44
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
45
45
|
|
|
46
|
+
**Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
|
|
47
|
+
|
|
46
48
|
---
|
|
47
49
|
|
|
48
50
|
## ✨ Features
|
|
49
51
|
|
|
50
52
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
|
-
Each tool is designed to work with structured outputs
|
|
53
|
+
Each tool is designed to work with structured outputs.
|
|
52
54
|
|
|
53
55
|
- **`categorize()`** - Classifies text into given categories
|
|
54
|
-
- **`extract_keywords()`** - Extracts keywords from text
|
|
56
|
+
- **`extract_keywords()`** - Extracts keywords from the text
|
|
55
57
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
|
-
- **`is_question()`** - Binary detection
|
|
58
|
+
- **`is_question()`** - Binary question detection
|
|
57
59
|
- **`text_to_question()`** - Generates questions from text
|
|
58
|
-
- **`merge_questions()`** - Merges multiple questions
|
|
59
|
-
- **`rewrite()`** - Rewrites text
|
|
60
|
+
- **`merge_questions()`** - Merges multiple questions into one
|
|
61
|
+
- **`rewrite()`** - Rewrites text in a diffrent way
|
|
60
62
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
61
63
|
- **`summarize()`** - Text summarization
|
|
62
|
-
- **`translate()`** - Text translation
|
|
64
|
+
- **`translate()`** - Text translation
|
|
63
65
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
66
|
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
65
67
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
66
68
|
|
|
67
69
|
---
|
|
68
70
|
|
|
71
|
+
## 🚀 Installation
|
|
72
|
+
|
|
73
|
+
Install the latest release via PyPI:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install -U hamtaa-texttools
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
69
81
|
## 📊 Tool Quality Tiers
|
|
70
82
|
|
|
71
|
-
| Status | Meaning | Use in Production? |
|
|
72
|
-
|
|
73
|
-
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
-
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
-
|
|
76
|
-
### Current Status
|
|
77
|
-
**Production Tools:**
|
|
78
|
-
- `categorize()` (list mode)
|
|
79
|
-
- `extract_keywords()`
|
|
80
|
-
- `extract_entities()`
|
|
81
|
-
- `is_question()`
|
|
82
|
-
- `text_to_question()`
|
|
83
|
-
- `merge_questions()`
|
|
84
|
-
- `rewrite()`
|
|
85
|
-
- `subject_to_question()`
|
|
86
|
-
- `summarize()`
|
|
87
|
-
- `run_custom()` (fine in most cases)
|
|
88
|
-
|
|
89
|
-
**Experimental Tools:**
|
|
90
|
-
- `categorize()` (tree mode)
|
|
91
|
-
- `translate()`
|
|
92
|
-
- `propositionize()`
|
|
93
|
-
- `check_fact()`
|
|
94
|
-
- `run_custom()` (not evaluated in all scenarios)
|
|
83
|
+
| Status | Meaning | Tools | Use in Production? |
|
|
84
|
+
|--------|---------|----------|-------------------|
|
|
85
|
+
| **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
|
|
86
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
|
|
95
87
|
|
|
96
88
|
---
|
|
97
89
|
|
|
@@ -100,49 +92,37 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
100
92
|
TextTools provides several optional flags to customize LLM behavior:
|
|
101
93
|
|
|
102
94
|
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
103
|
-
**Note:** This doubles token usage per call
|
|
95
|
+
**Note:** This doubles token usage per call.
|
|
104
96
|
|
|
105
97
|
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
106
98
|
**Note:** This feature works if it's supported by the model.
|
|
107
99
|
|
|
108
|
-
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
100
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
109
101
|
|
|
110
|
-
- **`user_prompt: str`** → Allows you to inject a custom instruction or
|
|
102
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
111
103
|
|
|
112
104
|
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
113
105
|
|
|
114
|
-
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a
|
|
106
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
|
|
115
107
|
|
|
116
|
-
- **`priority: int (Experimental)`** → Task execution priority level.
|
|
108
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
|
|
117
109
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
118
110
|
|
|
119
|
-
**Note:** There might be some tools that don't support some of the parameters above.
|
|
120
|
-
|
|
121
111
|
---
|
|
122
112
|
|
|
123
113
|
## 🧩 ToolOutput
|
|
124
114
|
|
|
125
115
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
126
|
-
- **`result: Any`**
|
|
127
|
-
- **`analysis: str`**
|
|
128
|
-
- **`logprobs: list`**
|
|
129
|
-
- **`errors: list[str]`**
|
|
116
|
+
- **`result: Any`**
|
|
117
|
+
- **`analysis: str`**
|
|
118
|
+
- **`logprobs: list`**
|
|
119
|
+
- **`errors: list[str]`**
|
|
130
120
|
- **`ToolOutputMetadata`** →
|
|
131
|
-
- **`tool_name: str`**
|
|
132
|
-
- **`processed_at: datetime`**
|
|
133
|
-
- **`execution_time: float`**
|
|
121
|
+
- **`tool_name: str`**
|
|
122
|
+
- **`processed_at: datetime`**
|
|
123
|
+
- **`execution_time: float`**
|
|
134
124
|
|
|
135
|
-
**Note:** You can use `repr(ToolOutput)` to
|
|
136
|
-
|
|
137
|
-
---
|
|
138
|
-
|
|
139
|
-
## 🚀 Installation
|
|
140
|
-
|
|
141
|
-
Install the latest release via PyPI:
|
|
142
|
-
|
|
143
|
-
```bash
|
|
144
|
-
pip install -U hamtaa-texttools
|
|
145
|
-
```
|
|
125
|
+
**Note:** You can use `repr(ToolOutput)` to print your output with all the details.
|
|
146
126
|
|
|
147
127
|
---
|
|
148
128
|
|
|
@@ -160,26 +140,13 @@ pip install -U hamtaa-texttools
|
|
|
160
140
|
from openai import OpenAI
|
|
161
141
|
from texttools import TheTool
|
|
162
142
|
|
|
163
|
-
# Create your OpenAI client
|
|
164
143
|
client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
144
|
+
model = "model_name"
|
|
165
145
|
|
|
166
|
-
# Specify the model
|
|
167
|
-
model = "gpt-4o-mini"
|
|
168
|
-
|
|
169
|
-
# Create an instance of TheTool
|
|
170
146
|
the_tool = TheTool(client=client, model=model)
|
|
171
147
|
|
|
172
|
-
|
|
173
|
-
detection
|
|
174
|
-
print(detection.result)
|
|
175
|
-
print(detection.logprobs)
|
|
176
|
-
# Output: True + logprobs
|
|
177
|
-
|
|
178
|
-
# Example: Translation
|
|
179
|
-
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
180
|
-
print(translation.result)
|
|
181
|
-
print(translation.analysis)
|
|
182
|
-
# Output: "Hi! How are you?" + analysis
|
|
148
|
+
detection = the_tool.is_question("Is this project open source?")
|
|
149
|
+
print(repr(detection))
|
|
183
150
|
```
|
|
184
151
|
|
|
185
152
|
---
|
|
@@ -192,22 +159,17 @@ from openai import AsyncOpenAI
|
|
|
192
159
|
from texttools import AsyncTheTool
|
|
193
160
|
|
|
194
161
|
async def main():
|
|
195
|
-
# Create your AsyncOpenAI client
|
|
196
162
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
163
|
+
model = "model_name"
|
|
197
164
|
|
|
198
|
-
# Specify the model
|
|
199
|
-
model = "gpt-4o-mini"
|
|
200
|
-
|
|
201
|
-
# Create an instance of AsyncTheTool
|
|
202
165
|
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
203
166
|
|
|
204
|
-
# Example: Async Translation and Keyword Extraction
|
|
205
167
|
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
206
168
|
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
207
169
|
|
|
208
170
|
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
209
|
-
print(translation
|
|
210
|
-
print(keywords
|
|
171
|
+
print(repr(translation))
|
|
172
|
+
print(repr(keywords))
|
|
211
173
|
|
|
212
174
|
asyncio.run(main())
|
|
213
175
|
```
|
|
@@ -229,13 +191,12 @@ Use **TextTools** when you need to:
|
|
|
229
191
|
|
|
230
192
|
Process large datasets efficiently using OpenAI's batch API.
|
|
231
193
|
|
|
232
|
-
## ⚡ Quick Start (Batch)
|
|
194
|
+
## ⚡ Quick Start (Batch Runner)
|
|
233
195
|
|
|
234
196
|
```python
|
|
235
197
|
from pydantic import BaseModel
|
|
236
|
-
from texttools import
|
|
198
|
+
from texttools import BatchRunner, BatchConfig
|
|
237
199
|
|
|
238
|
-
# Configure your batch job
|
|
239
200
|
config = BatchConfig(
|
|
240
201
|
system_prompt="Extract entities from the text",
|
|
241
202
|
job_name="entity_extraction",
|
|
@@ -244,12 +205,10 @@ config = BatchConfig(
|
|
|
244
205
|
model="gpt-4o-mini"
|
|
245
206
|
)
|
|
246
207
|
|
|
247
|
-
# Define your output schema
|
|
248
208
|
class Output(BaseModel):
|
|
249
209
|
entities: list[str]
|
|
250
210
|
|
|
251
|
-
|
|
252
|
-
runner = BatchJobRunner(config, output_model=Output)
|
|
211
|
+
runner = BatchRunner(config, output_model=Output)
|
|
253
212
|
runner.run()
|
|
254
213
|
```
|
|
255
214
|
|
|
@@ -2,61 +2,53 @@
|
|
|
2
2
|
|
|
3
3
|
## 📌 Overview
|
|
4
4
|
|
|
5
|
-
**TextTools** is a high-level **NLP toolkit** built on top of
|
|
5
|
+
**TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
|
|
6
6
|
|
|
7
7
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
8
8
|
|
|
9
9
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
10
10
|
|
|
11
|
+
**Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
|
|
12
|
+
|
|
11
13
|
---
|
|
12
14
|
|
|
13
15
|
## ✨ Features
|
|
14
16
|
|
|
15
17
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
16
|
-
Each tool is designed to work with structured outputs
|
|
18
|
+
Each tool is designed to work with structured outputs.
|
|
17
19
|
|
|
18
20
|
- **`categorize()`** - Classifies text into given categories
|
|
19
|
-
- **`extract_keywords()`** - Extracts keywords from text
|
|
21
|
+
- **`extract_keywords()`** - Extracts keywords from the text
|
|
20
22
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
21
|
-
- **`is_question()`** - Binary detection
|
|
23
|
+
- **`is_question()`** - Binary question detection
|
|
22
24
|
- **`text_to_question()`** - Generates questions from text
|
|
23
|
-
- **`merge_questions()`** - Merges multiple questions
|
|
24
|
-
- **`rewrite()`** - Rewrites text
|
|
25
|
+
- **`merge_questions()`** - Merges multiple questions into one
|
|
26
|
+
- **`rewrite()`** - Rewrites text in a diffrent way
|
|
25
27
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
26
28
|
- **`summarize()`** - Text summarization
|
|
27
|
-
- **`translate()`** - Text translation
|
|
29
|
+
- **`translate()`** - Text translation
|
|
28
30
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
29
31
|
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
30
32
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
31
33
|
|
|
32
34
|
---
|
|
33
35
|
|
|
36
|
+
## 🚀 Installation
|
|
37
|
+
|
|
38
|
+
Install the latest release via PyPI:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install -U hamtaa-texttools
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
34
46
|
## 📊 Tool Quality Tiers
|
|
35
47
|
|
|
36
|
-
| Status | Meaning | Use in Production? |
|
|
37
|
-
|
|
38
|
-
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
39
|
-
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
40
|
-
|
|
41
|
-
### Current Status
|
|
42
|
-
**Production Tools:**
|
|
43
|
-
- `categorize()` (list mode)
|
|
44
|
-
- `extract_keywords()`
|
|
45
|
-
- `extract_entities()`
|
|
46
|
-
- `is_question()`
|
|
47
|
-
- `text_to_question()`
|
|
48
|
-
- `merge_questions()`
|
|
49
|
-
- `rewrite()`
|
|
50
|
-
- `subject_to_question()`
|
|
51
|
-
- `summarize()`
|
|
52
|
-
- `run_custom()` (fine in most cases)
|
|
53
|
-
|
|
54
|
-
**Experimental Tools:**
|
|
55
|
-
- `categorize()` (tree mode)
|
|
56
|
-
- `translate()`
|
|
57
|
-
- `propositionize()`
|
|
58
|
-
- `check_fact()`
|
|
59
|
-
- `run_custom()` (not evaluated in all scenarios)
|
|
48
|
+
| Status | Meaning | Tools | Use in Production? |
|
|
49
|
+
|--------|---------|----------|-------------------|
|
|
50
|
+
| **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
|
|
51
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
|
|
60
52
|
|
|
61
53
|
---
|
|
62
54
|
|
|
@@ -65,49 +57,37 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
65
57
|
TextTools provides several optional flags to customize LLM behavior:
|
|
66
58
|
|
|
67
59
|
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
68
|
-
**Note:** This doubles token usage per call
|
|
60
|
+
**Note:** This doubles token usage per call.
|
|
69
61
|
|
|
70
62
|
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
71
63
|
**Note:** This feature works if it's supported by the model.
|
|
72
64
|
|
|
73
|
-
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
65
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
74
66
|
|
|
75
|
-
- **`user_prompt: str`** → Allows you to inject a custom instruction or
|
|
67
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
76
68
|
|
|
77
69
|
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
78
70
|
|
|
79
|
-
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a
|
|
71
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
|
|
80
72
|
|
|
81
|
-
- **`priority: int (Experimental)`** → Task execution priority level.
|
|
73
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
|
|
82
74
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
83
75
|
|
|
84
|
-
**Note:** There might be some tools that don't support some of the parameters above.
|
|
85
|
-
|
|
86
76
|
---
|
|
87
77
|
|
|
88
78
|
## 🧩 ToolOutput
|
|
89
79
|
|
|
90
80
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
91
|
-
- **`result: Any`**
|
|
92
|
-
- **`analysis: str`**
|
|
93
|
-
- **`logprobs: list`**
|
|
94
|
-
- **`errors: list[str]`**
|
|
81
|
+
- **`result: Any`**
|
|
82
|
+
- **`analysis: str`**
|
|
83
|
+
- **`logprobs: list`**
|
|
84
|
+
- **`errors: list[str]`**
|
|
95
85
|
- **`ToolOutputMetadata`** →
|
|
96
|
-
- **`tool_name: str`**
|
|
97
|
-
- **`processed_at: datetime`**
|
|
98
|
-
- **`execution_time: float`**
|
|
86
|
+
- **`tool_name: str`**
|
|
87
|
+
- **`processed_at: datetime`**
|
|
88
|
+
- **`execution_time: float`**
|
|
99
89
|
|
|
100
|
-
**Note:** You can use `repr(ToolOutput)` to
|
|
101
|
-
|
|
102
|
-
---
|
|
103
|
-
|
|
104
|
-
## 🚀 Installation
|
|
105
|
-
|
|
106
|
-
Install the latest release via PyPI:
|
|
107
|
-
|
|
108
|
-
```bash
|
|
109
|
-
pip install -U hamtaa-texttools
|
|
110
|
-
```
|
|
90
|
+
**Note:** You can use `repr(ToolOutput)` to print your output with all the details.
|
|
111
91
|
|
|
112
92
|
---
|
|
113
93
|
|
|
@@ -125,26 +105,13 @@ pip install -U hamtaa-texttools
|
|
|
125
105
|
from openai import OpenAI
|
|
126
106
|
from texttools import TheTool
|
|
127
107
|
|
|
128
|
-
# Create your OpenAI client
|
|
129
108
|
client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
109
|
+
model = "model_name"
|
|
130
110
|
|
|
131
|
-
# Specify the model
|
|
132
|
-
model = "gpt-4o-mini"
|
|
133
|
-
|
|
134
|
-
# Create an instance of TheTool
|
|
135
111
|
the_tool = TheTool(client=client, model=model)
|
|
136
112
|
|
|
137
|
-
|
|
138
|
-
detection
|
|
139
|
-
print(detection.result)
|
|
140
|
-
print(detection.logprobs)
|
|
141
|
-
# Output: True + logprobs
|
|
142
|
-
|
|
143
|
-
# Example: Translation
|
|
144
|
-
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
145
|
-
print(translation.result)
|
|
146
|
-
print(translation.analysis)
|
|
147
|
-
# Output: "Hi! How are you?" + analysis
|
|
113
|
+
detection = the_tool.is_question("Is this project open source?")
|
|
114
|
+
print(repr(detection))
|
|
148
115
|
```
|
|
149
116
|
|
|
150
117
|
---
|
|
@@ -157,22 +124,17 @@ from openai import AsyncOpenAI
|
|
|
157
124
|
from texttools import AsyncTheTool
|
|
158
125
|
|
|
159
126
|
async def main():
|
|
160
|
-
# Create your AsyncOpenAI client
|
|
161
127
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
128
|
+
model = "model_name"
|
|
162
129
|
|
|
163
|
-
# Specify the model
|
|
164
|
-
model = "gpt-4o-mini"
|
|
165
|
-
|
|
166
|
-
# Create an instance of AsyncTheTool
|
|
167
130
|
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
168
131
|
|
|
169
|
-
# Example: Async Translation and Keyword Extraction
|
|
170
132
|
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
171
133
|
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
172
134
|
|
|
173
135
|
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
174
|
-
print(translation
|
|
175
|
-
print(keywords
|
|
136
|
+
print(repr(translation))
|
|
137
|
+
print(repr(keywords))
|
|
176
138
|
|
|
177
139
|
asyncio.run(main())
|
|
178
140
|
```
|
|
@@ -194,13 +156,12 @@ Use **TextTools** when you need to:
|
|
|
194
156
|
|
|
195
157
|
Process large datasets efficiently using OpenAI's batch API.
|
|
196
158
|
|
|
197
|
-
## ⚡ Quick Start (Batch)
|
|
159
|
+
## ⚡ Quick Start (Batch Runner)
|
|
198
160
|
|
|
199
161
|
```python
|
|
200
162
|
from pydantic import BaseModel
|
|
201
|
-
from texttools import
|
|
163
|
+
from texttools import BatchRunner, BatchConfig
|
|
202
164
|
|
|
203
|
-
# Configure your batch job
|
|
204
165
|
config = BatchConfig(
|
|
205
166
|
system_prompt="Extract entities from the text",
|
|
206
167
|
job_name="entity_extraction",
|
|
@@ -209,12 +170,10 @@ config = BatchConfig(
|
|
|
209
170
|
model="gpt-4o-mini"
|
|
210
171
|
)
|
|
211
172
|
|
|
212
|
-
# Define your output schema
|
|
213
173
|
class Output(BaseModel):
|
|
214
174
|
entities: list[str]
|
|
215
175
|
|
|
216
|
-
|
|
217
|
-
runner = BatchJobRunner(config, output_model=Output)
|
|
176
|
+
runner = BatchRunner(config, output_model=Output)
|
|
218
177
|
runner.run()
|
|
219
178
|
```
|
|
220
179
|
|