hamtaa-texttools 1.1.20__tar.gz → 1.1.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.20/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.22}/PKG-INFO +49 -109
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/README.md +48 -108
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22/hamtaa_texttools.egg-info}/PKG-INFO +49 -109
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/hamtaa_texttools.egg-info/SOURCES.txt +1 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/pyproject.toml +1 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/tests/test_all_tools.py +15 -9
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/__init__.py +3 -3
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/batch/batch_config.py +14 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/batch/batch_runner.py +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/async_operator.py +49 -92
- hamtaa_texttools-1.1.22/texttools/internals/models.py +150 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/operator_utils.py +25 -27
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/prompt_loader.py +3 -20
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/sync_operator.py +49 -92
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/README.md +2 -2
- hamtaa_texttools-1.1.22/texttools/prompts/categorize.yaml +35 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/check_fact.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/extract_entities.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/extract_keywords.yaml +6 -6
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/is_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/merge_questions.yaml +4 -4
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/propositionize.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/rewrite.yaml +6 -6
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/run_custom.yaml +1 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/subject_to_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/summarize.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/text_to_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/prompts/translate.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/tools/async_tools.py +393 -487
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/tools/sync_tools.py +394 -488
- hamtaa_texttools-1.1.20/texttools/batch/internals/utils.py +0 -13
- hamtaa_texttools-1.1.20/texttools/internals/models.py +0 -181
- hamtaa_texttools-1.1.20/texttools/prompts/categorize.yaml +0 -77
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/LICENSE +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/tests/test_all_async_tools.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/tests/test_output_validation.py +0 -0
- {hamtaa_texttools-1.1.20/texttools/batch/internals → hamtaa_texttools-1.1.22/texttools/batch}/batch_manager.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/exceptions.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.22}/texttools/internals/text_to_chunks.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.22
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -37,61 +37,53 @@ Dynamic: license-file
|
|
|
37
37
|
|
|
38
38
|
## 📌 Overview
|
|
39
39
|
|
|
40
|
-
**TextTools** is a high-level **NLP toolkit** built on top of
|
|
40
|
+
**TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
|
|
41
41
|
|
|
42
42
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
43
43
|
|
|
44
44
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
45
45
|
|
|
46
|
+
**Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
|
|
47
|
+
|
|
46
48
|
---
|
|
47
49
|
|
|
48
50
|
## ✨ Features
|
|
49
51
|
|
|
50
52
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
|
-
Each tool is designed to work with structured outputs
|
|
53
|
+
Each tool is designed to work with structured outputs.
|
|
52
54
|
|
|
53
|
-
- **`categorize()`** - Classifies text into given categories
|
|
54
|
-
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
|
+
- **`categorize()`** - Classifies text into given categories
|
|
56
|
+
- **`extract_keywords()`** - Extracts keywords from the text
|
|
55
57
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
|
-
- **`is_question()`** - Binary detection
|
|
58
|
+
- **`is_question()`** - Binary question detection
|
|
57
59
|
- **`text_to_question()`** - Generates questions from text
|
|
58
|
-
- **`merge_questions()`** - Merges multiple questions
|
|
59
|
-
- **`rewrite()`** - Rewrites text
|
|
60
|
+
- **`merge_questions()`** - Merges multiple questions into one
|
|
61
|
+
- **`rewrite()`** - Rewrites text in a diffrent way
|
|
60
62
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
61
63
|
- **`summarize()`** - Text summarization
|
|
62
|
-
- **`translate()`** - Text translation
|
|
64
|
+
- **`translate()`** - Text translation
|
|
63
65
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
66
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
65
67
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
66
68
|
|
|
67
69
|
---
|
|
68
70
|
|
|
71
|
+
## 🚀 Installation
|
|
72
|
+
|
|
73
|
+
Install the latest release via PyPI:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install -U hamtaa-texttools
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
69
81
|
## 📊 Tool Quality Tiers
|
|
70
82
|
|
|
71
|
-
| Status | Meaning | Use in Production? |
|
|
72
|
-
|
|
73
|
-
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
-
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
-
|
|
76
|
-
### Current Status
|
|
77
|
-
**Production Tools:**
|
|
78
|
-
- `categorize()` (list mode)
|
|
79
|
-
- `extract_keywords()`
|
|
80
|
-
- `extract_entities()`
|
|
81
|
-
- `is_question()`
|
|
82
|
-
- `text_to_question()`
|
|
83
|
-
- `merge_questions()`
|
|
84
|
-
- `rewrite()`
|
|
85
|
-
- `subject_to_question()`
|
|
86
|
-
- `summarize()`
|
|
87
|
-
- `run_custom()` (fine in most cases)
|
|
88
|
-
|
|
89
|
-
**Experimental Tools:**
|
|
90
|
-
- `categorize()` (tree mode)
|
|
91
|
-
- `translate()`
|
|
92
|
-
- `propositionize()`
|
|
93
|
-
- `check_fact()`
|
|
94
|
-
- `run_custom()` (not evaluated in all scenarios)
|
|
83
|
+
| Status | Meaning | Tools | Use in Production? |
|
|
84
|
+
|--------|---------|----------|-------------------|
|
|
85
|
+
| **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
|
|
86
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
|
|
95
87
|
|
|
96
88
|
---
|
|
97
89
|
|
|
@@ -100,48 +92,37 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
100
92
|
TextTools provides several optional flags to customize LLM behavior:
|
|
101
93
|
|
|
102
94
|
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
103
|
-
**Note:** This doubles token usage per call
|
|
95
|
+
**Note:** This doubles token usage per call.
|
|
104
96
|
|
|
105
97
|
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
106
98
|
**Note:** This feature works if it's supported by the model.
|
|
107
99
|
|
|
108
|
-
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
100
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
109
101
|
|
|
110
|
-
- **`user_prompt: str`** → Allows you to inject a custom instruction or
|
|
102
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
111
103
|
|
|
112
104
|
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
113
105
|
|
|
114
|
-
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a
|
|
106
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
|
|
115
107
|
|
|
116
|
-
- **`priority: int (Experimental)`** → Task execution priority level.
|
|
108
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
|
|
117
109
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
118
110
|
|
|
119
|
-
**Note:** There might be some tools that don't support some of the parameters above.
|
|
120
|
-
|
|
121
111
|
---
|
|
122
112
|
|
|
123
113
|
## 🧩 ToolOutput
|
|
124
114
|
|
|
125
115
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
126
|
-
- **`result: Any`**
|
|
127
|
-
- **`analysis: str`**
|
|
128
|
-
- **`logprobs: list`**
|
|
129
|
-
- **`
|
|
130
|
-
- **`
|
|
131
|
-
- **`
|
|
132
|
-
- **`
|
|
116
|
+
- **`result: Any`**
|
|
117
|
+
- **`analysis: str`**
|
|
118
|
+
- **`logprobs: list`**
|
|
119
|
+
- **`errors: list[str]`**
|
|
120
|
+
- **`ToolOutputMetadata`** →
|
|
121
|
+
- **`tool_name: str`**
|
|
122
|
+
- **`processed_at: datetime`**
|
|
123
|
+
- **`execution_time: float`**
|
|
133
124
|
|
|
134
|
-
**Note:** You can use `repr(ToolOutput)` to
|
|
135
|
-
|
|
136
|
-
---
|
|
137
|
-
|
|
138
|
-
## 🚀 Installation
|
|
139
|
-
|
|
140
|
-
Install the latest release via PyPI:
|
|
141
|
-
|
|
142
|
-
```bash
|
|
143
|
-
pip install -U hamtaa-texttools
|
|
144
|
-
```
|
|
125
|
+
**Note:** You can use `repr(ToolOutput)` to print your output with all the details.
|
|
145
126
|
|
|
146
127
|
---
|
|
147
128
|
|
|
@@ -159,26 +140,13 @@ pip install -U hamtaa-texttools
|
|
|
159
140
|
from openai import OpenAI
|
|
160
141
|
from texttools import TheTool
|
|
161
142
|
|
|
162
|
-
# Create your OpenAI client
|
|
163
143
|
client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
144
|
+
model = "model_name"
|
|
164
145
|
|
|
165
|
-
# Specify the model
|
|
166
|
-
model = "gpt-4o-mini"
|
|
167
|
-
|
|
168
|
-
# Create an instance of TheTool
|
|
169
146
|
the_tool = TheTool(client=client, model=model)
|
|
170
147
|
|
|
171
|
-
|
|
172
|
-
detection
|
|
173
|
-
print(detection.result)
|
|
174
|
-
print(detection.logprobs)
|
|
175
|
-
# Output: True + logprobs
|
|
176
|
-
|
|
177
|
-
# Example: Translation
|
|
178
|
-
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
179
|
-
print(translation.result)
|
|
180
|
-
print(translation.analysis)
|
|
181
|
-
# Output: "Hi! How are you?" + analysis
|
|
148
|
+
detection = the_tool.is_question("Is this project open source?")
|
|
149
|
+
print(repr(detection))
|
|
182
150
|
```
|
|
183
151
|
|
|
184
152
|
---
|
|
@@ -191,22 +159,17 @@ from openai import AsyncOpenAI
|
|
|
191
159
|
from texttools import AsyncTheTool
|
|
192
160
|
|
|
193
161
|
async def main():
|
|
194
|
-
# Create your AsyncOpenAI client
|
|
195
162
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
163
|
+
model = "model_name"
|
|
196
164
|
|
|
197
|
-
# Specify the model
|
|
198
|
-
model = "gpt-4o-mini"
|
|
199
|
-
|
|
200
|
-
# Create an instance of AsyncTheTool
|
|
201
165
|
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
202
166
|
|
|
203
|
-
# Example: Async Translation and Keyword Extraction
|
|
204
167
|
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
205
168
|
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
206
169
|
|
|
207
170
|
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
208
|
-
print(translation
|
|
209
|
-
print(keywords
|
|
171
|
+
print(repr(translation))
|
|
172
|
+
print(repr(keywords))
|
|
210
173
|
|
|
211
174
|
asyncio.run(main())
|
|
212
175
|
```
|
|
@@ -224,37 +187,16 @@ Use **TextTools** when you need to:
|
|
|
224
187
|
|
|
225
188
|
---
|
|
226
189
|
|
|
227
|
-
## 🔍 Logging
|
|
228
|
-
|
|
229
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
```python
|
|
233
|
-
import logging
|
|
234
|
-
|
|
235
|
-
# Default: warnings and errors only
|
|
236
|
-
logging.basicConfig(level=logging.WARNING)
|
|
237
|
-
|
|
238
|
-
# Debug everything (verbose)
|
|
239
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
240
|
-
|
|
241
|
-
# Complete silence
|
|
242
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
190
|
## 📚 Batch Processing
|
|
248
191
|
|
|
249
192
|
Process large datasets efficiently using OpenAI's batch API.
|
|
250
193
|
|
|
251
|
-
## ⚡ Quick Start (Batch)
|
|
194
|
+
## ⚡ Quick Start (Batch Runner)
|
|
252
195
|
|
|
253
196
|
```python
|
|
254
197
|
from pydantic import BaseModel
|
|
255
|
-
from texttools import
|
|
198
|
+
from texttools import BatchRunner, BatchConfig
|
|
256
199
|
|
|
257
|
-
# Configure your batch job
|
|
258
200
|
config = BatchConfig(
|
|
259
201
|
system_prompt="Extract entities from the text",
|
|
260
202
|
job_name="entity_extraction",
|
|
@@ -263,12 +205,10 @@ config = BatchConfig(
|
|
|
263
205
|
model="gpt-4o-mini"
|
|
264
206
|
)
|
|
265
207
|
|
|
266
|
-
# Define your output schema
|
|
267
208
|
class Output(BaseModel):
|
|
268
209
|
entities: list[str]
|
|
269
210
|
|
|
270
|
-
|
|
271
|
-
runner = BatchJobRunner(config, output_model=Output)
|
|
211
|
+
runner = BatchRunner(config, output_model=Output)
|
|
272
212
|
runner.run()
|
|
273
213
|
```
|
|
274
214
|
|
|
@@ -2,61 +2,53 @@
|
|
|
2
2
|
|
|
3
3
|
## 📌 Overview
|
|
4
4
|
|
|
5
|
-
**TextTools** is a high-level **NLP toolkit** built on top of
|
|
5
|
+
**TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
|
|
6
6
|
|
|
7
7
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
8
8
|
|
|
9
9
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
10
10
|
|
|
11
|
+
**Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
|
|
12
|
+
|
|
11
13
|
---
|
|
12
14
|
|
|
13
15
|
## ✨ Features
|
|
14
16
|
|
|
15
17
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
16
|
-
Each tool is designed to work with structured outputs
|
|
18
|
+
Each tool is designed to work with structured outputs.
|
|
17
19
|
|
|
18
|
-
- **`categorize()`** - Classifies text into given categories
|
|
19
|
-
- **`extract_keywords()`** - Extracts keywords from text
|
|
20
|
+
- **`categorize()`** - Classifies text into given categories
|
|
21
|
+
- **`extract_keywords()`** - Extracts keywords from the text
|
|
20
22
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
21
|
-
- **`is_question()`** - Binary detection
|
|
23
|
+
- **`is_question()`** - Binary question detection
|
|
22
24
|
- **`text_to_question()`** - Generates questions from text
|
|
23
|
-
- **`merge_questions()`** - Merges multiple questions
|
|
24
|
-
- **`rewrite()`** - Rewrites text
|
|
25
|
+
- **`merge_questions()`** - Merges multiple questions into one
|
|
26
|
+
- **`rewrite()`** - Rewrites text in a diffrent way
|
|
25
27
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
26
28
|
- **`summarize()`** - Text summarization
|
|
27
|
-
- **`translate()`** - Text translation
|
|
29
|
+
- **`translate()`** - Text translation
|
|
28
30
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
29
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
31
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
30
32
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
31
33
|
|
|
32
34
|
---
|
|
33
35
|
|
|
36
|
+
## 🚀 Installation
|
|
37
|
+
|
|
38
|
+
Install the latest release via PyPI:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install -U hamtaa-texttools
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
34
46
|
## 📊 Tool Quality Tiers
|
|
35
47
|
|
|
36
|
-
| Status | Meaning | Use in Production? |
|
|
37
|
-
|
|
38
|
-
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
39
|
-
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
40
|
-
|
|
41
|
-
### Current Status
|
|
42
|
-
**Production Tools:**
|
|
43
|
-
- `categorize()` (list mode)
|
|
44
|
-
- `extract_keywords()`
|
|
45
|
-
- `extract_entities()`
|
|
46
|
-
- `is_question()`
|
|
47
|
-
- `text_to_question()`
|
|
48
|
-
- `merge_questions()`
|
|
49
|
-
- `rewrite()`
|
|
50
|
-
- `subject_to_question()`
|
|
51
|
-
- `summarize()`
|
|
52
|
-
- `run_custom()` (fine in most cases)
|
|
53
|
-
|
|
54
|
-
**Experimental Tools:**
|
|
55
|
-
- `categorize()` (tree mode)
|
|
56
|
-
- `translate()`
|
|
57
|
-
- `propositionize()`
|
|
58
|
-
- `check_fact()`
|
|
59
|
-
- `run_custom()` (not evaluated in all scenarios)
|
|
48
|
+
| Status | Meaning | Tools | Use in Production? |
|
|
49
|
+
|--------|---------|----------|-------------------|
|
|
50
|
+
| **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
|
|
51
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
|
|
60
52
|
|
|
61
53
|
---
|
|
62
54
|
|
|
@@ -65,48 +57,37 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
65
57
|
TextTools provides several optional flags to customize LLM behavior:
|
|
66
58
|
|
|
67
59
|
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
68
|
-
**Note:** This doubles token usage per call
|
|
60
|
+
**Note:** This doubles token usage per call.
|
|
69
61
|
|
|
70
62
|
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
71
63
|
**Note:** This feature works if it's supported by the model.
|
|
72
64
|
|
|
73
|
-
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
65
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
74
66
|
|
|
75
|
-
- **`user_prompt: str`** → Allows you to inject a custom instruction or
|
|
67
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
76
68
|
|
|
77
69
|
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
78
70
|
|
|
79
|
-
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a
|
|
71
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
|
|
80
72
|
|
|
81
|
-
- **`priority: int (Experimental)`** → Task execution priority level.
|
|
73
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
|
|
82
74
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
83
75
|
|
|
84
|
-
**Note:** There might be some tools that don't support some of the parameters above.
|
|
85
|
-
|
|
86
76
|
---
|
|
87
77
|
|
|
88
78
|
## 🧩 ToolOutput
|
|
89
79
|
|
|
90
80
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
91
|
-
- **`result: Any`**
|
|
92
|
-
- **`analysis: str`**
|
|
93
|
-
- **`logprobs: list`**
|
|
94
|
-
- **`
|
|
95
|
-
- **`
|
|
96
|
-
- **`
|
|
97
|
-
- **`
|
|
81
|
+
- **`result: Any`**
|
|
82
|
+
- **`analysis: str`**
|
|
83
|
+
- **`logprobs: list`**
|
|
84
|
+
- **`errors: list[str]`**
|
|
85
|
+
- **`ToolOutputMetadata`** →
|
|
86
|
+
- **`tool_name: str`**
|
|
87
|
+
- **`processed_at: datetime`**
|
|
88
|
+
- **`execution_time: float`**
|
|
98
89
|
|
|
99
|
-
**Note:** You can use `repr(ToolOutput)` to
|
|
100
|
-
|
|
101
|
-
---
|
|
102
|
-
|
|
103
|
-
## 🚀 Installation
|
|
104
|
-
|
|
105
|
-
Install the latest release via PyPI:
|
|
106
|
-
|
|
107
|
-
```bash
|
|
108
|
-
pip install -U hamtaa-texttools
|
|
109
|
-
```
|
|
90
|
+
**Note:** You can use `repr(ToolOutput)` to print your output with all the details.
|
|
110
91
|
|
|
111
92
|
---
|
|
112
93
|
|
|
@@ -124,26 +105,13 @@ pip install -U hamtaa-texttools
|
|
|
124
105
|
from openai import OpenAI
|
|
125
106
|
from texttools import TheTool
|
|
126
107
|
|
|
127
|
-
# Create your OpenAI client
|
|
128
108
|
client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
109
|
+
model = "model_name"
|
|
129
110
|
|
|
130
|
-
# Specify the model
|
|
131
|
-
model = "gpt-4o-mini"
|
|
132
|
-
|
|
133
|
-
# Create an instance of TheTool
|
|
134
111
|
the_tool = TheTool(client=client, model=model)
|
|
135
112
|
|
|
136
|
-
|
|
137
|
-
detection
|
|
138
|
-
print(detection.result)
|
|
139
|
-
print(detection.logprobs)
|
|
140
|
-
# Output: True + logprobs
|
|
141
|
-
|
|
142
|
-
# Example: Translation
|
|
143
|
-
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
144
|
-
print(translation.result)
|
|
145
|
-
print(translation.analysis)
|
|
146
|
-
# Output: "Hi! How are you?" + analysis
|
|
113
|
+
detection = the_tool.is_question("Is this project open source?")
|
|
114
|
+
print(repr(detection))
|
|
147
115
|
```
|
|
148
116
|
|
|
149
117
|
---
|
|
@@ -156,22 +124,17 @@ from openai import AsyncOpenAI
|
|
|
156
124
|
from texttools import AsyncTheTool
|
|
157
125
|
|
|
158
126
|
async def main():
|
|
159
|
-
# Create your AsyncOpenAI client
|
|
160
127
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
128
|
+
model = "model_name"
|
|
161
129
|
|
|
162
|
-
# Specify the model
|
|
163
|
-
model = "gpt-4o-mini"
|
|
164
|
-
|
|
165
|
-
# Create an instance of AsyncTheTool
|
|
166
130
|
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
167
131
|
|
|
168
|
-
# Example: Async Translation and Keyword Extraction
|
|
169
132
|
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
|
|
170
133
|
keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
171
134
|
|
|
172
135
|
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
173
|
-
print(translation
|
|
174
|
-
print(keywords
|
|
136
|
+
print(repr(translation))
|
|
137
|
+
print(repr(keywords))
|
|
175
138
|
|
|
176
139
|
asyncio.run(main())
|
|
177
140
|
```
|
|
@@ -189,37 +152,16 @@ Use **TextTools** when you need to:
|
|
|
189
152
|
|
|
190
153
|
---
|
|
191
154
|
|
|
192
|
-
## 🔍 Logging
|
|
193
|
-
|
|
194
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
```python
|
|
198
|
-
import logging
|
|
199
|
-
|
|
200
|
-
# Default: warnings and errors only
|
|
201
|
-
logging.basicConfig(level=logging.WARNING)
|
|
202
|
-
|
|
203
|
-
# Debug everything (verbose)
|
|
204
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
205
|
-
|
|
206
|
-
# Complete silence
|
|
207
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
---
|
|
211
|
-
|
|
212
155
|
## 📚 Batch Processing
|
|
213
156
|
|
|
214
157
|
Process large datasets efficiently using OpenAI's batch API.
|
|
215
158
|
|
|
216
|
-
## ⚡ Quick Start (Batch)
|
|
159
|
+
## ⚡ Quick Start (Batch Runner)
|
|
217
160
|
|
|
218
161
|
```python
|
|
219
162
|
from pydantic import BaseModel
|
|
220
|
-
from texttools import
|
|
163
|
+
from texttools import BatchRunner, BatchConfig
|
|
221
164
|
|
|
222
|
-
# Configure your batch job
|
|
223
165
|
config = BatchConfig(
|
|
224
166
|
system_prompt="Extract entities from the text",
|
|
225
167
|
job_name="entity_extraction",
|
|
@@ -228,12 +170,10 @@ config = BatchConfig(
|
|
|
228
170
|
model="gpt-4o-mini"
|
|
229
171
|
)
|
|
230
172
|
|
|
231
|
-
# Define your output schema
|
|
232
173
|
class Output(BaseModel):
|
|
233
174
|
entities: list[str]
|
|
234
175
|
|
|
235
|
-
|
|
236
|
-
runner = BatchJobRunner(config, output_model=Output)
|
|
176
|
+
runner = BatchRunner(config, output_model=Output)
|
|
237
177
|
runner.run()
|
|
238
178
|
```
|
|
239
179
|
|