hamtaa-texttools 1.3.2__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/LICENSE +1 -1
  2. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/PKG-INFO +40 -47
  3. hamtaa_texttools-2.1.0/README.md +157 -0
  4. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/hamtaa_texttools.egg-info/PKG-INFO +40 -47
  5. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/hamtaa_texttools.egg-info/SOURCES.txt +6 -8
  6. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/hamtaa_texttools.egg-info/requires.txt +1 -0
  7. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/pyproject.toml +46 -45
  8. hamtaa_texttools-2.1.0/tests/test_category_tree.py +48 -0
  9. hamtaa_texttools-2.1.0/tests/test_to_chunks.py +13 -0
  10. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/__init__.py +1 -1
  11. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/internal_models.py +16 -7
  12. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/operators/async_operator.py +10 -16
  13. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/operators/sync_operator.py +10 -16
  14. hamtaa_texttools-2.1.0/texttools/core/utils.py +260 -0
  15. hamtaa_texttools-2.1.0/texttools/models.py +143 -0
  16. hamtaa_texttools-1.3.2/texttools/prompts/rewrite.yaml → hamtaa_texttools-2.1.0/texttools/prompts/augment.yaml +3 -3
  17. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/categorize.yaml +7 -8
  18. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/extract_entities.yaml +2 -2
  19. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/extract_keywords.yaml +4 -2
  20. hamtaa_texttools-1.3.2/texttools/prompts/check_fact.yaml → hamtaa_texttools-2.1.0/texttools/prompts/is_fact.yaml +5 -4
  21. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/is_question.yaml +1 -1
  22. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/merge_questions.yaml +8 -6
  23. hamtaa_texttools-2.1.0/texttools/prompts/propositionize.yaml +28 -0
  24. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/run_custom.yaml +3 -1
  25. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/summarize.yaml +3 -3
  26. hamtaa_texttools-2.1.0/texttools/prompts/to_question.yaml +60 -0
  27. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/prompts/translate.yaml +4 -4
  28. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/tools/async_tools.py +152 -169
  29. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/tools/sync_tools.py +138 -150
  30. hamtaa_texttools-1.3.2/README.md +0 -165
  31. hamtaa_texttools-1.3.2/tests/test_all_async_tools.py +0 -99
  32. hamtaa_texttools-1.3.2/tests/test_all_tools.py +0 -118
  33. hamtaa_texttools-1.3.2/tests/test_output_validation.py +0 -31
  34. hamtaa_texttools-1.3.2/texttools/core/engine.py +0 -262
  35. hamtaa_texttools-1.3.2/texttools/models.py +0 -88
  36. hamtaa_texttools-1.3.2/texttools/prompts/propositionize.yaml +0 -24
  37. hamtaa_texttools-1.3.2/texttools/prompts/subject_to_question.yaml +0 -26
  38. hamtaa_texttools-1.3.2/texttools/prompts/text_to_question.yaml +0 -26
  39. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  40. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  41. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/setup.cfg +0 -0
  42. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/__init__.py +0 -0
  43. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/exceptions.py +0 -0
  44. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/core/operators/__init__.py +0 -0
  45. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/py.typed +0 -0
  46. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.1.0}/texttools/tools/__init__.py +0 -0
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.2
3
+ Version: 2.1.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
+ Requires-Dist: dotenv>=0.9.9
17
18
  Requires-Dist: openai>=1.97.1
18
19
  Requires-Dist: pydantic>=2.0.0
19
20
  Requires-Dist: pyyaml>=6.0
@@ -30,30 +31,27 @@ Dynamic: license-file
30
31
 
31
32
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
32
33
 
33
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
34
-
35
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
34
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
36
35
 
37
36
  ---
38
37
 
39
38
  ## ✨ Features
40
39
 
41
- TextTools provides a rich collection of high-level NLP utilities,
40
+ TextTools provides a collection of high-level NLP utilities.
42
41
  Each tool is designed to work with structured outputs.
43
42
 
44
- - **`categorize()`** - Classifies text into given categories
45
- - **`extract_keywords()`** - Extracts keywords from the text
46
- - **`extract_entities()`** - Named Entity Recognition (NER) system
47
- - **`is_question()`** - Binary question detection
48
- - **`text_to_question()`** - Generates questions from text
49
- - **`merge_questions()`** - Merges multiple questions into one
50
- - **`rewrite()`** - Rewrites text in a different way
51
- - **`subject_to_question()`** - Generates questions about a given subject
52
- - **`summarize()`** - Text summarization
53
- - **`translate()`** - Text translation
54
- - **`propositionize()`** - Convert text to atomic independent meaningful sentences
55
- - **`check_fact()`** - Check whether a statement is relevant to the source text
56
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
43
+ - **`categorize()`** - Classify text into given categories
44
+ - **`extract_keywords()`** - Extract keywords from the text
45
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
46
+ - **`is_question()`** - Detect if the input is phrased as a question
47
+ - **`to_question()`** - Generate questions from the given text / subject
48
+ - **`merge_questions()`** - Merge multiple questions into one
49
+ - **`augment()`** - Rewrite text in different augmentations
50
+ - **`summarize()`** - Summarize the given text
51
+ - **`translate()`** - Translate text between languages
52
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
53
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
54
+ - **`run_custom()`** - Custom tool that can do almost anything
57
55
 
58
56
  ---
59
57
 
@@ -71,14 +69,14 @@ pip install -U hamtaa-texttools
71
69
 
72
70
  | Status | Meaning | Tools | Safe for Production? |
73
71
  |--------|---------|----------|-------------------|
74
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
75
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
72
+ | **✅ Production** | Evaluated and tested. | `categorize()`, `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
73
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
76
74
 
77
75
  ---
78
76
 
79
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
77
+ ## ⚙️ Additional Parameters
80
78
 
81
- TextTools provides several optional flags to customize LLM behavior:
79
+ - **`raise_on_error: bool`** (`TheTool/AsyncTheTool` parameter) Raise errors (True) or return them in output (False). Default is True.
82
80
 
83
81
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
84
82
  **Note:** This doubles token usage per call.
@@ -88,17 +86,17 @@ TextTools provides several optional flags to customize LLM behavior:
88
86
 
89
87
  - **`output_lang: str`** → Forces the model to respond in a specific language.
90
88
 
91
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
89
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
92
90
 
93
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
91
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
94
92
 
95
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
93
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
96
94
 
97
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
95
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
98
96
  **Note:** This feature works if it's supported by the model and vLLM.
99
97
 
100
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
101
- **Note:** This feature only exists in `AsyncTheTool`.
98
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
99
+ **Note:** This feature is only available in `AsyncTheTool`.
102
100
 
103
101
 
104
102
  ---
@@ -110,12 +108,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
110
108
  - **`analysis: str`**
111
109
  - **`logprobs: list`**
112
110
  - **`errors: list[str]`**
113
- - **`ToolOutputMetadata`**
111
+ - **`ToolOutputMetadata`**
114
112
  - **`tool_name: str`**
115
113
  - **`processed_at: datetime`**
116
114
  - **`execution_time: float`**
117
115
 
118
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
116
+ - Serialize output to JSON using the `to_json()` method.
117
+ - Verify operation success with the `is_successful()` method.
118
+ - Convert output to a dictionary with the `to_dict()` method.
119
119
 
120
120
  ---
121
121
 
@@ -133,13 +133,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
133
133
  from openai import OpenAI
134
134
  from texttools import TheTool
135
135
 
136
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
136
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
137
137
  model = "model_name"
138
138
 
139
139
  the_tool = TheTool(client=client, model=model)
140
140
 
141
141
  detection = the_tool.is_question("Is this project open source?")
142
- print(repr(detection))
142
+ print(detection.to_json())
143
143
  ```
144
144
 
145
145
  ---
@@ -157,30 +157,23 @@ async def main():
157
157
 
158
158
  async_the_tool = AsyncTheTool(client=async_client, model=model)
159
159
 
160
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
161
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
160
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
161
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
162
162
 
163
163
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
164
- print(repr(translation))
165
- print(repr(keywords))
164
+
165
+ print(translation.to_json())
166
+ print(keywords.to_json())
166
167
 
167
168
  asyncio.run(main())
168
169
  ```
169
170
 
170
171
  ---
171
172
 
172
- ## 👍 Use Cases
173
+ ## Use Cases
173
174
 
174
175
  Use **TextTools** when you need to:
175
176
 
176
- - 🔍 **Classify** large datasets quickly without model training
177
- - 🌍 **Translate** and process multilingual corpora with ease
177
+ - 🔍 **Classify** large datasets quickly without model training
178
178
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
179
179
  - 📊 **Analyze** large text collections using embeddings and categorization
180
-
181
- ---
182
-
183
- ## 🤝 Contributing
184
-
185
- Contributions are welcome!
186
- Feel free to **open issues, suggest new features, or submit pull requests**.
@@ -0,0 +1,157 @@
1
+ # TextTools
2
+
3
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
4
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
5
+
6
+ ## 📌 Overview
7
+
8
+ **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
9
+
10
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
11
+
12
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
13
+
14
+ ---
15
+
16
+ ## ✨ Features
17
+
18
+ TextTools provides a collection of high-level NLP utilities.
19
+ Each tool is designed to work with structured outputs.
20
+
21
+ - **`categorize()`** - Classify text into given categories
22
+ - **`extract_keywords()`** - Extract keywords from the text
23
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
24
+ - **`is_question()`** - Detect if the input is phrased as a question
25
+ - **`to_question()`** - Generate questions from the given text / subject
26
+ - **`merge_questions()`** - Merge multiple questions into one
27
+ - **`augment()`** - Rewrite text in different augmentations
28
+ - **`summarize()`** - Summarize the given text
29
+ - **`translate()`** - Translate text between languages
30
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
31
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
32
+ - **`run_custom()`** - Custom tool that can do almost anything
33
+
34
+ ---
35
+
36
+ ## 🚀 Installation
37
+
38
+ Install the latest release via PyPI:
39
+
40
+ ```bash
41
+ pip install -U hamtaa-texttools
42
+ ```
43
+
44
+ ---
45
+
46
+ ## 📊 Tool Quality Tiers
47
+
48
+ | Status | Meaning | Tools | Safe for Production? |
49
+ |--------|---------|----------|-------------------|
50
+ | **✅ Production** | Evaluated and tested. | `categorize()`, `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
51
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
52
+
53
+ ---
54
+
55
+ ## ⚙️ Additional Parameters
56
+
57
+ - **`raise_on_error: bool`** → (`TheTool/AsyncTheTool` parameter) Raise errors (True) or return them in output (False). Default is True.
58
+
59
+ - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
60
+ **Note:** This doubles token usage per call.
61
+
62
+ - **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
63
+ **Note:** This feature works if it's supported by the model.
64
+
65
+ - **`output_lang: str`** → Forces the model to respond in a specific language.
66
+
67
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
68
+
69
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
70
+
71
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
72
+
73
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
74
+ **Note:** This feature works if it's supported by the model and vLLM.
75
+
76
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
77
+ **Note:** This feature is only available in `AsyncTheTool`.
78
+
79
+
80
+ ---
81
+
82
+ ## 🧩 ToolOutput
83
+
84
+ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
85
+ - **`result: Any`**
86
+ - **`analysis: str`**
87
+ - **`logprobs: list`**
88
+ - **`errors: list[str]`**
89
+ - **`ToolOutputMetadata`**
90
+ - **`tool_name: str`**
91
+ - **`processed_at: datetime`**
92
+ - **`execution_time: float`**
93
+
94
+ - Serialize output to JSON using the `to_json()` method.
95
+ - Verify operation success with the `is_successful()` method.
96
+ - Convert output to a dictionary with the `to_dict()` method.
97
+
98
+ ---
99
+
100
+ ## 🧨 Sync vs Async
101
+ | Tool | Style | Use case |
102
+ |--------------|---------|---------------------------------------------|
103
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
104
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
105
+
106
+ ---
107
+
108
+ ## ⚡ Quick Start (Sync)
109
+
110
+ ```python
111
+ from openai import OpenAI
112
+ from texttools import TheTool
113
+
114
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
115
+ model = "model_name"
116
+
117
+ the_tool = TheTool(client=client, model=model)
118
+
119
+ detection = the_tool.is_question("Is this project open source?")
120
+ print(detection.to_json())
121
+ ```
122
+
123
+ ---
124
+
125
+ ## ⚡ Quick Start (Async)
126
+
127
+ ```python
128
+ import asyncio
129
+ from openai import AsyncOpenAI
130
+ from texttools import AsyncTheTool
131
+
132
+ async def main():
133
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
134
+ model = "model_name"
135
+
136
+ async_the_tool = AsyncTheTool(client=async_client, model=model)
137
+
138
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
139
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
140
+
141
+ (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
142
+
143
+ print(translation.to_json())
144
+ print(keywords.to_json())
145
+
146
+ asyncio.run(main())
147
+ ```
148
+
149
+ ---
150
+
151
+ ## ✅ Use Cases
152
+
153
+ Use **TextTools** when you need to:
154
+
155
+ - 🔍 **Classify** large datasets quickly without model training
156
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
157
+ - 📊 **Analyze** large text collections using embeddings and categorization
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.2
3
+ Version: 2.1.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,9 +11,10 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
+ Requires-Dist: dotenv>=0.9.9
17
18
  Requires-Dist: openai>=1.97.1
18
19
  Requires-Dist: pydantic>=2.0.0
19
20
  Requires-Dist: pyyaml>=6.0
@@ -30,30 +31,27 @@ Dynamic: license-file
30
31
 
31
32
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
32
33
 
33
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
34
-
35
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
34
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
36
35
 
37
36
  ---
38
37
 
39
38
  ## ✨ Features
40
39
 
41
- TextTools provides a rich collection of high-level NLP utilities,
40
+ TextTools provides a collection of high-level NLP utilities.
42
41
  Each tool is designed to work with structured outputs.
43
42
 
44
- - **`categorize()`** - Classifies text into given categories
45
- - **`extract_keywords()`** - Extracts keywords from the text
46
- - **`extract_entities()`** - Named Entity Recognition (NER) system
47
- - **`is_question()`** - Binary question detection
48
- - **`text_to_question()`** - Generates questions from text
49
- - **`merge_questions()`** - Merges multiple questions into one
50
- - **`rewrite()`** - Rewrites text in a different way
51
- - **`subject_to_question()`** - Generates questions about a given subject
52
- - **`summarize()`** - Text summarization
53
- - **`translate()`** - Text translation
54
- - **`propositionize()`** - Convert text to atomic independent meaningful sentences
55
- - **`check_fact()`** - Check whether a statement is relevant to the source text
56
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
43
+ - **`categorize()`** - Classify text into given categories
44
+ - **`extract_keywords()`** - Extract keywords from the text
45
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
46
+ - **`is_question()`** - Detect if the input is phrased as a question
47
+ - **`to_question()`** - Generate questions from the given text / subject
48
+ - **`merge_questions()`** - Merge multiple questions into one
49
+ - **`augment()`** - Rewrite text in different augmentations
50
+ - **`summarize()`** - Summarize the given text
51
+ - **`translate()`** - Translate text between languages
52
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
53
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
54
+ - **`run_custom()`** - Custom tool that can do almost anything
57
55
 
58
56
  ---
59
57
 
@@ -71,14 +69,14 @@ pip install -U hamtaa-texttools
71
69
 
72
70
  | Status | Meaning | Tools | Safe for Production? |
73
71
  |--------|---------|----------|-------------------|
74
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
75
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
72
+ | **✅ Production** | Evaluated and tested. | `categorize()`, `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
73
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
76
74
 
77
75
  ---
78
76
 
79
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
77
+ ## ⚙️ Additional Parameters
80
78
 
81
- TextTools provides several optional flags to customize LLM behavior:
79
+ - **`raise_on_error: bool`** (`TheTool/AsyncTheTool` parameter) Raise errors (True) or return them in output (False). Default is True.
82
80
 
83
81
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
84
82
  **Note:** This doubles token usage per call.
@@ -88,17 +86,17 @@ TextTools provides several optional flags to customize LLM behavior:
88
86
 
89
87
  - **`output_lang: str`** → Forces the model to respond in a specific language.
90
88
 
91
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
89
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
92
90
 
93
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
91
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
94
92
 
95
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
93
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
96
94
 
97
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
95
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
98
96
  **Note:** This feature works if it's supported by the model and vLLM.
99
97
 
100
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
101
- **Note:** This feature only exists in `AsyncTheTool`.
98
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
99
+ **Note:** This feature is only available in `AsyncTheTool`.
102
100
 
103
101
 
104
102
  ---
@@ -110,12 +108,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
110
108
  - **`analysis: str`**
111
109
  - **`logprobs: list`**
112
110
  - **`errors: list[str]`**
113
- - **`ToolOutputMetadata`**
111
+ - **`ToolOutputMetadata`**
114
112
  - **`tool_name: str`**
115
113
  - **`processed_at: datetime`**
116
114
  - **`execution_time: float`**
117
115
 
118
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
116
+ - Serialize output to JSON using the `to_json()` method.
117
+ - Verify operation success with the `is_successful()` method.
118
+ - Convert output to a dictionary with the `to_dict()` method.
119
119
 
120
120
  ---
121
121
 
@@ -133,13 +133,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
133
133
  from openai import OpenAI
134
134
  from texttools import TheTool
135
135
 
136
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
136
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
137
137
  model = "model_name"
138
138
 
139
139
  the_tool = TheTool(client=client, model=model)
140
140
 
141
141
  detection = the_tool.is_question("Is this project open source?")
142
- print(repr(detection))
142
+ print(detection.to_json())
143
143
  ```
144
144
 
145
145
  ---
@@ -157,30 +157,23 @@ async def main():
157
157
 
158
158
  async_the_tool = AsyncTheTool(client=async_client, model=model)
159
159
 
160
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
161
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
160
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
161
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
162
162
 
163
163
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
164
- print(repr(translation))
165
- print(repr(keywords))
164
+
165
+ print(translation.to_json())
166
+ print(keywords.to_json())
166
167
 
167
168
  asyncio.run(main())
168
169
  ```
169
170
 
170
171
  ---
171
172
 
172
- ## 👍 Use Cases
173
+ ## Use Cases
173
174
 
174
175
  Use **TextTools** when you need to:
175
176
 
176
- - 🔍 **Classify** large datasets quickly without model training
177
- - 🌍 **Translate** and process multilingual corpora with ease
177
+ - 🔍 **Classify** large datasets quickly without model training
178
178
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
179
179
  - 📊 **Analyze** large text collections using embeddings and categorization
180
-
181
- ---
182
-
183
- ## 🤝 Contributing
184
-
185
- Contributions are welcome!
186
- Feel free to **open issues, suggest new features, or submit pull requests**.
@@ -6,31 +6,29 @@ hamtaa_texttools.egg-info/SOURCES.txt
6
6
  hamtaa_texttools.egg-info/dependency_links.txt
7
7
  hamtaa_texttools.egg-info/requires.txt
8
8
  hamtaa_texttools.egg-info/top_level.txt
9
- tests/test_all_async_tools.py
10
- tests/test_all_tools.py
11
- tests/test_output_validation.py
9
+ tests/test_category_tree.py
10
+ tests/test_to_chunks.py
12
11
  texttools/__init__.py
13
12
  texttools/models.py
14
13
  texttools/py.typed
15
14
  texttools/core/__init__.py
16
- texttools/core/engine.py
17
15
  texttools/core/exceptions.py
18
16
  texttools/core/internal_models.py
17
+ texttools/core/utils.py
19
18
  texttools/core/operators/__init__.py
20
19
  texttools/core/operators/async_operator.py
21
20
  texttools/core/operators/sync_operator.py
21
+ texttools/prompts/augment.yaml
22
22
  texttools/prompts/categorize.yaml
23
- texttools/prompts/check_fact.yaml
24
23
  texttools/prompts/extract_entities.yaml
25
24
  texttools/prompts/extract_keywords.yaml
25
+ texttools/prompts/is_fact.yaml
26
26
  texttools/prompts/is_question.yaml
27
27
  texttools/prompts/merge_questions.yaml
28
28
  texttools/prompts/propositionize.yaml
29
- texttools/prompts/rewrite.yaml
30
29
  texttools/prompts/run_custom.yaml
31
- texttools/prompts/subject_to_question.yaml
32
30
  texttools/prompts/summarize.yaml
33
- texttools/prompts/text_to_question.yaml
31
+ texttools/prompts/to_question.yaml
34
32
  texttools/prompts/translate.yaml
35
33
  texttools/tools/__init__.py
36
34
  texttools/tools/async_tools.py
@@ -1,3 +1,4 @@
1
+ dotenv>=0.9.9
1
2
  openai>=1.97.1
2
3
  pydantic>=2.0.0
3
4
  pyyaml>=6.0
@@ -1,45 +1,46 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "hamtaa-texttools"
7
- version = "1.3.2"
8
- authors = [
9
- {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
- {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
11
- {name = "Montazer", email = "montazerh82@gmail.com"},
12
- {name = "Givechi", email = "mohamad.m.givechi@gmail.com"},
13
- {name = "Zareshahi", email = "a.zareshahi1377@gmail.com"},
14
- ]
15
- maintainers = [
16
- {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
17
- {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
18
- ]
19
- description = "A high-level NLP toolkit built on top of modern LLMs."
20
- readme = "README.md"
21
- license = {text = "MIT"}
22
- requires-python = ">=3.9"
23
- dependencies = [
24
- "openai>=1.97.1",
25
- "pydantic>=2.0.0",
26
- "pyyaml>=6.0",
27
- ]
28
- keywords = ["nlp", "llm", "text-processing", "openai"]
29
- classifiers = [
30
- "Development Status :: 5 - Production/Stable",
31
- "License :: OSI Approved :: MIT License",
32
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
- "Topic :: Text Processing",
34
- "Operating System :: OS Independent",
35
- ]
36
-
37
- [tool.setuptools.packages.find]
38
- where = ["."]
39
- include = ["texttools*"]
40
-
41
- [tool.setuptools]
42
- include-package-data = true
43
-
44
- [tool.setuptools.package-data]
45
- "texttools" = ["prompts/*.yaml", "py.typed"]
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "2.1.0"
8
+ authors = [
9
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
11
+ {name = "Montazer", email = "montazerh82@gmail.com"},
12
+ {name = "Givechi", email = "mohamad.m.givechi@gmail.com"},
13
+ {name = "Zareshahi", email = "a.zareshahi1377@gmail.com"},
14
+ ]
15
+ maintainers = [
16
+ {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
17
+ {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
18
+ ]
19
+ description = "A high-level NLP toolkit built on top of modern LLMs."
20
+ readme = "README.md"
21
+ license = {text = "MIT"}
22
+ requires-python = ">=3.11"
23
+ dependencies = [
24
+ "dotenv>=0.9.9",
25
+ "openai>=1.97.1",
26
+ "pydantic>=2.0.0",
27
+ "pyyaml>=6.0",
28
+ ]
29
+ keywords = ["nlp", "llm", "text-processing", "openai"]
30
+ classifiers = [
31
+ "Development Status :: 5 - Production/Stable",
32
+ "License :: OSI Approved :: MIT License",
33
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
34
+ "Topic :: Text Processing",
35
+ "Operating System :: OS Independent",
36
+ ]
37
+
38
+ [tool.setuptools.packages.find]
39
+ where = ["."]
40
+ include = ["texttools*"]
41
+
42
+ [tool.setuptools]
43
+ include-package-data = true
44
+
45
+ [tool.setuptools.package-data]
46
+ "texttools" = ["prompts/*.yaml", "py.typed"]