hamtaa-texttools 1.3.1__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/LICENSE +1 -1
  2. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/PKG-INFO +42 -48
  3. hamtaa_texttools-2.0.0/README.md +162 -0
  4. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/PKG-INFO +42 -48
  5. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/SOURCES.txt +7 -8
  6. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/pyproject.toml +2 -2
  7. hamtaa_texttools-2.0.0/tests/test_category_tree.py +48 -0
  8. hamtaa_texttools-2.0.0/tests/test_to_chunks.py +13 -0
  9. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/__init__.py +1 -1
  10. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/core/internal_models.py +21 -8
  11. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/core/operators/async_operator.py +11 -19
  12. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/core/operators/sync_operator.py +11 -19
  13. hamtaa_texttools-2.0.0/texttools/core/utils.py +260 -0
  14. hamtaa_texttools-2.0.0/texttools/models.py +143 -0
  15. hamtaa_texttools-1.3.1/texttools/prompts/rewrite.yaml → hamtaa_texttools-2.0.0/texttools/prompts/augment.yaml +3 -3
  16. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/categorize.yaml +7 -8
  17. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/extract_entities.yaml +2 -2
  18. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/extract_keywords.yaml +4 -2
  19. hamtaa_texttools-1.3.1/texttools/prompts/check_fact.yaml → hamtaa_texttools-2.0.0/texttools/prompts/is_fact.yaml +5 -4
  20. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/is_question.yaml +1 -1
  21. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/merge_questions.yaml +8 -6
  22. hamtaa_texttools-2.0.0/texttools/prompts/propositionize.yaml +28 -0
  23. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/run_custom.yaml +3 -1
  24. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/summarize.yaml +3 -3
  25. hamtaa_texttools-2.0.0/texttools/prompts/to_question.yaml +60 -0
  26. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/prompts/translate.yaml +4 -4
  27. hamtaa_texttools-2.0.0/texttools/tools/__init__.py +0 -0
  28. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/tools/async_tools.py +90 -169
  29. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/tools/sync_tools.py +76 -150
  30. hamtaa_texttools-1.3.1/README.md +0 -168
  31. hamtaa_texttools-1.3.1/tests/test_all_async_tools.py +0 -99
  32. hamtaa_texttools-1.3.1/tests/test_all_tools.py +0 -118
  33. hamtaa_texttools-1.3.1/tests/test_output_validation.py +0 -31
  34. hamtaa_texttools-1.3.1/texttools/core/engine.py +0 -264
  35. hamtaa_texttools-1.3.1/texttools/models.py +0 -88
  36. hamtaa_texttools-1.3.1/texttools/prompts/propositionize.yaml +0 -24
  37. hamtaa_texttools-1.3.1/texttools/prompts/subject_to_question.yaml +0 -26
  38. hamtaa_texttools-1.3.1/texttools/prompts/text_to_question.yaml +0 -26
  39. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  40. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/requires.txt +0 -0
  41. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  42. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/setup.cfg +0 -0
  43. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/core/__init__.py +0 -0
  44. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/core/exceptions.py +0 -0
  45. {hamtaa_texttools-1.3.1/texttools/tools → hamtaa_texttools-2.0.0/texttools/core/operators}/__init__.py +0 -0
  46. {hamtaa_texttools-1.3.1 → hamtaa_texttools-2.0.0}/texttools/py.typed +0 -0
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.1
3
+ Version: 2.0.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  Requires-Dist: openai>=1.97.1
@@ -21,36 +21,36 @@ Dynamic: license-file
21
21
 
22
22
  # TextTools
23
23
 
24
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
25
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
26
+
24
27
  ## 📌 Overview
25
28
 
26
29
  **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
27
30
 
28
31
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
29
32
 
30
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
31
-
32
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
33
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
33
34
 
34
35
  ---
35
36
 
36
37
  ## ✨ Features
37
38
 
38
- TextTools provides a rich collection of high-level NLP utilities,
39
+ TextTools provides a collection of high-level NLP utilities.
39
40
  Each tool is designed to work with structured outputs.
40
41
 
41
- - **`categorize()`** - Classifies text into given categories
42
- - **`extract_keywords()`** - Extracts keywords from the text
43
- - **`extract_entities()`** - Named Entity Recognition (NER) system
44
- - **`is_question()`** - Binary question detection
45
- - **`text_to_question()`** - Generates questions from text
46
- - **`merge_questions()`** - Merges multiple questions into one
47
- - **`rewrite()`** - Rewrites text in a diffrent way
48
- - **`subject_to_question()`** - Generates questions about a specific subject
49
- - **`summarize()`** - Text summarization
50
- - **`translate()`** - Text translation
51
- - **`propositionize()`** - Convert text to atomic independence meaningful sentences
52
- - **`check_fact()`** - Check whether a statement is relevant to the source text
53
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
42
+ - **`categorize()`** - Classify text into given categories
43
+ - **`extract_keywords()`** - Extract keywords from the text
44
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
45
+ - **`is_question()`** - Detect if the input is phrased as a question
46
+ - **`to_question()`** - Generate questions from the given text / subject
47
+ - **`merge_questions()`** - Merge multiple questions into one
48
+ - **`augment()`** - Rewrite text in different augmentations
49
+ - **`summarize()`** - Summarize the given text
50
+ - **`translate()`** - Translate text between languages
51
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
52
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
53
+ - **`run_custom()`** - Custom tool that can do almost anything
54
54
 
55
55
  ---
56
56
 
@@ -66,16 +66,14 @@ pip install -U hamtaa-texttools
66
66
 
67
67
  ## 📊 Tool Quality Tiers
68
68
 
69
- | Status | Meaning | Tools | Use in Production? |
69
+ | Status | Meaning | Tools | Safe for Production? |
70
70
  |--------|---------|----------|-------------------|
71
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
71
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
73
73
 
74
74
  ---
75
75
 
76
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
77
-
78
- TextTools provides several optional flags to customize LLM behavior:
76
+ ## ⚙️ Additional Parameters
79
77
 
80
78
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
81
79
  **Note:** This doubles token usage per call.
@@ -85,17 +83,17 @@ TextTools provides several optional flags to customize LLM behavior:
85
83
 
86
84
  - **`output_lang: str`** → Forces the model to respond in a specific language.
87
85
 
88
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
86
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
89
87
 
90
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
88
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
91
89
 
92
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
90
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
93
91
 
94
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
92
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
95
93
  **Note:** This feature works if it's supported by the model and vLLM.
96
94
 
97
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
98
- **Note:** This feature only exists in `AsyncTheTool`.
95
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
96
+ **Note:** This feature is only available in `AsyncTheTool`.
99
97
 
100
98
 
101
99
  ---
@@ -107,12 +105,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
107
105
  - **`analysis: str`**
108
106
  - **`logprobs: list`**
109
107
  - **`errors: list[str]`**
110
- - **`ToolOutputMetadata`**
108
+ - **`ToolOutputMetadata`**
111
109
  - **`tool_name: str`**
112
110
  - **`processed_at: datetime`**
113
111
  - **`execution_time: float`**
114
112
 
115
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
113
+ - Serialize output to JSON using the `to_json()` method.
114
+ - Verify operation success with the `is_successful()` method.
115
+ - Convert output to a dictionary with the `to_dict()` method.
116
116
 
117
117
  ---
118
118
 
@@ -130,13 +130,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
130
130
  from openai import OpenAI
131
131
  from texttools import TheTool
132
132
 
133
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
133
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
134
134
  model = "model_name"
135
135
 
136
136
  the_tool = TheTool(client=client, model=model)
137
137
 
138
138
  detection = the_tool.is_question("Is this project open source?")
139
- print(repr(detection))
139
+ print(detection.to_json())
140
140
  ```
141
141
 
142
142
  ---
@@ -154,24 +154,24 @@ async def main():
154
154
 
155
155
  async_the_tool = AsyncTheTool(client=async_client, model=model)
156
156
 
157
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
158
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
157
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
158
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
159
159
 
160
160
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
161
- print(repr(translation))
162
- print(repr(keywords))
161
+
162
+ print(translation.to_json())
163
+ print(keywords.to_json())
163
164
 
164
165
  asyncio.run(main())
165
166
  ```
166
167
 
167
168
  ---
168
169
 
169
- ## 👍 Use Cases
170
+ ## Use Cases
170
171
 
171
172
  Use **TextTools** when you need to:
172
173
 
173
- - 🔍 **Classify** large datasets quickly without model training
174
- - 🌍 **Translate** and process multilingual corpora with ease
174
+ - 🔍 **Classify** large datasets quickly without model training
175
175
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
176
176
  - 📊 **Analyze** large text collections using embeddings and categorization
177
177
 
@@ -181,9 +181,3 @@ Use **TextTools** when you need to:
181
181
 
182
182
  Contributions are welcome!
183
183
  Feel free to **open issues, suggest new features, or submit pull requests**.
184
-
185
- ---
186
-
187
- ## 🌿 License
188
-
189
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,162 @@
1
+ # TextTools
2
+
3
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
4
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
5
+
6
+ ## 📌 Overview
7
+
8
+ **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
9
+
10
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
11
+
12
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
13
+
14
+ ---
15
+
16
+ ## ✨ Features
17
+
18
+ TextTools provides a collection of high-level NLP utilities.
19
+ Each tool is designed to work with structured outputs.
20
+
21
+ - **`categorize()`** - Classify text into given categories
22
+ - **`extract_keywords()`** - Extract keywords from the text
23
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
24
+ - **`is_question()`** - Detect if the input is phrased as a question
25
+ - **`to_question()`** - Generate questions from the given text / subject
26
+ - **`merge_questions()`** - Merge multiple questions into one
27
+ - **`augment()`** - Rewrite text in different augmentations
28
+ - **`summarize()`** - Summarize the given text
29
+ - **`translate()`** - Translate text between languages
30
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
31
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
32
+ - **`run_custom()`** - Custom tool that can do almost anything
33
+
34
+ ---
35
+
36
+ ## 🚀 Installation
37
+
38
+ Install the latest release via PyPI:
39
+
40
+ ```bash
41
+ pip install -U hamtaa-texttools
42
+ ```
43
+
44
+ ---
45
+
46
+ ## 📊 Tool Quality Tiers
47
+
48
+ | Status | Meaning | Tools | Safe for Production? |
49
+ |--------|---------|----------|-------------------|
50
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
51
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
52
+
53
+ ---
54
+
55
+ ## ⚙️ Additional Parameters
56
+
57
+ - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
58
+ **Note:** This doubles token usage per call.
59
+
60
+ - **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
61
+ **Note:** This feature works if it's supported by the model.
62
+
63
+ - **`output_lang: str`** → Forces the model to respond in a specific language.
64
+
65
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
66
+
67
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
68
+
69
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
70
+
71
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
72
+ **Note:** This feature works if it's supported by the model and vLLM.
73
+
74
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
75
+ **Note:** This feature is only available in `AsyncTheTool`.
76
+
77
+
78
+ ---
79
+
80
+ ## 🧩 ToolOutput
81
+
82
+ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
83
+ - **`result: Any`**
84
+ - **`analysis: str`**
85
+ - **`logprobs: list`**
86
+ - **`errors: list[str]`**
87
+ - **`ToolOutputMetadata`**
88
+ - **`tool_name: str`**
89
+ - **`processed_at: datetime`**
90
+ - **`execution_time: float`**
91
+
92
+ - Serialize output to JSON using the `to_json()` method.
93
+ - Verify operation success with the `is_successful()` method.
94
+ - Convert output to a dictionary with the `to_dict()` method.
95
+
96
+ ---
97
+
98
+ ## 🧨 Sync vs Async
99
+ | Tool | Style | Use case |
100
+ |--------------|---------|---------------------------------------------|
101
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
102
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
103
+
104
+ ---
105
+
106
+ ## ⚡ Quick Start (Sync)
107
+
108
+ ```python
109
+ from openai import OpenAI
110
+ from texttools import TheTool
111
+
112
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
113
+ model = "model_name"
114
+
115
+ the_tool = TheTool(client=client, model=model)
116
+
117
+ detection = the_tool.is_question("Is this project open source?")
118
+ print(detection.to_json())
119
+ ```
120
+
121
+ ---
122
+
123
+ ## ⚡ Quick Start (Async)
124
+
125
+ ```python
126
+ import asyncio
127
+ from openai import AsyncOpenAI
128
+ from texttools import AsyncTheTool
129
+
130
+ async def main():
131
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
132
+ model = "model_name"
133
+
134
+ async_the_tool = AsyncTheTool(client=async_client, model=model)
135
+
136
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
137
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
138
+
139
+ (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
140
+
141
+ print(translation.to_json())
142
+ print(keywords.to_json())
143
+
144
+ asyncio.run(main())
145
+ ```
146
+
147
+ ---
148
+
149
+ ## ✅ Use Cases
150
+
151
+ Use **TextTools** when you need to:
152
+
153
+ - 🔍 **Classify** large datasets quickly without model training
154
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
155
+ - 📊 **Analyze** large text collections using embeddings and categorization
156
+
157
+ ---
158
+
159
+ ## 🤝 Contributing
160
+
161
+ Contributions are welcome!
162
+ Feel free to **open issues, suggest new features, or submit pull requests**.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.1
3
+ Version: 2.0.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  Requires-Dist: openai>=1.97.1
@@ -21,36 +21,36 @@ Dynamic: license-file
21
21
 
22
22
  # TextTools
23
23
 
24
+ ![PyPI](https://img.shields.io/pypi/v/hamtaa-texttools)
25
+ ![License](https://img.shields.io/pypi/l/hamtaa-texttools)
26
+
24
27
  ## 📌 Overview
25
28
 
26
29
  **TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
27
30
 
28
31
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
29
32
 
30
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
31
-
32
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
33
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
33
34
 
34
35
  ---
35
36
 
36
37
  ## ✨ Features
37
38
 
38
- TextTools provides a rich collection of high-level NLP utilities,
39
+ TextTools provides a collection of high-level NLP utilities.
39
40
  Each tool is designed to work with structured outputs.
40
41
 
41
- - **`categorize()`** - Classifies text into given categories
42
- - **`extract_keywords()`** - Extracts keywords from the text
43
- - **`extract_entities()`** - Named Entity Recognition (NER) system
44
- - **`is_question()`** - Binary question detection
45
- - **`text_to_question()`** - Generates questions from text
46
- - **`merge_questions()`** - Merges multiple questions into one
47
- - **`rewrite()`** - Rewrites text in a diffrent way
48
- - **`subject_to_question()`** - Generates questions about a specific subject
49
- - **`summarize()`** - Text summarization
50
- - **`translate()`** - Text translation
51
- - **`propositionize()`** - Convert text to atomic independence meaningful sentences
52
- - **`check_fact()`** - Check whether a statement is relevant to the source text
53
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
42
+ - **`categorize()`** - Classify text into given categories
43
+ - **`extract_keywords()`** - Extract keywords from the text
44
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
45
+ - **`is_question()`** - Detect if the input is phrased as a question
46
+ - **`to_question()`** - Generate questions from the given text / subject
47
+ - **`merge_questions()`** - Merge multiple questions into one
48
+ - **`augment()`** - Rewrite text in different augmentations
49
+ - **`summarize()`** - Summarize the given text
50
+ - **`translate()`** - Translate text between languages
51
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
52
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
53
+ - **`run_custom()`** - Custom tool that can do almost anything
54
54
 
55
55
  ---
56
56
 
@@ -66,16 +66,14 @@ pip install -U hamtaa-texttools
66
66
 
67
67
  ## 📊 Tool Quality Tiers
68
68
 
69
- | Status | Meaning | Tools | Use in Production? |
69
+ | Status | Meaning | Tools | Safe for Production? |
70
70
  |--------|---------|----------|-------------------|
71
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
71
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
73
73
 
74
74
  ---
75
75
 
76
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
77
-
78
- TextTools provides several optional flags to customize LLM behavior:
76
+ ## ⚙️ Additional Parameters
79
77
 
80
78
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
81
79
  **Note:** This doubles token usage per call.
@@ -85,17 +83,17 @@ TextTools provides several optional flags to customize LLM behavior:
85
83
 
86
84
  - **`output_lang: str`** → Forces the model to respond in a specific language.
87
85
 
88
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
86
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
89
87
 
90
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
88
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
91
89
 
92
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
90
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
93
91
 
94
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
92
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
95
93
  **Note:** This feature works if it's supported by the model and vLLM.
96
94
 
97
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
98
- **Note:** This feature only exists in `AsyncTheTool`.
95
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
96
+ **Note:** This feature is only available in `AsyncTheTool`.
99
97
 
100
98
 
101
99
  ---
@@ -107,12 +105,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
107
105
  - **`analysis: str`**
108
106
  - **`logprobs: list`**
109
107
  - **`errors: list[str]`**
110
- - **`ToolOutputMetadata`**
108
+ - **`ToolOutputMetadata`**
111
109
  - **`tool_name: str`**
112
110
  - **`processed_at: datetime`**
113
111
  - **`execution_time: float`**
114
112
 
115
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
113
+ - Serialize output to JSON using the `to_json()` method.
114
+ - Verify operation success with the `is_successful()` method.
115
+ - Convert output to a dictionary with the `to_dict()` method.
116
116
 
117
117
  ---
118
118
 
@@ -130,13 +130,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
130
130
  from openai import OpenAI
131
131
  from texttools import TheTool
132
132
 
133
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
133
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
134
134
  model = "model_name"
135
135
 
136
136
  the_tool = TheTool(client=client, model=model)
137
137
 
138
138
  detection = the_tool.is_question("Is this project open source?")
139
- print(repr(detection))
139
+ print(detection.to_json())
140
140
  ```
141
141
 
142
142
  ---
@@ -154,24 +154,24 @@ async def main():
154
154
 
155
155
  async_the_tool = AsyncTheTool(client=async_client, model=model)
156
156
 
157
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
158
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
157
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
158
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
159
159
 
160
160
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
161
- print(repr(translation))
162
- print(repr(keywords))
161
+
162
+ print(translation.to_json())
163
+ print(keywords.to_json())
163
164
 
164
165
  asyncio.run(main())
165
166
  ```
166
167
 
167
168
  ---
168
169
 
169
- ## 👍 Use Cases
170
+ ## Use Cases
170
171
 
171
172
  Use **TextTools** when you need to:
172
173
 
173
- - 🔍 **Classify** large datasets quickly without model training
174
- - 🌍 **Translate** and process multilingual corpora with ease
174
+ - 🔍 **Classify** large datasets quickly without model training
175
175
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
176
176
  - 📊 **Analyze** large text collections using embeddings and categorization
177
177
 
@@ -181,9 +181,3 @@ Use **TextTools** when you need to:
181
181
 
182
182
  Contributions are welcome!
183
183
  Feel free to **open issues, suggest new features, or submit pull requests**.
184
-
185
- ---
186
-
187
- ## 🌿 License
188
-
189
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -6,30 +6,29 @@ hamtaa_texttools.egg-info/SOURCES.txt
6
6
  hamtaa_texttools.egg-info/dependency_links.txt
7
7
  hamtaa_texttools.egg-info/requires.txt
8
8
  hamtaa_texttools.egg-info/top_level.txt
9
- tests/test_all_async_tools.py
10
- tests/test_all_tools.py
11
- tests/test_output_validation.py
9
+ tests/test_category_tree.py
10
+ tests/test_to_chunks.py
12
11
  texttools/__init__.py
13
12
  texttools/models.py
14
13
  texttools/py.typed
15
14
  texttools/core/__init__.py
16
- texttools/core/engine.py
17
15
  texttools/core/exceptions.py
18
16
  texttools/core/internal_models.py
17
+ texttools/core/utils.py
18
+ texttools/core/operators/__init__.py
19
19
  texttools/core/operators/async_operator.py
20
20
  texttools/core/operators/sync_operator.py
21
+ texttools/prompts/augment.yaml
21
22
  texttools/prompts/categorize.yaml
22
- texttools/prompts/check_fact.yaml
23
23
  texttools/prompts/extract_entities.yaml
24
24
  texttools/prompts/extract_keywords.yaml
25
+ texttools/prompts/is_fact.yaml
25
26
  texttools/prompts/is_question.yaml
26
27
  texttools/prompts/merge_questions.yaml
27
28
  texttools/prompts/propositionize.yaml
28
- texttools/prompts/rewrite.yaml
29
29
  texttools/prompts/run_custom.yaml
30
- texttools/prompts/subject_to_question.yaml
31
30
  texttools/prompts/summarize.yaml
32
- texttools/prompts/text_to_question.yaml
31
+ texttools/prompts/to_question.yaml
33
32
  texttools/prompts/translate.yaml
34
33
  texttools/tools/__init__.py
35
34
  texttools/tools/async_tools.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.3.1"
7
+ version = "2.0.0"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -19,7 +19,7 @@ maintainers = [
19
19
  description = "A high-level NLP toolkit built on top of modern LLMs."
20
20
  readme = "README.md"
21
21
  license = {text = "MIT"}
22
- requires-python = ">=3.9"
22
+ requires-python = ">=3.11"
23
23
  dependencies = [
24
24
  "openai>=1.97.1",
25
25
  "pydantic>=2.0.0",
@@ -0,0 +1,48 @@
1
+ import pytest
2
+ from texttools.models import CategoryTree, Node
3
+
4
+
5
+ @pytest.fixture
6
+ def tree():
7
+ tree = CategoryTree()
8
+ tree.add_node("اخلاق", "root")
9
+ tree.add_node("معرفت شناسی", "root")
10
+ tree.add_node("متافیزیک", "root")
11
+ tree.add_node("فلسفه ذهن", "root")
12
+ tree.add_node("آگاهی", "فلسفه ذهن")
13
+ tree.add_node("ذهن و بدن", "فلسفه ذهن")
14
+ tree.add_node("امکان و ضرورت", "متافیزیک")
15
+ tree.add_node("مغز و ترشحات", "ذهن و بدن")
16
+ return tree
17
+
18
+
19
+ def test_level_count(tree):
20
+ assert tree.get_level_count() == 3
21
+
22
+
23
+ def test_none_node(tree):
24
+ assert tree.get_node("سلامت") is None
25
+
26
+
27
+ def test_get_node(tree):
28
+ assert isinstance(tree.get_node("آگاهی"), Node)
29
+
30
+
31
+ def test_add_duplicate_node(tree):
32
+ with pytest.raises(ValueError, match="Cannot add آگاهی category twice"):
33
+ tree.add_node("آگاهی", "root")
34
+
35
+
36
+ def test_wrong_parent(tree):
37
+ with pytest.raises(ValueError, match="Parent category امکان not found"):
38
+ tree.add_node("ضرورت", "امکان")
39
+
40
+
41
+ def test_remove_root(tree):
42
+ with pytest.raises(ValueError, match="Cannot remove the root node"):
43
+ tree.remove_node("root")
44
+
45
+
46
+ def test_remove_none(tree):
47
+ with pytest.raises(ValueError, match="Category: ایجاب not found"):
48
+ tree.remove_node("ایجاب")