hamtaa-texttools 1.3.2__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/LICENSE +1 -1
  2. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/PKG-INFO +38 -41
  3. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/README.md +36 -39
  4. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/PKG-INFO +38 -41
  5. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/SOURCES.txt +6 -8
  6. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/pyproject.toml +2 -2
  7. hamtaa_texttools-2.0.0/tests/test_category_tree.py +48 -0
  8. hamtaa_texttools-2.0.0/tests/test_to_chunks.py +13 -0
  9. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/__init__.py +1 -1
  10. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/internal_models.py +16 -7
  11. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/operators/async_operator.py +10 -16
  12. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/operators/sync_operator.py +10 -16
  13. hamtaa_texttools-2.0.0/texttools/core/utils.py +260 -0
  14. hamtaa_texttools-2.0.0/texttools/models.py +143 -0
  15. hamtaa_texttools-1.3.2/texttools/prompts/rewrite.yaml → hamtaa_texttools-2.0.0/texttools/prompts/augment.yaml +3 -3
  16. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/categorize.yaml +7 -8
  17. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/extract_entities.yaml +2 -2
  18. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/extract_keywords.yaml +4 -2
  19. hamtaa_texttools-1.3.2/texttools/prompts/check_fact.yaml → hamtaa_texttools-2.0.0/texttools/prompts/is_fact.yaml +5 -4
  20. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/is_question.yaml +1 -1
  21. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/merge_questions.yaml +8 -6
  22. hamtaa_texttools-2.0.0/texttools/prompts/propositionize.yaml +28 -0
  23. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/run_custom.yaml +3 -1
  24. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/summarize.yaml +3 -3
  25. hamtaa_texttools-2.0.0/texttools/prompts/to_question.yaml +60 -0
  26. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/prompts/translate.yaml +4 -4
  27. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/tools/async_tools.py +90 -169
  28. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/tools/sync_tools.py +76 -150
  29. hamtaa_texttools-1.3.2/tests/test_all_async_tools.py +0 -99
  30. hamtaa_texttools-1.3.2/tests/test_all_tools.py +0 -118
  31. hamtaa_texttools-1.3.2/tests/test_output_validation.py +0 -31
  32. hamtaa_texttools-1.3.2/texttools/core/engine.py +0 -262
  33. hamtaa_texttools-1.3.2/texttools/models.py +0 -88
  34. hamtaa_texttools-1.3.2/texttools/prompts/propositionize.yaml +0 -24
  35. hamtaa_texttools-1.3.2/texttools/prompts/subject_to_question.yaml +0 -26
  36. hamtaa_texttools-1.3.2/texttools/prompts/text_to_question.yaml +0 -26
  37. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  38. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/requires.txt +0 -0
  39. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  40. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/setup.cfg +0 -0
  41. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/__init__.py +0 -0
  42. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/exceptions.py +0 -0
  43. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/core/operators/__init__.py +0 -0
  44. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/py.typed +0 -0
  45. {hamtaa_texttools-1.3.2 → hamtaa_texttools-2.0.0}/texttools/tools/__init__.py +0 -0
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.2
3
+ Version: 2.0.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  Requires-Dist: openai>=1.97.1
@@ -30,30 +30,27 @@ Dynamic: license-file
30
30
 
31
31
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
32
32
 
33
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
34
-
35
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
33
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
36
34
 
37
35
  ---
38
36
 
39
37
  ## ✨ Features
40
38
 
41
- TextTools provides a rich collection of high-level NLP utilities,
39
+ TextTools provides a collection of high-level NLP utilities.
42
40
  Each tool is designed to work with structured outputs.
43
41
 
44
- - **`categorize()`** - Classifies text into given categories
45
- - **`extract_keywords()`** - Extracts keywords from the text
46
- - **`extract_entities()`** - Named Entity Recognition (NER) system
47
- - **`is_question()`** - Binary question detection
48
- - **`text_to_question()`** - Generates questions from text
49
- - **`merge_questions()`** - Merges multiple questions into one
50
- - **`rewrite()`** - Rewrites text in a different way
51
- - **`subject_to_question()`** - Generates questions about a given subject
52
- - **`summarize()`** - Text summarization
53
- - **`translate()`** - Text translation
54
- - **`propositionize()`** - Convert text to atomic independent meaningful sentences
55
- - **`check_fact()`** - Check whether a statement is relevant to the source text
56
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
42
+ - **`categorize()`** - Classify text into given categories
43
+ - **`extract_keywords()`** - Extract keywords from the text
44
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
45
+ - **`is_question()`** - Detect if the input is phrased as a question
46
+ - **`to_question()`** - Generate questions from the given text / subject
47
+ - **`merge_questions()`** - Merge multiple questions into one
48
+ - **`augment()`** - Rewrite text in different augmentations
49
+ - **`summarize()`** - Summarize the given text
50
+ - **`translate()`** - Translate text between languages
51
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
52
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
53
+ - **`run_custom()`** - Custom tool that can do almost anything
57
54
 
58
55
  ---
59
56
 
@@ -71,14 +68,12 @@ pip install -U hamtaa-texttools
71
68
 
72
69
  | Status | Meaning | Tools | Safe for Production? |
73
70
  |--------|---------|----------|-------------------|
74
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
75
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
71
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
76
73
 
77
74
  ---
78
75
 
79
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
80
-
81
- TextTools provides several optional flags to customize LLM behavior:
76
+ ## ⚙️ Additional Parameters
82
77
 
83
78
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
84
79
  **Note:** This doubles token usage per call.
@@ -88,17 +83,17 @@ TextTools provides several optional flags to customize LLM behavior:
88
83
 
89
84
  - **`output_lang: str`** → Forces the model to respond in a specific language.
90
85
 
91
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
86
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
92
87
 
93
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
88
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
94
89
 
95
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
90
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
96
91
 
97
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
92
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
98
93
  **Note:** This feature works if it's supported by the model and vLLM.
99
94
 
100
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
101
- **Note:** This feature only exists in `AsyncTheTool`.
95
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
96
+ **Note:** This feature is only available in `AsyncTheTool`.
102
97
 
103
98
 
104
99
  ---
@@ -110,12 +105,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
110
105
  - **`analysis: str`**
111
106
  - **`logprobs: list`**
112
107
  - **`errors: list[str]`**
113
- - **`ToolOutputMetadata`**
108
+ - **`ToolOutputMetadata`**
114
109
  - **`tool_name: str`**
115
110
  - **`processed_at: datetime`**
116
111
  - **`execution_time: float`**
117
112
 
118
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
113
+ - Serialize output to JSON using the `to_json()` method.
114
+ - Verify operation success with the `is_successful()` method.
115
+ - Convert output to a dictionary with the `to_dict()` method.
119
116
 
120
117
  ---
121
118
 
@@ -133,13 +130,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
133
130
  from openai import OpenAI
134
131
  from texttools import TheTool
135
132
 
136
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
133
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
137
134
  model = "model_name"
138
135
 
139
136
  the_tool = TheTool(client=client, model=model)
140
137
 
141
138
  detection = the_tool.is_question("Is this project open source?")
142
- print(repr(detection))
139
+ print(detection.to_json())
143
140
  ```
144
141
 
145
142
  ---
@@ -157,24 +154,24 @@ async def main():
157
154
 
158
155
  async_the_tool = AsyncTheTool(client=async_client, model=model)
159
156
 
160
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
161
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
157
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
158
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
162
159
 
163
160
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
164
- print(repr(translation))
165
- print(repr(keywords))
161
+
162
+ print(translation.to_json())
163
+ print(keywords.to_json())
166
164
 
167
165
  asyncio.run(main())
168
166
  ```
169
167
 
170
168
  ---
171
169
 
172
- ## 👍 Use Cases
170
+ ## Use Cases
173
171
 
174
172
  Use **TextTools** when you need to:
175
173
 
176
- - 🔍 **Classify** large datasets quickly without model training
177
- - 🌍 **Translate** and process multilingual corpora with ease
174
+ - 🔍 **Classify** large datasets quickly without model training
178
175
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
179
176
  - 📊 **Analyze** large text collections using embeddings and categorization
180
177
 
@@ -9,30 +9,27 @@
9
9
 
10
10
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
11
11
 
12
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
13
-
14
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
12
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
15
13
 
16
14
  ---
17
15
 
18
16
  ## ✨ Features
19
17
 
20
- TextTools provides a rich collection of high-level NLP utilities,
18
+ TextTools provides a collection of high-level NLP utilities.
21
19
  Each tool is designed to work with structured outputs.
22
20
 
23
- - **`categorize()`** - Classifies text into given categories
24
- - **`extract_keywords()`** - Extracts keywords from the text
25
- - **`extract_entities()`** - Named Entity Recognition (NER) system
26
- - **`is_question()`** - Binary question detection
27
- - **`text_to_question()`** - Generates questions from text
28
- - **`merge_questions()`** - Merges multiple questions into one
29
- - **`rewrite()`** - Rewrites text in a different way
30
- - **`subject_to_question()`** - Generates questions about a given subject
31
- - **`summarize()`** - Text summarization
32
- - **`translate()`** - Text translation
33
- - **`propositionize()`** - Convert text to atomic independent meaningful sentences
34
- - **`check_fact()`** - Check whether a statement is relevant to the source text
35
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
21
+ - **`categorize()`** - Classify text into given categories
22
+ - **`extract_keywords()`** - Extract keywords from the text
23
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
24
+ - **`is_question()`** - Detect if the input is phrased as a question
25
+ - **`to_question()`** - Generate questions from the given text / subject
26
+ - **`merge_questions()`** - Merge multiple questions into one
27
+ - **`augment()`** - Rewrite text in different augmentations
28
+ - **`summarize()`** - Summarize the given text
29
+ - **`translate()`** - Translate text between languages
30
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
31
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
32
+ - **`run_custom()`** - Custom tool that can do almost anything
36
33
 
37
34
  ---
38
35
 
@@ -50,14 +47,12 @@ pip install -U hamtaa-texttools
50
47
 
51
48
  | Status | Meaning | Tools | Safe for Production? |
52
49
  |--------|---------|----------|-------------------|
53
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
54
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
50
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
51
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
55
52
 
56
53
  ---
57
54
 
58
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
59
-
60
- TextTools provides several optional flags to customize LLM behavior:
55
+ ## ⚙️ Additional Parameters
61
56
 
62
57
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
63
58
  **Note:** This doubles token usage per call.
@@ -67,17 +62,17 @@ TextTools provides several optional flags to customize LLM behavior:
67
62
 
68
63
  - **`output_lang: str`** → Forces the model to respond in a specific language.
69
64
 
70
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
65
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
71
66
 
72
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
67
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
73
68
 
74
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
69
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
75
70
 
76
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
71
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
77
72
  **Note:** This feature works if it's supported by the model and vLLM.
78
73
 
79
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
80
- **Note:** This feature only exists in `AsyncTheTool`.
74
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
75
+ **Note:** This feature is only available in `AsyncTheTool`.
81
76
 
82
77
 
83
78
  ---
@@ -89,12 +84,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
89
84
  - **`analysis: str`**
90
85
  - **`logprobs: list`**
91
86
  - **`errors: list[str]`**
92
- - **`ToolOutputMetadata`**
87
+ - **`ToolOutputMetadata`**
93
88
  - **`tool_name: str`**
94
89
  - **`processed_at: datetime`**
95
90
  - **`execution_time: float`**
96
91
 
97
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
92
+ - Serialize output to JSON using the `to_json()` method.
93
+ - Verify operation success with the `is_successful()` method.
94
+ - Convert output to a dictionary with the `to_dict()` method.
98
95
 
99
96
  ---
100
97
 
@@ -112,13 +109,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
112
109
  from openai import OpenAI
113
110
  from texttools import TheTool
114
111
 
115
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
112
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
116
113
  model = "model_name"
117
114
 
118
115
  the_tool = TheTool(client=client, model=model)
119
116
 
120
117
  detection = the_tool.is_question("Is this project open source?")
121
- print(repr(detection))
118
+ print(detection.to_json())
122
119
  ```
123
120
 
124
121
  ---
@@ -136,24 +133,24 @@ async def main():
136
133
 
137
134
  async_the_tool = AsyncTheTool(client=async_client, model=model)
138
135
 
139
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
140
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
136
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
137
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
141
138
 
142
139
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
143
- print(repr(translation))
144
- print(repr(keywords))
140
+
141
+ print(translation.to_json())
142
+ print(keywords.to_json())
145
143
 
146
144
  asyncio.run(main())
147
145
  ```
148
146
 
149
147
  ---
150
148
 
151
- ## 👍 Use Cases
149
+ ## Use Cases
152
150
 
153
151
  Use **TextTools** when you need to:
154
152
 
155
- - 🔍 **Classify** large datasets quickly without model training
156
- - 🌍 **Translate** and process multilingual corpora with ease
153
+ - 🔍 **Classify** large datasets quickly without model training
157
154
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
158
155
  - 📊 **Analyze** large text collections using embeddings and categorization
159
156
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.3.2
3
+ Version: 2.0.0
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
5
  Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
12
  Classifier: Topic :: Text Processing
13
13
  Classifier: Operating System :: OS Independent
14
- Requires-Python: >=3.9
14
+ Requires-Python: >=3.11
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  Requires-Dist: openai>=1.97.1
@@ -30,30 +30,27 @@ Dynamic: license-file
30
30
 
31
31
  It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
32
32
 
33
- It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
34
-
35
- **Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
33
+ It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
36
34
 
37
35
  ---
38
36
 
39
37
  ## ✨ Features
40
38
 
41
- TextTools provides a rich collection of high-level NLP utilities,
39
+ TextTools provides a collection of high-level NLP utilities.
42
40
  Each tool is designed to work with structured outputs.
43
41
 
44
- - **`categorize()`** - Classifies text into given categories
45
- - **`extract_keywords()`** - Extracts keywords from the text
46
- - **`extract_entities()`** - Named Entity Recognition (NER) system
47
- - **`is_question()`** - Binary question detection
48
- - **`text_to_question()`** - Generates questions from text
49
- - **`merge_questions()`** - Merges multiple questions into one
50
- - **`rewrite()`** - Rewrites text in a different way
51
- - **`subject_to_question()`** - Generates questions about a given subject
52
- - **`summarize()`** - Text summarization
53
- - **`translate()`** - Text translation
54
- - **`propositionize()`** - Convert text to atomic independent meaningful sentences
55
- - **`check_fact()`** - Check whether a statement is relevant to the source text
56
- - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
42
+ - **`categorize()`** - Classify text into given categories
43
+ - **`extract_keywords()`** - Extract keywords from the text
44
+ - **`extract_entities()`** - Perform Named Entity Recognition (NER)
45
+ - **`is_question()`** - Detect if the input is phrased as a question
46
+ - **`to_question()`** - Generate questions from the given text / subject
47
+ - **`merge_questions()`** - Merge multiple questions into one
48
+ - **`augment()`** - Rewrite text in different augmentations
49
+ - **`summarize()`** - Summarize the given text
50
+ - **`translate()`** - Translate text between languages
51
+ - **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
52
+ - **`is_fact()`** - Check whether a statement is a fact based on the source text
53
+ - **`run_custom()`** - Custom tool that can do almost anything
57
54
 
58
55
  ---
59
56
 
@@ -71,14 +68,12 @@ pip install -U hamtaa-texttools
71
68
 
72
69
  | Status | Meaning | Tools | Safe for Production? |
73
70
  |--------|---------|----------|-------------------|
74
- | **✅ Production** | Evaluated, tested, stable. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `text_to_question()`, `merge_questions()`, `rewrite()`, `subject_to_question()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
75
- | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | `categorize()` (tree mode), `translate()`, `propositionize()`, `check_fact()` | **Use with caution** - outputs not yet validated. |
71
+ | **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
72
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
76
73
 
77
74
  ---
78
75
 
79
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator`, `priority` and `timeout` parameters
80
-
81
- TextTools provides several optional flags to customize LLM behavior:
76
+ ## ⚙️ Additional Parameters
82
77
 
83
78
  - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
84
79
  **Note:** This doubles token usage per call.
@@ -88,17 +83,17 @@ TextTools provides several optional flags to customize LLM behavior:
88
83
 
89
84
  - **`output_lang: str`** → Forces the model to respond in a specific language.
90
85
 
91
- - **`user_prompt: str`** → Allows you to inject a custom instruction or into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
86
+ - **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
92
87
 
93
- - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
88
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
94
89
 
95
- - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
90
+ - **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
96
91
 
97
- - **`priority: int (Experimental)`** → Task execution priority level. Affects processing order in queues.
92
+ - **`priority: int (Experimental)`** → Affects processing order in queues.
98
93
  **Note:** This feature works if it's supported by the model and vLLM.
99
94
 
100
- - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
101
- **Note:** This feature only exists in `AsyncTheTool`.
95
+ - **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
96
+ **Note:** This feature is only available in `AsyncTheTool`.
102
97
 
103
98
 
104
99
  ---
@@ -110,12 +105,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
110
105
  - **`analysis: str`**
111
106
  - **`logprobs: list`**
112
107
  - **`errors: list[str]`**
113
- - **`ToolOutputMetadata`**
108
+ - **`ToolOutputMetadata`**
114
109
  - **`tool_name: str`**
115
110
  - **`processed_at: datetime`**
116
111
  - **`execution_time: float`**
117
112
 
118
- **Note:** You can use `repr(ToolOutput)` to print your output with all the details.
113
+ - Serialize output to JSON using the `to_json()` method.
114
+ - Verify operation success with the `is_successful()` method.
115
+ - Convert output to a dictionary with the `to_dict()` method.
119
116
 
120
117
  ---
121
118
 
@@ -133,13 +130,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
133
130
  from openai import OpenAI
134
131
  from texttools import TheTool
135
132
 
136
- client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
133
+ client = OpenAI(base_url="your_url", API_KEY="your_api_key")
137
134
  model = "model_name"
138
135
 
139
136
  the_tool = TheTool(client=client, model=model)
140
137
 
141
138
  detection = the_tool.is_question("Is this project open source?")
142
- print(repr(detection))
139
+ print(detection.to_json())
143
140
  ```
144
141
 
145
142
  ---
@@ -157,24 +154,24 @@ async def main():
157
154
 
158
155
  async_the_tool = AsyncTheTool(client=async_client, model=model)
159
156
 
160
- translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
161
- keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
157
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
158
+ keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
162
159
 
163
160
  (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
164
- print(repr(translation))
165
- print(repr(keywords))
161
+
162
+ print(translation.to_json())
163
+ print(keywords.to_json())
166
164
 
167
165
  asyncio.run(main())
168
166
  ```
169
167
 
170
168
  ---
171
169
 
172
- ## 👍 Use Cases
170
+ ## Use Cases
173
171
 
174
172
  Use **TextTools** when you need to:
175
173
 
176
- - 🔍 **Classify** large datasets quickly without model training
177
- - 🌍 **Translate** and process multilingual corpora with ease
174
+ - 🔍 **Classify** large datasets quickly without model training
178
175
  - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
179
176
  - 📊 **Analyze** large text collections using embeddings and categorization
180
177
 
@@ -6,31 +6,29 @@ hamtaa_texttools.egg-info/SOURCES.txt
6
6
  hamtaa_texttools.egg-info/dependency_links.txt
7
7
  hamtaa_texttools.egg-info/requires.txt
8
8
  hamtaa_texttools.egg-info/top_level.txt
9
- tests/test_all_async_tools.py
10
- tests/test_all_tools.py
11
- tests/test_output_validation.py
9
+ tests/test_category_tree.py
10
+ tests/test_to_chunks.py
12
11
  texttools/__init__.py
13
12
  texttools/models.py
14
13
  texttools/py.typed
15
14
  texttools/core/__init__.py
16
- texttools/core/engine.py
17
15
  texttools/core/exceptions.py
18
16
  texttools/core/internal_models.py
17
+ texttools/core/utils.py
19
18
  texttools/core/operators/__init__.py
20
19
  texttools/core/operators/async_operator.py
21
20
  texttools/core/operators/sync_operator.py
21
+ texttools/prompts/augment.yaml
22
22
  texttools/prompts/categorize.yaml
23
- texttools/prompts/check_fact.yaml
24
23
  texttools/prompts/extract_entities.yaml
25
24
  texttools/prompts/extract_keywords.yaml
25
+ texttools/prompts/is_fact.yaml
26
26
  texttools/prompts/is_question.yaml
27
27
  texttools/prompts/merge_questions.yaml
28
28
  texttools/prompts/propositionize.yaml
29
- texttools/prompts/rewrite.yaml
30
29
  texttools/prompts/run_custom.yaml
31
- texttools/prompts/subject_to_question.yaml
32
30
  texttools/prompts/summarize.yaml
33
- texttools/prompts/text_to_question.yaml
31
+ texttools/prompts/to_question.yaml
34
32
  texttools/prompts/translate.yaml
35
33
  texttools/tools/__init__.py
36
34
  texttools/tools/async_tools.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hamtaa-texttools"
7
- version = "1.3.2"
7
+ version = "2.0.0"
8
8
  authors = [
9
9
  {name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
10
10
  {name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
@@ -19,7 +19,7 @@ maintainers = [
19
19
  description = "A high-level NLP toolkit built on top of modern LLMs."
20
20
  readme = "README.md"
21
21
  license = {text = "MIT"}
22
- requires-python = ">=3.9"
22
+ requires-python = ">=3.11"
23
23
  dependencies = [
24
24
  "openai>=1.97.1",
25
25
  "pydantic>=2.0.0",
@@ -0,0 +1,48 @@
1
+ import pytest
2
+ from texttools.models import CategoryTree, Node
3
+
4
+
5
+ @pytest.fixture
6
+ def tree():
7
+ tree = CategoryTree()
8
+ tree.add_node("اخلاق", "root")
9
+ tree.add_node("معرفت شناسی", "root")
10
+ tree.add_node("متافیزیک", "root")
11
+ tree.add_node("فلسفه ذهن", "root")
12
+ tree.add_node("آگاهی", "فلسفه ذهن")
13
+ tree.add_node("ذهن و بدن", "فلسفه ذهن")
14
+ tree.add_node("امکان و ضرورت", "متافیزیک")
15
+ tree.add_node("مغز و ترشحات", "ذهن و بدن")
16
+ return tree
17
+
18
+
19
+ def test_level_count(tree):
20
+ assert tree.get_level_count() == 3
21
+
22
+
23
+ def test_none_node(tree):
24
+ assert tree.get_node("سلامت") is None
25
+
26
+
27
+ def test_get_node(tree):
28
+ assert isinstance(tree.get_node("آگاهی"), Node)
29
+
30
+
31
+ def test_add_duplicate_node(tree):
32
+ with pytest.raises(ValueError, match="Cannot add آگاهی category twice"):
33
+ tree.add_node("آگاهی", "root")
34
+
35
+
36
+ def test_wrong_parent(tree):
37
+ with pytest.raises(ValueError, match="Parent category امکان not found"):
38
+ tree.add_node("ضرورت", "امکان")
39
+
40
+
41
+ def test_remove_root(tree):
42
+ with pytest.raises(ValueError, match="Cannot remove the root node"):
43
+ tree.remove_node("root")
44
+
45
+
46
+ def test_remove_none(tree):
47
+ with pytest.raises(ValueError, match="Category: ایجاب not found"):
48
+ tree.remove_node("ایجاب")
@@ -0,0 +1,13 @@
1
+ from texttools.core.utils import TheToolUtils
2
+
3
+
4
+ def test_single_chunk():
5
+ text = "Short text"
6
+ chunks = TheToolUtils.to_chunks(text, size=100, overlap=0)
7
+ assert len(chunks) == 1
8
+ assert chunks[0] == "Short text"
9
+
10
+
11
+ def test_empty_text():
12
+ chunks = TheToolUtils.to_chunks("", size=10, overlap=0)
13
+ assert len(chunks) == 0
@@ -2,4 +2,4 @@ from .models import CategoryTree
2
2
  from .tools.async_tools import AsyncTheTool
3
3
  from .tools.sync_tools import TheTool
4
4
 
5
- __all__ = ["TheTool", "AsyncTheTool", "CategoryTree"]
5
+ __all__ = ["CategoryTree", "AsyncTheTool", "TheTool"]