hamtaa-texttools 1.0.2__tar.gz → 1.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/LICENSE +20 -20
  2. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/MANIFEST.in +2 -2
  3. hamtaa_texttools-1.1.3/PKG-INFO +185 -0
  4. hamtaa_texttools-1.1.3/README.md +151 -0
  5. hamtaa_texttools-1.1.3/hamtaa_texttools.egg-info/PKG-INFO +185 -0
  6. hamtaa_texttools-1.1.3/hamtaa_texttools.egg-info/SOURCES.txt +34 -0
  7. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  8. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/hamtaa_texttools.egg-info/requires.txt +1 -1
  9. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  10. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/pyproject.toml +32 -32
  11. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/setup.cfg +4 -4
  12. hamtaa_texttools-1.1.3/texttools/__init__.py +9 -0
  13. {hamtaa_texttools-1.0.2/texttools/utils/batch_manager → hamtaa_texttools-1.1.3/texttools/batch}/__init__.py +4 -4
  14. {hamtaa_texttools-1.0.2/texttools/utils/batch_manager → hamtaa_texttools-1.1.3/texttools/batch}/batch_manager.py +228 -240
  15. {hamtaa_texttools-1.0.2/texttools/utils/batch_manager → hamtaa_texttools-1.1.3/texttools/batch}/batch_runner.py +262 -212
  16. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.3}/texttools/prompts/README.md +35 -31
  17. hamtaa_texttools-1.1.3/texttools/prompts/categorizer.yaml +28 -0
  18. hamtaa_texttools-1.0.2/texttools/prompts/ner_extractor.yaml → hamtaa_texttools-1.1.3/texttools/prompts/extract_entities.yaml +20 -18
  19. hamtaa_texttools-1.1.3/texttools/prompts/extract_keywords.yaml +18 -0
  20. hamtaa_texttools-1.0.2/texttools/prompts/question_detector.yaml → hamtaa_texttools-1.1.3/texttools/prompts/is_question.yaml +13 -11
  21. hamtaa_texttools-1.0.2/texttools/prompts/question_merger.yaml → hamtaa_texttools-1.1.3/texttools/prompts/merge_questions.yaml +45 -46
  22. hamtaa_texttools-1.1.3/texttools/prompts/rewrite.yaml +111 -0
  23. hamtaa_texttools-1.1.3/texttools/prompts/run_custom.yaml +7 -0
  24. hamtaa_texttools-1.0.2/texttools/prompts/subject_question_generator.yaml → hamtaa_texttools-1.1.3/texttools/prompts/subject_to_question.yaml +22 -27
  25. hamtaa_texttools-1.0.2/texttools/prompts/summarizer.yaml → hamtaa_texttools-1.1.3/texttools/prompts/summarize.yaml +13 -10
  26. hamtaa_texttools-1.0.2/texttools/prompts/question_generator.yaml → hamtaa_texttools-1.1.3/texttools/prompts/text_to_question.yaml +19 -21
  27. hamtaa_texttools-1.0.2/texttools/prompts/translator.yaml → hamtaa_texttools-1.1.3/texttools/prompts/translate.yaml +14 -13
  28. hamtaa_texttools-1.1.3/texttools/tools/__init__.py +4 -0
  29. hamtaa_texttools-1.1.3/texttools/tools/async_the_tool.py +435 -0
  30. hamtaa_texttools-1.1.3/texttools/tools/internals/async_operator.py +242 -0
  31. hamtaa_texttools-1.1.3/texttools/tools/internals/base_operator.py +101 -0
  32. hamtaa_texttools-1.1.3/texttools/tools/internals/formatters.py +24 -0
  33. hamtaa_texttools-1.1.3/texttools/tools/internals/operator.py +242 -0
  34. hamtaa_texttools-1.1.3/texttools/tools/internals/output_models.py +59 -0
  35. hamtaa_texttools-1.1.3/texttools/tools/internals/prompt_loader.py +60 -0
  36. hamtaa_texttools-1.1.3/texttools/tools/the_tool.py +433 -0
  37. hamtaa_texttools-1.0.2/PKG-INFO +0 -129
  38. hamtaa_texttools-1.0.2/README.md +0 -95
  39. hamtaa_texttools-1.0.2/hamtaa_texttools.egg-info/PKG-INFO +0 -129
  40. hamtaa_texttools-1.0.2/hamtaa_texttools.egg-info/SOURCES.txt +0 -33
  41. hamtaa_texttools-1.0.2/tests/test_tools.py +0 -65
  42. hamtaa_texttools-1.0.2/texttools/__init__.py +0 -9
  43. hamtaa_texttools-1.0.2/texttools/formatters/base_formatter.py +0 -33
  44. hamtaa_texttools-1.0.2/texttools/formatters/user_merge_formatter/user_merge_formatter.py +0 -47
  45. hamtaa_texttools-1.0.2/texttools/prompts/categorizer.yaml +0 -25
  46. hamtaa_texttools-1.0.2/texttools/prompts/keyword_extractor.yaml +0 -11
  47. hamtaa_texttools-1.0.2/texttools/prompts/question_rewriter.yaml +0 -44
  48. hamtaa_texttools-1.0.2/texttools/tools/__init__.py +0 -3
  49. hamtaa_texttools-1.0.2/texttools/tools/operator.py +0 -236
  50. hamtaa_texttools-1.0.2/texttools/tools/output_models.py +0 -54
  51. hamtaa_texttools-1.0.2/texttools/tools/prompt_loader.py +0 -84
  52. hamtaa_texttools-1.0.2/texttools/tools/the_tool.py +0 -291
  53. hamtaa_texttools-1.0.2/texttools/utils/__init__.py +0 -4
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Hamtaa
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Hamtaa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
@@ -1,2 +1,2 @@
1
- graft texttools/prompts
2
- global-exclude *.pyc
1
+ graft texttools/prompts
2
+ global-exclude *.pyc
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 1.1.3
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Hamtaa
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Keywords: nlp,llm,text-processing,openai
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai==1.97.1
32
+ Requires-Dist: pyyaml>=6.0
33
+ Dynamic: license-file
34
+
35
+ # TextTools
36
+
37
+ ## 📌 Overview
38
+
39
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
40
+
41
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
42
+
43
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
50
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
51
+
52
+ - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`is_question()`** - Binary detection of whether input is a question
54
+ - **`extract_keywords()`** - Extracts keywords from text
55
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
56
+ - **`summarize()`** - Text summarization
57
+ - **`text_to_question()`** - Generates questions from text
58
+ - **`merge_questions()`** - Merges multiple questions with different modes
59
+ - **`rewrite()`** - Rewrites text with different wording/meaning
60
+ - **`subject_to_question()`** - Generates questions about a specific subject
61
+ - **`translate()`** - Text translation between languages
62
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
63
+
64
+ ---
65
+
66
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
67
+
68
+ TextTools provides several optional flags to customize LLM behavior:
69
+
70
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
71
+ Note: This doubles token usage per call because it triggers an additional LLM request.
72
+
73
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
74
+
75
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
76
+
77
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
78
+
79
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
82
+
83
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
84
+
85
+ **Note:** There might be some tools that don't support some of the parameters above.
86
+
87
+ ---
88
+
89
+ ## 🚀 Installation
90
+
91
+ Install the latest release via PyPI:
92
+
93
+ ```bash
94
+ pip install -U hamtaa-texttools
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Sync vs Async
100
+ | Tool | Style | Use case |
101
+ |--------------|---------|---------------------------------------------|
102
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
103
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
104
+
105
+ ---
106
+
107
+ ## ⚡ Quick Start (Sync)
108
+
109
+ ```python
110
+ from openai import OpenAI
111
+ from texttools import TheTool
112
+
113
+ # Create your OpenAI client
114
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
115
+
116
+ # Specify the model
117
+ model = "gpt-4o-mini"
118
+
119
+ # Create an instance of TheTool
120
+ the_tool = TheTool(client=client, model=model)
121
+
122
+ # Example: Question Detection
123
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
124
+ print(detection.result)
125
+ print(detection.logprobs)
126
+ # Output: True \n --logprobs
127
+
128
+ # Example: Translation
129
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
130
+ print(translation.result)
131
+ print(translation.analysis)
132
+ # Output: "Hi! How are you?" \n --analysis
133
+ ```
134
+
135
+ ---
136
+
137
+ ## ⚡ Quick Start (Async)
138
+
139
+ ```python
140
+ import asyncio
141
+ from openai import AsyncOpenAI
142
+ from texttools import AsyncTheTool
143
+
144
+ async def main():
145
+ # Create your AsyncOpenAI client
146
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
147
+
148
+ # Specify the model
149
+ model = "gpt-4o-mini"
150
+
151
+ # Create an instance of AsyncTheTool
152
+ the_tool = AsyncTheTool(client=async_client, model=model)
153
+
154
+ # Example: Async Translation
155
+ translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
156
+ print(translation.result)
157
+ # Output: "Hi! How are you?"
158
+
159
+ asyncio.run(main())
160
+ ```
161
+
162
+ ---
163
+
164
+ ## 📚 Use Cases
165
+
166
+ Use **TextTools** when you need to:
167
+
168
+ - 🔍 **Classify** large datasets quickly without model training
169
+ - 🌍 **Translate** and process multilingual corpora with ease
170
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
171
+ - 📊 **Analyze** large text collections using embeddings and categorization
172
+ - 👍 **Automate** common text-processing tasks without reinventing the wheel
173
+
174
+ ---
175
+
176
+ ## 🤝 Contributing
177
+
178
+ Contributions are welcome!
179
+ Feel free to **open issues, suggest new features, or submit pull requests**.
180
+
181
+ ---
182
+
183
+ ## License
184
+
185
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,151 @@
1
+ # TextTools
2
+
3
+ ## 📌 Overview
4
+
5
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
6
+
7
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
8
+
9
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
10
+
11
+ ---
12
+
13
+ ## ✨ Features
14
+
15
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
16
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
17
+
18
+ - **`categorize()`** - Classifies text into Islamic studies categories
19
+ - **`is_question()`** - Binary detection of whether input is a question
20
+ - **`extract_keywords()`** - Extracts keywords from text
21
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
22
+ - **`summarize()`** - Text summarization
23
+ - **`text_to_question()`** - Generates questions from text
24
+ - **`merge_questions()`** - Merges multiple questions with different modes
25
+ - **`rewrite()`** - Rewrites text with different wording/meaning
26
+ - **`subject_to_question()`** - Generates questions about a specific subject
27
+ - **`translate()`** - Text translation between languages
28
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
29
+
30
+ ---
31
+
32
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
33
+
34
+ TextTools provides several optional flags to customize LLM behavior:
35
+
36
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
37
+ Note: This doubles token usage per call because it triggers an additional LLM request.
38
+
39
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
40
+
41
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
42
+
43
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
44
+
45
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
46
+
47
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
48
+
49
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
50
+
51
+ **Note:** There might be some tools that don't support some of the parameters above.
52
+
53
+ ---
54
+
55
+ ## 🚀 Installation
56
+
57
+ Install the latest release via PyPI:
58
+
59
+ ```bash
60
+ pip install -U hamtaa-texttools
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Sync vs Async
66
+ | Tool | Style | Use case |
67
+ |--------------|---------|---------------------------------------------|
68
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
69
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
70
+
71
+ ---
72
+
73
+ ## ⚡ Quick Start (Sync)
74
+
75
+ ```python
76
+ from openai import OpenAI
77
+ from texttools import TheTool
78
+
79
+ # Create your OpenAI client
80
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
81
+
82
+ # Specify the model
83
+ model = "gpt-4o-mini"
84
+
85
+ # Create an instance of TheTool
86
+ the_tool = TheTool(client=client, model=model)
87
+
88
+ # Example: Question Detection
89
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
90
+ print(detection.result)
91
+ print(detection.logprobs)
92
+ # Output: True \n --logprobs
93
+
94
+ # Example: Translation
95
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
96
+ print(translation.result)
97
+ print(translation.analysis)
98
+ # Output: "Hi! How are you?" \n --analysis
99
+ ```
100
+
101
+ ---
102
+
103
+ ## ⚡ Quick Start (Async)
104
+
105
+ ```python
106
+ import asyncio
107
+ from openai import AsyncOpenAI
108
+ from texttools import AsyncTheTool
109
+
110
+ async def main():
111
+ # Create your AsyncOpenAI client
112
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
113
+
114
+ # Specify the model
115
+ model = "gpt-4o-mini"
116
+
117
+ # Create an instance of AsyncTheTool
118
+ the_tool = AsyncTheTool(client=async_client, model=model)
119
+
120
+ # Example: Async Translation
121
+ translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
122
+ print(translation.result)
123
+ # Output: "Hi! How are you?"
124
+
125
+ asyncio.run(main())
126
+ ```
127
+
128
+ ---
129
+
130
+ ## 📚 Use Cases
131
+
132
+ Use **TextTools** when you need to:
133
+
134
+ - 🔍 **Classify** large datasets quickly without model training
135
+ - 🌍 **Translate** and process multilingual corpora with ease
136
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
137
+ - 📊 **Analyze** large text collections using embeddings and categorization
138
+ - 👍 **Automate** common text-processing tasks without reinventing the wheel
139
+
140
+ ---
141
+
142
+ ## 🤝 Contributing
143
+
144
+ Contributions are welcome!
145
+ Feel free to **open issues, suggest new features, or submit pull requests**.
146
+
147
+ ---
148
+
149
+ ## License
150
+
151
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 1.1.3
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Hamtaa
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Keywords: nlp,llm,text-processing,openai
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai==1.97.1
32
+ Requires-Dist: pyyaml>=6.0
33
+ Dynamic: license-file
34
+
35
+ # TextTools
36
+
37
+ ## 📌 Overview
38
+
39
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
40
+
41
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
42
+
43
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
50
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
51
+
52
+ - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`is_question()`** - Binary detection of whether input is a question
54
+ - **`extract_keywords()`** - Extracts keywords from text
55
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
56
+ - **`summarize()`** - Text summarization
57
+ - **`text_to_question()`** - Generates questions from text
58
+ - **`merge_questions()`** - Merges multiple questions with different modes
59
+ - **`rewrite()`** - Rewrites text with different wording/meaning
60
+ - **`subject_to_question()`** - Generates questions about a specific subject
61
+ - **`translate()`** - Text translation between languages
62
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
63
+
64
+ ---
65
+
66
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
67
+
68
+ TextTools provides several optional flags to customize LLM behavior:
69
+
70
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
71
+ Note: This doubles token usage per call because it triggers an additional LLM request.
72
+
73
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
74
+
75
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
76
+
77
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
78
+
79
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
82
+
83
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
84
+
85
+ **Note:** There might be some tools that don't support some of the parameters above.
86
+
87
+ ---
88
+
89
+ ## 🚀 Installation
90
+
91
+ Install the latest release via PyPI:
92
+
93
+ ```bash
94
+ pip install -U hamtaa-texttools
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Sync vs Async
100
+ | Tool | Style | Use case |
101
+ |--------------|---------|---------------------------------------------|
102
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
103
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
104
+
105
+ ---
106
+
107
+ ## ⚡ Quick Start (Sync)
108
+
109
+ ```python
110
+ from openai import OpenAI
111
+ from texttools import TheTool
112
+
113
+ # Create your OpenAI client
114
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
115
+
116
+ # Specify the model
117
+ model = "gpt-4o-mini"
118
+
119
+ # Create an instance of TheTool
120
+ the_tool = TheTool(client=client, model=model)
121
+
122
+ # Example: Question Detection
123
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
124
+ print(detection.result)
125
+ print(detection.logprobs)
126
+ # Output: True \n --logprobs
127
+
128
+ # Example: Translation
129
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
130
+ print(translation.result)
131
+ print(translation.analysis)
132
+ # Output: "Hi! How are you?" \n --analysis
133
+ ```
134
+
135
+ ---
136
+
137
+ ## ⚡ Quick Start (Async)
138
+
139
+ ```python
140
+ import asyncio
141
+ from openai import AsyncOpenAI
142
+ from texttools import AsyncTheTool
143
+
144
+ async def main():
145
+ # Create your AsyncOpenAI client
146
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
147
+
148
+ # Specify the model
149
+ model = "gpt-4o-mini"
150
+
151
+ # Create an instance of AsyncTheTool
152
+ the_tool = AsyncTheTool(client=async_client, model=model)
153
+
154
+ # Example: Async Translation
155
+ translation = await the_tool.translate("سلام، حالت چطوره؟", target_language="English")
156
+ print(translation.result)
157
+ # Output: "Hi! How are you?"
158
+
159
+ asyncio.run(main())
160
+ ```
161
+
162
+ ---
163
+
164
+ ## 📚 Use Cases
165
+
166
+ Use **TextTools** when you need to:
167
+
168
+ - 🔍 **Classify** large datasets quickly without model training
169
+ - 🌍 **Translate** and process multilingual corpora with ease
170
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
171
+ - 📊 **Analyze** large text collections using embeddings and categorization
172
+ - 👍 **Automate** common text-processing tasks without reinventing the wheel
173
+
174
+ ---
175
+
176
+ ## 🤝 Contributing
177
+
178
+ Contributions are welcome!
179
+ Feel free to **open issues, suggest new features, or submit pull requests**.
180
+
181
+ ---
182
+
183
+ ## License
184
+
185
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,34 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ hamtaa_texttools.egg-info/PKG-INFO
6
+ hamtaa_texttools.egg-info/SOURCES.txt
7
+ hamtaa_texttools.egg-info/dependency_links.txt
8
+ hamtaa_texttools.egg-info/requires.txt
9
+ hamtaa_texttools.egg-info/top_level.txt
10
+ texttools/__init__.py
11
+ texttools/batch/__init__.py
12
+ texttools/batch/batch_manager.py
13
+ texttools/batch/batch_runner.py
14
+ texttools/prompts/README.md
15
+ texttools/prompts/categorizer.yaml
16
+ texttools/prompts/extract_entities.yaml
17
+ texttools/prompts/extract_keywords.yaml
18
+ texttools/prompts/is_question.yaml
19
+ texttools/prompts/merge_questions.yaml
20
+ texttools/prompts/rewrite.yaml
21
+ texttools/prompts/run_custom.yaml
22
+ texttools/prompts/subject_to_question.yaml
23
+ texttools/prompts/summarize.yaml
24
+ texttools/prompts/text_to_question.yaml
25
+ texttools/prompts/translate.yaml
26
+ texttools/tools/__init__.py
27
+ texttools/tools/async_the_tool.py
28
+ texttools/tools/the_tool.py
29
+ texttools/tools/internals/async_operator.py
30
+ texttools/tools/internals/base_operator.py
31
+ texttools/tools/internals/formatters.py
32
+ texttools/tools/internals/operator.py
33
+ texttools/tools/internals/output_models.py
34
+ texttools/tools/internals/prompt_loader.py
@@ -1,2 +1,2 @@
1
1
  openai==1.97.1
2
- PyYAML>=6.0
2
+ pyyaml>=6.0
@@ -1,32 +1,32 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "hamtaa-texttools"
7
- version = "1.0.2"
8
- authors = [
9
- { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
- { name = "Montazer", email = "montazerh82@gmail.com" },
11
- { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
- { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
- ]
14
- description = "TextTools is a high-level NLP toolkit built on top of modern LLMs."
15
- readme = "README.md"
16
- license = {file = "LICENSE"}
17
- requires-python = ">=3.8"
18
- dependencies = [
19
- "openai==1.97.1",
20
- "PyYAML>=6.0"
21
- ]
22
- keywords = ["nlp", "llm", "text-processing", "openai"]
23
-
24
- [tool.setuptools.packages.find]
25
- where = ["."]
26
- include = ["texttools*"]
27
-
28
- [tool.setuptools]
29
- include-package-data = true
30
-
31
- [tool.setuptools.package-data]
32
- "texttools" = ["prompts/*.yaml", "prompts/*.yml"]
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "1.1.3"
8
+ authors = [
9
+ { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
+ { name = "Montazer", email = "montazerh82@gmail.com" },
11
+ { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
+ { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
+ ]
14
+ description = "A high-level NLP toolkit built on top of modern LLMs."
15
+ readme = "README.md"
16
+ license = {file = "LICENSE"}
17
+ requires-python = ">=3.8"
18
+ dependencies = [
19
+ "openai==1.97.1",
20
+ "pyyaml>=6.0",
21
+ ]
22
+ keywords = ["nlp", "llm", "text-processing", "openai"]
23
+
24
+ [tool.setuptools.packages.find]
25
+ where = ["."]
26
+ include = ["texttools*"]
27
+
28
+ [tool.setuptools]
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.package-data]
32
+ "texttools" = ["prompts/*.yaml", "prompts/*.yml"]