hamtaa-texttools 1.0.2__tar.gz → 1.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/LICENSE +20 -20
  2. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/MANIFEST.in +2 -2
  3. hamtaa_texttools-1.1.7/PKG-INFO +228 -0
  4. hamtaa_texttools-1.1.7/README.md +194 -0
  5. hamtaa_texttools-1.1.7/hamtaa_texttools.egg-info/PKG-INFO +228 -0
  6. hamtaa_texttools-1.1.7/hamtaa_texttools.egg-info/SOURCES.txt +34 -0
  7. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  8. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/requires.txt +1 -1
  9. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  10. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/pyproject.toml +32 -32
  11. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/setup.cfg +4 -4
  12. hamtaa_texttools-1.1.7/texttools/__init__.py +4 -0
  13. hamtaa_texttools-1.1.7/texttools/batch/__init__.py +3 -0
  14. {hamtaa_texttools-1.0.2/texttools/utils/batch_manager → hamtaa_texttools-1.1.7/texttools/batch}/batch_manager.py +226 -240
  15. hamtaa_texttools-1.1.7/texttools/batch/batch_runner.py +254 -0
  16. {hamtaa_texttools-1.0.2 → hamtaa_texttools-1.1.7}/texttools/prompts/README.md +35 -31
  17. hamtaa_texttools-1.1.7/texttools/prompts/categorizer.yaml +28 -0
  18. hamtaa_texttools-1.0.2/texttools/prompts/ner_extractor.yaml → hamtaa_texttools-1.1.7/texttools/prompts/extract_entities.yaml +20 -18
  19. hamtaa_texttools-1.1.7/texttools/prompts/extract_keywords.yaml +18 -0
  20. hamtaa_texttools-1.0.2/texttools/prompts/question_detector.yaml → hamtaa_texttools-1.1.7/texttools/prompts/is_question.yaml +13 -11
  21. hamtaa_texttools-1.0.2/texttools/prompts/question_merger.yaml → hamtaa_texttools-1.1.7/texttools/prompts/merge_questions.yaml +45 -46
  22. hamtaa_texttools-1.1.7/texttools/prompts/rewrite.yaml +111 -0
  23. hamtaa_texttools-1.1.7/texttools/prompts/run_custom.yaml +7 -0
  24. hamtaa_texttools-1.0.2/texttools/prompts/subject_question_generator.yaml → hamtaa_texttools-1.1.7/texttools/prompts/subject_to_question.yaml +22 -27
  25. hamtaa_texttools-1.0.2/texttools/prompts/summarizer.yaml → hamtaa_texttools-1.1.7/texttools/prompts/summarize.yaml +13 -10
  26. hamtaa_texttools-1.0.2/texttools/prompts/question_generator.yaml → hamtaa_texttools-1.1.7/texttools/prompts/text_to_question.yaml +19 -21
  27. hamtaa_texttools-1.0.2/texttools/prompts/translator.yaml → hamtaa_texttools-1.1.7/texttools/prompts/translate.yaml +14 -13
  28. hamtaa_texttools-1.1.7/texttools/tools/__init__.py +4 -0
  29. hamtaa_texttools-1.1.7/texttools/tools/async_the_tool.py +435 -0
  30. hamtaa_texttools-1.1.7/texttools/tools/internals/async_operator.py +242 -0
  31. hamtaa_texttools-1.1.7/texttools/tools/internals/base_operator.py +100 -0
  32. hamtaa_texttools-1.1.7/texttools/tools/internals/formatters.py +24 -0
  33. hamtaa_texttools-1.1.7/texttools/tools/internals/operator.py +242 -0
  34. hamtaa_texttools-1.1.7/texttools/tools/internals/output_models.py +62 -0
  35. hamtaa_texttools-1.1.7/texttools/tools/internals/prompt_loader.py +60 -0
  36. hamtaa_texttools-1.1.7/texttools/tools/the_tool.py +433 -0
  37. hamtaa_texttools-1.0.2/PKG-INFO +0 -129
  38. hamtaa_texttools-1.0.2/README.md +0 -95
  39. hamtaa_texttools-1.0.2/hamtaa_texttools.egg-info/PKG-INFO +0 -129
  40. hamtaa_texttools-1.0.2/hamtaa_texttools.egg-info/SOURCES.txt +0 -33
  41. hamtaa_texttools-1.0.2/tests/test_tools.py +0 -65
  42. hamtaa_texttools-1.0.2/texttools/__init__.py +0 -9
  43. hamtaa_texttools-1.0.2/texttools/formatters/base_formatter.py +0 -33
  44. hamtaa_texttools-1.0.2/texttools/formatters/user_merge_formatter/user_merge_formatter.py +0 -47
  45. hamtaa_texttools-1.0.2/texttools/prompts/categorizer.yaml +0 -25
  46. hamtaa_texttools-1.0.2/texttools/prompts/keyword_extractor.yaml +0 -11
  47. hamtaa_texttools-1.0.2/texttools/prompts/question_rewriter.yaml +0 -44
  48. hamtaa_texttools-1.0.2/texttools/tools/__init__.py +0 -3
  49. hamtaa_texttools-1.0.2/texttools/tools/operator.py +0 -236
  50. hamtaa_texttools-1.0.2/texttools/tools/output_models.py +0 -54
  51. hamtaa_texttools-1.0.2/texttools/tools/prompt_loader.py +0 -84
  52. hamtaa_texttools-1.0.2/texttools/tools/the_tool.py +0 -291
  53. hamtaa_texttools-1.0.2/texttools/utils/__init__.py +0 -4
  54. hamtaa_texttools-1.0.2/texttools/utils/batch_manager/__init__.py +0 -4
  55. hamtaa_texttools-1.0.2/texttools/utils/batch_manager/batch_runner.py +0 -212
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Hamtaa
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Hamtaa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
@@ -1,2 +1,2 @@
1
- graft texttools/prompts
2
- global-exclude *.pyc
1
+ graft texttools/prompts
2
+ global-exclude *.pyc
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 1.1.7
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Hamtaa
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Keywords: nlp,llm,text-processing,openai
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai==1.97.1
32
+ Requires-Dist: pyyaml>=6.0
33
+ Dynamic: license-file
34
+
35
+ # TextTools
36
+
37
+ ## 📌 Overview
38
+
39
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
40
+
41
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
42
+
43
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
50
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
51
+
52
+ - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`is_question()`** - Binary detection of whether input is a question
54
+ - **`extract_keywords()`** - Extracts keywords from text
55
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
56
+ - **`summarize()`** - Text summarization
57
+ - **`text_to_question()`** - Generates questions from text
58
+ - **`merge_questions()`** - Merges multiple questions with different modes
59
+ - **`rewrite()`** - Rewrites text with different wording/meaning
60
+ - **`subject_to_question()`** - Generates questions about a specific subject
61
+ - **`translate()`** - Text translation between languages
62
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
63
+
64
+ ---
65
+
66
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
67
+
68
+ TextTools provides several optional flags to customize LLM behavior:
69
+
70
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
71
+ Note: This doubles token usage per call because it triggers an additional LLM request.
72
+
73
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
74
+
75
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
76
+
77
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
78
+
79
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
82
+
83
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
84
+
85
+ **Note:** There might be some tools that don't support some of the parameters above.
86
+
87
+ ---
88
+
89
+ ## 🧩 ToolOutput
90
+
91
+ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
92
+ - **`result`** → The output of LLM (`type=Any`)
93
+ - **`analysis`** → The reasoning step before generating the final output (`type=str`)
94
+ - **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
95
+ - **`errors`** → Any error that have occured during calling LLM (`type=str`)
96
+
97
+ **None:** You can use `repr(ToolOutput)` to see details of an output.
98
+
99
+ ---
100
+
101
+ ## 🚀 Installation
102
+
103
+ Install the latest release via PyPI:
104
+
105
+ ```bash
106
+ pip install -U hamtaa-texttools
107
+ ```
108
+
109
+ ---
110
+
111
+ ## Sync vs Async
112
+ | Tool | Style | Use case |
113
+ |--------------|---------|---------------------------------------------|
114
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
115
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
116
+
117
+ ---
118
+
119
+ ## ⚡ Quick Start (Sync)
120
+
121
+ ```python
122
+ from openai import OpenAI
123
+ from texttools import TheTool
124
+
125
+ # Create your OpenAI client
126
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
127
+
128
+ # Specify the model
129
+ model = "gpt-4o-mini"
130
+
131
+ # Create an instance of TheTool
132
+ the_tool = TheTool(client=client, model=model)
133
+
134
+ # Example: Question Detection
135
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
136
+ print(detection.result)
137
+ print(detection.logprobs)
138
+ # Output: True + logprobs
139
+
140
+ # Example: Translation
141
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
142
+ print(translation.result)
143
+ print(translation.analysis)
144
+ # Output: "Hi! How are you?" + analysis
145
+ ```
146
+
147
+ ---
148
+
149
+ ## ⚡ Quick Start (Async)
150
+
151
+ ```python
152
+ import asyncio
153
+ from openai import AsyncOpenAI
154
+ from texttools import AsyncTheTool
155
+
156
+ async def main():
157
+ # Create your AsyncOpenAI client
158
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
159
+
160
+ # Specify the model
161
+ model = "gpt-4o-mini"
162
+
163
+ # Create an instance of AsyncTheTool
164
+ async_the_tool = AsyncTheTool(client=async_client, model=model)
165
+
166
+ # Example: Async Translation and Keyword Extraction
167
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
168
+ keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
169
+
170
+ (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
171
+ print(translation.result)
172
+ print(keywords.result)
173
+
174
+ asyncio.run(main())
175
+ ```
176
+
177
+ ---
178
+
179
+ ## 👍 Use Cases
180
+
181
+ Use **TextTools** when you need to:
182
+
183
+ - 🔍 **Classify** large datasets quickly without model training
184
+ - 🌍 **Translate** and process multilingual corpora with ease
185
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
186
+ - 📊 **Analyze** large text collections using embeddings and categorization
187
+
188
+ ---
189
+
190
+ ## 📚 Batch Processing
191
+
192
+ Process large datasets efficiently using OpenAI's batch API.
193
+
194
+ ## Quick Start
195
+
196
+ ```python
197
+ from texttools import BatchJobRunner, BatchConfig
198
+
199
+ # Configure your batch job
200
+ config = BatchConfig(
201
+ system_prompt="Extract entities from the text",
202
+ job_name="entity_extraction",
203
+ input_data_path="data.json",
204
+ output_data_filename="results.json",
205
+ model="gpt-4o-mini"
206
+ )
207
+
208
+ # Define your output schema
209
+ class Output(BaseModel):
210
+ entities: list[str]
211
+
212
+ # Run the batch job
213
+ runner = BatchJobRunner(config, output_model=Output)
214
+ runner.run()
215
+ ```
216
+
217
+ ---
218
+
219
+ ## 🤝 Contributing
220
+
221
+ Contributions are welcome!
222
+ Feel free to **open issues, suggest new features, or submit pull requests**.
223
+
224
+ ---
225
+
226
+ ## License
227
+
228
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,194 @@
1
+ # TextTools
2
+
3
+ ## 📌 Overview
4
+
5
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
6
+
7
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
8
+
9
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
10
+
11
+ ---
12
+
13
+ ## ✨ Features
14
+
15
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
16
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
17
+
18
+ - **`categorize()`** - Classifies text into Islamic studies categories
19
+ - **`is_question()`** - Binary detection of whether input is a question
20
+ - **`extract_keywords()`** - Extracts keywords from text
21
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
22
+ - **`summarize()`** - Text summarization
23
+ - **`text_to_question()`** - Generates questions from text
24
+ - **`merge_questions()`** - Merges multiple questions with different modes
25
+ - **`rewrite()`** - Rewrites text with different wording/meaning
26
+ - **`subject_to_question()`** - Generates questions about a specific subject
27
+ - **`translate()`** - Text translation between languages
28
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
29
+
30
+ ---
31
+
32
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
33
+
34
+ TextTools provides several optional flags to customize LLM behavior:
35
+
36
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
37
+ Note: This doubles token usage per call because it triggers an additional LLM request.
38
+
39
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
40
+
41
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
42
+
43
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
44
+
45
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
46
+
47
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
48
+
49
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
50
+
51
+ **Note:** There might be some tools that don't support some of the parameters above.
52
+
53
+ ---
54
+
55
+ ## 🧩 ToolOutput
56
+
57
+ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
58
+ - **`result`** → The output of LLM (`type=Any`)
59
+ - **`analysis`** → The reasoning step before generating the final output (`type=str`)
60
+ - **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
61
+ - **`errors`** → Any error that have occured during calling LLM (`type=str`)
62
+
63
+ **None:** You can use `repr(ToolOutput)` to see details of an output.
64
+
65
+ ---
66
+
67
+ ## 🚀 Installation
68
+
69
+ Install the latest release via PyPI:
70
+
71
+ ```bash
72
+ pip install -U hamtaa-texttools
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Sync vs Async
78
+ | Tool | Style | Use case |
79
+ |--------------|---------|---------------------------------------------|
80
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
81
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
82
+
83
+ ---
84
+
85
+ ## ⚡ Quick Start (Sync)
86
+
87
+ ```python
88
+ from openai import OpenAI
89
+ from texttools import TheTool
90
+
91
+ # Create your OpenAI client
92
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
93
+
94
+ # Specify the model
95
+ model = "gpt-4o-mini"
96
+
97
+ # Create an instance of TheTool
98
+ the_tool = TheTool(client=client, model=model)
99
+
100
+ # Example: Question Detection
101
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
102
+ print(detection.result)
103
+ print(detection.logprobs)
104
+ # Output: True + logprobs
105
+
106
+ # Example: Translation
107
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
108
+ print(translation.result)
109
+ print(translation.analysis)
110
+ # Output: "Hi! How are you?" + analysis
111
+ ```
112
+
113
+ ---
114
+
115
+ ## ⚡ Quick Start (Async)
116
+
117
+ ```python
118
+ import asyncio
119
+ from openai import AsyncOpenAI
120
+ from texttools import AsyncTheTool
121
+
122
+ async def main():
123
+ # Create your AsyncOpenAI client
124
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
125
+
126
+ # Specify the model
127
+ model = "gpt-4o-mini"
128
+
129
+ # Create an instance of AsyncTheTool
130
+ async_the_tool = AsyncTheTool(client=async_client, model=model)
131
+
132
+ # Example: Async Translation and Keyword Extraction
133
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
134
+ keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
135
+
136
+ (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
137
+ print(translation.result)
138
+ print(keywords.result)
139
+
140
+ asyncio.run(main())
141
+ ```
142
+
143
+ ---
144
+
145
+ ## 👍 Use Cases
146
+
147
+ Use **TextTools** when you need to:
148
+
149
+ - 🔍 **Classify** large datasets quickly without model training
150
+ - 🌍 **Translate** and process multilingual corpora with ease
151
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
152
+ - 📊 **Analyze** large text collections using embeddings and categorization
153
+
154
+ ---
155
+
156
+ ## 📚 Batch Processing
157
+
158
+ Process large datasets efficiently using OpenAI's batch API.
159
+
160
+ ## Quick Start
161
+
162
+ ```python
163
+ from texttools import BatchJobRunner, BatchConfig
164
+
165
+ # Configure your batch job
166
+ config = BatchConfig(
167
+ system_prompt="Extract entities from the text",
168
+ job_name="entity_extraction",
169
+ input_data_path="data.json",
170
+ output_data_filename="results.json",
171
+ model="gpt-4o-mini"
172
+ )
173
+
174
+ # Define your output schema
175
+ class Output(BaseModel):
176
+ entities: list[str]
177
+
178
+ # Run the batch job
179
+ runner = BatchJobRunner(config, output_model=Output)
180
+ runner.run()
181
+ ```
182
+
183
+ ---
184
+
185
+ ## 🤝 Contributing
186
+
187
+ Contributions are welcome!
188
+ Feel free to **open issues, suggest new features, or submit pull requests**.
189
+
190
+ ---
191
+
192
+ ## License
193
+
194
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: hamtaa-texttools
3
+ Version: 1.1.7
4
+ Summary: A high-level NLP toolkit built on top of modern LLMs.
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Hamtaa
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Keywords: nlp,llm,text-processing,openai
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: openai==1.97.1
32
+ Requires-Dist: pyyaml>=6.0
33
+ Dynamic: license-file
34
+
35
+ # TextTools
36
+
37
+ ## 📌 Overview
38
+
39
+ **TextTools** is a high-level **NLP toolkit** built on top of modern **LLMs**.
40
+
41
+ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
42
+
43
+ It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
50
+ Each tool is designed to work with structured outputs (JSON / Pydantic).
51
+
52
+ - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`is_question()`** - Binary detection of whether input is a question
54
+ - **`extract_keywords()`** - Extracts keywords from text
55
+ - **`extract_entities()`** - Named Entity Recognition (NER) system
56
+ - **`summarize()`** - Text summarization
57
+ - **`text_to_question()`** - Generates questions from text
58
+ - **`merge_questions()`** - Merges multiple questions with different modes
59
+ - **`rewrite()`** - Rewrites text with different wording/meaning
60
+ - **`subject_to_question()`** - Generates questions about a specific subject
61
+ - **`translate()`** - Text translation between languages
62
+ - **`run_custom()`** - Allows users to define a custom tool with arbitrary BaseModel
63
+
64
+ ---
65
+
66
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
67
+
68
+ TextTools provides several optional flags to customize LLM behavior:
69
+
70
+ - **`with_analysis=True`** → Adds a reasoning step before generating the final output. Useful for debugging, improving prompts, or understanding model behavior.
71
+ Note: This doubles token usage per call because it triggers an additional LLM request.
72
+
73
+ - **`logprobs=True`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
74
+
75
+ - **`output_lang="en"`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
76
+
77
+ - **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
78
+
79
+ - **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
80
+
81
+ - **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
82
+
83
+ All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
84
+
85
+ **Note:** There might be some tools that don't support some of the parameters above.
86
+
87
+ ---
88
+
89
+ ## 🧩 ToolOutput
90
+
91
+ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
92
+ - **`result`** → The output of LLM (`type=Any`)
93
+ - **`analysis`** → The reasoning step before generating the final output (`type=str`)
94
+ - **`logprobs`** → Token-level probabilities for the generated output (`type=list`)
95
+ - **`errors`** → Any error that have occured during calling LLM (`type=str`)
96
+
97
+ **None:** You can use `repr(ToolOutput)` to see details of an output.
98
+
99
+ ---
100
+
101
+ ## 🚀 Installation
102
+
103
+ Install the latest release via PyPI:
104
+
105
+ ```bash
106
+ pip install -U hamtaa-texttools
107
+ ```
108
+
109
+ ---
110
+
111
+ ## Sync vs Async
112
+ | Tool | Style | Use case |
113
+ |--------------|---------|---------------------------------------------|
114
+ | `TheTool` | Sync | Simple scripts, sequential workflows |
115
+ | `AsyncTheTool` | Async | High-throughput apps, APIs, concurrent tasks |
116
+
117
+ ---
118
+
119
+ ## ⚡ Quick Start (Sync)
120
+
121
+ ```python
122
+ from openai import OpenAI
123
+ from texttools import TheTool
124
+
125
+ # Create your OpenAI client
126
+ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
127
+
128
+ # Specify the model
129
+ model = "gpt-4o-mini"
130
+
131
+ # Create an instance of TheTool
132
+ the_tool = TheTool(client=client, model=model)
133
+
134
+ # Example: Question Detection
135
+ detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
136
+ print(detection.result)
137
+ print(detection.logprobs)
138
+ # Output: True + logprobs
139
+
140
+ # Example: Translation
141
+ translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
142
+ print(translation.result)
143
+ print(translation.analysis)
144
+ # Output: "Hi! How are you?" + analysis
145
+ ```
146
+
147
+ ---
148
+
149
+ ## ⚡ Quick Start (Async)
150
+
151
+ ```python
152
+ import asyncio
153
+ from openai import AsyncOpenAI
154
+ from texttools import AsyncTheTool
155
+
156
+ async def main():
157
+ # Create your AsyncOpenAI client
158
+ async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
159
+
160
+ # Specify the model
161
+ model = "gpt-4o-mini"
162
+
163
+ # Create an instance of AsyncTheTool
164
+ async_the_tool = AsyncTheTool(client=async_client, model=model)
165
+
166
+ # Example: Async Translation and Keyword Extraction
167
+ translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_language="English")
168
+ keywords_task = async_the_tool.extract_keywords("Tomorrow, we will be dead by the car crash")
169
+
170
+ (translation, keywords) = await asyncio.gather(translation_task, keywords_task)
171
+ print(translation.result)
172
+ print(keywords.result)
173
+
174
+ asyncio.run(main())
175
+ ```
176
+
177
+ ---
178
+
179
+ ## 👍 Use Cases
180
+
181
+ Use **TextTools** when you need to:
182
+
183
+ - 🔍 **Classify** large datasets quickly without model training
184
+ - 🌍 **Translate** and process multilingual corpora with ease
185
+ - 🧩 **Integrate** LLMs into production pipelines (structured outputs)
186
+ - 📊 **Analyze** large text collections using embeddings and categorization
187
+
188
+ ---
189
+
190
+ ## 📚 Batch Processing
191
+
192
+ Process large datasets efficiently using OpenAI's batch API.
193
+
194
+ ## Quick Start
195
+
196
+ ```python
197
+ from texttools import BatchJobRunner, BatchConfig
198
+
199
+ # Configure your batch job
200
+ config = BatchConfig(
201
+ system_prompt="Extract entities from the text",
202
+ job_name="entity_extraction",
203
+ input_data_path="data.json",
204
+ output_data_filename="results.json",
205
+ model="gpt-4o-mini"
206
+ )
207
+
208
+ # Define your output schema
209
+ class Output(BaseModel):
210
+ entities: list[str]
211
+
212
+ # Run the batch job
213
+ runner = BatchJobRunner(config, output_model=Output)
214
+ runner.run()
215
+ ```
216
+
217
+ ---
218
+
219
+ ## 🤝 Contributing
220
+
221
+ Contributions are welcome!
222
+ Feel free to **open issues, suggest new features, or submit pull requests**.
223
+
224
+ ---
225
+
226
+ ## License
227
+
228
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.