hamtaa-texttools 1.1.11__tar.gz → 1.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {hamtaa_texttools-1.1.11/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.20}/PKG-INFO +53 -17
  2. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/README.md +51 -15
  3. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20/hamtaa_texttools.egg-info}/PKG-INFO +53 -17
  4. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/SOURCES.txt +11 -9
  5. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/pyproject.toml +34 -33
  6. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_all_async_tools.py +24 -3
  7. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_all_tools.py +47 -8
  8. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_output_validation.py +2 -2
  9. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/__init__.py +2 -1
  10. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/batch_config.py +1 -1
  11. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/batch_runner.py +79 -68
  12. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/internals/batch_manager.py +6 -6
  13. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/internals/utils.py +1 -4
  14. hamtaa_texttools-1.1.20/texttools/internals/async_operator.py +246 -0
  15. hamtaa_texttools-1.1.20/texttools/internals/exceptions.py +28 -0
  16. hamtaa_texttools-1.1.20/texttools/internals/models.py +181 -0
  17. hamtaa_texttools-1.1.11/texttools/tools/internals/base_operator.py → hamtaa_texttools-1.1.20/texttools/internals/operator_utils.py +31 -17
  18. hamtaa_texttools-1.1.20/texttools/internals/prompt_loader.py +108 -0
  19. hamtaa_texttools-1.1.20/texttools/internals/sync_operator.py +244 -0
  20. hamtaa_texttools-1.1.20/texttools/internals/text_to_chunks.py +97 -0
  21. hamtaa_texttools-1.1.20/texttools/prompts/categorize.yaml +77 -0
  22. hamtaa_texttools-1.1.20/texttools/prompts/check_fact.yaml +19 -0
  23. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/extract_entities.yaml +1 -1
  24. hamtaa_texttools-1.1.20/texttools/prompts/extract_keywords.yaml +68 -0
  25. hamtaa_texttools-1.1.20/texttools/prompts/propositionize.yaml +22 -0
  26. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/run_custom.yaml +1 -1
  27. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/text_to_question.yaml +6 -4
  28. hamtaa_texttools-1.1.20/texttools/tools/async_tools.py +1198 -0
  29. hamtaa_texttools-1.1.20/texttools/tools/sync_tools.py +1198 -0
  30. hamtaa_texttools-1.1.11/tests/test_logprobs.py +0 -38
  31. hamtaa_texttools-1.1.11/texttools/prompts/categorizer.yaml +0 -28
  32. hamtaa_texttools-1.1.11/texttools/prompts/extract_keywords.yaml +0 -18
  33. hamtaa_texttools-1.1.11/texttools/tools/async_tools.py +0 -444
  34. hamtaa_texttools-1.1.11/texttools/tools/internals/async_operator.py +0 -193
  35. hamtaa_texttools-1.1.11/texttools/tools/internals/formatters.py +0 -24
  36. hamtaa_texttools-1.1.11/texttools/tools/internals/output_models.py +0 -62
  37. hamtaa_texttools-1.1.11/texttools/tools/internals/prompt_loader.py +0 -56
  38. hamtaa_texttools-1.1.11/texttools/tools/internals/sync_operator.py +0 -191
  39. hamtaa_texttools-1.1.11/texttools/tools/sync_tools.py +0 -442
  40. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/LICENSE +0 -0
  41. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/MANIFEST.in +0 -0
  42. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
  43. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/requires.txt +0 -0
  44. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/top_level.txt +0 -0
  45. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/setup.cfg +0 -0
  46. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/README.md +0 -0
  47. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/is_question.yaml +0 -0
  48. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/merge_questions.yaml +0 -0
  49. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/rewrite.yaml +0 -0
  50. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/subject_to_question.yaml +0 -0
  51. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/summarize.yaml +0 -0
  52. {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/translate.yaml +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.11
3
+ Version: 1.1.20
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
7
7
 
8
8
  Copyright (c) 2025 Hamtaa
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
50
50
  TextTools provides a rich collection of high-level NLP utilities,
51
51
  Each tool is designed to work with structured outputs (JSON / Pydantic).
52
52
 
53
- - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`categorize()`** - Classifies text into given categories (You have to create a category tree)
54
54
  - **`extract_keywords()`** - Extracts keywords from text
55
55
  - **`extract_entities()`** - Named Entity Recognition (NER) system
56
56
  - **`is_question()`** - Binary detection of whether input is a question
@@ -60,28 +60,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
60
60
  - **`subject_to_question()`** - Generates questions about a specific subject
61
61
  - **`summarize()`** - Text summarization
62
62
  - **`translate()`** - Text translation between languages
63
+ - **`propositionize()`** - Convert text to atomic independence meaningful sentences
64
+ - **`check_fact()`** - Check a statement is relevant to source text or not
63
65
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
64
66
 
65
67
  ---
66
68
 
67
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
69
+ ## 📊 Tool Quality Tiers
70
+
71
+ | Status | Meaning | Use in Production? |
72
+ |--------|---------|-------------------|
73
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
74
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
75
+
76
+ ### Current Status
77
+ **Production Tools:**
78
+ - `categorize()` (list mode)
79
+ - `extract_keywords()`
80
+ - `extract_entities()`
81
+ - `is_question()`
82
+ - `text_to_question()`
83
+ - `merge_questions()`
84
+ - `rewrite()`
85
+ - `subject_to_question()`
86
+ - `summarize()`
87
+ - `run_custom()` (fine in most cases)
88
+
89
+ **Experimental Tools:**
90
+ - `categorize()` (tree mode)
91
+ - `translate()`
92
+ - `propositionize()`
93
+ - `check_fact()`
94
+ - `run_custom()` (not evaluated in all scenarios)
95
+
96
+ ---
97
+
98
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
68
99
 
69
100
  TextTools provides several optional flags to customize LLM behavior:
70
101
 
71
- - **`with_analysis (bool)`** → Adds a reasoning step before generating the final output.
102
+ - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
72
103
  **Note:** This doubles token usage per call because it triggers an additional LLM request.
73
104
 
74
- - **`logprobs (bool)`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
105
+ - **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
106
+ **Note:** This feature works if it's supported by the model.
75
107
 
76
- - **`output_lang (str)`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
108
+ - **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
77
109
 
78
- - **`user_prompt (str)`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
110
+ - **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
79
111
 
80
- - **`temperature (float)`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
112
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
81
113
 
82
- - **`validator (Callable)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
114
+ - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
83
115
 
84
- All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
116
+ - **`priority: int (Experimental)`** Task execution priority level. Higher values = higher priority. Affects processing order in queues.
117
+ **Note:** This feature works if it's supported by the model and vLLM.
85
118
 
86
119
  **Note:** There might be some tools that don't support some of the parameters above.
87
120
 
@@ -90,12 +123,15 @@ All these parameters can be used individually or together to tailor the behavior
90
123
  ## 🧩 ToolOutput
91
124
 
92
125
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
93
- - **`result (Any)`** → The output of LLM
94
- - **`analysis (str)`** → The reasoning step before generating the final output
95
- - **`logprobs (list)`** → Token-level probabilities for the generated output
96
- - **`errors (list[str])`** → Any error that have occured during calling LLM
97
-
98
- **None:** You can use `repr(ToolOutput)` to see details of an output.
126
+ - **`result: Any`** → The output of LLM
127
+ - **`analysis: str`** → The reasoning step before generating the final output
128
+ - **`logprobs: list`** → Token-level probabilities for the generated output
129
+ - **`process: str`** → The tool name which processed the input
130
+ - **`processed_at: datetime`** → The process time
131
+ - **`execution_time: float`** The execution time (seconds)
132
+ - **`errors: list[str]`** → Any error that have occured during calling LLM
133
+
134
+ **Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
99
135
 
100
136
  ---
101
137
 
@@ -15,7 +15,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
15
15
  TextTools provides a rich collection of high-level NLP utilities,
16
16
  Each tool is designed to work with structured outputs (JSON / Pydantic).
17
17
 
18
- - **`categorize()`** - Classifies text into Islamic studies categories
18
+ - **`categorize()`** - Classifies text into given categories (You have to create a category tree)
19
19
  - **`extract_keywords()`** - Extracts keywords from text
20
20
  - **`extract_entities()`** - Named Entity Recognition (NER) system
21
21
  - **`is_question()`** - Binary detection of whether input is a question
@@ -25,28 +25,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
25
25
  - **`subject_to_question()`** - Generates questions about a specific subject
26
26
  - **`summarize()`** - Text summarization
27
27
  - **`translate()`** - Text translation between languages
28
+ - **`propositionize()`** - Convert text to atomic independence meaningful sentences
29
+ - **`check_fact()`** - Check a statement is relevant to source text or not
28
30
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
29
31
 
30
32
  ---
31
33
 
32
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
34
+ ## 📊 Tool Quality Tiers
35
+
36
+ | Status | Meaning | Use in Production? |
37
+ |--------|---------|-------------------|
38
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
39
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
40
+
41
+ ### Current Status
42
+ **Production Tools:**
43
+ - `categorize()` (list mode)
44
+ - `extract_keywords()`
45
+ - `extract_entities()`
46
+ - `is_question()`
47
+ - `text_to_question()`
48
+ - `merge_questions()`
49
+ - `rewrite()`
50
+ - `subject_to_question()`
51
+ - `summarize()`
52
+ - `run_custom()` (fine in most cases)
53
+
54
+ **Experimental Tools:**
55
+ - `categorize()` (tree mode)
56
+ - `translate()`
57
+ - `propositionize()`
58
+ - `check_fact()`
59
+ - `run_custom()` (not evaluated in all scenarios)
60
+
61
+ ---
62
+
63
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
33
64
 
34
65
  TextTools provides several optional flags to customize LLM behavior:
35
66
 
36
- - **`with_analysis (bool)`** → Adds a reasoning step before generating the final output.
67
+ - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
37
68
  **Note:** This doubles token usage per call because it triggers an additional LLM request.
38
69
 
39
- - **`logprobs (bool)`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
70
+ - **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
71
+ **Note:** This feature works if it's supported by the model.
40
72
 
41
- - **`output_lang (str)`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
73
+ - **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
42
74
 
43
- - **`user_prompt (str)`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
75
+ - **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
44
76
 
45
- - **`temperature (float)`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
77
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
46
78
 
47
- - **`validator (Callable)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
79
+ - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
48
80
 
49
- All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
81
+ - **`priority: int (Experimental)`** Task execution priority level. Higher values = higher priority. Affects processing order in queues.
82
+ **Note:** This feature works if it's supported by the model and vLLM.
50
83
 
51
84
  **Note:** There might be some tools that don't support some of the parameters above.
52
85
 
@@ -55,12 +88,15 @@ All these parameters can be used individually or together to tailor the behavior
55
88
  ## 🧩 ToolOutput
56
89
 
57
90
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
58
- - **`result (Any)`** → The output of LLM
59
- - **`analysis (str)`** → The reasoning step before generating the final output
60
- - **`logprobs (list)`** → Token-level probabilities for the generated output
61
- - **`errors (list[str])`** → Any error that have occured during calling LLM
62
-
63
- **None:** You can use `repr(ToolOutput)` to see details of an output.
91
+ - **`result: Any`** → The output of LLM
92
+ - **`analysis: str`** → The reasoning step before generating the final output
93
+ - **`logprobs: list`** → Token-level probabilities for the generated output
94
+ - **`process: str`** → The tool name which processed the input
95
+ - **`processed_at: datetime`** → The process time
96
+ - **`execution_time: float`** The execution time (seconds)
97
+ - **`errors: list[str]`** → Any error that have occured during calling LLM
98
+
99
+ **Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
64
100
 
65
101
  ---
66
102
 
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hamtaa-texttools
3
- Version: 1.1.11
3
+ Version: 1.1.20
4
4
  Summary: A high-level NLP toolkit built on top of modern LLMs.
5
- Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
5
+ Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
6
6
  License: MIT License
7
7
 
8
8
  Copyright (c) 2025 Hamtaa
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
50
50
  TextTools provides a rich collection of high-level NLP utilities,
51
51
  Each tool is designed to work with structured outputs (JSON / Pydantic).
52
52
 
53
- - **`categorize()`** - Classifies text into Islamic studies categories
53
+ - **`categorize()`** - Classifies text into given categories (You have to create a category tree)
54
54
  - **`extract_keywords()`** - Extracts keywords from text
55
55
  - **`extract_entities()`** - Named Entity Recognition (NER) system
56
56
  - **`is_question()`** - Binary detection of whether input is a question
@@ -60,28 +60,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
60
60
  - **`subject_to_question()`** - Generates questions about a specific subject
61
61
  - **`summarize()`** - Text summarization
62
62
  - **`translate()`** - Text translation between languages
63
+ - **`propositionize()`** - Convert text to atomic independence meaningful sentences
64
+ - **`check_fact()`** - Check a statement is relevant to source text or not
63
65
  - **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
64
66
 
65
67
  ---
66
68
 
67
- ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
69
+ ## 📊 Tool Quality Tiers
70
+
71
+ | Status | Meaning | Use in Production? |
72
+ |--------|---------|-------------------|
73
+ | **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
74
+ | **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
75
+
76
+ ### Current Status
77
+ **Production Tools:**
78
+ - `categorize()` (list mode)
79
+ - `extract_keywords()`
80
+ - `extract_entities()`
81
+ - `is_question()`
82
+ - `text_to_question()`
83
+ - `merge_questions()`
84
+ - `rewrite()`
85
+ - `subject_to_question()`
86
+ - `summarize()`
87
+ - `run_custom()` (fine in most cases)
88
+
89
+ **Experimental Tools:**
90
+ - `categorize()` (tree mode)
91
+ - `translate()`
92
+ - `propositionize()`
93
+ - `check_fact()`
94
+ - `run_custom()` (not evaluated in all scenarios)
95
+
96
+ ---
97
+
98
+ ## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
68
99
 
69
100
  TextTools provides several optional flags to customize LLM behavior:
70
101
 
71
- - **`with_analysis (bool)`** → Adds a reasoning step before generating the final output.
102
+ - **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
72
103
  **Note:** This doubles token usage per call because it triggers an additional LLM request.
73
104
 
74
- - **`logprobs (bool)`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
105
+ - **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
106
+ **Note:** This feature works if it's supported by the model.
75
107
 
76
- - **`output_lang (str)`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
108
+ - **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
77
109
 
78
- - **`user_prompt (str)`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
110
+ - **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
79
111
 
80
- - **`temperature (float)`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
112
+ - **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
81
113
 
82
- - **`validator (Callable)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
114
+ - **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
83
115
 
84
- All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
116
+ - **`priority: int (Experimental)`** Task execution priority level. Higher values = higher priority. Affects processing order in queues.
117
+ **Note:** This feature works if it's supported by the model and vLLM.
85
118
 
86
119
  **Note:** There might be some tools that don't support some of the parameters above.
87
120
 
@@ -90,12 +123,15 @@ All these parameters can be used individually or together to tailor the behavior
90
123
  ## 🧩 ToolOutput
91
124
 
92
125
  Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
93
- - **`result (Any)`** → The output of LLM
94
- - **`analysis (str)`** → The reasoning step before generating the final output
95
- - **`logprobs (list)`** → Token-level probabilities for the generated output
96
- - **`errors (list[str])`** → Any error that have occured during calling LLM
97
-
98
- **None:** You can use `repr(ToolOutput)` to see details of an output.
126
+ - **`result: Any`** → The output of LLM
127
+ - **`analysis: str`** → The reasoning step before generating the final output
128
+ - **`logprobs: list`** → Token-level probabilities for the generated output
129
+ - **`process: str`** → The tool name which processed the input
130
+ - **`processed_at: datetime`** → The process time
131
+ - **`execution_time: float`** The execution time (seconds)
132
+ - **`errors: list[str]`** → Any error that have occured during calling LLM
133
+
134
+ **Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
99
135
 
100
136
  ---
101
137
 
@@ -9,19 +9,27 @@ hamtaa_texttools.egg-info/requires.txt
9
9
  hamtaa_texttools.egg-info/top_level.txt
10
10
  tests/test_all_async_tools.py
11
11
  tests/test_all_tools.py
12
- tests/test_logprobs.py
13
12
  tests/test_output_validation.py
14
13
  texttools/__init__.py
15
14
  texttools/batch/batch_config.py
16
15
  texttools/batch/batch_runner.py
17
16
  texttools/batch/internals/batch_manager.py
18
17
  texttools/batch/internals/utils.py
18
+ texttools/internals/async_operator.py
19
+ texttools/internals/exceptions.py
20
+ texttools/internals/models.py
21
+ texttools/internals/operator_utils.py
22
+ texttools/internals/prompt_loader.py
23
+ texttools/internals/sync_operator.py
24
+ texttools/internals/text_to_chunks.py
19
25
  texttools/prompts/README.md
20
- texttools/prompts/categorizer.yaml
26
+ texttools/prompts/categorize.yaml
27
+ texttools/prompts/check_fact.yaml
21
28
  texttools/prompts/extract_entities.yaml
22
29
  texttools/prompts/extract_keywords.yaml
23
30
  texttools/prompts/is_question.yaml
24
31
  texttools/prompts/merge_questions.yaml
32
+ texttools/prompts/propositionize.yaml
25
33
  texttools/prompts/rewrite.yaml
26
34
  texttools/prompts/run_custom.yaml
27
35
  texttools/prompts/subject_to_question.yaml
@@ -29,10 +37,4 @@ texttools/prompts/summarize.yaml
29
37
  texttools/prompts/text_to_question.yaml
30
38
  texttools/prompts/translate.yaml
31
39
  texttools/tools/async_tools.py
32
- texttools/tools/sync_tools.py
33
- texttools/tools/internals/async_operator.py
34
- texttools/tools/internals/base_operator.py
35
- texttools/tools/internals/formatters.py
36
- texttools/tools/internals/output_models.py
37
- texttools/tools/internals/prompt_loader.py
38
- texttools/tools/internals/sync_operator.py
40
+ texttools/tools/sync_tools.py
@@ -1,33 +1,34 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "hamtaa-texttools"
7
- version = "1.1.11"
8
- authors = [
9
- { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
- { name = "Montazer", email = "montazerh82@gmail.com" },
11
- { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
- { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
- ]
14
- description = "A high-level NLP toolkit built on top of modern LLMs."
15
- readme = "README.md"
16
- license = {file = "LICENSE"}
17
- requires-python = ">=3.8"
18
- dependencies = [
19
- "openai==1.97.1",
20
- "pydantic>=2.0.0",
21
- "pyyaml>=6.0",
22
- ]
23
- keywords = ["nlp", "llm", "text-processing", "openai"]
24
-
25
- [tool.setuptools.packages.find]
26
- where = ["."]
27
- include = ["texttools*"]
28
-
29
- [tool.setuptools]
30
- include-package-data = true
31
-
32
- [tool.setuptools.package-data]
33
- "texttools" = ["prompts/*.yaml", "prompts/*.yml"]
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hamtaa-texttools"
7
+ version = "1.1.20"
8
+ authors = [
9
+ { name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
10
+ { name = "Montazer", email = "montazerh82@gmail.com" },
11
+ { name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
12
+ { name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
13
+ { name = "Zareshahi", email = "a.zareshahi1377@gmail.com" },
14
+ ]
15
+ description = "A high-level NLP toolkit built on top of modern LLMs."
16
+ readme = "README.md"
17
+ license = {file = "LICENSE"}
18
+ requires-python = ">=3.8"
19
+ dependencies = [
20
+ "openai==1.97.1",
21
+ "pydantic>=2.0.0",
22
+ "pyyaml>=6.0",
23
+ ]
24
+ keywords = ["nlp", "llm", "text-processing", "openai"]
25
+
26
+ [tool.setuptools.packages.find]
27
+ where = ["."]
28
+ include = ["texttools*"]
29
+
30
+ [tool.setuptools]
31
+ include-package-data = true
32
+
33
+ [tool.setuptools.package-data]
34
+ "texttools" = ["prompts/*.yaml", "prompts/*.yml"]
@@ -20,21 +20,36 @@ t = AsyncTheTool(client=client, model=MODEL)
20
20
 
21
21
 
22
22
  async def main():
23
- category_task = t.categorize("سلام حالت چطوره؟")
23
+ category_task = t.categorize(
24
+ "سلام حالت چطوره؟",
25
+ categories=["هیچکدام", "دینی", "فلسفه"],
26
+ logprobs=True,
27
+ )
24
28
  keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
25
29
  entities_task = t.extract_entities("We will be dead by the car crash")
26
30
  detection_task = t.is_question("We will be dead by the car crash")
27
- question_task = t.text_to_question("We will be dead by the car crash")
31
+ question_task = t.text_to_question("We will be dead by the car crash", 2)
28
32
  merged_task = t.merge_questions(
29
33
  ["چرا ما موجوداتی اجتماعی هستیم؟", "چرا باید در کنار هم زندگی کنیم؟"],
30
34
  mode="default",
35
+ with_analysis=True,
31
36
  )
32
37
  rewritten_task = t.rewrite(
33
- "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟", mode="positive"
38
+ "چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
39
+ mode="positive",
40
+ user_prompt="Be carefull",
34
41
  )
35
42
  questions_task = t.subject_to_question("Friendship", 3)
36
43
  summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
37
44
  translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
45
+ propositionize_task = t.propositionize(
46
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
47
+ output_lang="Persian",
48
+ )
49
+ check_fact_task = t.check_fact(
50
+ text="امام نهم در ایران به خاک سپرده شد",
51
+ source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
52
+ )
38
53
  (
39
54
  category,
40
55
  keywords,
@@ -46,6 +61,8 @@ async def main():
46
61
  questions,
47
62
  summary,
48
63
  translation,
64
+ propositionize,
65
+ check_fact,
49
66
  ) = await asyncio.gather(
50
67
  category_task,
51
68
  keywords_task,
@@ -57,6 +74,8 @@ async def main():
57
74
  questions_task,
58
75
  summary_task,
59
76
  translation_task,
77
+ propositionize_task,
78
+ check_fact_task,
60
79
  )
61
80
 
62
81
  for tool_output in (
@@ -70,6 +89,8 @@ async def main():
70
89
  questions,
71
90
  summary,
72
91
  translation,
92
+ propositionize,
93
+ check_fact,
73
94
  ):
74
95
  print(repr(tool_output))
75
96
 
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
4
4
  from openai import OpenAI
5
5
  from pydantic import BaseModel
6
6
 
7
- from texttools import TheTool
7
+ from texttools import TheTool, CategoryTree
8
8
 
9
9
  # Load environment variables from .env
10
10
  load_dotenv()
@@ -18,24 +18,49 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
18
18
  # Create an instance of TheTool
19
19
  t = TheTool(client=client, model=MODEL)
20
20
 
21
- # Categorizer
22
- category = t.categorize("سلام حالت چطوره؟")
21
+ # Categorizer: list mode
22
+ category = t.categorize(
23
+ "سلام حالت چطوره؟",
24
+ categories=["هیچکدام", "دینی", "فلسفه"],
25
+ logprobs=True,
26
+ top_logprobs=-1,
27
+ )
23
28
  print(repr(category))
24
29
 
30
+ # Categorizer: tree mode
31
+ tree = CategoryTree("category_test_tree")
32
+ tree.add_node("اخلاق")
33
+ tree.add_node("معرفت شناسی")
34
+ tree.add_node("متافیزیک", description="اراده قدرت در حیطه متافیزیک است")
35
+ tree.add_node("فلسفه ذهن", description="فلسفه ذهن به چگونگی درک ما از جهان می پردازد")
36
+ tree.add_node("آگاهی", "فلسفه ذهن", description="آگاهی خیلی مهم است")
37
+ tree.add_node("ذهن و بدن", "فلسفه ذهن")
38
+ tree.add_node("امکان و ضرورت", "متافیزیک")
39
+
40
+ categories = t.categorize(
41
+ "اراده قدرت مفهومی مهم در مابعد الطبیعه است که توسط نیچه مطرح شده",
42
+ tree,
43
+ mode="category_tree",
44
+ )
45
+ print(repr(categories))
46
+
25
47
  # Keyword Extractor
26
- keywords = t.extract_keywords("Tomorrow, we will be dead by the car crash")
48
+ keywords = t.extract_keywords(
49
+ "Tomorrow, we will be dead by the car crash", mode="count", number_of_keywords=3
50
+ )
27
51
  print(repr(keywords))
28
52
 
29
53
  # NER Extractor
30
- entities = t.extract_entities("We will be dead by the car crash")
54
+ entities = t.extract_entities("Ali will be dead by the car crash", entities=["EVENT"])
31
55
  print(repr(entities))
32
56
 
57
+
33
58
  # Question Detector
34
59
  detection = t.is_question("We will be dead by the car crash")
35
60
  print(repr(detection))
36
61
 
37
62
  # Question from Text Generator
38
- question = t.text_to_question("We will be dead by the car crash")
63
+ question = t.text_to_question("We will be dead by the car crash", 2)
39
64
  print(repr(question))
40
65
 
41
66
  # Question Merger
@@ -52,7 +77,7 @@ rewritten = t.rewrite(
52
77
  )
53
78
  print(repr(rewritten))
54
79
 
55
- # Question Generator from Subject
80
+ # Question from Subject Generator
56
81
  questions = t.subject_to_question("Friendship", 3)
57
82
  print(repr(questions))
58
83
 
@@ -64,8 +89,22 @@ print(repr(summary))
64
89
  translation = t.translate("سلام حالت چطوره؟", target_language="English")
65
90
  print(repr(translation))
66
91
 
92
+ # Propositionizer
93
+ propositionize = t.propositionize(
94
+ "جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
95
+ output_lang="Persian",
96
+ )
97
+ print(repr(propositionize))
98
+
99
+ # Check Fact
100
+ check_fact = t.check_fact(
101
+ text="امام نهم در ایران به خاک سپرده شد",
102
+ source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
103
+ )
104
+ print(repr(check_fact))
105
+
67
106
 
68
- # Custom tool
107
+ # Run Custom
69
108
  class Student(BaseModel):
70
109
  result: list[dict[str, str]]
71
110
 
@@ -29,7 +29,7 @@ question = t.text_to_question(
29
29
  "زندگی",
30
30
  output_lang="Persian",
31
31
  validator=validate,
32
- max_validation_retries=5,
32
+ max_validation_retries=0,
33
33
  temperature=1.0,
34
34
  )
35
- print(question)
35
+ print(repr(question))
@@ -2,5 +2,6 @@ from .batch.batch_runner import BatchJobRunner
2
2
  from .batch.batch_config import BatchConfig
3
3
  from .tools.sync_tools import TheTool
4
4
  from .tools.async_tools import AsyncTheTool
5
+ from .internals.models import CategoryTree
5
6
 
6
- __all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig"]
7
+ __all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import Callable
2
+ from collections.abc import Callable
3
3
 
4
4
  from texttools.batch.internals.utils import import_data, export_data
5
5