hamtaa-texttools 1.1.11__tar.gz → 1.1.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.11/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.20}/PKG-INFO +53 -17
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/README.md +51 -15
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20/hamtaa_texttools.egg-info}/PKG-INFO +53 -17
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/SOURCES.txt +11 -9
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/pyproject.toml +34 -33
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_all_async_tools.py +24 -3
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_all_tools.py +47 -8
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/tests/test_output_validation.py +2 -2
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/__init__.py +2 -1
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/batch_config.py +1 -1
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/batch_runner.py +79 -68
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/internals/batch_manager.py +6 -6
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/batch/internals/utils.py +1 -4
- hamtaa_texttools-1.1.20/texttools/internals/async_operator.py +246 -0
- hamtaa_texttools-1.1.20/texttools/internals/exceptions.py +28 -0
- hamtaa_texttools-1.1.20/texttools/internals/models.py +181 -0
- hamtaa_texttools-1.1.11/texttools/tools/internals/base_operator.py → hamtaa_texttools-1.1.20/texttools/internals/operator_utils.py +31 -17
- hamtaa_texttools-1.1.20/texttools/internals/prompt_loader.py +108 -0
- hamtaa_texttools-1.1.20/texttools/internals/sync_operator.py +244 -0
- hamtaa_texttools-1.1.20/texttools/internals/text_to_chunks.py +97 -0
- hamtaa_texttools-1.1.20/texttools/prompts/categorize.yaml +77 -0
- hamtaa_texttools-1.1.20/texttools/prompts/check_fact.yaml +19 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/extract_entities.yaml +1 -1
- hamtaa_texttools-1.1.20/texttools/prompts/extract_keywords.yaml +68 -0
- hamtaa_texttools-1.1.20/texttools/prompts/propositionize.yaml +22 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/run_custom.yaml +1 -1
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/text_to_question.yaml +6 -4
- hamtaa_texttools-1.1.20/texttools/tools/async_tools.py +1198 -0
- hamtaa_texttools-1.1.20/texttools/tools/sync_tools.py +1198 -0
- hamtaa_texttools-1.1.11/tests/test_logprobs.py +0 -38
- hamtaa_texttools-1.1.11/texttools/prompts/categorizer.yaml +0 -28
- hamtaa_texttools-1.1.11/texttools/prompts/extract_keywords.yaml +0 -18
- hamtaa_texttools-1.1.11/texttools/tools/async_tools.py +0 -444
- hamtaa_texttools-1.1.11/texttools/tools/internals/async_operator.py +0 -193
- hamtaa_texttools-1.1.11/texttools/tools/internals/formatters.py +0 -24
- hamtaa_texttools-1.1.11/texttools/tools/internals/output_models.py +0 -62
- hamtaa_texttools-1.1.11/texttools/tools/internals/prompt_loader.py +0 -56
- hamtaa_texttools-1.1.11/texttools/tools/internals/sync_operator.py +0 -191
- hamtaa_texttools-1.1.11/texttools/tools/sync_tools.py +0 -442
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/LICENSE +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.1.11 → hamtaa_texttools-1.1.20}/texttools/prompts/translate.yaml +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.20
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
|
-
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
5
|
+
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
7
7
|
|
|
8
8
|
Copyright (c) 2025 Hamtaa
|
|
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
50
50
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
51
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
52
52
|
|
|
53
|
-
- **`categorize()`** - Classifies text into
|
|
53
|
+
- **`categorize()`** - Classifies text into given categories (You have to create a category tree)
|
|
54
54
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
55
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
56
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -60,28 +60,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
60
60
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
|
+
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
63
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
64
66
|
|
|
65
67
|
---
|
|
66
68
|
|
|
67
|
-
##
|
|
69
|
+
## 📊 Tool Quality Tiers
|
|
70
|
+
|
|
71
|
+
| Status | Meaning | Use in Production? |
|
|
72
|
+
|--------|---------|-------------------|
|
|
73
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
+
|
|
76
|
+
### Current Status
|
|
77
|
+
**Production Tools:**
|
|
78
|
+
- `categorize()` (list mode)
|
|
79
|
+
- `extract_keywords()`
|
|
80
|
+
- `extract_entities()`
|
|
81
|
+
- `is_question()`
|
|
82
|
+
- `text_to_question()`
|
|
83
|
+
- `merge_questions()`
|
|
84
|
+
- `rewrite()`
|
|
85
|
+
- `subject_to_question()`
|
|
86
|
+
- `summarize()`
|
|
87
|
+
- `run_custom()` (fine in most cases)
|
|
88
|
+
|
|
89
|
+
**Experimental Tools:**
|
|
90
|
+
- `categorize()` (tree mode)
|
|
91
|
+
- `translate()`
|
|
92
|
+
- `propositionize()`
|
|
93
|
+
- `check_fact()`
|
|
94
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
68
99
|
|
|
69
100
|
TextTools provides several optional flags to customize LLM behavior:
|
|
70
101
|
|
|
71
|
-
- **`with_analysis
|
|
102
|
+
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
72
103
|
**Note:** This doubles token usage per call because it triggers an additional LLM request.
|
|
73
104
|
|
|
74
|
-
- **`logprobs
|
|
105
|
+
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
106
|
+
**Note:** This feature works if it's supported by the model.
|
|
75
107
|
|
|
76
|
-
- **`output_lang
|
|
108
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
|
|
77
109
|
|
|
78
|
-
- **`user_prompt
|
|
110
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
79
111
|
|
|
80
|
-
- **`temperature
|
|
112
|
+
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
81
113
|
|
|
82
|
-
- **`validator (
|
|
114
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
|
|
83
115
|
|
|
84
|
-
|
|
116
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Higher values = higher priority. Affects processing order in queues.
|
|
117
|
+
**Note:** This feature works if it's supported by the model and vLLM.
|
|
85
118
|
|
|
86
119
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
87
120
|
|
|
@@ -90,12 +123,15 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
90
123
|
## 🧩 ToolOutput
|
|
91
124
|
|
|
92
125
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
93
|
-
- **`result
|
|
94
|
-
- **`analysis
|
|
95
|
-
- **`logprobs
|
|
96
|
-
- **`
|
|
97
|
-
|
|
98
|
-
|
|
126
|
+
- **`result: Any`** → The output of LLM
|
|
127
|
+
- **`analysis: str`** → The reasoning step before generating the final output
|
|
128
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
129
|
+
- **`process: str`** → The tool name which processed the input
|
|
130
|
+
- **`processed_at: datetime`** → The process time
|
|
131
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
132
|
+
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
133
|
+
|
|
134
|
+
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
99
135
|
|
|
100
136
|
---
|
|
101
137
|
|
|
@@ -15,7 +15,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
15
15
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
16
16
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
17
17
|
|
|
18
|
-
- **`categorize()`** - Classifies text into
|
|
18
|
+
- **`categorize()`** - Classifies text into given categories (You have to create a category tree)
|
|
19
19
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
20
20
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
21
21
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -25,28 +25,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
25
25
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
26
26
|
- **`summarize()`** - Text summarization
|
|
27
27
|
- **`translate()`** - Text translation between languages
|
|
28
|
+
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
29
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
28
30
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
29
31
|
|
|
30
32
|
---
|
|
31
33
|
|
|
32
|
-
##
|
|
34
|
+
## 📊 Tool Quality Tiers
|
|
35
|
+
|
|
36
|
+
| Status | Meaning | Use in Production? |
|
|
37
|
+
|--------|---------|-------------------|
|
|
38
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
39
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
40
|
+
|
|
41
|
+
### Current Status
|
|
42
|
+
**Production Tools:**
|
|
43
|
+
- `categorize()` (list mode)
|
|
44
|
+
- `extract_keywords()`
|
|
45
|
+
- `extract_entities()`
|
|
46
|
+
- `is_question()`
|
|
47
|
+
- `text_to_question()`
|
|
48
|
+
- `merge_questions()`
|
|
49
|
+
- `rewrite()`
|
|
50
|
+
- `subject_to_question()`
|
|
51
|
+
- `summarize()`
|
|
52
|
+
- `run_custom()` (fine in most cases)
|
|
53
|
+
|
|
54
|
+
**Experimental Tools:**
|
|
55
|
+
- `categorize()` (tree mode)
|
|
56
|
+
- `translate()`
|
|
57
|
+
- `propositionize()`
|
|
58
|
+
- `check_fact()`
|
|
59
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
33
64
|
|
|
34
65
|
TextTools provides several optional flags to customize LLM behavior:
|
|
35
66
|
|
|
36
|
-
- **`with_analysis
|
|
67
|
+
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
37
68
|
**Note:** This doubles token usage per call because it triggers an additional LLM request.
|
|
38
69
|
|
|
39
|
-
- **`logprobs
|
|
70
|
+
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
71
|
+
**Note:** This feature works if it's supported by the model.
|
|
40
72
|
|
|
41
|
-
- **`output_lang
|
|
73
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
|
|
42
74
|
|
|
43
|
-
- **`user_prompt
|
|
75
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
44
76
|
|
|
45
|
-
- **`temperature
|
|
77
|
+
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
46
78
|
|
|
47
|
-
- **`validator (
|
|
79
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
|
|
48
80
|
|
|
49
|
-
|
|
81
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Higher values = higher priority. Affects processing order in queues.
|
|
82
|
+
**Note:** This feature works if it's supported by the model and vLLM.
|
|
50
83
|
|
|
51
84
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
52
85
|
|
|
@@ -55,12 +88,15 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
55
88
|
## 🧩 ToolOutput
|
|
56
89
|
|
|
57
90
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
58
|
-
- **`result
|
|
59
|
-
- **`analysis
|
|
60
|
-
- **`logprobs
|
|
61
|
-
- **`
|
|
62
|
-
|
|
63
|
-
|
|
91
|
+
- **`result: Any`** → The output of LLM
|
|
92
|
+
- **`analysis: str`** → The reasoning step before generating the final output
|
|
93
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
94
|
+
- **`process: str`** → The tool name which processed the input
|
|
95
|
+
- **`processed_at: datetime`** → The process time
|
|
96
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
97
|
+
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
98
|
+
|
|
99
|
+
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
64
100
|
|
|
65
101
|
---
|
|
66
102
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.20
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
|
-
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
5
|
+
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
7
7
|
|
|
8
8
|
Copyright (c) 2025 Hamtaa
|
|
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
50
50
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
51
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
52
52
|
|
|
53
|
-
- **`categorize()`** - Classifies text into
|
|
53
|
+
- **`categorize()`** - Classifies text into given categories (You have to create a category tree)
|
|
54
54
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
55
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
56
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -60,28 +60,61 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
60
60
|
- **`subject_to_question()`** - Generates questions about a specific subject
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
|
+
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
+
- **`check_fact()`** - Check a statement is relevant to source text or not
|
|
63
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
64
66
|
|
|
65
67
|
---
|
|
66
68
|
|
|
67
|
-
##
|
|
69
|
+
## 📊 Tool Quality Tiers
|
|
70
|
+
|
|
71
|
+
| Status | Meaning | Use in Production? |
|
|
72
|
+
|--------|---------|-------------------|
|
|
73
|
+
| **✅ Production** | Evaluated, tested, stable. | **Yes** - ready for reliable use. |
|
|
74
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. Functional, but quality may vary. | **Use with caution** - outputs not yet validated. |
|
|
75
|
+
|
|
76
|
+
### Current Status
|
|
77
|
+
**Production Tools:**
|
|
78
|
+
- `categorize()` (list mode)
|
|
79
|
+
- `extract_keywords()`
|
|
80
|
+
- `extract_entities()`
|
|
81
|
+
- `is_question()`
|
|
82
|
+
- `text_to_question()`
|
|
83
|
+
- `merge_questions()`
|
|
84
|
+
- `rewrite()`
|
|
85
|
+
- `subject_to_question()`
|
|
86
|
+
- `summarize()`
|
|
87
|
+
- `run_custom()` (fine in most cases)
|
|
88
|
+
|
|
89
|
+
**Experimental Tools:**
|
|
90
|
+
- `categorize()` (tree mode)
|
|
91
|
+
- `translate()`
|
|
92
|
+
- `propositionize()`
|
|
93
|
+
- `check_fact()`
|
|
94
|
+
- `run_custom()` (not evaluated in all scenarios)
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature`, `validator` and `priority` parameters
|
|
68
99
|
|
|
69
100
|
TextTools provides several optional flags to customize LLM behavior:
|
|
70
101
|
|
|
71
|
-
- **`with_analysis
|
|
102
|
+
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
72
103
|
**Note:** This doubles token usage per call because it triggers an additional LLM request.
|
|
73
104
|
|
|
74
|
-
- **`logprobs
|
|
105
|
+
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
106
|
+
**Note:** This feature works if it's supported by the model.
|
|
75
107
|
|
|
76
|
-
- **`output_lang
|
|
108
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
|
|
77
109
|
|
|
78
|
-
- **`user_prompt
|
|
110
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
79
111
|
|
|
80
|
-
- **`temperature
|
|
112
|
+
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
81
113
|
|
|
82
|
-
- **`validator (
|
|
114
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
|
|
83
115
|
|
|
84
|
-
|
|
116
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Higher values = higher priority. Affects processing order in queues.
|
|
117
|
+
**Note:** This feature works if it's supported by the model and vLLM.
|
|
85
118
|
|
|
86
119
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
87
120
|
|
|
@@ -90,12 +123,15 @@ All these parameters can be used individually or together to tailor the behavior
|
|
|
90
123
|
## 🧩 ToolOutput
|
|
91
124
|
|
|
92
125
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
93
|
-
- **`result
|
|
94
|
-
- **`analysis
|
|
95
|
-
- **`logprobs
|
|
96
|
-
- **`
|
|
97
|
-
|
|
98
|
-
|
|
126
|
+
- **`result: Any`** → The output of LLM
|
|
127
|
+
- **`analysis: str`** → The reasoning step before generating the final output
|
|
128
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
129
|
+
- **`process: str`** → The tool name which processed the input
|
|
130
|
+
- **`processed_at: datetime`** → The process time
|
|
131
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
132
|
+
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
133
|
+
|
|
134
|
+
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
99
135
|
|
|
100
136
|
---
|
|
101
137
|
|
|
@@ -9,19 +9,27 @@ hamtaa_texttools.egg-info/requires.txt
|
|
|
9
9
|
hamtaa_texttools.egg-info/top_level.txt
|
|
10
10
|
tests/test_all_async_tools.py
|
|
11
11
|
tests/test_all_tools.py
|
|
12
|
-
tests/test_logprobs.py
|
|
13
12
|
tests/test_output_validation.py
|
|
14
13
|
texttools/__init__.py
|
|
15
14
|
texttools/batch/batch_config.py
|
|
16
15
|
texttools/batch/batch_runner.py
|
|
17
16
|
texttools/batch/internals/batch_manager.py
|
|
18
17
|
texttools/batch/internals/utils.py
|
|
18
|
+
texttools/internals/async_operator.py
|
|
19
|
+
texttools/internals/exceptions.py
|
|
20
|
+
texttools/internals/models.py
|
|
21
|
+
texttools/internals/operator_utils.py
|
|
22
|
+
texttools/internals/prompt_loader.py
|
|
23
|
+
texttools/internals/sync_operator.py
|
|
24
|
+
texttools/internals/text_to_chunks.py
|
|
19
25
|
texttools/prompts/README.md
|
|
20
|
-
texttools/prompts/
|
|
26
|
+
texttools/prompts/categorize.yaml
|
|
27
|
+
texttools/prompts/check_fact.yaml
|
|
21
28
|
texttools/prompts/extract_entities.yaml
|
|
22
29
|
texttools/prompts/extract_keywords.yaml
|
|
23
30
|
texttools/prompts/is_question.yaml
|
|
24
31
|
texttools/prompts/merge_questions.yaml
|
|
32
|
+
texttools/prompts/propositionize.yaml
|
|
25
33
|
texttools/prompts/rewrite.yaml
|
|
26
34
|
texttools/prompts/run_custom.yaml
|
|
27
35
|
texttools/prompts/subject_to_question.yaml
|
|
@@ -29,10 +37,4 @@ texttools/prompts/summarize.yaml
|
|
|
29
37
|
texttools/prompts/text_to_question.yaml
|
|
30
38
|
texttools/prompts/translate.yaml
|
|
31
39
|
texttools/tools/async_tools.py
|
|
32
|
-
texttools/tools/sync_tools.py
|
|
33
|
-
texttools/tools/internals/async_operator.py
|
|
34
|
-
texttools/tools/internals/base_operator.py
|
|
35
|
-
texttools/tools/internals/formatters.py
|
|
36
|
-
texttools/tools/internals/output_models.py
|
|
37
|
-
texttools/tools/internals/prompt_loader.py
|
|
38
|
-
texttools/tools/internals/sync_operator.py
|
|
40
|
+
texttools/tools/sync_tools.py
|
|
@@ -1,33 +1,34 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "hamtaa-texttools"
|
|
7
|
-
version = "1.1.
|
|
8
|
-
authors = [
|
|
9
|
-
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
|
-
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
11
|
-
{ name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
|
|
12
|
-
{ name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hamtaa-texttools"
|
|
7
|
+
version = "1.1.20"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
|
+
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
11
|
+
{ name = "Givechi", email = "mohamad.m.givechi@gmail.com" },
|
|
12
|
+
{ name = "MoosaviNejad", email = "erfanmoosavi84@gmail.com" },
|
|
13
|
+
{ name = "Zareshahi", email = "a.zareshahi1377@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
description = "A high-level NLP toolkit built on top of modern LLMs."
|
|
16
|
+
readme = "README.md"
|
|
17
|
+
license = {file = "LICENSE"}
|
|
18
|
+
requires-python = ">=3.8"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"openai==1.97.1",
|
|
21
|
+
"pydantic>=2.0.0",
|
|
22
|
+
"pyyaml>=6.0",
|
|
23
|
+
]
|
|
24
|
+
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.packages.find]
|
|
27
|
+
where = ["."]
|
|
28
|
+
include = ["texttools*"]
|
|
29
|
+
|
|
30
|
+
[tool.setuptools]
|
|
31
|
+
include-package-data = true
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.package-data]
|
|
34
|
+
"texttools" = ["prompts/*.yaml", "prompts/*.yml"]
|
|
@@ -20,21 +20,36 @@ t = AsyncTheTool(client=client, model=MODEL)
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
async def main():
|
|
23
|
-
category_task = t.categorize(
|
|
23
|
+
category_task = t.categorize(
|
|
24
|
+
"سلام حالت چطوره؟",
|
|
25
|
+
categories=["هیچکدام", "دینی", "فلسفه"],
|
|
26
|
+
logprobs=True,
|
|
27
|
+
)
|
|
24
28
|
keywords_task = t.extract_keywords("Tomorrow, we will be dead by the car crash")
|
|
25
29
|
entities_task = t.extract_entities("We will be dead by the car crash")
|
|
26
30
|
detection_task = t.is_question("We will be dead by the car crash")
|
|
27
|
-
question_task = t.text_to_question("We will be dead by the car crash")
|
|
31
|
+
question_task = t.text_to_question("We will be dead by the car crash", 2)
|
|
28
32
|
merged_task = t.merge_questions(
|
|
29
33
|
["چرا ما موجوداتی اجتماعی هستیم؟", "چرا باید در کنار هم زندگی کنیم؟"],
|
|
30
34
|
mode="default",
|
|
35
|
+
with_analysis=True,
|
|
31
36
|
)
|
|
32
37
|
rewritten_task = t.rewrite(
|
|
33
|
-
"چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
|
|
38
|
+
"چرا ما انسان ها، موجوداتی اجتماعی هستیم؟",
|
|
39
|
+
mode="positive",
|
|
40
|
+
user_prompt="Be carefull",
|
|
34
41
|
)
|
|
35
42
|
questions_task = t.subject_to_question("Friendship", 3)
|
|
36
43
|
summary_task = t.summarize("Tomorrow, we will be dead by the car crash")
|
|
37
44
|
translation_task = t.translate("سلام حالت چطوره؟", target_language="English")
|
|
45
|
+
propositionize_task = t.propositionize(
|
|
46
|
+
"جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
|
|
47
|
+
output_lang="Persian",
|
|
48
|
+
)
|
|
49
|
+
check_fact_task = t.check_fact(
|
|
50
|
+
text="امام نهم در ایران به خاک سپرده شد",
|
|
51
|
+
source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
|
|
52
|
+
)
|
|
38
53
|
(
|
|
39
54
|
category,
|
|
40
55
|
keywords,
|
|
@@ -46,6 +61,8 @@ async def main():
|
|
|
46
61
|
questions,
|
|
47
62
|
summary,
|
|
48
63
|
translation,
|
|
64
|
+
propositionize,
|
|
65
|
+
check_fact,
|
|
49
66
|
) = await asyncio.gather(
|
|
50
67
|
category_task,
|
|
51
68
|
keywords_task,
|
|
@@ -57,6 +74,8 @@ async def main():
|
|
|
57
74
|
questions_task,
|
|
58
75
|
summary_task,
|
|
59
76
|
translation_task,
|
|
77
|
+
propositionize_task,
|
|
78
|
+
check_fact_task,
|
|
60
79
|
)
|
|
61
80
|
|
|
62
81
|
for tool_output in (
|
|
@@ -70,6 +89,8 @@ async def main():
|
|
|
70
89
|
questions,
|
|
71
90
|
summary,
|
|
72
91
|
translation,
|
|
92
|
+
propositionize,
|
|
93
|
+
check_fact,
|
|
73
94
|
):
|
|
74
95
|
print(repr(tool_output))
|
|
75
96
|
|
|
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|
|
4
4
|
from openai import OpenAI
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
-
from texttools import TheTool
|
|
7
|
+
from texttools import TheTool, CategoryTree
|
|
8
8
|
|
|
9
9
|
# Load environment variables from .env
|
|
10
10
|
load_dotenv()
|
|
@@ -18,24 +18,49 @@ client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
|
|
|
18
18
|
# Create an instance of TheTool
|
|
19
19
|
t = TheTool(client=client, model=MODEL)
|
|
20
20
|
|
|
21
|
-
# Categorizer
|
|
22
|
-
category = t.categorize(
|
|
21
|
+
# Categorizer: list mode
|
|
22
|
+
category = t.categorize(
|
|
23
|
+
"سلام حالت چطوره؟",
|
|
24
|
+
categories=["هیچکدام", "دینی", "فلسفه"],
|
|
25
|
+
logprobs=True,
|
|
26
|
+
top_logprobs=-1,
|
|
27
|
+
)
|
|
23
28
|
print(repr(category))
|
|
24
29
|
|
|
30
|
+
# Categorizer: tree mode
|
|
31
|
+
tree = CategoryTree("category_test_tree")
|
|
32
|
+
tree.add_node("اخلاق")
|
|
33
|
+
tree.add_node("معرفت شناسی")
|
|
34
|
+
tree.add_node("متافیزیک", description="اراده قدرت در حیطه متافیزیک است")
|
|
35
|
+
tree.add_node("فلسفه ذهن", description="فلسفه ذهن به چگونگی درک ما از جهان می پردازد")
|
|
36
|
+
tree.add_node("آگاهی", "فلسفه ذهن", description="آگاهی خیلی مهم است")
|
|
37
|
+
tree.add_node("ذهن و بدن", "فلسفه ذهن")
|
|
38
|
+
tree.add_node("امکان و ضرورت", "متافیزیک")
|
|
39
|
+
|
|
40
|
+
categories = t.categorize(
|
|
41
|
+
"اراده قدرت مفهومی مهم در مابعد الطبیعه است که توسط نیچه مطرح شده",
|
|
42
|
+
tree,
|
|
43
|
+
mode="category_tree",
|
|
44
|
+
)
|
|
45
|
+
print(repr(categories))
|
|
46
|
+
|
|
25
47
|
# Keyword Extractor
|
|
26
|
-
keywords = t.extract_keywords(
|
|
48
|
+
keywords = t.extract_keywords(
|
|
49
|
+
"Tomorrow, we will be dead by the car crash", mode="count", number_of_keywords=3
|
|
50
|
+
)
|
|
27
51
|
print(repr(keywords))
|
|
28
52
|
|
|
29
53
|
# NER Extractor
|
|
30
|
-
entities = t.extract_entities("
|
|
54
|
+
entities = t.extract_entities("Ali will be dead by the car crash", entities=["EVENT"])
|
|
31
55
|
print(repr(entities))
|
|
32
56
|
|
|
57
|
+
|
|
33
58
|
# Question Detector
|
|
34
59
|
detection = t.is_question("We will be dead by the car crash")
|
|
35
60
|
print(repr(detection))
|
|
36
61
|
|
|
37
62
|
# Question from Text Generator
|
|
38
|
-
question = t.text_to_question("We will be dead by the car crash")
|
|
63
|
+
question = t.text_to_question("We will be dead by the car crash", 2)
|
|
39
64
|
print(repr(question))
|
|
40
65
|
|
|
41
66
|
# Question Merger
|
|
@@ -52,7 +77,7 @@ rewritten = t.rewrite(
|
|
|
52
77
|
)
|
|
53
78
|
print(repr(rewritten))
|
|
54
79
|
|
|
55
|
-
# Question
|
|
80
|
+
# Question from Subject Generator
|
|
56
81
|
questions = t.subject_to_question("Friendship", 3)
|
|
57
82
|
print(repr(questions))
|
|
58
83
|
|
|
@@ -64,8 +89,22 @@ print(repr(summary))
|
|
|
64
89
|
translation = t.translate("سلام حالت چطوره؟", target_language="English")
|
|
65
90
|
print(repr(translation))
|
|
66
91
|
|
|
92
|
+
# Propositionizer
|
|
93
|
+
propositionize = t.propositionize(
|
|
94
|
+
"جنگ جهانی دوم در سال ۱۹۳۹ آغاز شد و آلمان به لهستان حمله کرد.",
|
|
95
|
+
output_lang="Persian",
|
|
96
|
+
)
|
|
97
|
+
print(repr(propositionize))
|
|
98
|
+
|
|
99
|
+
# Check Fact
|
|
100
|
+
check_fact = t.check_fact(
|
|
101
|
+
text="امام نهم در ایران به خاک سپرده شد",
|
|
102
|
+
source_text="حرم مطهر امام رضا علیه السلام در مشهد مقدس هست",
|
|
103
|
+
)
|
|
104
|
+
print(repr(check_fact))
|
|
105
|
+
|
|
67
106
|
|
|
68
|
-
# Custom
|
|
107
|
+
# Run Custom
|
|
69
108
|
class Student(BaseModel):
|
|
70
109
|
result: list[dict[str, str]]
|
|
71
110
|
|
|
@@ -2,5 +2,6 @@ from .batch.batch_runner import BatchJobRunner
|
|
|
2
2
|
from .batch.batch_config import BatchConfig
|
|
3
3
|
from .tools.sync_tools import TheTool
|
|
4
4
|
from .tools.async_tools import AsyncTheTool
|
|
5
|
+
from .internals.models import CategoryTree
|
|
5
6
|
|
|
6
|
-
__all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig"]
|
|
7
|
+
__all__ = ["TheTool", "AsyncTheTool", "BatchJobRunner", "BatchConfig", "CategoryTree"]
|