hamtaa-texttools 1.1.0__tar.gz → 1.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.1.0/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.5}/PKG-INFO +7 -5
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/README.md +5 -3
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5/hamtaa_texttools.egg-info}/PKG-INFO +7 -5
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/hamtaa_texttools.egg-info/requires.txt +1 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/pyproject.toml +2 -2
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/batch/batch_manager.py +0 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/batch/batch_runner.py +0 -1
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/async_the_tool.py +75 -23
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/async_operator.py +72 -9
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/base_operator.py +18 -8
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/operator.py +72 -9
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/output_models.py +7 -4
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/prompt_loader.py +3 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/the_tool.py +75 -23
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/LICENSE +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/batch/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/categorizer.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/translate.yaml +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/formatters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.5
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
|
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Requires-Dist: openai==1.97.1
|
|
32
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
33
|
Dynamic: license-file
|
|
34
34
|
|
|
35
35
|
# TextTools
|
|
@@ -40,14 +40,14 @@ Dynamic: license-file
|
|
|
40
40
|
|
|
41
41
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
42
42
|
|
|
43
|
-
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER
|
|
43
|
+
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
44
44
|
|
|
45
45
|
---
|
|
46
46
|
|
|
47
47
|
## ✨ Features
|
|
48
48
|
|
|
49
49
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
50
|
-
Each tool is designed to work
|
|
50
|
+
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
51
51
|
|
|
52
52
|
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
53
53
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -63,7 +63,7 @@ Each tool is designed to work out-of-the-box with structured outputs (JSON / Pyd
|
|
|
63
63
|
|
|
64
64
|
---
|
|
65
65
|
|
|
66
|
-
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt` and `
|
|
66
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
|
|
67
67
|
|
|
68
68
|
TextTools provides several optional flags to customize LLM behavior:
|
|
69
69
|
|
|
@@ -78,6 +78,8 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
78
78
|
|
|
79
79
|
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
80
80
|
|
|
81
|
+
- **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
|
|
82
|
+
|
|
81
83
|
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
82
84
|
|
|
83
85
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
@@ -6,14 +6,14 @@
|
|
|
6
6
|
|
|
7
7
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
8
8
|
|
|
9
|
-
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER
|
|
9
|
+
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
13
|
## ✨ Features
|
|
14
14
|
|
|
15
15
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
16
|
-
Each tool is designed to work
|
|
16
|
+
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
17
17
|
|
|
18
18
|
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
19
19
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -29,7 +29,7 @@ Each tool is designed to work out-of-the-box with structured outputs (JSON / Pyd
|
|
|
29
29
|
|
|
30
30
|
---
|
|
31
31
|
|
|
32
|
-
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt` and `
|
|
32
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
|
|
33
33
|
|
|
34
34
|
TextTools provides several optional flags to customize LLM behavior:
|
|
35
35
|
|
|
@@ -44,6 +44,8 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
44
44
|
|
|
45
45
|
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
46
46
|
|
|
47
|
+
- **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
|
|
48
|
+
|
|
47
49
|
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
48
50
|
|
|
49
51
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.5
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,7 +29,7 @@ Requires-Python: >=3.8
|
|
|
29
29
|
Description-Content-Type: text/markdown
|
|
30
30
|
License-File: LICENSE
|
|
31
31
|
Requires-Dist: openai==1.97.1
|
|
32
|
-
Requires-Dist:
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
33
|
Dynamic: license-file
|
|
34
34
|
|
|
35
35
|
# TextTools
|
|
@@ -40,14 +40,14 @@ Dynamic: license-file
|
|
|
40
40
|
|
|
41
41
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
42
42
|
|
|
43
|
-
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER
|
|
43
|
+
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extraction, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
44
44
|
|
|
45
45
|
---
|
|
46
46
|
|
|
47
47
|
## ✨ Features
|
|
48
48
|
|
|
49
49
|
TextTools provides a rich collection of high-level NLP utilities built on top of LLMs.
|
|
50
|
-
Each tool is designed to work
|
|
50
|
+
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
51
51
|
|
|
52
52
|
- **`categorize()`** - Classifies text into Islamic studies categories
|
|
53
53
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -63,7 +63,7 @@ Each tool is designed to work out-of-the-box with structured outputs (JSON / Pyd
|
|
|
63
63
|
|
|
64
64
|
---
|
|
65
65
|
|
|
66
|
-
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt` and `
|
|
66
|
+
## ⚙️ `with_analysis`, `logprobs`, `output_lang`, `user_prompt`, `temperature` and `validator` parameters
|
|
67
67
|
|
|
68
68
|
TextTools provides several optional flags to customize LLM behavior:
|
|
69
69
|
|
|
@@ -78,6 +78,8 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
78
78
|
|
|
79
79
|
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
80
80
|
|
|
81
|
+
- **`validator=validation_function`** → Forces TheTool to validate the output result based on your custom validator. Validator should return bool (True if there were no problem, False if the validation failed.) If validator failed, TheTool will retry to get another output by modifying `temperature`.
|
|
82
|
+
|
|
81
83
|
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
82
84
|
|
|
83
85
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
openai==1.97.1
|
|
2
|
-
|
|
2
|
+
pyyaml>=6.0
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hamtaa-texttools"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.5"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Tohidi", email = "the.mohammad.tohidi@gmail.com" },
|
|
10
10
|
{ name = "Montazer", email = "montazerh82@gmail.com" },
|
|
@@ -17,7 +17,7 @@ license = {file = "LICENSE"}
|
|
|
17
17
|
requires-python = ">=3.8"
|
|
18
18
|
dependencies = [
|
|
19
19
|
"openai==1.97.1",
|
|
20
|
-
"
|
|
20
|
+
"pyyaml>=6.0",
|
|
21
21
|
]
|
|
22
22
|
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
23
23
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import AsyncOpenAI
|
|
4
4
|
|
|
@@ -34,12 +34,16 @@ class AsyncTheTool:
|
|
|
34
34
|
temperature: float | None = 0.0,
|
|
35
35
|
logprobs: bool = False,
|
|
36
36
|
top_logprobs: int | None = None,
|
|
37
|
-
|
|
37
|
+
validator: Callable[[Any], bool] | None = None,
|
|
38
|
+
) -> OutputModels.ToolOutput:
|
|
38
39
|
"""
|
|
39
40
|
Categorize a text into a single Islamic studies domain category.
|
|
40
41
|
|
|
41
42
|
Returns:
|
|
42
|
-
|
|
43
|
+
ToolOutput: Object containing:
|
|
44
|
+
- result (str): The assigned Islamic studies category
|
|
45
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
46
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
43
47
|
"""
|
|
44
48
|
return await self.operator.run(
|
|
45
49
|
# User parameters
|
|
@@ -49,6 +53,7 @@ class AsyncTheTool:
|
|
|
49
53
|
temperature=temperature,
|
|
50
54
|
logprobs=logprobs,
|
|
51
55
|
top_logprobs=top_logprobs,
|
|
56
|
+
validator=validator,
|
|
52
57
|
# Internal parameters
|
|
53
58
|
prompt_file="categorizer.yaml",
|
|
54
59
|
output_model=OutputModels.CategorizerOutput,
|
|
@@ -66,12 +71,16 @@ class AsyncTheTool:
|
|
|
66
71
|
temperature: float | None = 0.0,
|
|
67
72
|
logprobs: bool = False,
|
|
68
73
|
top_logprobs: int | None = None,
|
|
69
|
-
|
|
74
|
+
validator: Callable[[Any], bool] | None = None,
|
|
75
|
+
) -> OutputModels.ToolOutput:
|
|
70
76
|
"""
|
|
71
77
|
Extract salient keywords from text.
|
|
72
78
|
|
|
73
79
|
Returns:
|
|
74
|
-
|
|
80
|
+
ToolOutput: Object containing:
|
|
81
|
+
- result (list[str]): List of extracted keywords
|
|
82
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
83
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
75
84
|
"""
|
|
76
85
|
return await self.operator.run(
|
|
77
86
|
# User parameters
|
|
@@ -82,6 +91,7 @@ class AsyncTheTool:
|
|
|
82
91
|
temperature=temperature,
|
|
83
92
|
logprobs=logprobs,
|
|
84
93
|
top_logprobs=top_logprobs,
|
|
94
|
+
validator=validator,
|
|
85
95
|
# Internal parameters
|
|
86
96
|
prompt_file="extract_keywords.yaml",
|
|
87
97
|
output_model=OutputModels.ListStrOutput,
|
|
@@ -98,12 +108,16 @@ class AsyncTheTool:
|
|
|
98
108
|
temperature: float | None = 0.0,
|
|
99
109
|
logprobs: bool = False,
|
|
100
110
|
top_logprobs: int | None = None,
|
|
101
|
-
|
|
111
|
+
validator: Callable[[Any], bool] | None = None,
|
|
112
|
+
) -> OutputModels.ToolOutput:
|
|
102
113
|
"""
|
|
103
114
|
Perform Named Entity Recognition (NER) over the input text.
|
|
104
115
|
|
|
105
116
|
Returns:
|
|
106
|
-
|
|
117
|
+
ToolOutput: Object containing:
|
|
118
|
+
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
119
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
120
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
107
121
|
"""
|
|
108
122
|
return await self.operator.run(
|
|
109
123
|
# User parameters
|
|
@@ -114,6 +128,7 @@ class AsyncTheTool:
|
|
|
114
128
|
temperature=temperature,
|
|
115
129
|
logprobs=logprobs,
|
|
116
130
|
top_logprobs=top_logprobs,
|
|
131
|
+
validator=validator,
|
|
117
132
|
# Internal parameters
|
|
118
133
|
prompt_file="extract_entities.yaml",
|
|
119
134
|
output_model=OutputModels.ListDictStrStrOutput,
|
|
@@ -129,12 +144,16 @@ class AsyncTheTool:
|
|
|
129
144
|
temperature: float | None = 0.0,
|
|
130
145
|
logprobs: bool = False,
|
|
131
146
|
top_logprobs: int | None = None,
|
|
132
|
-
|
|
147
|
+
validator: Callable[[Any], bool] | None = None,
|
|
148
|
+
) -> OutputModels.ToolOutput:
|
|
133
149
|
"""
|
|
134
150
|
Detect if the input is phrased as a question.
|
|
135
151
|
|
|
136
152
|
Returns:
|
|
137
|
-
|
|
153
|
+
ToolOutput: Object containing:
|
|
154
|
+
- result (bool): True if text is a question, False otherwise
|
|
155
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
156
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
138
157
|
"""
|
|
139
158
|
return await self.operator.run(
|
|
140
159
|
# User parameters
|
|
@@ -144,6 +163,7 @@ class AsyncTheTool:
|
|
|
144
163
|
temperature=temperature,
|
|
145
164
|
logprobs=logprobs,
|
|
146
165
|
top_logprobs=top_logprobs,
|
|
166
|
+
validator=validator,
|
|
147
167
|
# Internal parameters
|
|
148
168
|
prompt_file="is_question.yaml",
|
|
149
169
|
output_model=OutputModels.BoolOutput,
|
|
@@ -161,12 +181,16 @@ class AsyncTheTool:
|
|
|
161
181
|
temperature: float | None = 0.0,
|
|
162
182
|
logprobs: bool = False,
|
|
163
183
|
top_logprobs: int | None = None,
|
|
164
|
-
|
|
184
|
+
validator: Callable[[Any], bool] | None = None,
|
|
185
|
+
) -> OutputModels.ToolOutput:
|
|
165
186
|
"""
|
|
166
187
|
Generate a single question from the given text.
|
|
167
188
|
|
|
168
189
|
Returns:
|
|
169
|
-
|
|
190
|
+
ToolOutput: Object containing:
|
|
191
|
+
- result (str): The generated question
|
|
192
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
193
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
170
194
|
"""
|
|
171
195
|
return await self.operator.run(
|
|
172
196
|
# User parameters
|
|
@@ -177,6 +201,7 @@ class AsyncTheTool:
|
|
|
177
201
|
temperature=temperature,
|
|
178
202
|
logprobs=logprobs,
|
|
179
203
|
top_logprobs=top_logprobs,
|
|
204
|
+
validator=validator,
|
|
180
205
|
# Internal parameters
|
|
181
206
|
prompt_file="text_to_question.yaml",
|
|
182
207
|
output_model=OutputModels.StrOutput,
|
|
@@ -194,12 +219,16 @@ class AsyncTheTool:
|
|
|
194
219
|
logprobs: bool = False,
|
|
195
220
|
top_logprobs: int | None = None,
|
|
196
221
|
mode: Literal["default", "reason"] = "default",
|
|
197
|
-
|
|
222
|
+
validator: Callable[[Any], bool] | None = None,
|
|
223
|
+
) -> OutputModels.ToolOutput:
|
|
198
224
|
"""
|
|
199
225
|
Merge multiple questions into a single unified question.
|
|
200
226
|
|
|
201
227
|
Returns:
|
|
202
|
-
|
|
228
|
+
ToolOutput: Object containing:
|
|
229
|
+
- result (str): The merged question
|
|
230
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
231
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
203
232
|
"""
|
|
204
233
|
text = ", ".join(text)
|
|
205
234
|
return await self.operator.run(
|
|
@@ -211,6 +240,7 @@ class AsyncTheTool:
|
|
|
211
240
|
temperature=temperature,
|
|
212
241
|
logprobs=logprobs,
|
|
213
242
|
top_logprobs=top_logprobs,
|
|
243
|
+
validator=validator,
|
|
214
244
|
# Internal parameters
|
|
215
245
|
prompt_file="merge_questions.yaml",
|
|
216
246
|
output_model=OutputModels.StrOutput,
|
|
@@ -228,12 +258,16 @@ class AsyncTheTool:
|
|
|
228
258
|
logprobs: bool = False,
|
|
229
259
|
top_logprobs: int | None = None,
|
|
230
260
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
231
|
-
|
|
261
|
+
validator: Callable[[Any], bool] | None = None,
|
|
262
|
+
) -> OutputModels.ToolOutput:
|
|
232
263
|
"""
|
|
233
264
|
Rewrite a text with different modes.
|
|
234
265
|
|
|
235
266
|
Returns:
|
|
236
|
-
|
|
267
|
+
ToolOutput: Object containing:
|
|
268
|
+
- result (str): The rewritten text
|
|
269
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
270
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
237
271
|
"""
|
|
238
272
|
return await self.operator.run(
|
|
239
273
|
# User parameters
|
|
@@ -244,6 +278,7 @@ class AsyncTheTool:
|
|
|
244
278
|
temperature=temperature,
|
|
245
279
|
logprobs=logprobs,
|
|
246
280
|
top_logprobs=top_logprobs,
|
|
281
|
+
validator=validator,
|
|
247
282
|
# Internal parameters
|
|
248
283
|
prompt_file="rewrite.yaml",
|
|
249
284
|
output_model=OutputModels.StrOutput,
|
|
@@ -261,12 +296,16 @@ class AsyncTheTool:
|
|
|
261
296
|
temperature: float | None = 0.0,
|
|
262
297
|
logprobs: bool = False,
|
|
263
298
|
top_logprobs: int | None = None,
|
|
264
|
-
|
|
299
|
+
validator: Callable[[Any], bool] | None = None,
|
|
300
|
+
) -> OutputModels.ToolOutput:
|
|
265
301
|
"""
|
|
266
302
|
Generate a list of questions about a subject.
|
|
267
303
|
|
|
268
304
|
Returns:
|
|
269
|
-
|
|
305
|
+
ToolOutput: Object containing:
|
|
306
|
+
- result (list[str]): List of generated questions
|
|
307
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
308
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
270
309
|
"""
|
|
271
310
|
return await self.operator.run(
|
|
272
311
|
# User parameters
|
|
@@ -278,6 +317,7 @@ class AsyncTheTool:
|
|
|
278
317
|
temperature=temperature,
|
|
279
318
|
logprobs=logprobs,
|
|
280
319
|
top_logprobs=top_logprobs,
|
|
320
|
+
validator=validator,
|
|
281
321
|
# Internal parameters
|
|
282
322
|
prompt_file="subject_to_question.yaml",
|
|
283
323
|
output_model=OutputModels.ReasonListStrOutput,
|
|
@@ -294,12 +334,16 @@ class AsyncTheTool:
|
|
|
294
334
|
temperature: float | None = 0.0,
|
|
295
335
|
logprobs: bool = False,
|
|
296
336
|
top_logprobs: int | None = None,
|
|
297
|
-
|
|
337
|
+
validator: Callable[[Any], bool] | None = None,
|
|
338
|
+
) -> OutputModels.ToolOutput:
|
|
298
339
|
"""
|
|
299
340
|
Summarize the given subject text.
|
|
300
341
|
|
|
301
342
|
Returns:
|
|
302
|
-
|
|
343
|
+
ToolOutput: Object containing:
|
|
344
|
+
- result (str): The summary text
|
|
345
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
346
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
303
347
|
"""
|
|
304
348
|
return await self.operator.run(
|
|
305
349
|
# User parameters
|
|
@@ -310,6 +354,7 @@ class AsyncTheTool:
|
|
|
310
354
|
temperature=temperature,
|
|
311
355
|
logprobs=logprobs,
|
|
312
356
|
top_logprobs=top_logprobs,
|
|
357
|
+
validator=validator,
|
|
313
358
|
# Internal parameters
|
|
314
359
|
prompt_file="summarize.yaml",
|
|
315
360
|
output_model=OutputModels.StrOutput,
|
|
@@ -326,12 +371,16 @@ class AsyncTheTool:
|
|
|
326
371
|
temperature: float | None = 0.0,
|
|
327
372
|
logprobs: bool = False,
|
|
328
373
|
top_logprobs: int | None = None,
|
|
329
|
-
|
|
374
|
+
validator: Callable[[Any], bool] | None = None,
|
|
375
|
+
) -> OutputModels.ToolOutput:
|
|
330
376
|
"""
|
|
331
377
|
Translate text between languages.
|
|
332
378
|
|
|
333
379
|
Returns:
|
|
334
|
-
|
|
380
|
+
ToolOutput: Object containing:
|
|
381
|
+
- result (str): The translated text
|
|
382
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
383
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
335
384
|
"""
|
|
336
385
|
return await self.operator.run(
|
|
337
386
|
# User parameters
|
|
@@ -342,6 +391,7 @@ class AsyncTheTool:
|
|
|
342
391
|
temperature=temperature,
|
|
343
392
|
logprobs=logprobs,
|
|
344
393
|
top_logprobs=top_logprobs,
|
|
394
|
+
validator=validator,
|
|
345
395
|
# Internal parameters
|
|
346
396
|
prompt_file="translate.yaml",
|
|
347
397
|
output_model=OutputModels.StrOutput,
|
|
@@ -358,12 +408,13 @@ class AsyncTheTool:
|
|
|
358
408
|
temperature: float | None = None,
|
|
359
409
|
logprobs: bool | None = None,
|
|
360
410
|
top_logprobs: int | None = None,
|
|
361
|
-
) ->
|
|
411
|
+
) -> OutputModels.ToolOutput:
|
|
362
412
|
"""
|
|
363
413
|
Custom tool that can do almost anything!
|
|
364
414
|
|
|
365
415
|
Returns:
|
|
366
|
-
|
|
416
|
+
ToolOutput: Object with fields:
|
|
417
|
+
- result (str): The output result
|
|
367
418
|
"""
|
|
368
419
|
return await self.operator.run(
|
|
369
420
|
# User paramaeters
|
|
@@ -380,4 +431,5 @@ class AsyncTheTool:
|
|
|
380
431
|
user_prompt=None,
|
|
381
432
|
with_analysis=False,
|
|
382
433
|
mode=None,
|
|
434
|
+
validator=None,
|
|
383
435
|
)
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/async_operator.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import AsyncOpenAI
|
|
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
# Configure logger
|
|
16
15
|
logger = logging.getLogger("async_operator")
|
|
17
16
|
logger.setLevel(logging.INFO)
|
|
18
17
|
|
|
@@ -32,6 +31,10 @@ class AsyncOperator(BaseOperator):
|
|
|
32
31
|
self.model = model
|
|
33
32
|
|
|
34
33
|
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
36
|
+
Returns the analyzed content as a string.
|
|
37
|
+
"""
|
|
35
38
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
39
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
40
|
completion = await self.client.chat.completions.create(
|
|
@@ -50,6 +53,10 @@ class AsyncOperator(BaseOperator):
|
|
|
50
53
|
logprobs: bool = False,
|
|
51
54
|
top_logprobs: int = 3,
|
|
52
55
|
) -> tuple[Type[T], Any]:
|
|
56
|
+
"""
|
|
57
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
58
|
+
Returns both the parsed object and the raw completion for logging.
|
|
59
|
+
"""
|
|
53
60
|
request_kwargs = {
|
|
54
61
|
"model": self.model,
|
|
55
62
|
"messages": message,
|
|
@@ -73,6 +80,10 @@ class AsyncOperator(BaseOperator):
|
|
|
73
80
|
logprobs: bool = False,
|
|
74
81
|
top_logprobs: int = 3,
|
|
75
82
|
) -> tuple[Type[T], Any]:
|
|
83
|
+
"""
|
|
84
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
85
|
+
Returns the parsed output model and raw completion.
|
|
86
|
+
"""
|
|
76
87
|
json_schema = output_model.model_json_schema()
|
|
77
88
|
|
|
78
89
|
# Build kwargs dynamically
|
|
@@ -104,20 +115,23 @@ class AsyncOperator(BaseOperator):
|
|
|
104
115
|
temperature: float,
|
|
105
116
|
logprobs: bool,
|
|
106
117
|
top_logprobs: int | None,
|
|
118
|
+
validator: Callable[[Any], bool] | None,
|
|
107
119
|
# Internal parameters
|
|
108
120
|
prompt_file: str,
|
|
109
121
|
output_model: Type[T],
|
|
110
122
|
resp_format: Literal["vllm", "parse"],
|
|
111
123
|
mode: str | None,
|
|
112
124
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
125
|
+
) -> ToolOutput:
|
|
114
126
|
"""
|
|
115
127
|
Execute the async LLM pipeline with the given input text. (Async)
|
|
116
128
|
"""
|
|
117
129
|
prompt_loader = PromptLoader()
|
|
118
130
|
formatter = Formatter()
|
|
131
|
+
output = ToolOutput()
|
|
119
132
|
|
|
120
133
|
try:
|
|
134
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
135
|
prompt_configs = prompt_loader.load(
|
|
122
136
|
prompt_file=prompt_file,
|
|
123
137
|
text=text.strip(),
|
|
@@ -159,14 +173,62 @@ class AsyncOperator(BaseOperator):
|
|
|
159
173
|
|
|
160
174
|
# Ensure output_model has a `result` field
|
|
161
175
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
176
|
+
error = "The provided output_model must define a field named 'result'"
|
|
177
|
+
logger.error(error)
|
|
178
|
+
output.errors.append(error)
|
|
179
|
+
return output
|
|
167
180
|
|
|
168
181
|
output.result = parsed.result
|
|
169
182
|
|
|
183
|
+
# Retry logic if validation fails
|
|
184
|
+
if validator and not validator(output.result):
|
|
185
|
+
max_retries = 3
|
|
186
|
+
for attempt in range(max_retries):
|
|
187
|
+
logger.warning(
|
|
188
|
+
f"Validation failed, retrying for the {attempt + 1} time."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Generate new temperature for retry
|
|
192
|
+
retry_temperature = self._get_retry_temp(temperature)
|
|
193
|
+
try:
|
|
194
|
+
if resp_format == "vllm":
|
|
195
|
+
parsed, completion = await self._vllm_completion(
|
|
196
|
+
messages,
|
|
197
|
+
output_model,
|
|
198
|
+
retry_temperature,
|
|
199
|
+
logprobs,
|
|
200
|
+
top_logprobs,
|
|
201
|
+
)
|
|
202
|
+
elif resp_format == "parse":
|
|
203
|
+
parsed, completion = await self._parse_completion(
|
|
204
|
+
messages,
|
|
205
|
+
output_model,
|
|
206
|
+
retry_temperature,
|
|
207
|
+
logprobs,
|
|
208
|
+
top_logprobs,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
output.result = parsed.result
|
|
212
|
+
|
|
213
|
+
# Check if retry was successful
|
|
214
|
+
if validator(output.result):
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Validation passed on retry attempt {attempt + 1}"
|
|
217
|
+
)
|
|
218
|
+
break
|
|
219
|
+
else:
|
|
220
|
+
logger.warning(
|
|
221
|
+
f"Validation still failing after retry attempt {attempt + 1}"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(f"Retry attempt {attempt + 1} failed: {e}")
|
|
226
|
+
# Continue to next retry attempt if this one fails
|
|
227
|
+
|
|
228
|
+
# Final check after all retries
|
|
229
|
+
if validator and not validator(output.result):
|
|
230
|
+
output.errors.append("Validation failed after all retry attempts")
|
|
231
|
+
|
|
170
232
|
if logprobs:
|
|
171
233
|
output.logprobs = self._extract_logprobs(completion)
|
|
172
234
|
|
|
@@ -174,6 +236,7 @@ class AsyncOperator(BaseOperator):
|
|
|
174
236
|
output.analysis = analysis
|
|
175
237
|
|
|
176
238
|
return output
|
|
239
|
+
|
|
177
240
|
except Exception as e:
|
|
178
241
|
logger.error(f"AsyncTheTool failed: {e}")
|
|
179
|
-
return
|
|
242
|
+
return output.errors.append(str(e))
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/base_operator.py
RENAMED
|
@@ -3,6 +3,7 @@ import json
|
|
|
3
3
|
import re
|
|
4
4
|
import math
|
|
5
5
|
import logging
|
|
6
|
+
import random
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel
|
|
8
9
|
from openai import OpenAI, AsyncOpenAI
|
|
@@ -10,7 +11,6 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
10
11
|
# Base Model type for output models
|
|
11
12
|
T = TypeVar("T", bound=BaseModel)
|
|
12
13
|
|
|
13
|
-
# Configure logger
|
|
14
14
|
logger = logging.getLogger("base_operator")
|
|
15
15
|
logger.setLevel(logging.INFO)
|
|
16
16
|
|
|
@@ -40,13 +40,6 @@ class BaseOperator:
|
|
|
40
40
|
) -> Type[T]:
|
|
41
41
|
"""
|
|
42
42
|
Convert a JSON response string to output model.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
response_string: The JSON string (may contain code block markers)
|
|
46
|
-
output_model: Your Pydantic output model class (e.g., StrOutput, ListStrOutput)
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
Instance of your output model
|
|
50
43
|
"""
|
|
51
44
|
# Clean the response string
|
|
52
45
|
cleaned_json = self._clean_json_response(response_string)
|
|
@@ -61,7 +54,12 @@ class BaseOperator:
|
|
|
61
54
|
return output_model(**response_dict)
|
|
62
55
|
|
|
63
56
|
def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
|
|
57
|
+
"""
|
|
58
|
+
Extracts and filters token probabilities from completion logprobs.
|
|
59
|
+
Skips punctuation and structural tokens, returns cleaned probability data.
|
|
60
|
+
"""
|
|
64
61
|
logprobs_data = []
|
|
62
|
+
|
|
65
63
|
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
66
64
|
|
|
67
65
|
for choice in completion.choices:
|
|
@@ -89,3 +87,15 @@ class BaseOperator:
|
|
|
89
87
|
logprobs_data.append(token_entry)
|
|
90
88
|
|
|
91
89
|
return logprobs_data
|
|
90
|
+
|
|
91
|
+
def _get_retry_temp(self, base_temp: float) -> float:
|
|
92
|
+
"""
|
|
93
|
+
Calculate temperature for retry attempts.
|
|
94
|
+
"""
|
|
95
|
+
delta_temp = random.choice([-1, 1]) * random.uniform(0.1, 0.9)
|
|
96
|
+
new_temp = base_temp + delta_temp
|
|
97
|
+
print(f"Base Temp: {base_temp}")
|
|
98
|
+
print(f"Delta Temp: {delta_temp}")
|
|
99
|
+
print(f"New Temp: {new_temp}")
|
|
100
|
+
|
|
101
|
+
return max(0.0, min(new_temp, 1.5))
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, TypeVar, Type, Literal
|
|
1
|
+
from typing import Any, TypeVar, Type, Literal, Callable
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from openai import OpenAI
|
|
@@ -12,7 +12,6 @@ from texttools.tools.internals.prompt_loader import PromptLoader
|
|
|
12
12
|
# Base Model type for output models
|
|
13
13
|
T = TypeVar("T", bound=BaseModel)
|
|
14
14
|
|
|
15
|
-
# Configure logger
|
|
16
15
|
logger = logging.getLogger("operator")
|
|
17
16
|
logger.setLevel(logging.INFO)
|
|
18
17
|
|
|
@@ -32,6 +31,10 @@ class Operator(BaseOperator):
|
|
|
32
31
|
self.model = model
|
|
33
32
|
|
|
34
33
|
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Calls OpenAI API for analysis using the configured prompt template.
|
|
36
|
+
Returns the analyzed content as a string.
|
|
37
|
+
"""
|
|
35
38
|
analyze_prompt = prompt_configs["analyze_template"]
|
|
36
39
|
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
37
40
|
completion = self.client.chat.completions.create(
|
|
@@ -50,6 +53,10 @@ class Operator(BaseOperator):
|
|
|
50
53
|
logprobs: bool = False,
|
|
51
54
|
top_logprobs: int = 3,
|
|
52
55
|
) -> tuple[Type[T], Any]:
|
|
56
|
+
"""
|
|
57
|
+
Parses a chat completion using OpenAI's structured output format.
|
|
58
|
+
Returns both the parsed object and the raw completion for logging.
|
|
59
|
+
"""
|
|
53
60
|
request_kwargs = {
|
|
54
61
|
"model": self.model,
|
|
55
62
|
"messages": message,
|
|
@@ -73,6 +80,10 @@ class Operator(BaseOperator):
|
|
|
73
80
|
logprobs: bool = False,
|
|
74
81
|
top_logprobs: int = 3,
|
|
75
82
|
) -> tuple[Type[T], Any]:
|
|
83
|
+
"""
|
|
84
|
+
Generates a completion using vLLM with JSON schema guidance.
|
|
85
|
+
Returns the parsed output model and raw completion.
|
|
86
|
+
"""
|
|
76
87
|
json_schema = output_model.model_json_schema()
|
|
77
88
|
|
|
78
89
|
# Build kwargs dynamically
|
|
@@ -104,20 +115,23 @@ class Operator(BaseOperator):
|
|
|
104
115
|
temperature: float,
|
|
105
116
|
logprobs: bool,
|
|
106
117
|
top_logprobs: int | None,
|
|
118
|
+
validator: Callable[[Any], bool] | None,
|
|
107
119
|
# Internal parameters
|
|
108
120
|
prompt_file: str,
|
|
109
121
|
output_model: Type[T],
|
|
110
122
|
resp_format: Literal["vllm", "parse"],
|
|
111
123
|
mode: str | None,
|
|
112
124
|
**extra_kwargs,
|
|
113
|
-
) ->
|
|
125
|
+
) -> ToolOutput:
|
|
114
126
|
"""
|
|
115
127
|
Execute the LLM pipeline with the given input text.
|
|
116
128
|
"""
|
|
117
129
|
prompt_loader = PromptLoader()
|
|
118
130
|
formatter = Formatter()
|
|
131
|
+
output = ToolOutput()
|
|
119
132
|
|
|
120
133
|
try:
|
|
134
|
+
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
121
135
|
prompt_configs = prompt_loader.load(
|
|
122
136
|
prompt_file=prompt_file,
|
|
123
137
|
text=text.strip(),
|
|
@@ -159,14 +173,62 @@ class Operator(BaseOperator):
|
|
|
159
173
|
|
|
160
174
|
# Ensure output_model has a `result` field
|
|
161
175
|
if not hasattr(parsed, "result"):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
output = ToolOutput(result="", analysis="", logprobs=[], errors=[])
|
|
176
|
+
error = "The provided output_model must define a field named 'result'"
|
|
177
|
+
logger.error(error)
|
|
178
|
+
output.errors.append(error)
|
|
179
|
+
return output
|
|
167
180
|
|
|
168
181
|
output.result = parsed.result
|
|
169
182
|
|
|
183
|
+
# Retry logic if validation fails
|
|
184
|
+
if validator and not validator(output.result):
|
|
185
|
+
max_retries = 3
|
|
186
|
+
for attempt in range(max_retries):
|
|
187
|
+
logger.warning(
|
|
188
|
+
f"Validation failed, retrying for the {attempt + 1} time."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Generate new temperature for retry
|
|
192
|
+
retry_temperature = self._get_retry_temp(temperature)
|
|
193
|
+
try:
|
|
194
|
+
if resp_format == "vllm":
|
|
195
|
+
parsed, completion = self._vllm_completion(
|
|
196
|
+
messages,
|
|
197
|
+
output_model,
|
|
198
|
+
retry_temperature,
|
|
199
|
+
logprobs,
|
|
200
|
+
top_logprobs,
|
|
201
|
+
)
|
|
202
|
+
elif resp_format == "parse":
|
|
203
|
+
parsed, completion = self._parse_completion(
|
|
204
|
+
messages,
|
|
205
|
+
output_model,
|
|
206
|
+
retry_temperature,
|
|
207
|
+
logprobs,
|
|
208
|
+
top_logprobs,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
output.result = parsed.result
|
|
212
|
+
|
|
213
|
+
# Check if retry was successful
|
|
214
|
+
if validator(output.result):
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Validation passed on retry attempt {attempt + 1}"
|
|
217
|
+
)
|
|
218
|
+
break
|
|
219
|
+
else:
|
|
220
|
+
logger.warning(
|
|
221
|
+
f"Validation still failing after retry attempt {attempt + 1}"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error(f"Retry attempt {attempt + 1} failed: {e}")
|
|
226
|
+
# Continue to next retry attempt if this one fails
|
|
227
|
+
|
|
228
|
+
# Final check after all retries
|
|
229
|
+
if validator and not validator(output.result):
|
|
230
|
+
output.errors.append("Validation failed after all retry attempts")
|
|
231
|
+
|
|
170
232
|
if logprobs:
|
|
171
233
|
output.logprobs = self._extract_logprobs(completion)
|
|
172
234
|
|
|
@@ -174,6 +236,7 @@ class Operator(BaseOperator):
|
|
|
174
236
|
output.analysis = analysis
|
|
175
237
|
|
|
176
238
|
return output
|
|
239
|
+
|
|
177
240
|
except Exception as e:
|
|
178
241
|
logger.error(f"TheTool failed: {e}")
|
|
179
|
-
return
|
|
242
|
+
return output.errors.append(str(e))
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/output_models.py
RENAMED
|
@@ -4,10 +4,13 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ToolOutput(BaseModel):
|
|
7
|
-
result:
|
|
8
|
-
analysis: str
|
|
9
|
-
logprobs: list[dict[str, Any]]
|
|
10
|
-
errors: list[str]
|
|
7
|
+
result: Any = None
|
|
8
|
+
analysis: str = ""
|
|
9
|
+
logprobs: list[dict[str, Any]] = []
|
|
10
|
+
errors: list[str] = []
|
|
11
|
+
|
|
12
|
+
def __repr__(self) -> str:
|
|
13
|
+
return f"ToolOutput(result_type='{type(self.result)}', result='{self.result}', analysis='{self.analysis}', logprobs='{self.logprobs}', errors='{self.errors}'"
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class StrOutput(BaseModel):
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/tools/internals/prompt_loader.py
RENAMED
|
@@ -24,6 +24,9 @@ class PromptLoader:
|
|
|
24
24
|
# Use lru_cache to load each file once
|
|
25
25
|
@lru_cache(maxsize=32)
|
|
26
26
|
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
27
|
+
"""
|
|
28
|
+
Loads prompt templates from YAML file with optional mode selection.
|
|
29
|
+
"""
|
|
27
30
|
base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
28
31
|
prompt_path = base_dir / prompt_file
|
|
29
32
|
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal, Any
|
|
1
|
+
from typing import Literal, Any, Callable
|
|
2
2
|
|
|
3
3
|
from openai import OpenAI
|
|
4
4
|
|
|
@@ -32,12 +32,16 @@ class TheTool:
|
|
|
32
32
|
temperature: float | None = 0.0,
|
|
33
33
|
logprobs: bool = False,
|
|
34
34
|
top_logprobs: int | None = None,
|
|
35
|
-
|
|
35
|
+
validator: Callable[[Any], bool] | None = None,
|
|
36
|
+
) -> OutputModels.ToolOutput:
|
|
36
37
|
"""
|
|
37
38
|
Categorize a text into a single Islamic studies domain category.
|
|
38
39
|
|
|
39
40
|
Returns:
|
|
40
|
-
|
|
41
|
+
ToolOutput: Object containing:
|
|
42
|
+
- result (str): The assigned Islamic studies category
|
|
43
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
44
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
41
45
|
"""
|
|
42
46
|
return self.operator.run(
|
|
43
47
|
# User parameters
|
|
@@ -47,6 +51,7 @@ class TheTool:
|
|
|
47
51
|
temperature=temperature,
|
|
48
52
|
logprobs=logprobs,
|
|
49
53
|
top_logprobs=top_logprobs,
|
|
54
|
+
validator=validator,
|
|
50
55
|
# Internal parameters
|
|
51
56
|
prompt_file="categorizer.yaml",
|
|
52
57
|
output_model=OutputModels.CategorizerOutput,
|
|
@@ -64,12 +69,16 @@ class TheTool:
|
|
|
64
69
|
temperature: float | None = 0.0,
|
|
65
70
|
logprobs: bool = False,
|
|
66
71
|
top_logprobs: int | None = None,
|
|
67
|
-
|
|
72
|
+
validator: Callable[[Any], bool] | None = None,
|
|
73
|
+
) -> OutputModels.ToolOutput:
|
|
68
74
|
"""
|
|
69
75
|
Extract salient keywords from text.
|
|
70
76
|
|
|
71
77
|
Returns:
|
|
72
|
-
|
|
78
|
+
ToolOutput: Object containing:
|
|
79
|
+
- result (list[str]): List of extracted keywords
|
|
80
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
81
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
73
82
|
"""
|
|
74
83
|
return self.operator.run(
|
|
75
84
|
# User parameters
|
|
@@ -80,6 +89,7 @@ class TheTool:
|
|
|
80
89
|
temperature=temperature,
|
|
81
90
|
logprobs=logprobs,
|
|
82
91
|
top_logprobs=top_logprobs,
|
|
92
|
+
validator=validator,
|
|
83
93
|
# Internal parameters
|
|
84
94
|
prompt_file="extract_keywords.yaml",
|
|
85
95
|
output_model=OutputModels.ListStrOutput,
|
|
@@ -96,12 +106,16 @@ class TheTool:
|
|
|
96
106
|
temperature: float | None = 0.0,
|
|
97
107
|
logprobs: bool = False,
|
|
98
108
|
top_logprobs: int | None = None,
|
|
99
|
-
|
|
109
|
+
validator: Callable[[Any], bool] | None = None,
|
|
110
|
+
) -> OutputModels.ToolOutput:
|
|
100
111
|
"""
|
|
101
112
|
Perform Named Entity Recognition (NER) over the input text.
|
|
102
113
|
|
|
103
114
|
Returns:
|
|
104
|
-
|
|
115
|
+
ToolOutput: Object containing:
|
|
116
|
+
- result (list[dict]): List of entities with 'text' and 'type' keys
|
|
117
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
118
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
105
119
|
"""
|
|
106
120
|
return self.operator.run(
|
|
107
121
|
# User parameters
|
|
@@ -112,6 +126,7 @@ class TheTool:
|
|
|
112
126
|
temperature=temperature,
|
|
113
127
|
logprobs=logprobs,
|
|
114
128
|
top_logprobs=top_logprobs,
|
|
129
|
+
validator=validator,
|
|
115
130
|
# Internal parameters
|
|
116
131
|
prompt_file="extract_entities.yaml",
|
|
117
132
|
output_model=OutputModels.ListDictStrStrOutput,
|
|
@@ -127,12 +142,16 @@ class TheTool:
|
|
|
127
142
|
temperature: float | None = 0.0,
|
|
128
143
|
logprobs: bool = False,
|
|
129
144
|
top_logprobs: int | None = None,
|
|
130
|
-
|
|
145
|
+
validator: Callable[[Any], bool] | None = None,
|
|
146
|
+
) -> OutputModels.ToolOutput:
|
|
131
147
|
"""
|
|
132
148
|
Detect if the input is phrased as a question.
|
|
133
149
|
|
|
134
150
|
Returns:
|
|
135
|
-
|
|
151
|
+
ToolOutput: Object containing:
|
|
152
|
+
- result (bool): True if text is a question, False otherwise
|
|
153
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
154
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
136
155
|
"""
|
|
137
156
|
return self.operator.run(
|
|
138
157
|
# User parameters
|
|
@@ -142,6 +161,7 @@ class TheTool:
|
|
|
142
161
|
temperature=temperature,
|
|
143
162
|
logprobs=logprobs,
|
|
144
163
|
top_logprobs=top_logprobs,
|
|
164
|
+
validator=validator,
|
|
145
165
|
# Internal parameters
|
|
146
166
|
prompt_file="is_question.yaml",
|
|
147
167
|
output_model=OutputModels.BoolOutput,
|
|
@@ -159,12 +179,16 @@ class TheTool:
|
|
|
159
179
|
temperature: float | None = 0.0,
|
|
160
180
|
logprobs: bool = False,
|
|
161
181
|
top_logprobs: int | None = None,
|
|
162
|
-
|
|
182
|
+
validator: Callable[[Any], bool] | None = None,
|
|
183
|
+
) -> OutputModels.ToolOutput:
|
|
163
184
|
"""
|
|
164
185
|
Generate a single question from the given text.
|
|
165
186
|
|
|
166
187
|
Returns:
|
|
167
|
-
|
|
188
|
+
ToolOutput: Object containing:
|
|
189
|
+
- result (str): The generated question
|
|
190
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
191
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
168
192
|
"""
|
|
169
193
|
return self.operator.run(
|
|
170
194
|
# User parameters
|
|
@@ -175,6 +199,7 @@ class TheTool:
|
|
|
175
199
|
temperature=temperature,
|
|
176
200
|
logprobs=logprobs,
|
|
177
201
|
top_logprobs=top_logprobs,
|
|
202
|
+
validator=validator,
|
|
178
203
|
# Internal parameters
|
|
179
204
|
prompt_file="text_to_question.yaml",
|
|
180
205
|
output_model=OutputModels.StrOutput,
|
|
@@ -192,12 +217,16 @@ class TheTool:
|
|
|
192
217
|
logprobs: bool = False,
|
|
193
218
|
top_logprobs: int | None = None,
|
|
194
219
|
mode: Literal["default", "reason"] = "default",
|
|
195
|
-
|
|
220
|
+
validator: Callable[[Any], bool] | None = None,
|
|
221
|
+
) -> OutputModels.ToolOutput:
|
|
196
222
|
"""
|
|
197
223
|
Merge multiple questions into a single unified question.
|
|
198
224
|
|
|
199
225
|
Returns:
|
|
200
|
-
|
|
226
|
+
ToolOutput: Object containing:
|
|
227
|
+
- result (str): The merged question
|
|
228
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
229
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
201
230
|
"""
|
|
202
231
|
text = ", ".join(text)
|
|
203
232
|
return self.operator.run(
|
|
@@ -209,6 +238,7 @@ class TheTool:
|
|
|
209
238
|
temperature=temperature,
|
|
210
239
|
logprobs=logprobs,
|
|
211
240
|
top_logprobs=top_logprobs,
|
|
241
|
+
validator=validator,
|
|
212
242
|
# Internal parameters
|
|
213
243
|
prompt_file="merge_questions.yaml",
|
|
214
244
|
output_model=OutputModels.StrOutput,
|
|
@@ -226,12 +256,16 @@ class TheTool:
|
|
|
226
256
|
logprobs: bool = False,
|
|
227
257
|
top_logprobs: int | None = None,
|
|
228
258
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
229
|
-
|
|
259
|
+
validator: Callable[[Any], bool] | None = None,
|
|
260
|
+
) -> OutputModels.ToolOutput:
|
|
230
261
|
"""
|
|
231
262
|
Rewrite a text with different modes.
|
|
232
263
|
|
|
233
264
|
Returns:
|
|
234
|
-
|
|
265
|
+
ToolOutput: Object containing:
|
|
266
|
+
- result (str): The rewritten text
|
|
267
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
268
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
235
269
|
"""
|
|
236
270
|
return self.operator.run(
|
|
237
271
|
# User parameters
|
|
@@ -242,6 +276,7 @@ class TheTool:
|
|
|
242
276
|
temperature=temperature,
|
|
243
277
|
logprobs=logprobs,
|
|
244
278
|
top_logprobs=top_logprobs,
|
|
279
|
+
validator=validator,
|
|
245
280
|
# Internal parameters
|
|
246
281
|
prompt_file="rewrite.yaml",
|
|
247
282
|
output_model=OutputModels.StrOutput,
|
|
@@ -259,12 +294,16 @@ class TheTool:
|
|
|
259
294
|
temperature: float | None = 0.0,
|
|
260
295
|
logprobs: bool = False,
|
|
261
296
|
top_logprobs: int | None = None,
|
|
262
|
-
|
|
297
|
+
validator: Callable[[Any], bool] | None = None,
|
|
298
|
+
) -> OutputModels.ToolOutput:
|
|
263
299
|
"""
|
|
264
300
|
Generate a list of questions about a subject.
|
|
265
301
|
|
|
266
302
|
Returns:
|
|
267
|
-
|
|
303
|
+
ToolOutput: Object containing:
|
|
304
|
+
- result (list[str]): List of generated questions
|
|
305
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
306
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
268
307
|
"""
|
|
269
308
|
return self.operator.run(
|
|
270
309
|
# User parameters
|
|
@@ -276,6 +315,7 @@ class TheTool:
|
|
|
276
315
|
temperature=temperature,
|
|
277
316
|
logprobs=logprobs,
|
|
278
317
|
top_logprobs=top_logprobs,
|
|
318
|
+
validator=validator,
|
|
279
319
|
# Internal parameters
|
|
280
320
|
prompt_file="subject_to_question.yaml",
|
|
281
321
|
output_model=OutputModels.ReasonListStrOutput,
|
|
@@ -292,12 +332,16 @@ class TheTool:
|
|
|
292
332
|
temperature: float | None = 0.0,
|
|
293
333
|
logprobs: bool = False,
|
|
294
334
|
top_logprobs: int | None = None,
|
|
295
|
-
|
|
335
|
+
validator: Callable[[Any], bool] | None = None,
|
|
336
|
+
) -> OutputModels.ToolOutput:
|
|
296
337
|
"""
|
|
297
338
|
Summarize the given subject text.
|
|
298
339
|
|
|
299
340
|
Returns:
|
|
300
|
-
|
|
341
|
+
ToolOutput: Object containing:
|
|
342
|
+
- result (str): The summary text
|
|
343
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
344
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
301
345
|
"""
|
|
302
346
|
return self.operator.run(
|
|
303
347
|
# User parameters
|
|
@@ -308,6 +352,7 @@ class TheTool:
|
|
|
308
352
|
temperature=temperature,
|
|
309
353
|
logprobs=logprobs,
|
|
310
354
|
top_logprobs=top_logprobs,
|
|
355
|
+
validator=validator,
|
|
311
356
|
# Internal parameters
|
|
312
357
|
prompt_file="summarize.yaml",
|
|
313
358
|
output_model=OutputModels.StrOutput,
|
|
@@ -324,12 +369,16 @@ class TheTool:
|
|
|
324
369
|
temperature: float | None = 0.0,
|
|
325
370
|
logprobs: bool = False,
|
|
326
371
|
top_logprobs: int | None = None,
|
|
327
|
-
|
|
372
|
+
validator: Callable[[Any], bool] | None = None,
|
|
373
|
+
) -> OutputModels.ToolOutput:
|
|
328
374
|
"""
|
|
329
375
|
Translate text between languages.
|
|
330
376
|
|
|
331
377
|
Returns:
|
|
332
|
-
|
|
378
|
+
ToolOutput: Object containing:
|
|
379
|
+
- result (str): The translated text
|
|
380
|
+
- logprobs (list | None): Probability data if logprobs enabled
|
|
381
|
+
- analysis (str | None): Detailed reasoning if with_analysis enabled
|
|
333
382
|
"""
|
|
334
383
|
return self.operator.run(
|
|
335
384
|
# User parameters
|
|
@@ -340,6 +389,7 @@ class TheTool:
|
|
|
340
389
|
temperature=temperature,
|
|
341
390
|
logprobs=logprobs,
|
|
342
391
|
top_logprobs=top_logprobs,
|
|
392
|
+
validator=validator,
|
|
343
393
|
# Internal parameters
|
|
344
394
|
prompt_file="translate.yaml",
|
|
345
395
|
output_model=OutputModels.StrOutput,
|
|
@@ -356,12 +406,13 @@ class TheTool:
|
|
|
356
406
|
temperature: float | None = None,
|
|
357
407
|
logprobs: bool | None = None,
|
|
358
408
|
top_logprobs: int | None = None,
|
|
359
|
-
) ->
|
|
409
|
+
) -> OutputModels.ToolOutput:
|
|
360
410
|
"""
|
|
361
411
|
Custom tool that can do almost anything!
|
|
362
412
|
|
|
363
413
|
Returns:
|
|
364
|
-
|
|
414
|
+
ToolOutput: Object with fields:
|
|
415
|
+
- result (str): The output result
|
|
365
416
|
"""
|
|
366
417
|
return self.operator.run(
|
|
367
418
|
# User paramaeters
|
|
@@ -378,4 +429,5 @@ class TheTool:
|
|
|
378
429
|
user_prompt=None,
|
|
379
430
|
with_analysis=False,
|
|
380
431
|
mode=None,
|
|
432
|
+
validator=None,
|
|
381
433
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.1.0 → hamtaa_texttools-1.1.5}/texttools/prompts/subject_to_question.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|