hamtaa-texttools 1.0.7__tar.gz → 1.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hamtaa-texttools might be problematic. Click here for more details.
- {hamtaa_texttools-1.0.7/hamtaa_texttools.egg-info → hamtaa_texttools-1.0.8}/PKG-INFO +13 -22
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/README.md +12 -21
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8/hamtaa_texttools.egg-info}/PKG-INFO +13 -22
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/SOURCES.txt +1 -2
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/pyproject.toml +1 -1
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/async_the_tool.py +60 -15
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/async_operator.py +9 -23
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/base_operator.py +2 -2
- hamtaa_texttools-1.0.8/texttools/tools/internals/formatters.py +24 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/operator.py +10 -25
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/prompt_loader.py +3 -12
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/the_tool.py +13 -70
- hamtaa_texttools-1.0.7/texttools/formatters/base_formatter.py +0 -33
- hamtaa_texttools-1.0.7/texttools/formatters/user_merge_formatter.py +0 -30
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/LICENSE +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/setup.cfg +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/batch/__init__.py +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/batch/batch_manager.py +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/batch/batch_runner.py +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/README.md +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/categorizer.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/rewrite.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/subject_to_question.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/text_to_question.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/translate.yaml +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/output_models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.8
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
|
|
|
42
42
|
|
|
43
43
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
44
44
|
|
|
45
|
-
**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
|
|
46
|
-
|
|
47
45
|
---
|
|
48
46
|
|
|
49
47
|
## ✨ Features
|
|
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
78
76
|
|
|
79
77
|
- **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
80
78
|
|
|
81
|
-
|
|
79
|
+
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
80
|
+
|
|
81
|
+
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
82
|
+
|
|
83
|
+
**Note:** There might be some tools that don't support some of the parameters above.
|
|
82
84
|
|
|
83
85
|
---
|
|
84
86
|
|
|
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
|
|
|
104
106
|
|
|
105
107
|
```python
|
|
106
108
|
from openai import OpenAI
|
|
107
|
-
from pydantic import BaseModel
|
|
108
109
|
from texttools import TheTool
|
|
109
110
|
|
|
110
111
|
# Create your OpenAI client
|
|
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
|
114
115
|
model = "gpt-4o-mini"
|
|
115
116
|
|
|
116
117
|
# Create an instance of TheTool
|
|
117
|
-
|
|
118
|
-
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
118
|
+
the_tool = TheTool(client=client, model=model)
|
|
119
119
|
|
|
120
120
|
# Example: Question Detection
|
|
121
121
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
122
122
|
print(detection["result"])
|
|
123
123
|
print(detection["logprobs"])
|
|
124
|
-
# Output: True
|
|
124
|
+
# Output: True \n --logprobs
|
|
125
125
|
|
|
126
126
|
# Example: Translation
|
|
127
|
-
|
|
128
|
-
print(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# Example: Custom Tool
|
|
132
|
-
# Note: Output model should only contain result key
|
|
133
|
-
# Everything else will be ignored
|
|
134
|
-
class Custom(BaseModel):
|
|
135
|
-
result: list[list[dict[str, int]]]
|
|
136
|
-
|
|
137
|
-
custom_prompt = "Something"
|
|
138
|
-
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
139
|
-
print(custom_result)
|
|
127
|
+
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
128
|
+
print(translation["result"])
|
|
129
|
+
print(translation["analysis"])
|
|
130
|
+
# Output: "Hi! How are you?" \n --analysis
|
|
140
131
|
```
|
|
141
132
|
|
|
142
133
|
---
|
|
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
|
|
|
149
140
|
from texttools import AsyncTheTool
|
|
150
141
|
|
|
151
142
|
async def main():
|
|
152
|
-
# Create your
|
|
143
|
+
# Create your AsyncOpenAI client
|
|
153
144
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
154
145
|
|
|
155
146
|
# Specify the model
|
|
@@ -8,8 +8,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
|
|
|
8
8
|
|
|
9
9
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
10
10
|
|
|
11
|
-
**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
|
|
12
|
-
|
|
13
11
|
---
|
|
14
12
|
|
|
15
13
|
## ✨ Features
|
|
@@ -44,7 +42,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
44
42
|
|
|
45
43
|
- **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
46
44
|
|
|
47
|
-
|
|
45
|
+
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
46
|
+
|
|
47
|
+
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
48
|
+
|
|
49
|
+
**Note:** There might be some tools that don't support some of the parameters above.
|
|
48
50
|
|
|
49
51
|
---
|
|
50
52
|
|
|
@@ -70,7 +72,6 @@ pip install -U hamtaa-texttools
|
|
|
70
72
|
|
|
71
73
|
```python
|
|
72
74
|
from openai import OpenAI
|
|
73
|
-
from pydantic import BaseModel
|
|
74
75
|
from texttools import TheTool
|
|
75
76
|
|
|
76
77
|
# Create your OpenAI client
|
|
@@ -80,29 +81,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
|
80
81
|
model = "gpt-4o-mini"
|
|
81
82
|
|
|
82
83
|
# Create an instance of TheTool
|
|
83
|
-
|
|
84
|
-
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
84
|
+
the_tool = TheTool(client=client, model=model)
|
|
85
85
|
|
|
86
86
|
# Example: Question Detection
|
|
87
87
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
88
88
|
print(detection["result"])
|
|
89
89
|
print(detection["logprobs"])
|
|
90
|
-
# Output: True
|
|
90
|
+
# Output: True \n --logprobs
|
|
91
91
|
|
|
92
92
|
# Example: Translation
|
|
93
|
-
|
|
94
|
-
print(
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# Example: Custom Tool
|
|
98
|
-
# Note: Output model should only contain result key
|
|
99
|
-
# Everything else will be ignored
|
|
100
|
-
class Custom(BaseModel):
|
|
101
|
-
result: list[list[dict[str, int]]]
|
|
102
|
-
|
|
103
|
-
custom_prompt = "Something"
|
|
104
|
-
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
105
|
-
print(custom_result)
|
|
93
|
+
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
94
|
+
print(translation["result"])
|
|
95
|
+
print(translation["analysis"])
|
|
96
|
+
# Output: "Hi! How are you?" \n --analysis
|
|
106
97
|
```
|
|
107
98
|
|
|
108
99
|
---
|
|
@@ -115,7 +106,7 @@ from openai import AsyncOpenAI
|
|
|
115
106
|
from texttools import AsyncTheTool
|
|
116
107
|
|
|
117
108
|
async def main():
|
|
118
|
-
# Create your
|
|
109
|
+
# Create your AsyncOpenAI client
|
|
119
110
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
120
111
|
|
|
121
112
|
# Specify the model
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.8
|
|
4
4
|
Summary: TextTools is a high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -42,8 +42,6 @@ It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for ma
|
|
|
42
42
|
|
|
43
43
|
It provides ready-to-use utilities for **translation, question detection, keyword extraction, categorization, NER extractor, and more** — designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
44
44
|
|
|
45
|
-
**Thread Safety:** All methods in AsyncTheTool are thread-safe, allowing concurrent usage across multiple threads without conflicts.
|
|
46
|
-
|
|
47
45
|
---
|
|
48
46
|
|
|
49
47
|
## ✨ Features
|
|
@@ -78,7 +76,11 @@ Note: This doubles token usage per call because it triggers an additional LLM re
|
|
|
78
76
|
|
|
79
77
|
- **`user_prompt="..."`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
80
78
|
|
|
81
|
-
|
|
79
|
+
- **`temperature=0.0`** → Determines how creative the model should respond. Takes a float number from `0.0` to `1.0`.
|
|
80
|
+
|
|
81
|
+
All these parameters can be used individually or together to tailor the behavior of any tool in **TextTools**.
|
|
82
|
+
|
|
83
|
+
**Note:** There might be some tools that don't support some of the parameters above.
|
|
82
84
|
|
|
83
85
|
---
|
|
84
86
|
|
|
@@ -104,7 +106,6 @@ pip install -U hamtaa-texttools
|
|
|
104
106
|
|
|
105
107
|
```python
|
|
106
108
|
from openai import OpenAI
|
|
107
|
-
from pydantic import BaseModel
|
|
108
109
|
from texttools import TheTool
|
|
109
110
|
|
|
110
111
|
# Create your OpenAI client
|
|
@@ -114,29 +115,19 @@ client = OpenAI(base_url = "your_url", API_KEY = "your_api_key")
|
|
|
114
115
|
model = "gpt-4o-mini"
|
|
115
116
|
|
|
116
117
|
# Create an instance of TheTool
|
|
117
|
-
|
|
118
|
-
the_tool = TheTool(client=client, model=model, with_analysis=True, output_lang="English")
|
|
118
|
+
the_tool = TheTool(client=client, model=model)
|
|
119
119
|
|
|
120
120
|
# Example: Question Detection
|
|
121
121
|
detection = the_tool.is_question("Is this project open source?", logprobs=True, top_logprobs=2)
|
|
122
122
|
print(detection["result"])
|
|
123
123
|
print(detection["logprobs"])
|
|
124
|
-
# Output: True
|
|
124
|
+
# Output: True \n --logprobs
|
|
125
125
|
|
|
126
126
|
# Example: Translation
|
|
127
|
-
|
|
128
|
-
print(
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# Example: Custom Tool
|
|
132
|
-
# Note: Output model should only contain result key
|
|
133
|
-
# Everything else will be ignored
|
|
134
|
-
class Custom(BaseModel):
|
|
135
|
-
result: list[list[dict[str, int]]]
|
|
136
|
-
|
|
137
|
-
custom_prompt = "Something"
|
|
138
|
-
custom_result = the_tool.run_custom(custom_prompt, Custom)
|
|
139
|
-
print(custom_result)
|
|
127
|
+
translation = the_tool.translate("سلام، حالت چطوره؟" target_language="English", with_analysis=True)
|
|
128
|
+
print(translation["result"])
|
|
129
|
+
print(translation["analysis"])
|
|
130
|
+
# Output: "Hi! How are you?" \n --analysis
|
|
140
131
|
```
|
|
141
132
|
|
|
142
133
|
---
|
|
@@ -149,7 +140,7 @@ from openai import AsyncOpenAI
|
|
|
149
140
|
from texttools import AsyncTheTool
|
|
150
141
|
|
|
151
142
|
async def main():
|
|
152
|
-
# Create your
|
|
143
|
+
# Create your AsyncOpenAI client
|
|
153
144
|
async_client = AsyncOpenAI(base_url="your_url", api_key="your_api_key")
|
|
154
145
|
|
|
155
146
|
# Specify the model
|
|
@@ -11,8 +11,6 @@ texttools/__init__.py
|
|
|
11
11
|
texttools/batch/__init__.py
|
|
12
12
|
texttools/batch/batch_manager.py
|
|
13
13
|
texttools/batch/batch_runner.py
|
|
14
|
-
texttools/formatters/base_formatter.py
|
|
15
|
-
texttools/formatters/user_merge_formatter.py
|
|
16
14
|
texttools/prompts/README.md
|
|
17
15
|
texttools/prompts/categorizer.yaml
|
|
18
16
|
texttools/prompts/extract_entities.yaml
|
|
@@ -30,6 +28,7 @@ texttools/tools/async_the_tool.py
|
|
|
30
28
|
texttools/tools/the_tool.py
|
|
31
29
|
texttools/tools/internals/async_operator.py
|
|
32
30
|
texttools/tools/internals/base_operator.py
|
|
31
|
+
texttools/tools/internals/formatters.py
|
|
33
32
|
texttools/tools/internals/operator.py
|
|
34
33
|
texttools/tools/internals/output_models.py
|
|
35
34
|
texttools/tools/internals/prompt_loader.py
|
|
@@ -10,6 +10,9 @@ class AsyncTheTool:
|
|
|
10
10
|
"""
|
|
11
11
|
Async counterpart to TheTool.
|
|
12
12
|
|
|
13
|
+
Each method configures the async operator with a specific YAML prompt,
|
|
14
|
+
output schema, and flags, then delegates execution to `operator.run()`.
|
|
15
|
+
|
|
13
16
|
Usage:
|
|
14
17
|
async_client = AsyncOpenAI(...)
|
|
15
18
|
tool = TheToolAsync(async_client, model="model-name")
|
|
@@ -27,7 +30,6 @@ class AsyncTheTool:
|
|
|
27
30
|
self,
|
|
28
31
|
text: str,
|
|
29
32
|
with_analysis: bool = False,
|
|
30
|
-
output_lang: str | None = None,
|
|
31
33
|
user_prompt: str | None = None,
|
|
32
34
|
temperature: float | None = 0.0,
|
|
33
35
|
logprobs: bool = False,
|
|
@@ -36,20 +38,13 @@ class AsyncTheTool:
|
|
|
36
38
|
"""
|
|
37
39
|
Categorize a text into a single Islamic studies domain category.
|
|
38
40
|
|
|
39
|
-
Args:
|
|
40
|
-
text: Input string to categorize.
|
|
41
|
-
with_analysis: If True, first runs an LLM "analysis" step and
|
|
42
|
-
conditions the main prompt on that analysis.
|
|
43
|
-
|
|
44
41
|
Returns:
|
|
45
|
-
{"result": <category string>}
|
|
46
|
-
Example: {"result": "باورهای دینی"}
|
|
42
|
+
{"result": <category string>} + ("logprobs" and "analysis" if enabled)
|
|
47
43
|
"""
|
|
48
44
|
return await self.operator.run(
|
|
49
45
|
# User parameters
|
|
50
46
|
text=text,
|
|
51
47
|
with_analysis=with_analysis,
|
|
52
|
-
output_lang=output_lang,
|
|
53
48
|
user_prompt=user_prompt,
|
|
54
49
|
temperature=temperature,
|
|
55
50
|
logprobs=logprobs,
|
|
@@ -59,6 +54,7 @@ class AsyncTheTool:
|
|
|
59
54
|
output_model=OutputModels.CategorizerOutput,
|
|
60
55
|
resp_format="parse",
|
|
61
56
|
mode=None,
|
|
57
|
+
output_lang=None,
|
|
62
58
|
)
|
|
63
59
|
|
|
64
60
|
async def extract_keywords(
|
|
@@ -71,6 +67,12 @@ class AsyncTheTool:
|
|
|
71
67
|
logprobs: bool = False,
|
|
72
68
|
top_logprobs: int | None = None,
|
|
73
69
|
) -> dict[str, list[str]]:
|
|
70
|
+
"""
|
|
71
|
+
Extract salient keywords from text.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
{"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
|
|
75
|
+
"""
|
|
74
76
|
return await self.operator.run(
|
|
75
77
|
# User parameters
|
|
76
78
|
text=text,
|
|
@@ -97,6 +99,12 @@ class AsyncTheTool:
|
|
|
97
99
|
logprobs: bool = False,
|
|
98
100
|
top_logprobs: int | None = None,
|
|
99
101
|
) -> dict[str, list[dict[str, str]]]:
|
|
102
|
+
"""
|
|
103
|
+
Perform Named Entity Recognition (NER) over the input text.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
{"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
|
|
107
|
+
"""
|
|
100
108
|
return await self.operator.run(
|
|
101
109
|
# User parameters
|
|
102
110
|
text=text,
|
|
@@ -122,6 +130,12 @@ class AsyncTheTool:
|
|
|
122
130
|
logprobs: bool = False,
|
|
123
131
|
top_logprobs: int | None = None,
|
|
124
132
|
) -> dict[str, bool]:
|
|
133
|
+
"""
|
|
134
|
+
Detect if the input is phrased as a question.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
{"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
|
|
138
|
+
"""
|
|
125
139
|
return await self.operator.run(
|
|
126
140
|
# User parameters
|
|
127
141
|
text=text,
|
|
@@ -148,6 +162,12 @@ class AsyncTheTool:
|
|
|
148
162
|
logprobs: bool = False,
|
|
149
163
|
top_logprobs: int | None = None,
|
|
150
164
|
) -> dict[str, str]:
|
|
165
|
+
"""
|
|
166
|
+
Generate a single question from the given text.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
{"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
|
|
170
|
+
"""
|
|
151
171
|
return await self.operator.run(
|
|
152
172
|
# User parameters
|
|
153
173
|
text=text,
|
|
@@ -175,6 +195,12 @@ class AsyncTheTool:
|
|
|
175
195
|
top_logprobs: int | None = None,
|
|
176
196
|
mode: Literal["default", "reason"] = "default",
|
|
177
197
|
) -> dict[str, str]:
|
|
198
|
+
"""
|
|
199
|
+
Merge multiple questions into a single unified question.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
{"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
|
|
203
|
+
"""
|
|
178
204
|
text = ", ".join(text)
|
|
179
205
|
return await self.operator.run(
|
|
180
206
|
# User parameters
|
|
@@ -203,6 +229,12 @@ class AsyncTheTool:
|
|
|
203
229
|
top_logprobs: int | None = None,
|
|
204
230
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
205
231
|
) -> dict[str, str]:
|
|
232
|
+
"""
|
|
233
|
+
Rewrite a text with different modes.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
{"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
|
|
237
|
+
"""
|
|
206
238
|
return await self.operator.run(
|
|
207
239
|
# User parameters
|
|
208
240
|
text=text,
|
|
@@ -230,6 +262,12 @@ class AsyncTheTool:
|
|
|
230
262
|
logprobs: bool = False,
|
|
231
263
|
top_logprobs: int | None = None,
|
|
232
264
|
) -> dict[str, list[str]]:
|
|
265
|
+
"""
|
|
266
|
+
Generate a list of questions about a subject.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
{"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
|
|
270
|
+
"""
|
|
233
271
|
return await self.operator.run(
|
|
234
272
|
# User parameters
|
|
235
273
|
text=text,
|
|
@@ -257,6 +295,12 @@ class AsyncTheTool:
|
|
|
257
295
|
logprobs: bool = False,
|
|
258
296
|
top_logprobs: int | None = None,
|
|
259
297
|
) -> dict[str, str]:
|
|
298
|
+
"""
|
|
299
|
+
Summarize the given subject text.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
{"result": <summary>} + ("logprobs" and "analysis" if enabled)
|
|
303
|
+
"""
|
|
260
304
|
return await self.operator.run(
|
|
261
305
|
# User parameters
|
|
262
306
|
text=text,
|
|
@@ -278,18 +322,22 @@ class AsyncTheTool:
|
|
|
278
322
|
text: str,
|
|
279
323
|
target_language: str,
|
|
280
324
|
with_analysis: bool = False,
|
|
281
|
-
output_lang: str | None = None,
|
|
282
325
|
user_prompt: str | None = None,
|
|
283
326
|
temperature: float | None = 0.0,
|
|
284
327
|
logprobs: bool = False,
|
|
285
328
|
top_logprobs: int | None = None,
|
|
286
329
|
) -> dict[str, str]:
|
|
330
|
+
"""
|
|
331
|
+
Translate text between languages.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
{"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
|
|
335
|
+
"""
|
|
287
336
|
return await self.operator.run(
|
|
288
337
|
# User parameters
|
|
289
338
|
text=text,
|
|
290
339
|
target_language=target_language,
|
|
291
340
|
with_analysis=with_analysis,
|
|
292
|
-
output_lang=output_lang,
|
|
293
341
|
user_prompt=user_prompt,
|
|
294
342
|
temperature=temperature,
|
|
295
343
|
logprobs=logprobs,
|
|
@@ -299,6 +347,7 @@ class AsyncTheTool:
|
|
|
299
347
|
output_model=OutputModels.StrOutput,
|
|
300
348
|
resp_format="parse",
|
|
301
349
|
mode=None,
|
|
350
|
+
output_lang=None,
|
|
302
351
|
)
|
|
303
352
|
|
|
304
353
|
async def run_custom(
|
|
@@ -313,10 +362,6 @@ class AsyncTheTool:
|
|
|
313
362
|
"""
|
|
314
363
|
Custom tool that can do almost anything!
|
|
315
364
|
|
|
316
|
-
Args:
|
|
317
|
-
prompt: Custom prompt.
|
|
318
|
-
output_model: Custom BaseModel output model.
|
|
319
|
-
|
|
320
365
|
Returns:
|
|
321
366
|
{"result": <Any>}
|
|
322
367
|
"""
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/async_operator.py
RENAMED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
from typing import Any, TypeVar, Type, Literal
|
|
4
2
|
import logging
|
|
5
3
|
|
|
@@ -7,9 +5,7 @@ from openai import AsyncOpenAI
|
|
|
7
5
|
from pydantic import BaseModel
|
|
8
6
|
|
|
9
7
|
from texttools.tools.internals.base_operator import BaseOperator
|
|
10
|
-
from texttools.formatters
|
|
11
|
-
UserMergeFormatter,
|
|
12
|
-
)
|
|
8
|
+
from texttools.tools.internals.formatters import Formatter
|
|
13
9
|
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
14
10
|
|
|
15
11
|
# Base Model type for output models
|
|
@@ -31,14 +27,12 @@ class AsyncOperator(BaseOperator):
|
|
|
31
27
|
"""
|
|
32
28
|
|
|
33
29
|
def __init__(self, client: AsyncOpenAI, model: str):
|
|
34
|
-
self.client
|
|
30
|
+
self.client = client
|
|
35
31
|
self.model = model
|
|
36
32
|
|
|
37
|
-
async def
|
|
38
|
-
|
|
39
|
-
analyze_message
|
|
40
|
-
temperature: float,
|
|
41
|
-
) -> str:
|
|
33
|
+
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
analyze_prompt = prompt_configs["analyze_template"]
|
|
35
|
+
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
42
36
|
completion = await self.client.chat.completions.create(
|
|
43
37
|
model=self.model,
|
|
44
38
|
messages=analyze_message,
|
|
@@ -47,12 +41,6 @@ class AsyncOperator(BaseOperator):
|
|
|
47
41
|
analysis = completion.choices[0].message.content.strip()
|
|
48
42
|
return analysis
|
|
49
43
|
|
|
50
|
-
async def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
51
|
-
analyze_prompt = prompt_configs["analyze_template"]
|
|
52
|
-
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
53
|
-
analysis = await self._analysis_completion(analyze_message, temperature)
|
|
54
|
-
return analysis
|
|
55
|
-
|
|
56
44
|
async def _parse_completion(
|
|
57
45
|
self,
|
|
58
46
|
message: list[dict[str, str]],
|
|
@@ -126,14 +114,12 @@ class AsyncOperator(BaseOperator):
|
|
|
126
114
|
Execute the async LLM pipeline with the given input text. (Async)
|
|
127
115
|
"""
|
|
128
116
|
prompt_loader = PromptLoader()
|
|
129
|
-
formatter =
|
|
117
|
+
formatter = Formatter()
|
|
130
118
|
|
|
131
119
|
try:
|
|
132
|
-
cleaned_text = text.strip()
|
|
133
|
-
|
|
134
120
|
prompt_configs = prompt_loader.load(
|
|
135
121
|
prompt_file=prompt_file,
|
|
136
|
-
text=
|
|
122
|
+
text=text.strip(),
|
|
137
123
|
mode=mode,
|
|
138
124
|
**extra_kwargs,
|
|
139
125
|
)
|
|
@@ -159,7 +145,7 @@ class AsyncOperator(BaseOperator):
|
|
|
159
145
|
)
|
|
160
146
|
|
|
161
147
|
messages.append(self._build_user_message(prompt_configs["main_template"]))
|
|
162
|
-
messages = formatter.
|
|
148
|
+
messages = formatter.user_merge_format(messages)
|
|
163
149
|
|
|
164
150
|
if resp_format == "vllm":
|
|
165
151
|
parsed, completion = await self._vllm_completion(
|
|
@@ -188,4 +174,4 @@ class AsyncOperator(BaseOperator):
|
|
|
188
174
|
|
|
189
175
|
except Exception as e:
|
|
190
176
|
logger.error(f"Async TheTool failed: {e}")
|
|
191
|
-
return {"
|
|
177
|
+
return {"error": str(e), "result": ""}
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/base_operator.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import TypeVar, Type
|
|
1
|
+
from typing import TypeVar, Type, Any
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
import math
|
|
@@ -55,7 +55,7 @@ class BaseOperator:
|
|
|
55
55
|
# Convert dictionary to output model
|
|
56
56
|
return output_model(**response_dict)
|
|
57
57
|
|
|
58
|
-
def _extract_logprobs(self, completion: dict):
|
|
58
|
+
def _extract_logprobs(self, completion: dict) -> list[dict[str, Any]]:
|
|
59
59
|
logprobs_data = []
|
|
60
60
|
ignore_pattern = re.compile(r'^(result|[\s\[\]\{\}",:]+)$')
|
|
61
61
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
class Formatter:
|
|
2
|
+
@staticmethod
|
|
3
|
+
def user_merge_format(messages: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
4
|
+
"""
|
|
5
|
+
Merges consecutive user messages into a single message, separated by newlines.
|
|
6
|
+
|
|
7
|
+
This is useful for condensing a multi-turn user input into a single
|
|
8
|
+
message for the LLM. Assistant and system messages are left unchanged and
|
|
9
|
+
act as separators between user message groups.
|
|
10
|
+
"""
|
|
11
|
+
merged: list[dict[str, str]] = []
|
|
12
|
+
|
|
13
|
+
for message in messages:
|
|
14
|
+
role, content = message["role"], message["content"].strip()
|
|
15
|
+
|
|
16
|
+
# Merge with previous user turn
|
|
17
|
+
if merged and role == "user" and merged[-1]["role"] == "user":
|
|
18
|
+
merged[-1]["content"] += "\n" + content
|
|
19
|
+
|
|
20
|
+
# Otherwise, start a new turn
|
|
21
|
+
else:
|
|
22
|
+
merged.append({"role": role, "content": content})
|
|
23
|
+
|
|
24
|
+
return merged
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
from typing import Any, TypeVar, Type, Literal
|
|
4
2
|
import logging
|
|
5
3
|
|
|
@@ -7,9 +5,7 @@ from openai import OpenAI
|
|
|
7
5
|
from pydantic import BaseModel
|
|
8
6
|
|
|
9
7
|
from texttools.tools.internals.base_operator import BaseOperator
|
|
10
|
-
from texttools.formatters
|
|
11
|
-
UserMergeFormatter,
|
|
12
|
-
)
|
|
8
|
+
from texttools.tools.internals.formatters import Formatter
|
|
13
9
|
from texttools.tools.internals.prompt_loader import PromptLoader
|
|
14
10
|
|
|
15
11
|
# Base Model type for output models
|
|
@@ -22,7 +18,7 @@ logger.setLevel(logging.INFO)
|
|
|
22
18
|
|
|
23
19
|
class Operator(BaseOperator):
|
|
24
20
|
"""
|
|
25
|
-
Core engine for running text-processing operations with an LLM.
|
|
21
|
+
Core engine for running text-processing operations with an LLM (Sync).
|
|
26
22
|
|
|
27
23
|
It wires together:
|
|
28
24
|
- `PromptLoader` → loads YAML prompt templates.
|
|
@@ -31,14 +27,12 @@ class Operator(BaseOperator):
|
|
|
31
27
|
"""
|
|
32
28
|
|
|
33
29
|
def __init__(self, client: OpenAI, model: str):
|
|
34
|
-
self.client
|
|
30
|
+
self.client = client
|
|
35
31
|
self.model = model
|
|
36
32
|
|
|
37
|
-
def
|
|
38
|
-
|
|
39
|
-
analyze_message
|
|
40
|
-
temperature: float,
|
|
41
|
-
) -> str:
|
|
33
|
+
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
34
|
+
analyze_prompt = prompt_configs["analyze_template"]
|
|
35
|
+
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
42
36
|
completion = self.client.chat.completions.create(
|
|
43
37
|
model=self.model,
|
|
44
38
|
messages=analyze_message,
|
|
@@ -47,12 +41,6 @@ class Operator(BaseOperator):
|
|
|
47
41
|
analysis = completion.choices[0].message.content.strip()
|
|
48
42
|
return analysis
|
|
49
43
|
|
|
50
|
-
def _analyze(self, prompt_configs: dict[str, str], temperature: float) -> str:
|
|
51
|
-
analyze_prompt = prompt_configs["analyze_template"]
|
|
52
|
-
analyze_message = [self._build_user_message(analyze_prompt)]
|
|
53
|
-
analysis = self._analysis_completion(analyze_message, temperature)
|
|
54
|
-
return analysis
|
|
55
|
-
|
|
56
44
|
def _parse_completion(
|
|
57
45
|
self,
|
|
58
46
|
message: list[dict[str, str]],
|
|
@@ -83,7 +71,6 @@ class Operator(BaseOperator):
|
|
|
83
71
|
temperature: float,
|
|
84
72
|
logprobs: bool = False,
|
|
85
73
|
top_logprobs: int = 3,
|
|
86
|
-
max_tokens: int | None = None,
|
|
87
74
|
) -> tuple[Type[T], Any]:
|
|
88
75
|
json_schema = output_model.model_json_schema()
|
|
89
76
|
|
|
@@ -127,14 +114,12 @@ class Operator(BaseOperator):
|
|
|
127
114
|
Execute the LLM pipeline with the given input text.
|
|
128
115
|
"""
|
|
129
116
|
prompt_loader = PromptLoader()
|
|
130
|
-
formatter =
|
|
117
|
+
formatter = Formatter()
|
|
131
118
|
|
|
132
119
|
try:
|
|
133
|
-
cleaned_text = text.strip()
|
|
134
|
-
|
|
135
120
|
prompt_configs = prompt_loader.load(
|
|
136
121
|
prompt_file=prompt_file,
|
|
137
|
-
text=
|
|
122
|
+
text=text.strip(),
|
|
138
123
|
mode=mode,
|
|
139
124
|
**extra_kwargs,
|
|
140
125
|
)
|
|
@@ -160,7 +145,7 @@ class Operator(BaseOperator):
|
|
|
160
145
|
)
|
|
161
146
|
|
|
162
147
|
messages.append(self._build_user_message(prompt_configs["main_template"]))
|
|
163
|
-
messages = formatter.
|
|
148
|
+
messages = formatter.user_merge_format(messages)
|
|
164
149
|
|
|
165
150
|
if resp_format == "vllm":
|
|
166
151
|
parsed, completion = self._vllm_completion(
|
|
@@ -189,4 +174,4 @@ class Operator(BaseOperator):
|
|
|
189
174
|
|
|
190
175
|
except Exception as e:
|
|
191
176
|
logger.error(f"TheTool failed: {e}")
|
|
192
|
-
return {"
|
|
177
|
+
return {"error": str(e), "result": ""}
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/prompt_loader.py
RENAMED
|
@@ -18,24 +18,15 @@ class PromptLoader:
|
|
|
18
18
|
}
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
def __init__(self):
|
|
22
|
-
self.base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
23
|
-
|
|
24
21
|
MAIN_TEMPLATE: str = "main_template"
|
|
25
22
|
ANALYZE_TEMPLATE: str = "analyze_template"
|
|
26
23
|
|
|
27
24
|
# Use lru_cache to load each file once
|
|
28
25
|
@lru_cache(maxsize=32)
|
|
29
26
|
def _load_templates(self, prompt_file: str, mode: str | None) -> dict[str, str]:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
|
|
34
|
-
|
|
35
|
-
try:
|
|
36
|
-
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
37
|
-
except yaml.YAMLError as e:
|
|
38
|
-
raise ValueError(f"Invalid YAML in {prompt_path}: {e}")
|
|
27
|
+
base_dir = Path(__file__).parent.parent.parent / Path("prompts")
|
|
28
|
+
prompt_path = base_dir / prompt_file
|
|
29
|
+
data = yaml.safe_load(prompt_path.read_text(encoding="utf-8"))
|
|
39
30
|
|
|
40
31
|
return {
|
|
41
32
|
self.MAIN_TEMPLATE: data[self.MAIN_TEMPLATE][mode]
|
|
@@ -28,7 +28,6 @@ class TheTool:
|
|
|
28
28
|
self,
|
|
29
29
|
text: str,
|
|
30
30
|
with_analysis: bool = False,
|
|
31
|
-
output_lang: str | None = None,
|
|
32
31
|
user_prompt: str | None = None,
|
|
33
32
|
temperature: float | None = 0.0,
|
|
34
33
|
logprobs: bool = False,
|
|
@@ -37,20 +36,13 @@ class TheTool:
|
|
|
37
36
|
"""
|
|
38
37
|
Categorize a text into a single Islamic studies domain category.
|
|
39
38
|
|
|
40
|
-
Args:
|
|
41
|
-
text: Input string to categorize.
|
|
42
|
-
with_analysis: If True, first runs an LLM "analysis" step and
|
|
43
|
-
conditions the main prompt on that analysis.
|
|
44
|
-
|
|
45
39
|
Returns:
|
|
46
|
-
{"result": <category string>}
|
|
47
|
-
Example: {"result": "باورهای دینی"}
|
|
40
|
+
{"result": <category string>} + ("logprobs" and "analysis" if enabled)
|
|
48
41
|
"""
|
|
49
42
|
return self.operator.run(
|
|
50
43
|
# User parameters
|
|
51
44
|
text=text,
|
|
52
45
|
with_analysis=with_analysis,
|
|
53
|
-
output_lang=output_lang,
|
|
54
46
|
user_prompt=user_prompt,
|
|
55
47
|
temperature=temperature,
|
|
56
48
|
logprobs=logprobs,
|
|
@@ -60,6 +52,7 @@ class TheTool:
|
|
|
60
52
|
output_model=OutputModels.CategorizerOutput,
|
|
61
53
|
resp_format="parse",
|
|
62
54
|
mode=None,
|
|
55
|
+
output_lang=None,
|
|
63
56
|
)
|
|
64
57
|
|
|
65
58
|
def extract_keywords(
|
|
@@ -75,12 +68,8 @@ class TheTool:
|
|
|
75
68
|
"""
|
|
76
69
|
Extract salient keywords from text.
|
|
77
70
|
|
|
78
|
-
Args:
|
|
79
|
-
text: Input string to analyze.
|
|
80
|
-
with_analysis: Whether to run an extra LLM reasoning step.
|
|
81
|
-
|
|
82
71
|
Returns:
|
|
83
|
-
{"result": [<keyword1>, <keyword2>, ...]}
|
|
72
|
+
{"result": [<keyword1>, <keyword2>, ...]} + ("logprobs" and "analysis" if enabled)
|
|
84
73
|
"""
|
|
85
74
|
return self.operator.run(
|
|
86
75
|
# User parameters
|
|
@@ -111,12 +100,8 @@ class TheTool:
|
|
|
111
100
|
"""
|
|
112
101
|
Perform Named Entity Recognition (NER) over the input text.
|
|
113
102
|
|
|
114
|
-
Args:
|
|
115
|
-
text: Input string.
|
|
116
|
-
with_analysis: Whether to run an extra LLM reasoning step.
|
|
117
|
-
|
|
118
103
|
Returns:
|
|
119
|
-
{"result": [{"text": <entity>, "type": <entity_type>}, ...]}
|
|
104
|
+
{"result": [{"text": <entity>, "type": <entity_type>}, ...]} + ("logprobs" and "analysis" if enabled)
|
|
120
105
|
"""
|
|
121
106
|
return self.operator.run(
|
|
122
107
|
# User parameters
|
|
@@ -146,12 +131,8 @@ class TheTool:
|
|
|
146
131
|
"""
|
|
147
132
|
Detect if the input is phrased as a question.
|
|
148
133
|
|
|
149
|
-
Args:
|
|
150
|
-
question: Input string to evaluate.
|
|
151
|
-
with_analysis: Whether to include an analysis step.
|
|
152
|
-
|
|
153
134
|
Returns:
|
|
154
|
-
{"result":
|
|
135
|
+
{"result": True} or {"result": False} + ("logprobs" and "analysis" if enabled)
|
|
155
136
|
"""
|
|
156
137
|
return self.operator.run(
|
|
157
138
|
# User parameters
|
|
@@ -182,12 +163,8 @@ class TheTool:
|
|
|
182
163
|
"""
|
|
183
164
|
Generate a single question from the given text.
|
|
184
165
|
|
|
185
|
-
Args:
|
|
186
|
-
text: Source text to derive a question from.
|
|
187
|
-
with_analysis: Whether to use analysis before generation.
|
|
188
|
-
|
|
189
166
|
Returns:
|
|
190
|
-
{"result": <generated_question>}
|
|
167
|
+
{"result": <generated_question>} + ("logprobs" and "analysis" if enabled)
|
|
191
168
|
"""
|
|
192
169
|
return self.operator.run(
|
|
193
170
|
# User parameters
|
|
@@ -219,15 +196,8 @@ class TheTool:
|
|
|
219
196
|
"""
|
|
220
197
|
Merge multiple questions into a single unified question.
|
|
221
198
|
|
|
222
|
-
Args:
|
|
223
|
-
questions: List of question strings.
|
|
224
|
-
mode: Merge strategy:
|
|
225
|
-
- "default": simple merging.
|
|
226
|
-
- "reason": merging with reasoning explanation.
|
|
227
|
-
with_analysis: Whether to use an analysis step.
|
|
228
|
-
|
|
229
199
|
Returns:
|
|
230
|
-
{"result": <merged_question>}
|
|
200
|
+
{"result": <merged_question>} + ("logprobs" and "analysis" if enabled)
|
|
231
201
|
"""
|
|
232
202
|
text = ", ".join(text)
|
|
233
203
|
return self.operator.run(
|
|
@@ -258,17 +228,10 @@ class TheTool:
|
|
|
258
228
|
mode: Literal["positive", "negative", "hard_negative"] = "positive",
|
|
259
229
|
) -> dict[str, str]:
|
|
260
230
|
"""
|
|
261
|
-
Rewrite a
|
|
262
|
-
|
|
263
|
-
Args:
|
|
264
|
-
question: Input question to rewrite.
|
|
265
|
-
mode: Rewrite strategy:
|
|
266
|
-
- "positive": keep meaning, change words.
|
|
267
|
-
- "negative": alter meaning, preserve wording style.
|
|
268
|
-
with_analysis: Whether to include an analysis step.
|
|
231
|
+
Rewrite a text with different modes.
|
|
269
232
|
|
|
270
233
|
Returns:
|
|
271
|
-
{"result": <
|
|
234
|
+
{"result": <rewritten_text>} + ("logprobs" and "analysis" if enabled)
|
|
272
235
|
"""
|
|
273
236
|
return self.operator.run(
|
|
274
237
|
# User parameters
|
|
@@ -300,14 +263,8 @@ class TheTool:
|
|
|
300
263
|
"""
|
|
301
264
|
Generate a list of questions about a subject.
|
|
302
265
|
|
|
303
|
-
Args:
|
|
304
|
-
subject: Topic of interest.
|
|
305
|
-
number_of_questions: Number of questions to produce.
|
|
306
|
-
language: Target language for generated questions.
|
|
307
|
-
with_analysis: Whether to include an analysis step.
|
|
308
|
-
|
|
309
266
|
Returns:
|
|
310
|
-
{"result": [<question1>, <question2>, ...]}
|
|
267
|
+
{"result": [<question1>, <question2>, ...]} + ("logprobs" and "analysis" if enabled)
|
|
311
268
|
"""
|
|
312
269
|
return self.operator.run(
|
|
313
270
|
# User parameters
|
|
@@ -339,12 +296,8 @@ class TheTool:
|
|
|
339
296
|
"""
|
|
340
297
|
Summarize the given subject text.
|
|
341
298
|
|
|
342
|
-
Args:
|
|
343
|
-
subject: Input text to summarize.
|
|
344
|
-
with_analysis: Whether to include an analysis step.
|
|
345
|
-
|
|
346
299
|
Returns:
|
|
347
|
-
{"result": <summary>}
|
|
300
|
+
{"result": <summary>} + ("logprobs" and "analysis" if enabled)
|
|
348
301
|
"""
|
|
349
302
|
return self.operator.run(
|
|
350
303
|
# User parameters
|
|
@@ -367,7 +320,6 @@ class TheTool:
|
|
|
367
320
|
text: str,
|
|
368
321
|
target_language: str,
|
|
369
322
|
with_analysis: bool = False,
|
|
370
|
-
output_lang: str | None = None,
|
|
371
323
|
user_prompt: str | None = None,
|
|
372
324
|
temperature: float | None = 0.0,
|
|
373
325
|
logprobs: bool = False,
|
|
@@ -376,20 +328,14 @@ class TheTool:
|
|
|
376
328
|
"""
|
|
377
329
|
Translate text between languages.
|
|
378
330
|
|
|
379
|
-
Args:
|
|
380
|
-
text: Input string to translate.
|
|
381
|
-
target_language: Language code or name to translate into.
|
|
382
|
-
with_analysis: Whether to include an analysis step.
|
|
383
|
-
|
|
384
331
|
Returns:
|
|
385
|
-
{"result": <translated_text>}
|
|
332
|
+
{"result": <translated_text>} + ("logprobs" and "analysis" if enabled)
|
|
386
333
|
"""
|
|
387
334
|
return self.operator.run(
|
|
388
335
|
# User parameters
|
|
389
336
|
text=text,
|
|
390
337
|
target_language=target_language,
|
|
391
338
|
with_analysis=with_analysis,
|
|
392
|
-
output_lang=output_lang,
|
|
393
339
|
user_prompt=user_prompt,
|
|
394
340
|
temperature=temperature,
|
|
395
341
|
logprobs=logprobs,
|
|
@@ -399,6 +345,7 @@ class TheTool:
|
|
|
399
345
|
output_model=OutputModels.StrOutput,
|
|
400
346
|
resp_format="parse",
|
|
401
347
|
mode=None,
|
|
348
|
+
output_lang=None,
|
|
402
349
|
)
|
|
403
350
|
|
|
404
351
|
def run_custom(
|
|
@@ -413,10 +360,6 @@ class TheTool:
|
|
|
413
360
|
"""
|
|
414
361
|
Custom tool that can do almost anything!
|
|
415
362
|
|
|
416
|
-
Args:
|
|
417
|
-
prompt: Custom prompt.
|
|
418
|
-
output_model: Custom BaseModel output model.
|
|
419
|
-
|
|
420
363
|
Returns:
|
|
421
364
|
{"result": <Any>}
|
|
422
365
|
"""
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class BaseFormatter(ABC):
|
|
6
|
-
"""
|
|
7
|
-
Adapter to convert a conversation into a specific LLM API's input format.
|
|
8
|
-
|
|
9
|
-
Concrete implementations transform standardized messages (e.g., list[dict]) into the
|
|
10
|
-
exact payload required by a provider (e.g., OpenAI's message list, a single string, etc.).
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
@abstractmethod
|
|
14
|
-
def format(
|
|
15
|
-
self,
|
|
16
|
-
messages: Any,
|
|
17
|
-
) -> Any:
|
|
18
|
-
"""
|
|
19
|
-
Transform the input messages into a provider-specific payload.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
messages: The input conversation. While often a list of dicts with
|
|
23
|
-
'role' and 'content' keys, the exact type and structure may vary
|
|
24
|
-
by implementation.
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
A payload in the format expected by the target LLM API. This could be:
|
|
28
|
-
- A list of role-content dictionaries (e.g., for OpenAI)
|
|
29
|
-
- A single formatted string (e.g., for completion-style APIs)
|
|
30
|
-
- A complex dictionary with additional parameters
|
|
31
|
-
- Any other provider-specific data structure
|
|
32
|
-
"""
|
|
33
|
-
pass
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from texttools.formatters.base_formatter import BaseFormatter
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class UserMergeFormatter(BaseFormatter):
|
|
5
|
-
"""
|
|
6
|
-
Merges consecutive user messages into a single message, separated by newlines.
|
|
7
|
-
|
|
8
|
-
This is useful for condensing a multi-turn user input into a single coherent
|
|
9
|
-
message for the LLM. Assistant and system messages are left unchanged and
|
|
10
|
-
act as separators between user message groups.
|
|
11
|
-
|
|
12
|
-
Raises:
|
|
13
|
-
ValueError: If the input messages have invalid structure or roles.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
def format(self, messages: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
17
|
-
merged: list[dict[str, str]] = []
|
|
18
|
-
|
|
19
|
-
for message in messages:
|
|
20
|
-
role, content = message["role"], message["content"].strip()
|
|
21
|
-
|
|
22
|
-
# Merge with previous user turn
|
|
23
|
-
if merged and role == "user" and merged[-1]["role"] == "user":
|
|
24
|
-
merged[-1]["content"] += "\n" + content
|
|
25
|
-
|
|
26
|
-
# Otherwise, start a new turn
|
|
27
|
-
else:
|
|
28
|
-
merged.append({"role": role, "content": content})
|
|
29
|
-
|
|
30
|
-
return merged
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/prompts/subject_to_question.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-1.0.7 → hamtaa_texttools-1.0.8}/texttools/tools/internals/output_models.py
RENAMED
|
File without changes
|