hamtaa-texttools 1.3.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/METADATA +42 -48
- hamtaa_texttools-2.0.0.dist-info/RECORD +30 -0
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/WHEEL +1 -1
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/licenses/LICENSE +1 -1
- texttools/__init__.py +1 -1
- texttools/core/internal_models.py +21 -8
- texttools/core/operators/__init__.py +0 -0
- texttools/core/operators/async_operator.py +11 -19
- texttools/core/operators/sync_operator.py +11 -19
- texttools/core/utils.py +260 -0
- texttools/models.py +77 -22
- texttools/prompts/{rewrite.yaml → augment.yaml} +3 -3
- texttools/prompts/categorize.yaml +7 -8
- texttools/prompts/extract_entities.yaml +2 -2
- texttools/prompts/extract_keywords.yaml +4 -2
- texttools/prompts/{check_fact.yaml → is_fact.yaml} +5 -4
- texttools/prompts/is_question.yaml +1 -1
- texttools/prompts/merge_questions.yaml +8 -6
- texttools/prompts/propositionize.yaml +11 -7
- texttools/prompts/run_custom.yaml +3 -1
- texttools/prompts/summarize.yaml +3 -3
- texttools/prompts/to_question.yaml +60 -0
- texttools/prompts/translate.yaml +4 -4
- texttools/tools/async_tools.py +90 -169
- texttools/tools/sync_tools.py +76 -150
- hamtaa_texttools-1.3.1.dist-info/RECORD +0 -30
- texttools/core/engine.py +0 -264
- texttools/prompts/subject_to_question.yaml +0 -26
- texttools/prompts/text_to_question.yaml +0 -26
- {hamtaa_texttools-1.3.1.dist-info → hamtaa_texttools-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
|
|
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
11
11
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
12
|
Classifier: Topic :: Text Processing
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
|
-
Requires-Python: >=3.
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
17
|
Requires-Dist: openai>=1.97.1
|
|
@@ -21,36 +21,36 @@ Dynamic: license-file
|
|
|
21
21
|
|
|
22
22
|
# TextTools
|
|
23
23
|
|
|
24
|
+

|
|
25
|
+

|
|
26
|
+
|
|
24
27
|
## 📌 Overview
|
|
25
28
|
|
|
26
29
|
**TextTools** is a high-level **NLP toolkit** built on top of **LLMs**.
|
|
27
30
|
|
|
28
31
|
It provides both **sync (`TheTool`)** and **async (`AsyncTheTool`)** APIs for maximum flexibility.
|
|
29
32
|
|
|
30
|
-
It provides ready-to-use utilities for **translation, question detection,
|
|
31
|
-
|
|
32
|
-
**Note:** Most features of `texttools` are reliable when you use `google/gemma-3n-e4b-it` model.
|
|
33
|
+
It provides ready-to-use utilities for **translation, question detection, categorization, NER extraction, and more** - designed to help you integrate AI-powered text processing into your applications with minimal effort.
|
|
33
34
|
|
|
34
35
|
---
|
|
35
36
|
|
|
36
37
|
## ✨ Features
|
|
37
38
|
|
|
38
|
-
TextTools provides a
|
|
39
|
+
TextTools provides a collection of high-level NLP utilities.
|
|
39
40
|
Each tool is designed to work with structured outputs.
|
|
40
41
|
|
|
41
|
-
- **`categorize()`** -
|
|
42
|
-
- **`extract_keywords()`** -
|
|
43
|
-
- **`extract_entities()`** - Named Entity Recognition (NER)
|
|
44
|
-
- **`is_question()`** -
|
|
45
|
-
- **`
|
|
46
|
-
- **`merge_questions()`** -
|
|
47
|
-
- **`
|
|
48
|
-
- **`
|
|
49
|
-
- **`
|
|
50
|
-
- **`
|
|
51
|
-
- **`
|
|
52
|
-
- **`
|
|
53
|
-
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
42
|
+
- **`categorize()`** - Classify text into given categories
|
|
43
|
+
- **`extract_keywords()`** - Extract keywords from the text
|
|
44
|
+
- **`extract_entities()`** - Perform Named Entity Recognition (NER)
|
|
45
|
+
- **`is_question()`** - Detect if the input is phrased as a question
|
|
46
|
+
- **`to_question()`** - Generate questions from the given text / subject
|
|
47
|
+
- **`merge_questions()`** - Merge multiple questions into one
|
|
48
|
+
- **`augment()`** - Rewrite text in different augmentations
|
|
49
|
+
- **`summarize()`** - Summarize the given text
|
|
50
|
+
- **`translate()`** - Translate text between languages
|
|
51
|
+
- **`propositionize()`** - Convert a text into atomic, independent, meaningful sentences
|
|
52
|
+
- **`is_fact()`** - Check whether a statement is a fact based on the source text
|
|
53
|
+
- **`run_custom()`** - Custom tool that can do almost anything
|
|
54
54
|
|
|
55
55
|
---
|
|
56
56
|
|
|
@@ -66,16 +66,14 @@ pip install -U hamtaa-texttools
|
|
|
66
66
|
|
|
67
67
|
## 📊 Tool Quality Tiers
|
|
68
68
|
|
|
69
|
-
| Status | Meaning | Tools |
|
|
69
|
+
| Status | Meaning | Tools | Safe for Production? |
|
|
70
70
|
|--------|---------|----------|-------------------|
|
|
71
|
-
| **✅ Production** | Evaluated
|
|
72
|
-
| **🧪 Experimental** | Added to the package but **not fully evaluated**.
|
|
71
|
+
| **✅ Production** | Evaluated and tested. | `categorize()` (list mode), `extract_keywords()`, `extract_entities()`, `is_question()`, `to_question()`, `merge_questions()`, `augment()`, `summarize()`, `run_custom()` | **Yes** - ready for reliable use. |
|
|
72
|
+
| **🧪 Experimental** | Added to the package but **not fully evaluated**. | `categorize()` (tree mode), `translate()`, `propositionize()`, `is_fact()` | **Use with caution** |
|
|
73
73
|
|
|
74
74
|
---
|
|
75
75
|
|
|
76
|
-
## ⚙️
|
|
77
|
-
|
|
78
|
-
TextTools provides several optional flags to customize LLM behavior:
|
|
76
|
+
## ⚙️ Additional Parameters
|
|
79
77
|
|
|
80
78
|
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
81
79
|
**Note:** This doubles token usage per call.
|
|
@@ -85,17 +83,17 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
85
83
|
|
|
86
84
|
- **`output_lang: str`** → Forces the model to respond in a specific language.
|
|
87
85
|
|
|
88
|
-
- **`user_prompt: str`** → Allows you to inject a custom instruction
|
|
86
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction into the model alongside the main template.
|
|
89
87
|
|
|
90
|
-
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number
|
|
88
|
+
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number between `0.0` and `2.0`.
|
|
91
89
|
|
|
92
|
-
- **`validator: Callable (Experimental)`** → Forces
|
|
90
|
+
- **`validator: Callable (Experimental)`** → Forces the tool to validate the output result based on your validator function. Validator should return a boolean. If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can also specify `max_validation_retries=<N>`.
|
|
93
91
|
|
|
94
|
-
- **`priority: int (Experimental)`** →
|
|
92
|
+
- **`priority: int (Experimental)`** → Affects processing order in queues.
|
|
95
93
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
96
94
|
|
|
97
|
-
- **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error
|
|
98
|
-
**Note:** This feature only
|
|
95
|
+
- **`timeout: float`** → Maximum time in seconds to wait for the response before raising a timeout error.
|
|
96
|
+
**Note:** This feature is only available in `AsyncTheTool`.
|
|
99
97
|
|
|
100
98
|
|
|
101
99
|
---
|
|
@@ -107,12 +105,14 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
|
|
|
107
105
|
- **`analysis: str`**
|
|
108
106
|
- **`logprobs: list`**
|
|
109
107
|
- **`errors: list[str]`**
|
|
110
|
-
- **`ToolOutputMetadata`**
|
|
108
|
+
- **`ToolOutputMetadata`**
|
|
111
109
|
- **`tool_name: str`**
|
|
112
110
|
- **`processed_at: datetime`**
|
|
113
111
|
- **`execution_time: float`**
|
|
114
112
|
|
|
115
|
-
|
|
113
|
+
- Serialize output to JSON using the `to_json()` method.
|
|
114
|
+
- Verify operation success with the `is_successful()` method.
|
|
115
|
+
- Convert output to a dictionary with the `to_dict()` method.
|
|
116
116
|
|
|
117
117
|
---
|
|
118
118
|
|
|
@@ -130,13 +130,13 @@ Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel wit
|
|
|
130
130
|
from openai import OpenAI
|
|
131
131
|
from texttools import TheTool
|
|
132
132
|
|
|
133
|
-
client = OpenAI(base_url
|
|
133
|
+
client = OpenAI(base_url="your_url", API_KEY="your_api_key")
|
|
134
134
|
model = "model_name"
|
|
135
135
|
|
|
136
136
|
the_tool = TheTool(client=client, model=model)
|
|
137
137
|
|
|
138
138
|
detection = the_tool.is_question("Is this project open source?")
|
|
139
|
-
print(
|
|
139
|
+
print(detection.to_json())
|
|
140
140
|
```
|
|
141
141
|
|
|
142
142
|
---
|
|
@@ -154,24 +154,24 @@ async def main():
|
|
|
154
154
|
|
|
155
155
|
async_the_tool = AsyncTheTool(client=async_client, model=model)
|
|
156
156
|
|
|
157
|
-
translation_task = async_the_tool.translate("سلام، حالت چطوره؟",
|
|
158
|
-
keywords_task = async_the_tool.extract_keywords("
|
|
157
|
+
translation_task = async_the_tool.translate("سلام، حالت چطوره؟", target_lang="English")
|
|
158
|
+
keywords_task = async_the_tool.extract_keywords("This open source project is great for processing large datasets!")
|
|
159
159
|
|
|
160
160
|
(translation, keywords) = await asyncio.gather(translation_task, keywords_task)
|
|
161
|
-
|
|
162
|
-
print(
|
|
161
|
+
|
|
162
|
+
print(translation.to_json())
|
|
163
|
+
print(keywords.to_json())
|
|
163
164
|
|
|
164
165
|
asyncio.run(main())
|
|
165
166
|
```
|
|
166
167
|
|
|
167
168
|
---
|
|
168
169
|
|
|
169
|
-
##
|
|
170
|
+
## ✅ Use Cases
|
|
170
171
|
|
|
171
172
|
Use **TextTools** when you need to:
|
|
172
173
|
|
|
173
|
-
- 🔍 **Classify** large datasets quickly without model training
|
|
174
|
-
- 🌍 **Translate** and process multilingual corpora with ease
|
|
174
|
+
- 🔍 **Classify** large datasets quickly without model training
|
|
175
175
|
- 🧩 **Integrate** LLMs into production pipelines (structured outputs)
|
|
176
176
|
- 📊 **Analyze** large text collections using embeddings and categorization
|
|
177
177
|
|
|
@@ -181,9 +181,3 @@ Use **TextTools** when you need to:
|
|
|
181
181
|
|
|
182
182
|
Contributions are welcome!
|
|
183
183
|
Feel free to **open issues, suggest new features, or submit pull requests**.
|
|
184
|
-
|
|
185
|
-
---
|
|
186
|
-
|
|
187
|
-
## 🌿 License
|
|
188
|
-
|
|
189
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
hamtaa_texttools-2.0.0.dist-info/licenses/LICENSE,sha256=gqxbR8wqI3utd__l3Yn6_dQ3Pou1a17W4KmydbvZGok,1084
|
|
2
|
+
texttools/__init__.py,sha256=AHpTq1BbL3sWCaFiIjlSkqNfNqweq-qm2EIOSmUZRJ0,175
|
|
3
|
+
texttools/models.py,sha256=CQnO1zkKHFyqeMWrYGA4IyXQ7YYLVc3Xz1WaXbXzDLw,4634
|
|
4
|
+
texttools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
texttools/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
texttools/core/exceptions.py,sha256=6SDjUL1rmd3ngzD3ytF4LyTRj3bQMSFR9ECrLoqXXHw,395
|
|
7
|
+
texttools/core/internal_models.py,sha256=CmRtXGZRn5fZ18lVb42N8LrZXvJb6WwdjIhgiotWJdA,1952
|
|
8
|
+
texttools/core/utils.py,sha256=jqXHXU1DWDKWhK0HHSjnjq4_TLg3FMcnRzrwTF1eqqc,9744
|
|
9
|
+
texttools/core/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
texttools/core/operators/async_operator.py,sha256=HOi9gUwIffJUtyp8WLNbMpxI8jnafNDrbtLl6vyPcUs,6221
|
|
11
|
+
texttools/core/operators/sync_operator.py,sha256=yM14fsku-4Nf60lPUVePaB9Lu8HbGKb4ubwoizVWuYQ,6126
|
|
12
|
+
texttools/prompts/augment.yaml,sha256=O-LMVyrihr0GQ8hp2Lx6uIR8Jh83bUDS9UZ-dvYOP7k,5453
|
|
13
|
+
texttools/prompts/categorize.yaml,sha256=kN4uRPOC7q6A13bdCIox60vZZ8sgRiTtquv-kqIvTsk,1133
|
|
14
|
+
texttools/prompts/extract_entities.yaml,sha256=-qe1eEvN-8nJ2_GLjeoFAPVORCPYUzsIt7UGXD485bE,648
|
|
15
|
+
texttools/prompts/extract_keywords.yaml,sha256=jP74HFa4Dka01d1COStEBbdzW5onqwocwyyVsmNpECs,3276
|
|
16
|
+
texttools/prompts/is_fact.yaml,sha256=kqF527DEdnlL3MG5tF1Z3ci_sRxmGv7dgNR2SuElq4Y,719
|
|
17
|
+
texttools/prompts/is_question.yaml,sha256=C-ynlt0qHpUM4BAIh0oI7UJ5BxCNU9-GR9T5864jeto,496
|
|
18
|
+
texttools/prompts/merge_questions.yaml,sha256=zgZs8BcwseZy1GsD_DvVGtw0yuCCc6xsK8VDmuHI2V0,1844
|
|
19
|
+
texttools/prompts/propositionize.yaml,sha256=xTw3HQrxtxoMpkf8a9is0uZZ0AG4IDNfh7XE0aVlNso,1441
|
|
20
|
+
texttools/prompts/run_custom.yaml,sha256=hSfR4BMJNUo9nP_AodPU7YTnhR-X_G-W7Pz0ROQzoI0,133
|
|
21
|
+
texttools/prompts/summarize.yaml,sha256=0aKYFRDxODqOOEhSexi-hn3twLwkMFVmi7rtAifnCuA,464
|
|
22
|
+
texttools/prompts/to_question.yaml,sha256=n8Bn28QjvSHwPHQLwRYpZ2IsaaBsq4pK9Dp_i0xk8eg,2210
|
|
23
|
+
texttools/prompts/translate.yaml,sha256=omtC-TlFYMidy8WqRe7idUtKNiK4g3IhEl-iyufOwjk,649
|
|
24
|
+
texttools/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
texttools/tools/async_tools.py,sha256=4-SwsjFqT_mGTK7reCqV-4VXxkyYkyuY3WYJcwkRMJs,44788
|
|
26
|
+
texttools/tools/sync_tools.py,sha256=trmWBf4lvF9h_cB1DelOC4MFWDaQuQ0FDcdc-gCMVxo,40534
|
|
27
|
+
hamtaa_texttools-2.0.0.dist-info/METADATA,sha256=NJYJ6K5HolttdX8_2rCq_w62N6k22mqu0hqO5Qhhy8I,6968
|
|
28
|
+
hamtaa_texttools-2.0.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
29
|
+
hamtaa_texttools-2.0.0.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
30
|
+
hamtaa_texttools-2.0.0.dist-info/RECORD,,
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
texttools/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Literal
|
|
1
|
+
from typing import Any, Literal
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field, create_model
|
|
4
4
|
|
|
@@ -10,18 +10,24 @@ class OperatorOutput(BaseModel):
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class Str(BaseModel):
|
|
13
|
-
result: str = Field(
|
|
13
|
+
result: str = Field(
|
|
14
|
+
..., description="The output string", json_schema_extra={"example": "text"}
|
|
15
|
+
)
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class Bool(BaseModel):
|
|
17
19
|
result: bool = Field(
|
|
18
|
-
...,
|
|
20
|
+
...,
|
|
21
|
+
description="Boolean indicating the output state",
|
|
22
|
+
json_schema_extra={"example": True},
|
|
19
23
|
)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
class ListStr(BaseModel):
|
|
23
27
|
result: list[str] = Field(
|
|
24
|
-
...,
|
|
28
|
+
...,
|
|
29
|
+
description="The output list of strings",
|
|
30
|
+
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
25
31
|
)
|
|
26
32
|
|
|
27
33
|
|
|
@@ -29,19 +35,26 @@ class ListDictStrStr(BaseModel):
|
|
|
29
35
|
result: list[dict[str, str]] = Field(
|
|
30
36
|
...,
|
|
31
37
|
description="List of dictionaries containing string key-value pairs",
|
|
32
|
-
|
|
38
|
+
json_schema_extra={
|
|
39
|
+
"example": [
|
|
40
|
+
{"text": "Mohammad", "type": "PER"},
|
|
41
|
+
{"text": "Iran", "type": "LOC"},
|
|
42
|
+
]
|
|
43
|
+
},
|
|
33
44
|
)
|
|
34
45
|
|
|
35
46
|
|
|
36
47
|
class ReasonListStr(BaseModel):
|
|
37
48
|
reason: str = Field(..., description="Thinking process that led to the output")
|
|
38
49
|
result: list[str] = Field(
|
|
39
|
-
...,
|
|
50
|
+
...,
|
|
51
|
+
description="The output list of strings",
|
|
52
|
+
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
40
53
|
)
|
|
41
54
|
|
|
42
55
|
|
|
43
|
-
#
|
|
44
|
-
def create_dynamic_model(allowed_values: list[str]) ->
|
|
56
|
+
# Create CategorizerOutput with dynamic categories
|
|
57
|
+
def create_dynamic_model(allowed_values: list[str]) -> type[BaseModel]:
|
|
45
58
|
literal_type = Literal[*allowed_values]
|
|
46
59
|
|
|
47
60
|
CategorizerOutput = create_model(
|
|
File without changes
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
from collections.abc import Callable
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from openai import AsyncOpenAI
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
-
from ..engine import OperatorUtils, PromptLoader
|
|
8
7
|
from ..exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
9
8
|
from ..internal_models import OperatorOutput
|
|
10
|
-
|
|
11
|
-
# Base Model type for output models
|
|
12
|
-
T = TypeVar("T", bound=BaseModel)
|
|
9
|
+
from ..utils import OperatorUtils
|
|
13
10
|
|
|
14
11
|
|
|
15
12
|
class AsyncOperator:
|
|
@@ -46,15 +43,15 @@ class AsyncOperator:
|
|
|
46
43
|
async def _parse_completion(
|
|
47
44
|
self,
|
|
48
45
|
main_message: list[dict[str, str]],
|
|
49
|
-
output_model:
|
|
46
|
+
output_model: type[BaseModel],
|
|
50
47
|
temperature: float,
|
|
51
48
|
logprobs: bool,
|
|
52
49
|
top_logprobs: int,
|
|
53
50
|
priority: int | None,
|
|
54
|
-
) -> tuple[
|
|
51
|
+
) -> tuple[BaseModel, Any]:
|
|
55
52
|
"""
|
|
56
53
|
Parses a chat completion using OpenAI's structured output format.
|
|
57
|
-
Returns both the parsed
|
|
54
|
+
Returns both the parsed and the completion for logprobs.
|
|
58
55
|
"""
|
|
59
56
|
try:
|
|
60
57
|
request_kwargs = {
|
|
@@ -92,7 +89,6 @@ class AsyncOperator:
|
|
|
92
89
|
|
|
93
90
|
async def run(
|
|
94
91
|
self,
|
|
95
|
-
# User parameters
|
|
96
92
|
text: str,
|
|
97
93
|
with_analysis: bool,
|
|
98
94
|
output_lang: str | None,
|
|
@@ -103,9 +99,8 @@ class AsyncOperator:
|
|
|
103
99
|
validator: Callable[[Any], bool] | None,
|
|
104
100
|
max_validation_retries: int | None,
|
|
105
101
|
priority: int | None,
|
|
106
|
-
# Internal parameters
|
|
107
102
|
tool_name: str,
|
|
108
|
-
output_model:
|
|
103
|
+
output_model: type[BaseModel],
|
|
109
104
|
mode: str | None,
|
|
110
105
|
**extra_kwargs,
|
|
111
106
|
) -> OperatorOutput:
|
|
@@ -113,8 +108,7 @@ class AsyncOperator:
|
|
|
113
108
|
Execute the LLM pipeline with the given input text.
|
|
114
109
|
"""
|
|
115
110
|
try:
|
|
116
|
-
|
|
117
|
-
prompt_configs = prompt_loader.load(
|
|
111
|
+
prompt_configs = OperatorUtils.load_prompt(
|
|
118
112
|
prompt_file=tool_name + ".yaml",
|
|
119
113
|
text=text.strip(),
|
|
120
114
|
mode=mode,
|
|
@@ -129,11 +123,10 @@ class AsyncOperator:
|
|
|
129
123
|
)
|
|
130
124
|
analysis = await self._analyze_completion(analyze_message)
|
|
131
125
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
135
|
-
)
|
|
126
|
+
main_prompt = OperatorUtils.build_main_prompt(
|
|
127
|
+
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
136
128
|
)
|
|
129
|
+
main_message = OperatorUtils.build_message(main_prompt)
|
|
137
130
|
|
|
138
131
|
parsed, completion = await self._parse_completion(
|
|
139
132
|
main_message,
|
|
@@ -144,7 +137,7 @@ class AsyncOperator:
|
|
|
144
137
|
priority,
|
|
145
138
|
)
|
|
146
139
|
|
|
147
|
-
# Retry logic
|
|
140
|
+
# Retry logic in case output validation fails
|
|
148
141
|
if validator and not validator(parsed.result):
|
|
149
142
|
if (
|
|
150
143
|
not isinstance(max_validation_retries, int)
|
|
@@ -154,7 +147,6 @@ class AsyncOperator:
|
|
|
154
147
|
|
|
155
148
|
succeeded = False
|
|
156
149
|
for _ in range(max_validation_retries):
|
|
157
|
-
# Generate a new temperature to retry
|
|
158
150
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
159
151
|
|
|
160
152
|
try:
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
from collections.abc import Callable
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from openai import OpenAI
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
-
from ..engine import OperatorUtils, PromptLoader
|
|
8
7
|
from ..exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
9
8
|
from ..internal_models import OperatorOutput
|
|
10
|
-
|
|
11
|
-
# Base Model type for output models
|
|
12
|
-
T = TypeVar("T", bound=BaseModel)
|
|
9
|
+
from ..utils import OperatorUtils
|
|
13
10
|
|
|
14
11
|
|
|
15
12
|
class Operator:
|
|
@@ -46,15 +43,15 @@ class Operator:
|
|
|
46
43
|
def _parse_completion(
|
|
47
44
|
self,
|
|
48
45
|
main_message: list[dict[str, str]],
|
|
49
|
-
output_model:
|
|
46
|
+
output_model: type[BaseModel],
|
|
50
47
|
temperature: float,
|
|
51
48
|
logprobs: bool,
|
|
52
49
|
top_logprobs: int,
|
|
53
50
|
priority: int | None,
|
|
54
|
-
) -> tuple[
|
|
51
|
+
) -> tuple[BaseModel, Any]:
|
|
55
52
|
"""
|
|
56
53
|
Parses a chat completion using OpenAI's structured output format.
|
|
57
|
-
Returns both the parsed
|
|
54
|
+
Returns both the parsed and the completion for logprobs.
|
|
58
55
|
"""
|
|
59
56
|
try:
|
|
60
57
|
request_kwargs = {
|
|
@@ -90,7 +87,6 @@ class Operator:
|
|
|
90
87
|
|
|
91
88
|
def run(
|
|
92
89
|
self,
|
|
93
|
-
# User parameters
|
|
94
90
|
text: str,
|
|
95
91
|
with_analysis: bool,
|
|
96
92
|
output_lang: str | None,
|
|
@@ -101,9 +97,8 @@ class Operator:
|
|
|
101
97
|
validator: Callable[[Any], bool] | None,
|
|
102
98
|
max_validation_retries: int | None,
|
|
103
99
|
priority: int | None,
|
|
104
|
-
# Internal parameters
|
|
105
100
|
tool_name: str,
|
|
106
|
-
output_model:
|
|
101
|
+
output_model: type[BaseModel],
|
|
107
102
|
mode: str | None,
|
|
108
103
|
**extra_kwargs,
|
|
109
104
|
) -> OperatorOutput:
|
|
@@ -111,8 +106,7 @@ class Operator:
|
|
|
111
106
|
Execute the LLM pipeline with the given input text.
|
|
112
107
|
"""
|
|
113
108
|
try:
|
|
114
|
-
|
|
115
|
-
prompt_configs = prompt_loader.load(
|
|
109
|
+
prompt_configs = OperatorUtils.load_prompt(
|
|
116
110
|
prompt_file=tool_name + ".yaml",
|
|
117
111
|
text=text.strip(),
|
|
118
112
|
mode=mode,
|
|
@@ -127,11 +121,10 @@ class Operator:
|
|
|
127
121
|
)
|
|
128
122
|
analysis = self._analyze_completion(analyze_message)
|
|
129
123
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
133
|
-
)
|
|
124
|
+
main_prompt = OperatorUtils.build_main_prompt(
|
|
125
|
+
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
134
126
|
)
|
|
127
|
+
main_message = OperatorUtils.build_message(main_prompt)
|
|
135
128
|
|
|
136
129
|
parsed, completion = self._parse_completion(
|
|
137
130
|
main_message,
|
|
@@ -142,7 +135,7 @@ class Operator:
|
|
|
142
135
|
priority,
|
|
143
136
|
)
|
|
144
137
|
|
|
145
|
-
# Retry logic
|
|
138
|
+
# Retry logic in case output validation fails
|
|
146
139
|
if validator and not validator(parsed.result):
|
|
147
140
|
if (
|
|
148
141
|
not isinstance(max_validation_retries, int)
|
|
@@ -152,7 +145,6 @@ class Operator:
|
|
|
152
145
|
|
|
153
146
|
succeeded = False
|
|
154
147
|
for _ in range(max_validation_retries):
|
|
155
|
-
# Generate a new temperature to retry
|
|
156
148
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
157
149
|
|
|
158
150
|
try:
|