hamtaa-texttools 1.1.20__tar.gz → 1.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.20/hamtaa_texttools.egg-info → hamtaa_texttools-1.1.21}/PKG-INFO +8 -27
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/README.md +7 -26
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21/hamtaa_texttools.egg-info}/PKG-INFO +8 -27
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/hamtaa_texttools.egg-info/SOURCES.txt +1 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/pyproject.toml +1 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/tests/test_all_tools.py +14 -8
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/batch/batch_config.py +14 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/batch/batch_runner.py +1 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/async_operator.py +45 -79
- hamtaa_texttools-1.1.21/texttools/internals/models.py +150 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/operator_utils.py +2 -26
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/prompt_loader.py +3 -20
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/sync_operator.py +44 -78
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/README.md +2 -2
- hamtaa_texttools-1.1.21/texttools/prompts/categorize.yaml +35 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/check_fact.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/extract_entities.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/extract_keywords.yaml +6 -6
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/is_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/merge_questions.yaml +4 -4
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/propositionize.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/rewrite.yaml +6 -6
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/run_custom.yaml +1 -1
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/subject_to_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/summarize.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/text_to_question.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/prompts/translate.yaml +2 -2
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/tools/async_tools.py +393 -485
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/tools/sync_tools.py +394 -486
- hamtaa_texttools-1.1.20/texttools/batch/internals/utils.py +0 -13
- hamtaa_texttools-1.1.20/texttools/internals/models.py +0 -181
- hamtaa_texttools-1.1.20/texttools/prompts/categorize.yaml +0 -77
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/LICENSE +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/MANIFEST.in +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/hamtaa_texttools.egg-info/requires.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/setup.cfg +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/tests/test_all_async_tools.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/tests/test_output_validation.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/__init__.py +0 -0
- {hamtaa_texttools-1.1.20/texttools/batch/internals → hamtaa_texttools-1.1.21/texttools/batch}/batch_manager.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/exceptions.py +0 -0
- {hamtaa_texttools-1.1.20 → hamtaa_texttools-1.1.21}/texttools/internals/text_to_chunks.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.21
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
50
50
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
51
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
52
52
|
|
|
53
|
-
- **`categorize()`** - Classifies text into given categories
|
|
53
|
+
- **`categorize()`** - Classifies text into given categories
|
|
54
54
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
55
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
56
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -61,7 +61,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
63
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
64
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
65
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
66
66
|
|
|
67
67
|
---
|
|
@@ -125,11 +125,12 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
125
125
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
126
126
|
- **`result: Any`** → The output of LLM
|
|
127
127
|
- **`analysis: str`** → The reasoning step before generating the final output
|
|
128
|
-
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
129
|
-
- **`process: str`** → The tool name which processed the input
|
|
130
|
-
- **`processed_at: datetime`** → The process time
|
|
131
|
-
- **`execution_time: float`** → The execution time (seconds)
|
|
128
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
132
129
|
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
130
|
+
- **`ToolOutputMetadata`** →
|
|
131
|
+
- **`tool_name: str`** → The tool name which processed the input
|
|
132
|
+
- **`processed_at: datetime`** → The process time
|
|
133
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
133
134
|
|
|
134
135
|
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
135
136
|
|
|
@@ -224,26 +225,6 @@ Use **TextTools** when you need to:
|
|
|
224
225
|
|
|
225
226
|
---
|
|
226
227
|
|
|
227
|
-
## 🔍 Logging
|
|
228
|
-
|
|
229
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
```python
|
|
233
|
-
import logging
|
|
234
|
-
|
|
235
|
-
# Default: warnings and errors only
|
|
236
|
-
logging.basicConfig(level=logging.WARNING)
|
|
237
|
-
|
|
238
|
-
# Debug everything (verbose)
|
|
239
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
240
|
-
|
|
241
|
-
# Complete silence
|
|
242
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
228
|
## 📚 Batch Processing
|
|
248
229
|
|
|
249
230
|
Process large datasets efficiently using OpenAI's batch API.
|
|
@@ -15,7 +15,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
15
15
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
16
16
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
17
17
|
|
|
18
|
-
- **`categorize()`** - Classifies text into given categories
|
|
18
|
+
- **`categorize()`** - Classifies text into given categories
|
|
19
19
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
20
20
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
21
21
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -26,7 +26,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
26
26
|
- **`summarize()`** - Text summarization
|
|
27
27
|
- **`translate()`** - Text translation between languages
|
|
28
28
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
29
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
29
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
30
30
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
31
31
|
|
|
32
32
|
---
|
|
@@ -90,11 +90,12 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
90
90
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
91
91
|
- **`result: Any`** → The output of LLM
|
|
92
92
|
- **`analysis: str`** → The reasoning step before generating the final output
|
|
93
|
-
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
94
|
-
- **`process: str`** → The tool name which processed the input
|
|
95
|
-
- **`processed_at: datetime`** → The process time
|
|
96
|
-
- **`execution_time: float`** → The execution time (seconds)
|
|
93
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
97
94
|
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
95
|
+
- **`ToolOutputMetadata`** →
|
|
96
|
+
- **`tool_name: str`** → The tool name which processed the input
|
|
97
|
+
- **`processed_at: datetime`** → The process time
|
|
98
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
98
99
|
|
|
99
100
|
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
100
101
|
|
|
@@ -189,26 +190,6 @@ Use **TextTools** when you need to:
|
|
|
189
190
|
|
|
190
191
|
---
|
|
191
192
|
|
|
192
|
-
## 🔍 Logging
|
|
193
|
-
|
|
194
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
```python
|
|
198
|
-
import logging
|
|
199
|
-
|
|
200
|
-
# Default: warnings and errors only
|
|
201
|
-
logging.basicConfig(level=logging.WARNING)
|
|
202
|
-
|
|
203
|
-
# Debug everything (verbose)
|
|
204
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
205
|
-
|
|
206
|
-
# Complete silence
|
|
207
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
---
|
|
211
|
-
|
|
212
193
|
## 📚 Batch Processing
|
|
213
194
|
|
|
214
195
|
Process large datasets efficiently using OpenAI's batch API.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.21
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
50
50
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
51
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
52
52
|
|
|
53
|
-
- **`categorize()`** - Classifies text into given categories
|
|
53
|
+
- **`categorize()`** - Classifies text into given categories
|
|
54
54
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
55
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
56
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -61,7 +61,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
63
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
64
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
65
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
66
66
|
|
|
67
67
|
---
|
|
@@ -125,11 +125,12 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
125
125
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
126
126
|
- **`result: Any`** → The output of LLM
|
|
127
127
|
- **`analysis: str`** → The reasoning step before generating the final output
|
|
128
|
-
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
129
|
-
- **`process: str`** → The tool name which processed the input
|
|
130
|
-
- **`processed_at: datetime`** → The process time
|
|
131
|
-
- **`execution_time: float`** → The execution time (seconds)
|
|
128
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
132
129
|
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
130
|
+
- **`ToolOutputMetadata`** →
|
|
131
|
+
- **`tool_name: str`** → The tool name which processed the input
|
|
132
|
+
- **`processed_at: datetime`** → The process time
|
|
133
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
133
134
|
|
|
134
135
|
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
135
136
|
|
|
@@ -224,26 +225,6 @@ Use **TextTools** when you need to:
|
|
|
224
225
|
|
|
225
226
|
---
|
|
226
227
|
|
|
227
|
-
## 🔍 Logging
|
|
228
|
-
|
|
229
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
```python
|
|
233
|
-
import logging
|
|
234
|
-
|
|
235
|
-
# Default: warnings and errors only
|
|
236
|
-
logging.basicConfig(level=logging.WARNING)
|
|
237
|
-
|
|
238
|
-
# Debug everything (verbose)
|
|
239
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
240
|
-
|
|
241
|
-
# Complete silence
|
|
242
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
228
|
## 📚 Batch Processing
|
|
248
229
|
|
|
249
230
|
Process large datasets efficiently using OpenAI's batch API.
|
|
@@ -12,9 +12,8 @@ tests/test_all_tools.py
|
|
|
12
12
|
tests/test_output_validation.py
|
|
13
13
|
texttools/__init__.py
|
|
14
14
|
texttools/batch/batch_config.py
|
|
15
|
+
texttools/batch/batch_manager.py
|
|
15
16
|
texttools/batch/batch_runner.py
|
|
16
|
-
texttools/batch/internals/batch_manager.py
|
|
17
|
-
texttools/batch/internals/utils.py
|
|
18
17
|
texttools/internals/async_operator.py
|
|
19
18
|
texttools/internals/exceptions.py
|
|
20
19
|
texttools/internals/models.py
|
|
@@ -28,19 +28,20 @@ category = t.categorize(
|
|
|
28
28
|
print(repr(category))
|
|
29
29
|
|
|
30
30
|
# Categorizer: tree mode
|
|
31
|
-
tree = CategoryTree(
|
|
32
|
-
tree.add_node("اخلاق")
|
|
33
|
-
tree.add_node("معرفت شناسی")
|
|
34
|
-
tree.add_node("متافیزیک", description="اراده قدرت در حیطه متافیزیک است")
|
|
35
|
-
tree.add_node(
|
|
36
|
-
|
|
31
|
+
tree = CategoryTree()
|
|
32
|
+
tree.add_node("اخلاق", "root")
|
|
33
|
+
tree.add_node("معرفت شناسی", "root")
|
|
34
|
+
tree.add_node("متافیزیک", "root", description="اراده قدرت در حیطه متافیزیک است")
|
|
35
|
+
tree.add_node(
|
|
36
|
+
"فلسفه ذهن", "root", description="فلسفه ذهن به چگونگی درک ما از جهان می پردازد"
|
|
37
|
+
)
|
|
38
|
+
tree.add_node("آگاهی", "فلسفه ذهن")
|
|
37
39
|
tree.add_node("ذهن و بدن", "فلسفه ذهن")
|
|
38
40
|
tree.add_node("امکان و ضرورت", "متافیزیک")
|
|
39
41
|
|
|
40
42
|
categories = t.categorize(
|
|
41
43
|
"اراده قدرت مفهومی مهم در مابعد الطبیعه است که توسط نیچه مطرح شده",
|
|
42
44
|
tree,
|
|
43
|
-
mode="category_tree",
|
|
44
45
|
)
|
|
45
46
|
print(repr(categories))
|
|
46
47
|
|
|
@@ -51,7 +52,12 @@ keywords = t.extract_keywords(
|
|
|
51
52
|
print(repr(keywords))
|
|
52
53
|
|
|
53
54
|
# NER Extractor
|
|
54
|
-
entities = t.extract_entities(
|
|
55
|
+
entities = t.extract_entities(
|
|
56
|
+
"Ali will be dead by the car crash",
|
|
57
|
+
entities=["EVENT"],
|
|
58
|
+
with_analysis=True,
|
|
59
|
+
logprobs=True,
|
|
60
|
+
)
|
|
55
61
|
print(repr(entities))
|
|
56
62
|
|
|
57
63
|
|
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
def export_data(data) -> list[dict[str, str]]:
|
|
6
|
+
"""
|
|
7
|
+
Produces a structure of the following form from an initial data structure:
|
|
8
|
+
[{"id": str, "text": str},...]
|
|
9
|
+
"""
|
|
10
|
+
return data
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def import_data(data) -> object:
|
|
14
|
+
"""
|
|
15
|
+
Takes the output and adds and aggregates it to the original structure.
|
|
16
|
+
"""
|
|
17
|
+
return data
|
|
5
18
|
|
|
6
19
|
|
|
7
20
|
@dataclass
|
|
@@ -9,7 +9,7 @@ from dotenv import load_dotenv
|
|
|
9
9
|
from openai import OpenAI
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from texttools.batch.
|
|
12
|
+
from texttools.batch.batch_manager import BatchManager
|
|
13
13
|
from texttools.batch.batch_config import BatchConfig
|
|
14
14
|
from texttools.internals.models import Str
|
|
15
15
|
from texttools.internals.exceptions import TextToolsError, ConfigurationError
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from typing import TypeVar, Type
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
-
import logging
|
|
4
3
|
|
|
5
4
|
from openai import AsyncOpenAI
|
|
6
5
|
from pydantic import BaseModel
|
|
7
6
|
|
|
8
|
-
from texttools.internals.models import
|
|
7
|
+
from texttools.internals.models import OperatorOutput
|
|
9
8
|
from texttools.internals.operator_utils import OperatorUtils
|
|
10
9
|
from texttools.internals.prompt_loader import PromptLoader
|
|
11
10
|
from texttools.internals.exceptions import (
|
|
@@ -18,35 +17,23 @@ from texttools.internals.exceptions import (
|
|
|
18
17
|
# Base Model type for output models
|
|
19
18
|
T = TypeVar("T", bound=BaseModel)
|
|
20
19
|
|
|
21
|
-
logger = logging.getLogger("texttools.async_operator")
|
|
22
|
-
|
|
23
20
|
|
|
24
21
|
class AsyncOperator:
|
|
25
22
|
"""
|
|
26
|
-
Core engine for running text-processing operations with an LLM
|
|
27
|
-
|
|
28
|
-
It wires together:
|
|
29
|
-
- `PromptLoader` → loads YAML prompt templates.
|
|
30
|
-
- `UserMergeFormatter` → applies formatting to messages (e.g., merging).
|
|
31
|
-
- AsyncOpenAI client → executes completions/parsed completions.
|
|
23
|
+
Core engine for running text-processing operations with an LLM.
|
|
32
24
|
"""
|
|
33
25
|
|
|
34
26
|
def __init__(self, client: AsyncOpenAI, model: str):
|
|
35
27
|
self._client = client
|
|
36
28
|
self._model = model
|
|
37
29
|
|
|
38
|
-
async def
|
|
39
|
-
"""
|
|
40
|
-
Calls OpenAI API for analysis using the configured prompt template.
|
|
41
|
-
Returns the analyzed content as a string.
|
|
42
|
-
"""
|
|
30
|
+
async def _analyze_completion(self, analyze_prompt: str, temperature: float) -> str:
|
|
43
31
|
try:
|
|
44
|
-
analyze_prompt = prompt_configs["analyze_template"]
|
|
45
|
-
|
|
46
32
|
if not analyze_prompt:
|
|
47
33
|
raise PromptError("Analyze template is empty")
|
|
48
34
|
|
|
49
|
-
analyze_message =
|
|
35
|
+
analyze_message = OperatorUtils.build_user_message(analyze_prompt)
|
|
36
|
+
|
|
50
37
|
completion = await self._client.chat.completions.create(
|
|
51
38
|
model=self._model,
|
|
52
39
|
messages=analyze_message,
|
|
@@ -61,7 +48,7 @@ class AsyncOperator:
|
|
|
61
48
|
if not analysis:
|
|
62
49
|
raise LLMError("Empty analysis response")
|
|
63
50
|
|
|
64
|
-
return analysis
|
|
51
|
+
return analysis
|
|
65
52
|
|
|
66
53
|
except Exception as e:
|
|
67
54
|
if isinstance(e, (PromptError, LLMError)):
|
|
@@ -70,21 +57,23 @@ class AsyncOperator:
|
|
|
70
57
|
|
|
71
58
|
async def _parse_completion(
|
|
72
59
|
self,
|
|
73
|
-
|
|
60
|
+
main_prompt: str,
|
|
74
61
|
output_model: Type[T],
|
|
75
62
|
temperature: float,
|
|
76
|
-
logprobs: bool
|
|
77
|
-
top_logprobs: int
|
|
78
|
-
priority: int
|
|
63
|
+
logprobs: bool,
|
|
64
|
+
top_logprobs: int,
|
|
65
|
+
priority: int,
|
|
79
66
|
) -> tuple[T, object]:
|
|
80
67
|
"""
|
|
81
68
|
Parses a chat completion using OpenAI's structured output format.
|
|
82
69
|
Returns both the parsed object and the raw completion for logprobs.
|
|
83
70
|
"""
|
|
84
71
|
try:
|
|
72
|
+
main_message = OperatorUtils.build_user_message(main_prompt)
|
|
73
|
+
|
|
85
74
|
request_kwargs = {
|
|
86
75
|
"model": self._model,
|
|
87
|
-
"messages":
|
|
76
|
+
"messages": main_message,
|
|
88
77
|
"response_format": output_model,
|
|
89
78
|
"temperature": temperature,
|
|
90
79
|
}
|
|
@@ -92,8 +81,10 @@ class AsyncOperator:
|
|
|
92
81
|
if logprobs:
|
|
93
82
|
request_kwargs["logprobs"] = True
|
|
94
83
|
request_kwargs["top_logprobs"] = top_logprobs
|
|
84
|
+
|
|
95
85
|
if priority:
|
|
96
86
|
request_kwargs["extra_body"] = {"priority": priority}
|
|
87
|
+
|
|
97
88
|
completion = await self._client.beta.chat.completions.parse(
|
|
98
89
|
**request_kwargs
|
|
99
90
|
)
|
|
@@ -122,24 +113,22 @@ class AsyncOperator:
|
|
|
122
113
|
user_prompt: str | None,
|
|
123
114
|
temperature: float,
|
|
124
115
|
logprobs: bool,
|
|
125
|
-
top_logprobs: int
|
|
116
|
+
top_logprobs: int,
|
|
126
117
|
validator: Callable[[object], bool] | None,
|
|
127
118
|
max_validation_retries: int | None,
|
|
119
|
+
priority: int,
|
|
128
120
|
# Internal parameters
|
|
129
121
|
prompt_file: str,
|
|
130
122
|
output_model: Type[T],
|
|
131
123
|
mode: str | None,
|
|
132
|
-
priority: int | None = 0,
|
|
133
124
|
**extra_kwargs,
|
|
134
|
-
) ->
|
|
125
|
+
) -> OperatorOutput:
|
|
135
126
|
"""
|
|
136
|
-
Execute the LLM pipeline with the given input text. (
|
|
127
|
+
Execute the LLM pipeline with the given input text. (Sync)
|
|
137
128
|
"""
|
|
138
129
|
try:
|
|
139
130
|
prompt_loader = PromptLoader()
|
|
140
|
-
output = ToolOutput()
|
|
141
131
|
|
|
142
|
-
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
143
132
|
prompt_configs = prompt_loader.load(
|
|
144
133
|
prompt_file=prompt_file,
|
|
145
134
|
text=text.strip(),
|
|
@@ -147,47 +136,32 @@ class AsyncOperator:
|
|
|
147
136
|
**extra_kwargs,
|
|
148
137
|
)
|
|
149
138
|
|
|
150
|
-
|
|
139
|
+
main_prompt = ""
|
|
140
|
+
analysis = ""
|
|
151
141
|
|
|
152
142
|
if with_analysis:
|
|
153
|
-
analysis = await self.
|
|
154
|
-
|
|
155
|
-
OperatorUtils.build_user_message(
|
|
156
|
-
f"Based on this analysis: {analysis}"
|
|
157
|
-
)
|
|
143
|
+
analysis = await self._analyze_completion(
|
|
144
|
+
prompt_configs["analyze_template"], temperature
|
|
158
145
|
)
|
|
146
|
+
main_prompt += f"Based on this analysis:\n{analysis}\n"
|
|
159
147
|
|
|
160
148
|
if output_lang:
|
|
161
|
-
|
|
162
|
-
OperatorUtils.build_user_message(
|
|
163
|
-
f"Respond only in the {output_lang} language."
|
|
164
|
-
)
|
|
165
|
-
)
|
|
149
|
+
main_prompt += f"Respond only in the {output_lang} language.\n"
|
|
166
150
|
|
|
167
151
|
if user_prompt:
|
|
168
|
-
|
|
169
|
-
OperatorUtils.build_user_message(
|
|
170
|
-
f"Consider this instruction {user_prompt}"
|
|
171
|
-
)
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
messages.append(
|
|
175
|
-
OperatorUtils.build_user_message(prompt_configs["main_template"])
|
|
176
|
-
)
|
|
152
|
+
main_prompt += f"Consider this instruction {user_prompt}\n"
|
|
177
153
|
|
|
178
|
-
|
|
154
|
+
main_prompt += prompt_configs["main_template"]
|
|
179
155
|
|
|
180
156
|
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
181
157
|
raise ValueError("top_logprobs should be an integer greater than 1")
|
|
182
158
|
|
|
183
159
|
parsed, completion = await self._parse_completion(
|
|
184
|
-
|
|
160
|
+
main_prompt, output_model, temperature, logprobs, top_logprobs, priority
|
|
185
161
|
)
|
|
186
162
|
|
|
187
|
-
output.result = parsed.result
|
|
188
|
-
|
|
189
163
|
# Retry logic if validation fails
|
|
190
|
-
if validator and not validator(
|
|
164
|
+
if validator and not validator(parsed.result):
|
|
191
165
|
if (
|
|
192
166
|
not isinstance(max_validation_retries, int)
|
|
193
167
|
or max_validation_retries < 1
|
|
@@ -197,17 +171,13 @@ class AsyncOperator:
|
|
|
197
171
|
)
|
|
198
172
|
|
|
199
173
|
succeeded = False
|
|
200
|
-
for
|
|
201
|
-
|
|
202
|
-
f"Validation failed, retrying for the {attempt + 1} time."
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
# Generate new temperature for retry
|
|
174
|
+
for _ in range(max_validation_retries):
|
|
175
|
+
# Generate a new temperature to retry
|
|
206
176
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
207
177
|
|
|
208
178
|
try:
|
|
209
179
|
parsed, completion = await self._parse_completion(
|
|
210
|
-
|
|
180
|
+
main_prompt,
|
|
211
181
|
output_model,
|
|
212
182
|
retry_temperature,
|
|
213
183
|
logprobs,
|
|
@@ -215,30 +185,26 @@ class AsyncOperator:
|
|
|
215
185
|
priority=priority,
|
|
216
186
|
)
|
|
217
187
|
|
|
218
|
-
output.result = parsed.result
|
|
219
|
-
|
|
220
188
|
# Check if retry was successful
|
|
221
|
-
if validator(
|
|
189
|
+
if validator(parsed.result):
|
|
222
190
|
succeeded = True
|
|
223
191
|
break
|
|
224
192
|
|
|
225
|
-
except LLMError
|
|
226
|
-
|
|
193
|
+
except LLMError:
|
|
194
|
+
pass
|
|
227
195
|
|
|
228
196
|
if not succeeded:
|
|
229
|
-
raise ValidationError(
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
output.process = prompt_file[:-5]
|
|
197
|
+
raise ValidationError("Validation failed after all retries")
|
|
198
|
+
|
|
199
|
+
operator_output = OperatorOutput(
|
|
200
|
+
result=parsed.result,
|
|
201
|
+
analysis=analysis if with_analysis else None,
|
|
202
|
+
logprobs=OperatorUtils.extract_logprobs(completion)
|
|
203
|
+
if logprobs
|
|
204
|
+
else None,
|
|
205
|
+
)
|
|
240
206
|
|
|
241
|
-
return
|
|
207
|
+
return operator_output
|
|
242
208
|
|
|
243
209
|
except (PromptError, LLMError, ValidationError):
|
|
244
210
|
raise
|