hamtaa-texttools 1.1.19__py3-none-any.whl → 1.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.21.dist-info}/METADATA +15 -34
- hamtaa_texttools-1.1.21.dist-info/RECORD +32 -0
- texttools/batch/batch_config.py +14 -1
- texttools/batch/{internals/batch_manager.py → batch_manager.py} +6 -6
- texttools/batch/batch_runner.py +7 -7
- texttools/internals/async_operator.py +48 -84
- texttools/internals/models.py +73 -113
- texttools/internals/operator_utils.py +2 -2
- texttools/internals/prompt_loader.py +3 -20
- texttools/internals/sync_operator.py +47 -83
- texttools/internals/text_to_chunks.py +97 -0
- texttools/prompts/README.md +2 -2
- texttools/prompts/categorize.yaml +35 -77
- texttools/prompts/check_fact.yaml +2 -2
- texttools/prompts/extract_entities.yaml +3 -3
- texttools/prompts/extract_keywords.yaml +6 -6
- texttools/prompts/is_question.yaml +2 -2
- texttools/prompts/merge_questions.yaml +4 -4
- texttools/prompts/propositionize.yaml +2 -2
- texttools/prompts/rewrite.yaml +6 -6
- texttools/prompts/run_custom.yaml +1 -1
- texttools/prompts/subject_to_question.yaml +2 -2
- texttools/prompts/summarize.yaml +2 -2
- texttools/prompts/text_to_question.yaml +8 -6
- texttools/prompts/translate.yaml +2 -2
- texttools/tools/async_tools.py +497 -519
- texttools/tools/sync_tools.py +498 -520
- hamtaa_texttools-1.1.19.dist-info/RECORD +0 -33
- texttools/batch/internals/utils.py +0 -16
- texttools/internals/formatters.py +0 -24
- {hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.21.dist-info}/WHEEL +0 -0
- {hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.21.dist-info}/licenses/LICENSE +0 -0
- {hamtaa_texttools-1.1.19.dist-info → hamtaa_texttools-1.1.21.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.21
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, MoosaviNejad <erfanmoosavi84@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -50,7 +50,7 @@ It provides ready-to-use utilities for **translation, question detection, keywor
|
|
|
50
50
|
TextTools provides a rich collection of high-level NLP utilities,
|
|
51
51
|
Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
52
52
|
|
|
53
|
-
- **`categorize()`** - Classifies text into given categories
|
|
53
|
+
- **`categorize()`** - Classifies text into given categories
|
|
54
54
|
- **`extract_keywords()`** - Extracts keywords from text
|
|
55
55
|
- **`extract_entities()`** - Named Entity Recognition (NER) system
|
|
56
56
|
- **`is_question()`** - Binary detection of whether input is a question
|
|
@@ -61,7 +61,7 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
61
61
|
- **`summarize()`** - Text summarization
|
|
62
62
|
- **`translate()`** - Text translation between languages
|
|
63
63
|
- **`propositionize()`** - Convert text to atomic independence meaningful sentences
|
|
64
|
-
- **`check_fact()`** - Check a statement is relevant to source text
|
|
64
|
+
- **`check_fact()`** - Check whether a statement is relevant to the source text
|
|
65
65
|
- **`run_custom()`** - Allows users to define a custom tool with an arbitrary BaseModel
|
|
66
66
|
|
|
67
67
|
---
|
|
@@ -99,21 +99,21 @@ Each tool is designed to work with structured outputs (JSON / Pydantic).
|
|
|
99
99
|
|
|
100
100
|
TextTools provides several optional flags to customize LLM behavior:
|
|
101
101
|
|
|
102
|
-
- **`with_analysis
|
|
102
|
+
- **`with_analysis: bool`** → Adds a reasoning step before generating the final output.
|
|
103
103
|
**Note:** This doubles token usage per call because it triggers an additional LLM request.
|
|
104
104
|
|
|
105
|
-
- **`logprobs
|
|
105
|
+
- **`logprobs: bool`** → Returns token-level probabilities for the generated output. You can also specify `top_logprobs=<N>` to get the top N alternative tokens and their probabilities.
|
|
106
106
|
**Note:** This feature works if it's supported by the model.
|
|
107
107
|
|
|
108
|
-
- **`output_lang
|
|
108
|
+
- **`output_lang: str`** → Forces the model to respond in a specific language. The model will ignore other instructions about language and respond strictly in the requested language.
|
|
109
109
|
|
|
110
|
-
- **`user_prompt
|
|
110
|
+
- **`user_prompt: str`** → Allows you to inject a custom instruction or prompt into the model alongside the main template. This gives you fine-grained control over how the model interprets or modifies the input text.
|
|
111
111
|
|
|
112
|
-
- **`temperature
|
|
112
|
+
- **`temperature: float`** → Determines how creative the model should respond. Takes a float number from `0.0` to `2.0`.
|
|
113
113
|
|
|
114
|
-
- **`validator (
|
|
114
|
+
- **`validator: Callable (Experimental)`** → Forces TheTool to validate the output result based on your custom validator. Validator should return a bool (True if there were no problem, False if the validation fails.) If the validator fails, TheTool will retry to get another output by modifying `temperature`. You can specify `max_validation_retries=<N>` to change the number of retries.
|
|
115
115
|
|
|
116
|
-
- **`priority (
|
|
116
|
+
- **`priority: int (Experimental)`** → Task execution priority level. Higher values = higher priority. Affects processing order in queues.
|
|
117
117
|
**Note:** This feature works if it's supported by the model and vLLM.
|
|
118
118
|
|
|
119
119
|
**Note:** There might be some tools that don't support some of the parameters above.
|
|
@@ -125,11 +125,12 @@ TextTools provides several optional flags to customize LLM behavior:
|
|
|
125
125
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
126
126
|
- **`result: Any`** → The output of LLM
|
|
127
127
|
- **`analysis: str`** → The reasoning step before generating the final output
|
|
128
|
-
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
129
|
-
- **`process: str`** → The tool name which processed the input
|
|
130
|
-
- **`processed_at: datetime`** → The process time
|
|
131
|
-
- **`execution_time: float`** → The execution time (seconds)
|
|
128
|
+
- **`logprobs: list`** → Token-level probabilities for the generated output
|
|
132
129
|
- **`errors: list[str]`** → Any error that have occured during calling LLM
|
|
130
|
+
- **`ToolOutputMetadata`** →
|
|
131
|
+
- **`tool_name: str`** → The tool name which processed the input
|
|
132
|
+
- **`processed_at: datetime`** → The process time
|
|
133
|
+
- **`execution_time: float`** → The execution time (seconds)
|
|
133
134
|
|
|
134
135
|
**Note:** You can use `repr(ToolOutput)` to see details of your ToolOutput.
|
|
135
136
|
|
|
@@ -224,26 +225,6 @@ Use **TextTools** when you need to:
|
|
|
224
225
|
|
|
225
226
|
---
|
|
226
227
|
|
|
227
|
-
## 🔍 Logging
|
|
228
|
-
|
|
229
|
-
TextTools uses Python's standard `logging` module. The library's default logger level is `WARNING`, so if you want to modify it, follow instructions:
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
```python
|
|
233
|
-
import logging
|
|
234
|
-
|
|
235
|
-
# Default: warnings and errors only
|
|
236
|
-
logging.basicConfig(level=logging.WARNING)
|
|
237
|
-
|
|
238
|
-
# Debug everything (verbose)
|
|
239
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
240
|
-
|
|
241
|
-
# Complete silence
|
|
242
|
-
logging.basicConfig(level=logging.CRITICAL)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
228
|
## 📚 Batch Processing
|
|
248
229
|
|
|
249
230
|
Process large datasets efficiently using OpenAI's batch API.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
hamtaa_texttools-1.1.21.dist-info/licenses/LICENSE,sha256=Hb2YOBKy2MJQLnyLrX37B4ZVuac8eaIcE71SvVIMOLg,1082
|
|
2
|
+
texttools/__init__.py,sha256=CmCS9dEvO6061GiJ8A7gD3UAhCWHTkaID9q3Krlyq_o,311
|
|
3
|
+
texttools/batch/batch_config.py,sha256=scWYQBDuaTj8-b2x_a33Zu-zxm7eqEf5FFoquD-Sv94,1029
|
|
4
|
+
texttools/batch/batch_manager.py,sha256=6HfsexU0PHGGBH7HKReZ-CQxaQI9DXYKAPsFXxovb_I,8740
|
|
5
|
+
texttools/batch/batch_runner.py,sha256=fmoq7yxtEdvfLbEhcx95ma-lgrL-ZdI2EgxmEfVcKtE,10016
|
|
6
|
+
texttools/internals/async_operator.py,sha256=sKMYEy7jEcsXpwnBkA18PFubkM-TXZrBH3QwF7l-wSg,7054
|
|
7
|
+
texttools/internals/exceptions.py,sha256=h_yp_5i_5IfmqTBQ4S6ZOISrrliJBQ3HTEAjwJXrplk,495
|
|
8
|
+
texttools/internals/models.py,sha256=9uoCAe2TLrSzyS9lMJja5orPAYaCvVL1zoCb6FNdkfs,4541
|
|
9
|
+
texttools/internals/operator_utils.py,sha256=eLY2OjYQ3jT-50nx3I8gzuVzgGpMi52f5oB3cnFyxko,1864
|
|
10
|
+
texttools/internals/prompt_loader.py,sha256=yYXDD4YYG2zohGPAmvZwmv5f6xV_RSl5yOrObTh9w7I,3352
|
|
11
|
+
texttools/internals/sync_operator.py,sha256=IG3CXfGmv4PdFlAQ4AZcKuBAqPJdkIAK4mVw77zLbqI,6959
|
|
12
|
+
texttools/internals/text_to_chunks.py,sha256=vY3odhgCZK4E44k_SGlLoSiKkdN0ib6-lQAsPcplAHA,3843
|
|
13
|
+
texttools/prompts/README.md,sha256=ztajRJcmFLhyrUF0_qmOXaCwGsTGCFabfMjch2LAJG0,1375
|
|
14
|
+
texttools/prompts/categorize.yaml,sha256=016b1uGtbKXEwB8_2_bBgVuUelBlu_rgT85XK_c3Yv0,1219
|
|
15
|
+
texttools/prompts/check_fact.yaml,sha256=gQqacCXqUEx3u2FRwhFSZHvhyWGwsYuJd1nIJyhpu7Q,700
|
|
16
|
+
texttools/prompts/extract_entities.yaml,sha256=DN8lZjvzCjotODnHFkWIAxFvmVvoeSs-hDKdN1L6bec,608
|
|
17
|
+
texttools/prompts/extract_keywords.yaml,sha256=GoeApi9SUCLZgs18H2-2BxZiKQ3lHptMPesgq3cluqU,3171
|
|
18
|
+
texttools/prompts/is_question.yaml,sha256=w5qF-z05h62YVs-0x2b2ySlHDKIhukFC9pibnvNM0vc,469
|
|
19
|
+
texttools/prompts/merge_questions.yaml,sha256=f6bHEx54jJ8hnb8iDBUCxXeGdGwRFmuu7vOkVWdaIkM,1788
|
|
20
|
+
texttools/prompts/propositionize.yaml,sha256=agZKQY-NmeJD86DGjmd-paIuazf82bczIGadgzSP5Vs,1378
|
|
21
|
+
texttools/prompts/rewrite.yaml,sha256=h6x8aXcW8oRxEbp466eak0y-LCkUOKf-mJ-vNVp5j5M,5386
|
|
22
|
+
texttools/prompts/run_custom.yaml,sha256=IETY9H0wPGWIIzcnupfbwwKQblwZrbYAxB754W9MhgU,125
|
|
23
|
+
texttools/prompts/subject_to_question.yaml,sha256=TfVmZ6gDgaHRqJWCVkFlKpuJczpMvJTo4XLWPaq5zic,1145
|
|
24
|
+
texttools/prompts/summarize.yaml,sha256=CKx4vjhHbGus1TdjDz_oc0bNEQtq7zfHsZkV2WeYHDU,457
|
|
25
|
+
texttools/prompts/text_to_question.yaml,sha256=mnArBoYu7gpGHriaU2-Aw5SixB2ZIgoHMt99PnTPKD0,1003
|
|
26
|
+
texttools/prompts/translate.yaml,sha256=ew9RERAVSzg0cvxAinNwTSFIaOIjdwIsekbUsgAuNgo,632
|
|
27
|
+
texttools/tools/async_tools.py,sha256=VU3cqqCPILsyjRiG84w8kCw3iDSuFbI6S3VjExXZwFQ,44635
|
|
28
|
+
texttools/tools/sync_tools.py,sha256=2cqcosMYR6LHuYw32WFR-drvqQ-t7Q9_2rUBDOeYzho,44441
|
|
29
|
+
hamtaa_texttools-1.1.21.dist-info/METADATA,sha256=lExdE6uMFSs_wqUSElOyktjpHpZx4RY-cUH6azF-IYA,10183
|
|
30
|
+
hamtaa_texttools-1.1.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
31
|
+
hamtaa_texttools-1.1.21.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
|
|
32
|
+
hamtaa_texttools-1.1.21.dist-info/RECORD,,
|
texttools/batch/batch_config.py
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from collections.abc import Callable
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
def export_data(data) -> list[dict[str, str]]:
|
|
6
|
+
"""
|
|
7
|
+
Produces a structure of the following form from an initial data structure:
|
|
8
|
+
[{"id": str, "text": str},...]
|
|
9
|
+
"""
|
|
10
|
+
return data
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def import_data(data) -> object:
|
|
14
|
+
"""
|
|
15
|
+
Takes the output and adds and aggregates it to the original structure.
|
|
16
|
+
"""
|
|
17
|
+
return data
|
|
5
18
|
|
|
6
19
|
|
|
7
20
|
@dataclass
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Type, TypeVar
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
@@ -31,7 +31,7 @@ class BatchManager:
|
|
|
31
31
|
prompt_template: str,
|
|
32
32
|
state_dir: Path = Path(".batch_jobs"),
|
|
33
33
|
custom_json_schema_obj_str: dict | None = None,
|
|
34
|
-
**client_kwargs:
|
|
34
|
+
**client_kwargs: object,
|
|
35
35
|
):
|
|
36
36
|
self._client = client
|
|
37
37
|
self._model = model
|
|
@@ -51,7 +51,7 @@ class BatchManager:
|
|
|
51
51
|
def _state_file(self, job_name: str) -> Path:
|
|
52
52
|
return self._state_dir / f"{job_name}.json"
|
|
53
53
|
|
|
54
|
-
def _load_state(self, job_name: str) -> list[dict[str,
|
|
54
|
+
def _load_state(self, job_name: str) -> list[dict[str, object]]:
|
|
55
55
|
"""
|
|
56
56
|
Loads the state (job information) from the state file for the given job name.
|
|
57
57
|
Returns an empty list if the state file does not exist.
|
|
@@ -62,7 +62,7 @@ class BatchManager:
|
|
|
62
62
|
return json.load(f)
|
|
63
63
|
return []
|
|
64
64
|
|
|
65
|
-
def _save_state(self, job_name: str, jobs: list[dict[str,
|
|
65
|
+
def _save_state(self, job_name: str, jobs: list[dict[str, object]]) -> None:
|
|
66
66
|
"""
|
|
67
67
|
Saves the job state to the state file for the given job name.
|
|
68
68
|
"""
|
|
@@ -77,11 +77,11 @@ class BatchManager:
|
|
|
77
77
|
if path.exists():
|
|
78
78
|
path.unlink()
|
|
79
79
|
|
|
80
|
-
def _build_task(self, text: str, idx: str) -> dict[str,
|
|
80
|
+
def _build_task(self, text: str, idx: str) -> dict[str, object]:
|
|
81
81
|
"""
|
|
82
82
|
Builds a single task dictionary for the batch job, including the prompt, model, and response format configuration.
|
|
83
83
|
"""
|
|
84
|
-
response_format_config: dict[str,
|
|
84
|
+
response_format_config: dict[str, object]
|
|
85
85
|
|
|
86
86
|
if self._custom_json_schema_obj_str:
|
|
87
87
|
response_format_config = {
|
texttools/batch/batch_runner.py
CHANGED
|
@@ -2,16 +2,16 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Type, TypeVar
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
8
|
from dotenv import load_dotenv
|
|
9
9
|
from openai import OpenAI
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from texttools.batch.
|
|
12
|
+
from texttools.batch.batch_manager import BatchManager
|
|
13
13
|
from texttools.batch.batch_config import BatchConfig
|
|
14
|
-
from texttools.internals.models import
|
|
14
|
+
from texttools.internals.models import Str
|
|
15
15
|
from texttools.internals.exceptions import TextToolsError, ConfigurationError
|
|
16
16
|
|
|
17
17
|
# Base Model type for output models
|
|
@@ -26,7 +26,7 @@ class BatchJobRunner:
|
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
28
|
def __init__(
|
|
29
|
-
self, config: BatchConfig = BatchConfig(), output_model: Type[T] =
|
|
29
|
+
self, config: BatchConfig = BatchConfig(), output_model: Type[T] = Str
|
|
30
30
|
):
|
|
31
31
|
try:
|
|
32
32
|
self._config = config
|
|
@@ -38,7 +38,7 @@ class BatchJobRunner:
|
|
|
38
38
|
self._output_model = output_model
|
|
39
39
|
self._manager = self._init_manager()
|
|
40
40
|
self._data = self._load_data()
|
|
41
|
-
self._parts: list[list[dict[str,
|
|
41
|
+
self._parts: list[list[dict[str, object]]] = []
|
|
42
42
|
# Map part index to job name
|
|
43
43
|
self._part_idx_to_job_name: dict[int, str] = {}
|
|
44
44
|
# Track retry attempts per part
|
|
@@ -130,8 +130,8 @@ class BatchJobRunner:
|
|
|
130
130
|
|
|
131
131
|
def _save_results(
|
|
132
132
|
self,
|
|
133
|
-
output_data: list[dict[str,
|
|
134
|
-
log: list[
|
|
133
|
+
output_data: list[dict[str, object]] | dict[str, object],
|
|
134
|
+
log: list[object],
|
|
135
135
|
part_idx: int,
|
|
136
136
|
):
|
|
137
137
|
part_suffix = f"_part_{part_idx + 1}" if len(self._parts) > 1 else ""
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import TypeVar, Type
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
-
import logging
|
|
4
3
|
|
|
5
4
|
from openai import AsyncOpenAI
|
|
6
5
|
from pydantic import BaseModel
|
|
7
6
|
|
|
8
|
-
from texttools.internals.models import
|
|
7
|
+
from texttools.internals.models import OperatorOutput
|
|
9
8
|
from texttools.internals.operator_utils import OperatorUtils
|
|
10
|
-
from texttools.internals.formatters import Formatter
|
|
11
9
|
from texttools.internals.prompt_loader import PromptLoader
|
|
12
10
|
from texttools.internals.exceptions import (
|
|
13
11
|
TextToolsError,
|
|
@@ -19,35 +17,23 @@ from texttools.internals.exceptions import (
|
|
|
19
17
|
# Base Model type for output models
|
|
20
18
|
T = TypeVar("T", bound=BaseModel)
|
|
21
19
|
|
|
22
|
-
logger = logging.getLogger("texttools.async_operator")
|
|
23
|
-
|
|
24
20
|
|
|
25
21
|
class AsyncOperator:
|
|
26
22
|
"""
|
|
27
|
-
Core engine for running text-processing operations with an LLM
|
|
28
|
-
|
|
29
|
-
It wires together:
|
|
30
|
-
- `PromptLoader` → loads YAML prompt templates.
|
|
31
|
-
- `UserMergeFormatter` → applies formatting to messages (e.g., merging).
|
|
32
|
-
- AsyncOpenAI client → executes completions/parsed completions.
|
|
23
|
+
Core engine for running text-processing operations with an LLM.
|
|
33
24
|
"""
|
|
34
25
|
|
|
35
26
|
def __init__(self, client: AsyncOpenAI, model: str):
|
|
36
27
|
self._client = client
|
|
37
28
|
self._model = model
|
|
38
29
|
|
|
39
|
-
async def
|
|
40
|
-
"""
|
|
41
|
-
Calls OpenAI API for analysis using the configured prompt template.
|
|
42
|
-
Returns the analyzed content as a string.
|
|
43
|
-
"""
|
|
30
|
+
async def _analyze_completion(self, analyze_prompt: str, temperature: float) -> str:
|
|
44
31
|
try:
|
|
45
|
-
analyze_prompt = prompt_configs["analyze_template"]
|
|
46
|
-
|
|
47
32
|
if not analyze_prompt:
|
|
48
33
|
raise PromptError("Analyze template is empty")
|
|
49
34
|
|
|
50
|
-
analyze_message =
|
|
35
|
+
analyze_message = OperatorUtils.build_user_message(analyze_prompt)
|
|
36
|
+
|
|
51
37
|
completion = await self._client.chat.completions.create(
|
|
52
38
|
model=self._model,
|
|
53
39
|
messages=analyze_message,
|
|
@@ -62,7 +48,7 @@ class AsyncOperator:
|
|
|
62
48
|
if not analysis:
|
|
63
49
|
raise LLMError("Empty analysis response")
|
|
64
50
|
|
|
65
|
-
return analysis
|
|
51
|
+
return analysis
|
|
66
52
|
|
|
67
53
|
except Exception as e:
|
|
68
54
|
if isinstance(e, (PromptError, LLMError)):
|
|
@@ -71,21 +57,23 @@ class AsyncOperator:
|
|
|
71
57
|
|
|
72
58
|
async def _parse_completion(
|
|
73
59
|
self,
|
|
74
|
-
|
|
60
|
+
main_prompt: str,
|
|
75
61
|
output_model: Type[T],
|
|
76
62
|
temperature: float,
|
|
77
|
-
logprobs: bool
|
|
78
|
-
top_logprobs: int
|
|
79
|
-
priority: int
|
|
80
|
-
) -> tuple[T,
|
|
63
|
+
logprobs: bool,
|
|
64
|
+
top_logprobs: int,
|
|
65
|
+
priority: int,
|
|
66
|
+
) -> tuple[T, object]:
|
|
81
67
|
"""
|
|
82
68
|
Parses a chat completion using OpenAI's structured output format.
|
|
83
69
|
Returns both the parsed object and the raw completion for logprobs.
|
|
84
70
|
"""
|
|
85
71
|
try:
|
|
72
|
+
main_message = OperatorUtils.build_user_message(main_prompt)
|
|
73
|
+
|
|
86
74
|
request_kwargs = {
|
|
87
75
|
"model": self._model,
|
|
88
|
-
"messages":
|
|
76
|
+
"messages": main_message,
|
|
89
77
|
"response_format": output_model,
|
|
90
78
|
"temperature": temperature,
|
|
91
79
|
}
|
|
@@ -93,8 +81,10 @@ class AsyncOperator:
|
|
|
93
81
|
if logprobs:
|
|
94
82
|
request_kwargs["logprobs"] = True
|
|
95
83
|
request_kwargs["top_logprobs"] = top_logprobs
|
|
84
|
+
|
|
96
85
|
if priority:
|
|
97
86
|
request_kwargs["extra_body"] = {"priority": priority}
|
|
87
|
+
|
|
98
88
|
completion = await self._client.beta.chat.completions.parse(
|
|
99
89
|
**request_kwargs
|
|
100
90
|
)
|
|
@@ -123,25 +113,22 @@ class AsyncOperator:
|
|
|
123
113
|
user_prompt: str | None,
|
|
124
114
|
temperature: float,
|
|
125
115
|
logprobs: bool,
|
|
126
|
-
top_logprobs: int
|
|
127
|
-
validator: Callable[[
|
|
116
|
+
top_logprobs: int,
|
|
117
|
+
validator: Callable[[object], bool] | None,
|
|
128
118
|
max_validation_retries: int | None,
|
|
119
|
+
priority: int,
|
|
129
120
|
# Internal parameters
|
|
130
121
|
prompt_file: str,
|
|
131
122
|
output_model: Type[T],
|
|
132
123
|
mode: str | None,
|
|
133
|
-
priority: int | None = 0,
|
|
134
124
|
**extra_kwargs,
|
|
135
|
-
) ->
|
|
125
|
+
) -> OperatorOutput:
|
|
136
126
|
"""
|
|
137
|
-
Execute the LLM pipeline with the given input text. (
|
|
127
|
+
Execute the LLM pipeline with the given input text. (Sync)
|
|
138
128
|
"""
|
|
139
129
|
try:
|
|
140
130
|
prompt_loader = PromptLoader()
|
|
141
|
-
formatter = Formatter()
|
|
142
|
-
output = ToolOutput()
|
|
143
131
|
|
|
144
|
-
# Prompt configs contain two keys: main_template and analyze template, both are string
|
|
145
132
|
prompt_configs = prompt_loader.load(
|
|
146
133
|
prompt_file=prompt_file,
|
|
147
134
|
text=text.strip(),
|
|
@@ -149,47 +136,32 @@ class AsyncOperator:
|
|
|
149
136
|
**extra_kwargs,
|
|
150
137
|
)
|
|
151
138
|
|
|
152
|
-
|
|
139
|
+
main_prompt = ""
|
|
140
|
+
analysis = ""
|
|
153
141
|
|
|
154
142
|
if with_analysis:
|
|
155
|
-
analysis = await self.
|
|
156
|
-
|
|
157
|
-
OperatorUtils.build_user_message(
|
|
158
|
-
f"Based on this analysis: {analysis}"
|
|
159
|
-
)
|
|
143
|
+
analysis = await self._analyze_completion(
|
|
144
|
+
prompt_configs["analyze_template"], temperature
|
|
160
145
|
)
|
|
146
|
+
main_prompt += f"Based on this analysis:\n{analysis}\n"
|
|
161
147
|
|
|
162
148
|
if output_lang:
|
|
163
|
-
|
|
164
|
-
OperatorUtils.build_user_message(
|
|
165
|
-
f"Respond only in the {output_lang} language."
|
|
166
|
-
)
|
|
167
|
-
)
|
|
149
|
+
main_prompt += f"Respond only in the {output_lang} language.\n"
|
|
168
150
|
|
|
169
151
|
if user_prompt:
|
|
170
|
-
|
|
171
|
-
OperatorUtils.build_user_message(
|
|
172
|
-
f"Consider this instruction {user_prompt}"
|
|
173
|
-
)
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
messages.append(
|
|
177
|
-
OperatorUtils.build_user_message(prompt_configs["main_template"])
|
|
178
|
-
)
|
|
152
|
+
main_prompt += f"Consider this instruction {user_prompt}\n"
|
|
179
153
|
|
|
180
|
-
|
|
154
|
+
main_prompt += prompt_configs["main_template"]
|
|
181
155
|
|
|
182
156
|
if logprobs and (not isinstance(top_logprobs, int) or top_logprobs < 2):
|
|
183
157
|
raise ValueError("top_logprobs should be an integer greater than 1")
|
|
184
158
|
|
|
185
159
|
parsed, completion = await self._parse_completion(
|
|
186
|
-
|
|
160
|
+
main_prompt, output_model, temperature, logprobs, top_logprobs, priority
|
|
187
161
|
)
|
|
188
162
|
|
|
189
|
-
output.result = parsed.result
|
|
190
|
-
|
|
191
163
|
# Retry logic if validation fails
|
|
192
|
-
if validator and not validator(
|
|
164
|
+
if validator and not validator(parsed.result):
|
|
193
165
|
if (
|
|
194
166
|
not isinstance(max_validation_retries, int)
|
|
195
167
|
or max_validation_retries < 1
|
|
@@ -199,17 +171,13 @@ class AsyncOperator:
|
|
|
199
171
|
)
|
|
200
172
|
|
|
201
173
|
succeeded = False
|
|
202
|
-
for
|
|
203
|
-
|
|
204
|
-
f"Validation failed, retrying for the {attempt + 1} time."
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Generate new temperature for retry
|
|
174
|
+
for _ in range(max_validation_retries):
|
|
175
|
+
# Generate a new temperature to retry
|
|
208
176
|
retry_temperature = OperatorUtils.get_retry_temp(temperature)
|
|
209
177
|
|
|
210
178
|
try:
|
|
211
179
|
parsed, completion = await self._parse_completion(
|
|
212
|
-
|
|
180
|
+
main_prompt,
|
|
213
181
|
output_model,
|
|
214
182
|
retry_temperature,
|
|
215
183
|
logprobs,
|
|
@@ -217,30 +185,26 @@ class AsyncOperator:
|
|
|
217
185
|
priority=priority,
|
|
218
186
|
)
|
|
219
187
|
|
|
220
|
-
output.result = parsed.result
|
|
221
|
-
|
|
222
188
|
# Check if retry was successful
|
|
223
|
-
if validator(
|
|
189
|
+
if validator(parsed.result):
|
|
224
190
|
succeeded = True
|
|
225
191
|
break
|
|
226
192
|
|
|
227
|
-
except LLMError
|
|
228
|
-
|
|
193
|
+
except LLMError:
|
|
194
|
+
pass
|
|
229
195
|
|
|
230
196
|
if not succeeded:
|
|
231
|
-
raise ValidationError(
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
output.process = prompt_file[:-5]
|
|
197
|
+
raise ValidationError("Validation failed after all retries")
|
|
198
|
+
|
|
199
|
+
operator_output = OperatorOutput(
|
|
200
|
+
result=parsed.result,
|
|
201
|
+
analysis=analysis if with_analysis else None,
|
|
202
|
+
logprobs=OperatorUtils.extract_logprobs(completion)
|
|
203
|
+
if logprobs
|
|
204
|
+
else None,
|
|
205
|
+
)
|
|
242
206
|
|
|
243
|
-
return
|
|
207
|
+
return operator_output
|
|
244
208
|
|
|
245
209
|
except (PromptError, LLMError, ValidationError):
|
|
246
210
|
raise
|