hamtaa-texttools 2.2.0__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/PKG-INFO +15 -3
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/README.md +13 -2
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/PKG-INFO +15 -3
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/requires.txt +1 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/pyproject.toml +2 -1
- hamtaa_texttools-2.3.0/texttools/__init__.py +4 -0
- hamtaa_texttools-2.3.0/texttools/core/__init__.py +34 -0
- hamtaa_texttools-2.3.0/texttools/core/internal_models.py +123 -0
- hamtaa_texttools-2.3.0/texttools/core/operators/__init__.py +4 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/operators/async_operator.py +11 -3
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/operators/sync_operator.py +9 -3
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/utils.py +33 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/models.py +4 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/to_question.yaml +0 -2
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/translate.yaml +2 -2
- hamtaa_texttools-2.3.0/texttools/tools/__init__.py +5 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/async_tools.py +69 -18
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/sync_tools.py +69 -18
- hamtaa_texttools-2.2.0/texttools/__init__.py +0 -6
- hamtaa_texttools-2.2.0/texttools/core/__init__.py +0 -0
- hamtaa_texttools-2.2.0/texttools/core/internal_models.py +0 -71
- hamtaa_texttools-2.2.0/texttools/core/operators/__init__.py +0 -0
- hamtaa_texttools-2.2.0/texttools/tools/__init__.py +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/LICENSE +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/SOURCES.txt +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/dependency_links.txt +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/top_level.txt +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/setup.cfg +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/tests/test_category_tree.py +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/tests/test_to_chunks.py +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/exceptions.py +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/augment.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/categorize.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/extract_entities.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/extract_keywords.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/is_fact.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/is_question.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/merge_questions.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/propositionize.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/run_custom.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/prompts/summarize.yaml +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/py.typed +0 -0
- {hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/tools/batch_tools.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
|
|
@@ -17,6 +17,7 @@ License-File: LICENSE
|
|
|
17
17
|
Requires-Dist: dotenv>=0.9.9
|
|
18
18
|
Requires-Dist: openai>=1.97.1
|
|
19
19
|
Requires-Dist: pydantic>=2.0.0
|
|
20
|
+
Requires-Dist: pytest>=9.0.2
|
|
20
21
|
Requires-Dist: pyyaml>=6.0
|
|
21
22
|
Dynamic: license-file
|
|
22
23
|
|
|
@@ -108,20 +109,31 @@ pip install -U hamtaa-texttools
|
|
|
108
109
|
## 🧩 ToolOutput
|
|
109
110
|
|
|
110
111
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
112
|
+
|
|
111
113
|
- **`result: Any`**
|
|
112
114
|
- **`analysis: str`**
|
|
113
115
|
- **`logprobs: list`**
|
|
114
116
|
- **`errors: list[str]`**
|
|
115
|
-
- **`ToolOutputMetadata`**
|
|
117
|
+
- **`ToolOutputMetadata`**
|
|
116
118
|
- **`tool_name: str`**
|
|
119
|
+
- **`processed_by: str`**
|
|
117
120
|
- **`processed_at: datetime`**
|
|
118
121
|
- **`execution_time: float`**
|
|
122
|
+
- **`token_usage: TokenUsage`**
|
|
123
|
+
- **`completion_usage: CompletionUsage`**
|
|
124
|
+
- **`prompt_tokens: int`**
|
|
125
|
+
- **`completion_tokens: int`**
|
|
126
|
+
- **`total_tokens: int`**
|
|
127
|
+
- **`analyze_usage: AnalyzeUsage`**
|
|
128
|
+
- **`prompt_tokens: int`**
|
|
129
|
+
- **`completion_tokens: int`**
|
|
130
|
+
- **`total_tokens: int`**
|
|
119
131
|
|
|
120
132
|
- Serialize output to JSON using the `to_json()` method.
|
|
121
133
|
- Verify operation success with the `is_successful()` method.
|
|
122
134
|
- Convert output to a dictionary with the `to_dict()` method.
|
|
123
135
|
|
|
124
|
-
**Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
|
|
136
|
+
**Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
|
|
125
137
|
|
|
126
138
|
---
|
|
127
139
|
|
|
@@ -86,20 +86,31 @@ pip install -U hamtaa-texttools
|
|
|
86
86
|
## 🧩 ToolOutput
|
|
87
87
|
|
|
88
88
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
89
|
+
|
|
89
90
|
- **`result: Any`**
|
|
90
91
|
- **`analysis: str`**
|
|
91
92
|
- **`logprobs: list`**
|
|
92
93
|
- **`errors: list[str]`**
|
|
93
|
-
- **`ToolOutputMetadata`**
|
|
94
|
+
- **`ToolOutputMetadata`**
|
|
94
95
|
- **`tool_name: str`**
|
|
96
|
+
- **`processed_by: str`**
|
|
95
97
|
- **`processed_at: datetime`**
|
|
96
98
|
- **`execution_time: float`**
|
|
99
|
+
- **`token_usage: TokenUsage`**
|
|
100
|
+
- **`completion_usage: CompletionUsage`**
|
|
101
|
+
- **`prompt_tokens: int`**
|
|
102
|
+
- **`completion_tokens: int`**
|
|
103
|
+
- **`total_tokens: int`**
|
|
104
|
+
- **`analyze_usage: AnalyzeUsage`**
|
|
105
|
+
- **`prompt_tokens: int`**
|
|
106
|
+
- **`completion_tokens: int`**
|
|
107
|
+
- **`total_tokens: int`**
|
|
97
108
|
|
|
98
109
|
- Serialize output to JSON using the `to_json()` method.
|
|
99
110
|
- Verify operation success with the `is_successful()` method.
|
|
100
111
|
- Convert output to a dictionary with the `to_dict()` method.
|
|
101
112
|
|
|
102
|
-
**Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
|
|
113
|
+
**Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
|
|
103
114
|
|
|
104
115
|
---
|
|
105
116
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hamtaa-texttools
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: A high-level NLP toolkit built on top of modern LLMs.
|
|
5
5
|
Author-email: Tohidi <the.mohammad.tohidi@gmail.com>, Erfan Moosavi <erfanmoosavi84@gmail.com>, Montazer <montazerh82@gmail.com>, Givechi <mohamad.m.givechi@gmail.com>, Zareshahi <a.zareshahi1377@gmail.com>
|
|
6
6
|
Maintainer-email: Erfan Moosavi <erfanmoosavi84@gmail.com>, Tohidi <the.mohammad.tohidi@gmail.com>
|
|
@@ -17,6 +17,7 @@ License-File: LICENSE
|
|
|
17
17
|
Requires-Dist: dotenv>=0.9.9
|
|
18
18
|
Requires-Dist: openai>=1.97.1
|
|
19
19
|
Requires-Dist: pydantic>=2.0.0
|
|
20
|
+
Requires-Dist: pytest>=9.0.2
|
|
20
21
|
Requires-Dist: pyyaml>=6.0
|
|
21
22
|
Dynamic: license-file
|
|
22
23
|
|
|
@@ -108,20 +109,31 @@ pip install -U hamtaa-texttools
|
|
|
108
109
|
## 🧩 ToolOutput
|
|
109
110
|
|
|
110
111
|
Every tool of `TextTools` returns a `ToolOutput` object which is a BaseModel with attributes:
|
|
112
|
+
|
|
111
113
|
- **`result: Any`**
|
|
112
114
|
- **`analysis: str`**
|
|
113
115
|
- **`logprobs: list`**
|
|
114
116
|
- **`errors: list[str]`**
|
|
115
|
-
- **`ToolOutputMetadata`**
|
|
117
|
+
- **`ToolOutputMetadata`**
|
|
116
118
|
- **`tool_name: str`**
|
|
119
|
+
- **`processed_by: str`**
|
|
117
120
|
- **`processed_at: datetime`**
|
|
118
121
|
- **`execution_time: float`**
|
|
122
|
+
- **`token_usage: TokenUsage`**
|
|
123
|
+
- **`completion_usage: CompletionUsage`**
|
|
124
|
+
- **`prompt_tokens: int`**
|
|
125
|
+
- **`completion_tokens: int`**
|
|
126
|
+
- **`total_tokens: int`**
|
|
127
|
+
- **`analyze_usage: AnalyzeUsage`**
|
|
128
|
+
- **`prompt_tokens: int`**
|
|
129
|
+
- **`completion_tokens: int`**
|
|
130
|
+
- **`total_tokens: int`**
|
|
119
131
|
|
|
120
132
|
- Serialize output to JSON using the `to_json()` method.
|
|
121
133
|
- Verify operation success with the `is_successful()` method.
|
|
122
134
|
- Convert output to a dictionary with the `to_dict()` method.
|
|
123
135
|
|
|
124
|
-
**Note:** For BatchTheTool: Each method returns a list[ToolOutput] containing results for all input texts.
|
|
136
|
+
**Note:** For BatchTheTool: Each method returns a `list[ToolOutput]` containing results for all input texts.
|
|
125
137
|
|
|
126
138
|
---
|
|
127
139
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hamtaa-texttools"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.3.0"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Tohidi", email = "the.mohammad.tohidi@gmail.com"},
|
|
10
10
|
{name = "Erfan Moosavi", email = "erfanmoosavi84@gmail.com"},
|
|
@@ -24,6 +24,7 @@ dependencies = [
|
|
|
24
24
|
"dotenv>=0.9.9",
|
|
25
25
|
"openai>=1.97.1",
|
|
26
26
|
"pydantic>=2.0.0",
|
|
27
|
+
"pytest>=9.0.2",
|
|
27
28
|
"pyyaml>=6.0",
|
|
28
29
|
]
|
|
29
30
|
keywords = ["nlp", "llm", "text-processing", "openai"]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from .exceptions import LLMError, PromptError, TextToolsError, ValidationError
|
|
2
|
+
from .internal_models import (
|
|
3
|
+
Bool,
|
|
4
|
+
ListDictStrStr,
|
|
5
|
+
ListStr,
|
|
6
|
+
ReasonListStr,
|
|
7
|
+
Str,
|
|
8
|
+
TokenUsage,
|
|
9
|
+
create_dynamic_model,
|
|
10
|
+
)
|
|
11
|
+
from .operators import AsyncOperator, Operator
|
|
12
|
+
from .utils import OperatorUtils, TheToolUtils
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
# Exceptions
|
|
16
|
+
"LLMError",
|
|
17
|
+
"PromptError",
|
|
18
|
+
"TextToolsError",
|
|
19
|
+
"ValidationError",
|
|
20
|
+
# Internal models
|
|
21
|
+
"Bool",
|
|
22
|
+
"ListDictStrStr",
|
|
23
|
+
"ListStr",
|
|
24
|
+
"ReasonListStr",
|
|
25
|
+
"Str",
|
|
26
|
+
"TokenUsage",
|
|
27
|
+
"create_dynamic_model",
|
|
28
|
+
# Operators
|
|
29
|
+
"AsyncOperator",
|
|
30
|
+
"Operator",
|
|
31
|
+
# Utils
|
|
32
|
+
"OperatorUtils",
|
|
33
|
+
"TheToolUtils",
|
|
34
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, create_model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CompletionUsage(BaseModel):
|
|
9
|
+
prompt_tokens: int = 0
|
|
10
|
+
completion_tokens: int = 0
|
|
11
|
+
total_tokens: int = 0
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AnalyzeUsage(BaseModel):
|
|
15
|
+
prompt_tokens: int = 0
|
|
16
|
+
completion_tokens: int = 0
|
|
17
|
+
total_tokens: int = 0
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TokenUsage(BaseModel):
|
|
21
|
+
completion_usage: CompletionUsage = CompletionUsage()
|
|
22
|
+
analyze_usage: AnalyzeUsage = AnalyzeUsage()
|
|
23
|
+
total_tokens: int = 0
|
|
24
|
+
|
|
25
|
+
def __add__(self, other: TokenUsage) -> TokenUsage:
|
|
26
|
+
new_completion_usage = CompletionUsage(
|
|
27
|
+
prompt_tokens=self.completion_usage.prompt_tokens
|
|
28
|
+
+ other.completion_usage.prompt_tokens,
|
|
29
|
+
completion_tokens=self.completion_usage.completion_tokens
|
|
30
|
+
+ other.completion_usage.completion_tokens,
|
|
31
|
+
total_tokens=self.completion_usage.total_tokens
|
|
32
|
+
+ other.completion_usage.total_tokens,
|
|
33
|
+
)
|
|
34
|
+
new_analyze_usage = AnalyzeUsage(
|
|
35
|
+
prompt_tokens=self.analyze_usage.prompt_tokens
|
|
36
|
+
+ other.analyze_usage.prompt_tokens,
|
|
37
|
+
completion_tokens=self.analyze_usage.completion_tokens
|
|
38
|
+
+ other.analyze_usage.completion_tokens,
|
|
39
|
+
total_tokens=self.analyze_usage.total_tokens
|
|
40
|
+
+ other.analyze_usage.total_tokens,
|
|
41
|
+
)
|
|
42
|
+
total_tokens = (
|
|
43
|
+
new_completion_usage.total_tokens + new_analyze_usage.total_tokens
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return TokenUsage(
|
|
47
|
+
completion_usage=new_completion_usage,
|
|
48
|
+
analyze_usage=new_analyze_usage,
|
|
49
|
+
total_tokens=total_tokens,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class OperatorOutput(BaseModel):
|
|
54
|
+
result: Any
|
|
55
|
+
analysis: str | None
|
|
56
|
+
logprobs: list[dict[str, Any]] | None
|
|
57
|
+
token_usage: TokenUsage | None = None
|
|
58
|
+
prompt_tokens: int | None = None
|
|
59
|
+
completion_tokens: int | None = None
|
|
60
|
+
analysis_tokens: int | None = None
|
|
61
|
+
total_tokens: int | None = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Str(BaseModel):
|
|
65
|
+
result: str = Field(
|
|
66
|
+
..., description="The output string", json_schema_extra={"example": "text"}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Bool(BaseModel):
|
|
71
|
+
result: bool = Field(
|
|
72
|
+
...,
|
|
73
|
+
description="Boolean indicating the output state",
|
|
74
|
+
json_schema_extra={"example": True},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ListStr(BaseModel):
|
|
79
|
+
result: list[str] = Field(
|
|
80
|
+
...,
|
|
81
|
+
description="The output list of strings",
|
|
82
|
+
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ListDictStrStr(BaseModel):
|
|
87
|
+
result: list[dict[str, str]] = Field(
|
|
88
|
+
...,
|
|
89
|
+
description="List of dictionaries containing string key-value pairs",
|
|
90
|
+
json_schema_extra={
|
|
91
|
+
"example": [
|
|
92
|
+
{"text": "Mohammad", "type": "PER"},
|
|
93
|
+
{"text": "Iran", "type": "LOC"},
|
|
94
|
+
]
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ReasonListStr(BaseModel):
|
|
100
|
+
reason: str = Field(..., description="Thinking process that led to the output")
|
|
101
|
+
result: list[str] = Field(
|
|
102
|
+
...,
|
|
103
|
+
description="The output list of strings",
|
|
104
|
+
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# Create CategorizerOutput with dynamic categories
|
|
109
|
+
def create_dynamic_model(allowed_values: list[str]) -> type[BaseModel]:
|
|
110
|
+
literal_type = Literal[*allowed_values]
|
|
111
|
+
|
|
112
|
+
CategorizerOutput = create_model(
|
|
113
|
+
"CategorizerOutput",
|
|
114
|
+
reason=(
|
|
115
|
+
str,
|
|
116
|
+
Field(
|
|
117
|
+
..., description="Explanation of why the input belongs to the category"
|
|
118
|
+
),
|
|
119
|
+
),
|
|
120
|
+
result=(literal_type, Field(..., description="Predicted category label")),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return CategorizerOutput
|
{hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/texttools/core/operators/async_operator.py
RENAMED
|
@@ -18,7 +18,9 @@ class AsyncOperator:
|
|
|
18
18
|
self._client = client
|
|
19
19
|
self._model = model
|
|
20
20
|
|
|
21
|
-
async def _analyze_completion(
|
|
21
|
+
async def _analyze_completion(
|
|
22
|
+
self, analyze_message: list[dict[str, str]]
|
|
23
|
+
) -> tuple[str, Any]:
|
|
22
24
|
try:
|
|
23
25
|
completion = await self._client.chat.completions.create(
|
|
24
26
|
model=self._model,
|
|
@@ -33,7 +35,7 @@ class AsyncOperator:
|
|
|
33
35
|
if not analysis:
|
|
34
36
|
raise LLMError("Empty analysis response")
|
|
35
37
|
|
|
36
|
-
return analysis
|
|
38
|
+
return analysis, completion
|
|
37
39
|
|
|
38
40
|
except Exception as e:
|
|
39
41
|
if isinstance(e, (PromptError, LLMError)):
|
|
@@ -116,12 +118,15 @@ class AsyncOperator:
|
|
|
116
118
|
)
|
|
117
119
|
|
|
118
120
|
analysis: str | None = None
|
|
121
|
+
analyze_completion: Any = None
|
|
119
122
|
|
|
120
123
|
if with_analysis:
|
|
121
124
|
analyze_message = OperatorUtils.build_message(
|
|
122
125
|
prompt_configs["analyze_template"]
|
|
123
126
|
)
|
|
124
|
-
analysis = await self._analyze_completion(
|
|
127
|
+
analysis, analyze_completion = await self._analyze_completion(
|
|
128
|
+
analyze_message
|
|
129
|
+
)
|
|
125
130
|
|
|
126
131
|
main_prompt = OperatorUtils.build_main_prompt(
|
|
127
132
|
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
@@ -176,6 +181,9 @@ class AsyncOperator:
|
|
|
176
181
|
logprobs=OperatorUtils.extract_logprobs(completion)
|
|
177
182
|
if logprobs
|
|
178
183
|
else None,
|
|
184
|
+
token_usage=OperatorUtils.extract_token_usage(
|
|
185
|
+
completion, analyze_completion
|
|
186
|
+
),
|
|
179
187
|
)
|
|
180
188
|
|
|
181
189
|
return operator_output
|
|
@@ -18,7 +18,9 @@ class Operator:
|
|
|
18
18
|
self._client = client
|
|
19
19
|
self._model = model
|
|
20
20
|
|
|
21
|
-
def _analyze_completion(
|
|
21
|
+
def _analyze_completion(
|
|
22
|
+
self, analyze_message: list[dict[str, str]]
|
|
23
|
+
) -> tuple[str, Any]:
|
|
22
24
|
try:
|
|
23
25
|
completion = self._client.chat.completions.create(
|
|
24
26
|
model=self._model,
|
|
@@ -33,7 +35,7 @@ class Operator:
|
|
|
33
35
|
if not analysis:
|
|
34
36
|
raise LLMError("Empty analysis response")
|
|
35
37
|
|
|
36
|
-
return analysis
|
|
38
|
+
return analysis, completion
|
|
37
39
|
|
|
38
40
|
except Exception as e:
|
|
39
41
|
if isinstance(e, (PromptError, LLMError)):
|
|
@@ -114,12 +116,13 @@ class Operator:
|
|
|
114
116
|
)
|
|
115
117
|
|
|
116
118
|
analysis: str | None = None
|
|
119
|
+
analyze_completion: Any = None
|
|
117
120
|
|
|
118
121
|
if with_analysis:
|
|
119
122
|
analyze_message = OperatorUtils.build_message(
|
|
120
123
|
prompt_configs["analyze_template"]
|
|
121
124
|
)
|
|
122
|
-
analysis = self._analyze_completion(analyze_message)
|
|
125
|
+
analysis, analyze_completion = self._analyze_completion(analyze_message)
|
|
123
126
|
|
|
124
127
|
main_prompt = OperatorUtils.build_main_prompt(
|
|
125
128
|
prompt_configs["main_template"], analysis, output_lang, user_prompt
|
|
@@ -174,6 +177,9 @@ class Operator:
|
|
|
174
177
|
logprobs=OperatorUtils.extract_logprobs(completion)
|
|
175
178
|
if logprobs
|
|
176
179
|
else None,
|
|
180
|
+
token_usage=OperatorUtils.extract_token_usage(
|
|
181
|
+
completion, analyze_completion
|
|
182
|
+
),
|
|
177
183
|
)
|
|
178
184
|
|
|
179
185
|
return operator_output
|
|
@@ -9,6 +9,7 @@ from typing import Any
|
|
|
9
9
|
import yaml
|
|
10
10
|
|
|
11
11
|
from .exceptions import PromptError
|
|
12
|
+
from .internal_models import AnalyzeUsage, CompletionUsage, TokenUsage
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class OperatorUtils:
|
|
@@ -148,6 +149,38 @@ class OperatorUtils:
|
|
|
148
149
|
new_temp = base_temp + random.choice([-1, 1]) * random.uniform(0.1, 0.9)
|
|
149
150
|
return max(0.0, min(new_temp, 1.5))
|
|
150
151
|
|
|
152
|
+
@staticmethod
|
|
153
|
+
def extract_token_usage(completion: Any, analyze_completion: Any) -> TokenUsage:
|
|
154
|
+
completion_usage = completion.usage
|
|
155
|
+
analyze_usage = analyze_completion.usage if analyze_completion else None
|
|
156
|
+
|
|
157
|
+
completion_usage_model = CompletionUsage(
|
|
158
|
+
prompt_tokens=getattr(completion_usage, "prompt_tokens", 00),
|
|
159
|
+
completion_tokens=getattr(completion_usage, "completion_tokens", 00),
|
|
160
|
+
total_tokens=getattr(completion_usage, "total_tokens", 00),
|
|
161
|
+
)
|
|
162
|
+
analyze_usage_model = AnalyzeUsage(
|
|
163
|
+
prompt_tokens=getattr(analyze_usage, "prompt_tokens", 0),
|
|
164
|
+
completion_tokens=getattr(analyze_usage, "completion_tokens", 0),
|
|
165
|
+
total_tokens=getattr(analyze_usage, "total_tokens", 0),
|
|
166
|
+
)
|
|
167
|
+
total_analyze_tokens = (
|
|
168
|
+
analyze_usage_model.prompt_tokens + analyze_usage_model.completion_tokens
|
|
169
|
+
if analyze_completion
|
|
170
|
+
else 0
|
|
171
|
+
)
|
|
172
|
+
total_tokens = (
|
|
173
|
+
completion_usage_model.prompt_tokens
|
|
174
|
+
+ completion_usage_model.completion_tokens
|
|
175
|
+
+ total_analyze_tokens
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return TokenUsage(
|
|
179
|
+
completion_usage=completion_usage_model,
|
|
180
|
+
analyze_usage=analyze_usage_model,
|
|
181
|
+
total_tokens=total_tokens,
|
|
182
|
+
)
|
|
183
|
+
|
|
151
184
|
|
|
152
185
|
class TheToolUtils:
|
|
153
186
|
"""
|
|
@@ -5,11 +5,15 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
|
+
from .core import TokenUsage
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class ToolOutputMetadata(BaseModel):
|
|
10
12
|
tool_name: str
|
|
13
|
+
processed_by: str | None = None
|
|
11
14
|
processed_at: datetime = Field(default_factory=datetime.now)
|
|
12
15
|
execution_time: float | None = None
|
|
16
|
+
token_usage: TokenUsage | None = None
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
class ToolOutput(BaseModel):
|
|
@@ -7,7 +7,6 @@ main_template:
|
|
|
7
7
|
and must not mention any verbs like this, that, he or she in the question.
|
|
8
8
|
|
|
9
9
|
There is a `reason` key, fill that up with a summerized version of your thoughts.
|
|
10
|
-
The `reason` must be less than 20 words.
|
|
11
10
|
Don't forget to fill the reason.
|
|
12
11
|
|
|
13
12
|
Respond only in JSON format:
|
|
@@ -23,7 +22,6 @@ main_template:
|
|
|
23
22
|
and must not mention any verbs like this, that, he or she in the question.
|
|
24
23
|
|
|
25
24
|
There is a `reason` key, fill that up with a summerized version of your thoughts.
|
|
26
|
-
The `reason` must be less than 20 words.
|
|
27
25
|
Don't forget to fill the reason.
|
|
28
26
|
|
|
29
27
|
Respond only in JSON format:
|
|
@@ -3,9 +3,9 @@ main_template: |
|
|
|
3
3
|
Output only the translated text.
|
|
4
4
|
|
|
5
5
|
Respond only in JSON format:
|
|
6
|
-
{{"result": "
|
|
6
|
+
{{"result": "translated_text"}}
|
|
7
7
|
|
|
8
|
-
Don't translate proper
|
|
8
|
+
Don't translate proper names, only transliterate them to {target_lang}
|
|
9
9
|
|
|
10
10
|
Translate the following text to {target_lang}:
|
|
11
11
|
{text}
|
|
@@ -5,17 +5,21 @@ from typing import Any, Literal
|
|
|
5
5
|
|
|
6
6
|
from openai import AsyncOpenAI
|
|
7
7
|
|
|
8
|
-
from ..core
|
|
9
|
-
|
|
8
|
+
from ..core import (
|
|
9
|
+
AsyncOperator,
|
|
10
10
|
Bool,
|
|
11
11
|
ListDictStrStr,
|
|
12
12
|
ListStr,
|
|
13
|
+
LLMError,
|
|
14
|
+
PromptError,
|
|
13
15
|
ReasonListStr,
|
|
14
16
|
Str,
|
|
17
|
+
TextToolsError,
|
|
18
|
+
TheToolUtils,
|
|
19
|
+
TokenUsage,
|
|
20
|
+
ValidationError,
|
|
15
21
|
create_dynamic_model,
|
|
16
22
|
)
|
|
17
|
-
from ..core.operators.async_operator import AsyncOperator
|
|
18
|
-
from ..core.utils import TheToolUtils
|
|
19
23
|
from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
|
|
20
24
|
|
|
21
25
|
|
|
@@ -29,6 +33,7 @@ class AsyncTheTool:
|
|
|
29
33
|
self._operator = AsyncOperator(client=client, model=model)
|
|
30
34
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
31
35
|
self.raise_on_error = raise_on_error
|
|
36
|
+
self.model = model
|
|
32
37
|
|
|
33
38
|
async def categorize(
|
|
34
39
|
self,
|
|
@@ -91,7 +96,10 @@ class AsyncTheTool:
|
|
|
91
96
|
)
|
|
92
97
|
|
|
93
98
|
metadata = ToolOutputMetadata(
|
|
94
|
-
tool_name=tool_name,
|
|
99
|
+
tool_name=tool_name,
|
|
100
|
+
execution_time=perf_counter() - start,
|
|
101
|
+
processed_by=self.model,
|
|
102
|
+
token_usage=operator_output.token_usage,
|
|
95
103
|
)
|
|
96
104
|
tool_output = ToolOutput(
|
|
97
105
|
result=operator_output.result,
|
|
@@ -106,6 +114,7 @@ class AsyncTheTool:
|
|
|
106
114
|
final_categories = []
|
|
107
115
|
analysis = ""
|
|
108
116
|
logprobs_list = []
|
|
117
|
+
token_usage = TokenUsage()
|
|
109
118
|
|
|
110
119
|
for _ in range(levels):
|
|
111
120
|
if not parent_node.children:
|
|
@@ -149,9 +158,13 @@ class AsyncTheTool:
|
|
|
149
158
|
analysis += level_operator_output.analysis
|
|
150
159
|
if logprobs:
|
|
151
160
|
logprobs_list.extend(level_operator_output.logprobs)
|
|
161
|
+
token_usage += level_operator_output.token_usage
|
|
152
162
|
|
|
153
163
|
metadata = ToolOutputMetadata(
|
|
154
|
-
tool_name=tool_name,
|
|
164
|
+
tool_name=tool_name,
|
|
165
|
+
execution_time=perf_counter() - start,
|
|
166
|
+
processed_by=self.model,
|
|
167
|
+
token_usage=token_usage,
|
|
155
168
|
)
|
|
156
169
|
tool_output = ToolOutput(
|
|
157
170
|
result=final_categories,
|
|
@@ -237,7 +250,10 @@ class AsyncTheTool:
|
|
|
237
250
|
)
|
|
238
251
|
|
|
239
252
|
metadata = ToolOutputMetadata(
|
|
240
|
-
tool_name=tool_name,
|
|
253
|
+
tool_name=tool_name,
|
|
254
|
+
execution_time=perf_counter() - start,
|
|
255
|
+
processed_by=self.model,
|
|
256
|
+
token_usage=operator_output.token_usage,
|
|
241
257
|
)
|
|
242
258
|
tool_output = ToolOutput(
|
|
243
259
|
result=operator_output.result,
|
|
@@ -321,7 +337,10 @@ class AsyncTheTool:
|
|
|
321
337
|
)
|
|
322
338
|
|
|
323
339
|
metadata = ToolOutputMetadata(
|
|
324
|
-
tool_name=tool_name,
|
|
340
|
+
tool_name=tool_name,
|
|
341
|
+
execution_time=perf_counter() - start,
|
|
342
|
+
processed_by=self.model,
|
|
343
|
+
token_usage=operator_output.token_usage,
|
|
325
344
|
)
|
|
326
345
|
tool_output = ToolOutput(
|
|
327
346
|
result=operator_output.result,
|
|
@@ -400,7 +419,10 @@ class AsyncTheTool:
|
|
|
400
419
|
)
|
|
401
420
|
|
|
402
421
|
metadata = ToolOutputMetadata(
|
|
403
|
-
tool_name=tool_name,
|
|
422
|
+
tool_name=tool_name,
|
|
423
|
+
execution_time=perf_counter() - start,
|
|
424
|
+
processed_by=self.model,
|
|
425
|
+
token_usage=operator_output.token_usage,
|
|
404
426
|
)
|
|
405
427
|
tool_output = ToolOutput(
|
|
406
428
|
result=operator_output.result,
|
|
@@ -486,7 +508,10 @@ class AsyncTheTool:
|
|
|
486
508
|
)
|
|
487
509
|
|
|
488
510
|
metadata = ToolOutputMetadata(
|
|
489
|
-
tool_name=tool_name,
|
|
511
|
+
tool_name=tool_name,
|
|
512
|
+
execution_time=perf_counter() - start,
|
|
513
|
+
processed_by=self.model,
|
|
514
|
+
token_usage=operator_output.token_usage,
|
|
490
515
|
)
|
|
491
516
|
tool_output = ToolOutput(
|
|
492
517
|
result=operator_output.result,
|
|
@@ -570,7 +595,10 @@ class AsyncTheTool:
|
|
|
570
595
|
)
|
|
571
596
|
|
|
572
597
|
metadata = ToolOutputMetadata(
|
|
573
|
-
tool_name=tool_name,
|
|
598
|
+
tool_name=tool_name,
|
|
599
|
+
execution_time=perf_counter() - start,
|
|
600
|
+
processed_by=self.model,
|
|
601
|
+
token_usage=operator_output.token_usage,
|
|
574
602
|
)
|
|
575
603
|
tool_output = ToolOutput(
|
|
576
604
|
result=operator_output.result,
|
|
@@ -653,7 +681,10 @@ class AsyncTheTool:
|
|
|
653
681
|
)
|
|
654
682
|
|
|
655
683
|
metadata = ToolOutputMetadata(
|
|
656
|
-
tool_name=tool_name,
|
|
684
|
+
tool_name=tool_name,
|
|
685
|
+
execution_time=perf_counter() - start,
|
|
686
|
+
processed_by=self.model,
|
|
687
|
+
token_usage=operator_output.token_usage,
|
|
657
688
|
)
|
|
658
689
|
tool_output = ToolOutput(
|
|
659
690
|
result=operator_output.result,
|
|
@@ -734,7 +765,10 @@ class AsyncTheTool:
|
|
|
734
765
|
)
|
|
735
766
|
|
|
736
767
|
metadata = ToolOutputMetadata(
|
|
737
|
-
tool_name=tool_name,
|
|
768
|
+
tool_name=tool_name,
|
|
769
|
+
execution_time=perf_counter() - start,
|
|
770
|
+
processed_by=self.model,
|
|
771
|
+
token_usage=operator_output.token_usage,
|
|
738
772
|
)
|
|
739
773
|
tool_output = ToolOutput(
|
|
740
774
|
result=operator_output.result,
|
|
@@ -802,6 +836,7 @@ class AsyncTheTool:
|
|
|
802
836
|
translation = ""
|
|
803
837
|
analysis = ""
|
|
804
838
|
logprobs_list = []
|
|
839
|
+
token_usage = TokenUsage()
|
|
805
840
|
|
|
806
841
|
for chunk in chunks:
|
|
807
842
|
chunk_operator_output = await TheToolUtils.run_with_timeout(
|
|
@@ -832,9 +867,13 @@ class AsyncTheTool:
|
|
|
832
867
|
analysis += chunk_operator_output.analysis
|
|
833
868
|
if logprobs:
|
|
834
869
|
logprobs_list.extend(chunk_operator_output.logprobs)
|
|
870
|
+
token_usage += chunk_operator_output.token_usage
|
|
835
871
|
|
|
836
872
|
metadata = ToolOutputMetadata(
|
|
837
|
-
tool_name=tool_name,
|
|
873
|
+
tool_name=tool_name,
|
|
874
|
+
execution_time=perf_counter() - start,
|
|
875
|
+
processed_by=self.model,
|
|
876
|
+
token_usage=token_usage,
|
|
838
877
|
)
|
|
839
878
|
tool_output = ToolOutput(
|
|
840
879
|
result=translation,
|
|
@@ -867,7 +906,10 @@ class AsyncTheTool:
|
|
|
867
906
|
)
|
|
868
907
|
|
|
869
908
|
metadata = ToolOutputMetadata(
|
|
870
|
-
tool_name=tool_name,
|
|
909
|
+
tool_name=tool_name,
|
|
910
|
+
execution_time=perf_counter() - start,
|
|
911
|
+
processed_by=self.model,
|
|
912
|
+
token_usage=operator_output.token_usage,
|
|
871
913
|
)
|
|
872
914
|
tool_output = ToolOutput(
|
|
873
915
|
result=operator_output.result,
|
|
@@ -950,7 +992,10 @@ class AsyncTheTool:
|
|
|
950
992
|
)
|
|
951
993
|
|
|
952
994
|
metadata = ToolOutputMetadata(
|
|
953
|
-
tool_name=tool_name,
|
|
995
|
+
tool_name=tool_name,
|
|
996
|
+
execution_time=perf_counter() - start,
|
|
997
|
+
processed_by=self.model,
|
|
998
|
+
token_usage=operator_output.token_usage,
|
|
954
999
|
)
|
|
955
1000
|
tool_output = ToolOutput(
|
|
956
1001
|
result=operator_output.result,
|
|
@@ -1036,7 +1081,10 @@ class AsyncTheTool:
|
|
|
1036
1081
|
)
|
|
1037
1082
|
|
|
1038
1083
|
metadata = ToolOutputMetadata(
|
|
1039
|
-
tool_name=tool_name,
|
|
1084
|
+
tool_name=tool_name,
|
|
1085
|
+
execution_time=perf_counter() - start,
|
|
1086
|
+
processed_by=self.model,
|
|
1087
|
+
token_usage=operator_output.token_usage,
|
|
1040
1088
|
)
|
|
1041
1089
|
tool_output = ToolOutput(
|
|
1042
1090
|
result=operator_output.result,
|
|
@@ -1121,7 +1169,10 @@ class AsyncTheTool:
|
|
|
1121
1169
|
)
|
|
1122
1170
|
|
|
1123
1171
|
metadata = ToolOutputMetadata(
|
|
1124
|
-
tool_name=tool_name,
|
|
1172
|
+
tool_name=tool_name,
|
|
1173
|
+
execution_time=perf_counter() - start,
|
|
1174
|
+
processed_by=self.model,
|
|
1175
|
+
token_usage=operator_output.token_usage,
|
|
1125
1176
|
)
|
|
1126
1177
|
tool_output = ToolOutput(
|
|
1127
1178
|
result=operator_output.result,
|
|
@@ -5,17 +5,21 @@ from typing import Any, Literal
|
|
|
5
5
|
|
|
6
6
|
from openai import OpenAI
|
|
7
7
|
|
|
8
|
-
from ..core
|
|
9
|
-
from ..core.internal_models import (
|
|
8
|
+
from ..core import (
|
|
10
9
|
Bool,
|
|
11
10
|
ListDictStrStr,
|
|
12
11
|
ListStr,
|
|
12
|
+
LLMError,
|
|
13
|
+
Operator,
|
|
14
|
+
PromptError,
|
|
13
15
|
ReasonListStr,
|
|
14
16
|
Str,
|
|
17
|
+
TextToolsError,
|
|
18
|
+
TheToolUtils,
|
|
19
|
+
TokenUsage,
|
|
20
|
+
ValidationError,
|
|
15
21
|
create_dynamic_model,
|
|
16
22
|
)
|
|
17
|
-
from ..core.operators.sync_operator import Operator
|
|
18
|
-
from ..core.utils import TheToolUtils
|
|
19
23
|
from ..models import CategoryTree, ToolOutput, ToolOutputMetadata
|
|
20
24
|
|
|
21
25
|
|
|
@@ -29,6 +33,7 @@ class TheTool:
|
|
|
29
33
|
self._operator = Operator(client=client, model=model)
|
|
30
34
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
31
35
|
self.raise_on_error = raise_on_error
|
|
36
|
+
self.model = model
|
|
32
37
|
|
|
33
38
|
def categorize(
|
|
34
39
|
self,
|
|
@@ -86,7 +91,10 @@ class TheTool:
|
|
|
86
91
|
)
|
|
87
92
|
|
|
88
93
|
metadata = ToolOutputMetadata(
|
|
89
|
-
tool_name=tool_name,
|
|
94
|
+
tool_name=tool_name,
|
|
95
|
+
execution_time=perf_counter() - start,
|
|
96
|
+
processed_by=self.model,
|
|
97
|
+
token_usage=operator_output.token_usage,
|
|
90
98
|
)
|
|
91
99
|
tool_output = ToolOutput(
|
|
92
100
|
result=operator_output.result,
|
|
@@ -101,6 +109,7 @@ class TheTool:
|
|
|
101
109
|
final_categories = []
|
|
102
110
|
analysis = ""
|
|
103
111
|
logprobs_list = []
|
|
112
|
+
token_usage = TokenUsage()
|
|
104
113
|
|
|
105
114
|
for _ in range(levels):
|
|
106
115
|
if not parent_node.children:
|
|
@@ -141,9 +150,13 @@ class TheTool:
|
|
|
141
150
|
analysis += level_operator_output.analysis
|
|
142
151
|
if logprobs:
|
|
143
152
|
logprobs_list.extend(level_operator_output.logprobs)
|
|
153
|
+
token_usage += level_operator_output.token_usage
|
|
144
154
|
|
|
145
155
|
metadata = ToolOutputMetadata(
|
|
146
|
-
tool_name=tool_name,
|
|
156
|
+
tool_name=tool_name,
|
|
157
|
+
execution_time=perf_counter() - start,
|
|
158
|
+
processed_by=self.model,
|
|
159
|
+
token_usage=token_usage,
|
|
147
160
|
)
|
|
148
161
|
tool_output = ToolOutput(
|
|
149
162
|
result=final_categories,
|
|
@@ -224,7 +237,10 @@ class TheTool:
|
|
|
224
237
|
)
|
|
225
238
|
|
|
226
239
|
metadata = ToolOutputMetadata(
|
|
227
|
-
tool_name=tool_name,
|
|
240
|
+
tool_name=tool_name,
|
|
241
|
+
execution_time=perf_counter() - start,
|
|
242
|
+
processed_by=self.model,
|
|
243
|
+
token_usage=operator_output.token_usage,
|
|
228
244
|
)
|
|
229
245
|
tool_output = ToolOutput(
|
|
230
246
|
result=operator_output.result,
|
|
@@ -303,7 +319,10 @@ class TheTool:
|
|
|
303
319
|
)
|
|
304
320
|
|
|
305
321
|
metadata = ToolOutputMetadata(
|
|
306
|
-
tool_name=tool_name,
|
|
322
|
+
tool_name=tool_name,
|
|
323
|
+
execution_time=perf_counter() - start,
|
|
324
|
+
processed_by=self.model,
|
|
325
|
+
token_usage=operator_output.token_usage,
|
|
307
326
|
)
|
|
308
327
|
tool_output = ToolOutput(
|
|
309
328
|
result=operator_output.result,
|
|
@@ -377,7 +396,10 @@ class TheTool:
|
|
|
377
396
|
)
|
|
378
397
|
|
|
379
398
|
metadata = ToolOutputMetadata(
|
|
380
|
-
tool_name=tool_name,
|
|
399
|
+
tool_name=tool_name,
|
|
400
|
+
execution_time=perf_counter() - start,
|
|
401
|
+
processed_by=self.model,
|
|
402
|
+
token_usage=operator_output.token_usage,
|
|
381
403
|
)
|
|
382
404
|
tool_output = ToolOutput(
|
|
383
405
|
result=operator_output.result,
|
|
@@ -458,7 +480,10 @@ class TheTool:
|
|
|
458
480
|
)
|
|
459
481
|
|
|
460
482
|
metadata = ToolOutputMetadata(
|
|
461
|
-
tool_name=tool_name,
|
|
483
|
+
tool_name=tool_name,
|
|
484
|
+
execution_time=perf_counter() - start,
|
|
485
|
+
processed_by=self.model,
|
|
486
|
+
token_usage=operator_output.token_usage,
|
|
462
487
|
)
|
|
463
488
|
tool_output = ToolOutput(
|
|
464
489
|
result=operator_output.result,
|
|
@@ -537,7 +562,10 @@ class TheTool:
|
|
|
537
562
|
)
|
|
538
563
|
|
|
539
564
|
metadata = ToolOutputMetadata(
|
|
540
|
-
tool_name=tool_name,
|
|
565
|
+
tool_name=tool_name,
|
|
566
|
+
execution_time=perf_counter() - start,
|
|
567
|
+
processed_by=self.model,
|
|
568
|
+
token_usage=operator_output.token_usage,
|
|
541
569
|
)
|
|
542
570
|
tool_output = ToolOutput(
|
|
543
571
|
result=operator_output.result,
|
|
@@ -615,7 +643,10 @@ class TheTool:
|
|
|
615
643
|
)
|
|
616
644
|
|
|
617
645
|
metadata = ToolOutputMetadata(
|
|
618
|
-
tool_name=tool_name,
|
|
646
|
+
tool_name=tool_name,
|
|
647
|
+
execution_time=perf_counter() - start,
|
|
648
|
+
processed_by=self.model,
|
|
649
|
+
token_usage=operator_output.token_usage,
|
|
619
650
|
)
|
|
620
651
|
tool_output = ToolOutput(
|
|
621
652
|
result=operator_output.result,
|
|
@@ -691,7 +722,10 @@ class TheTool:
|
|
|
691
722
|
)
|
|
692
723
|
|
|
693
724
|
metadata = ToolOutputMetadata(
|
|
694
|
-
tool_name=tool_name,
|
|
725
|
+
tool_name=tool_name,
|
|
726
|
+
execution_time=perf_counter() - start,
|
|
727
|
+
processed_by=self.model,
|
|
728
|
+
token_usage=operator_output.token_usage,
|
|
695
729
|
)
|
|
696
730
|
tool_output = ToolOutput(
|
|
697
731
|
result=operator_output.result,
|
|
@@ -757,6 +791,7 @@ class TheTool:
|
|
|
757
791
|
translation = ""
|
|
758
792
|
analysis = ""
|
|
759
793
|
logprobs_list = []
|
|
794
|
+
token_usage = TokenUsage()
|
|
760
795
|
|
|
761
796
|
for chunk in chunks:
|
|
762
797
|
chunk_operator_output = self._operator.run(
|
|
@@ -784,9 +819,13 @@ class TheTool:
|
|
|
784
819
|
analysis += chunk_operator_output.analysis
|
|
785
820
|
if logprobs:
|
|
786
821
|
logprobs_list.extend(chunk_operator_output.logprobs)
|
|
822
|
+
token_usage += chunk_operator_output.token_usage
|
|
787
823
|
|
|
788
824
|
metadata = ToolOutputMetadata(
|
|
789
|
-
tool_name=tool_name,
|
|
825
|
+
tool_name=tool_name,
|
|
826
|
+
execution_time=perf_counter() - start,
|
|
827
|
+
processed_by=self.model,
|
|
828
|
+
token_usage=token_usage,
|
|
790
829
|
)
|
|
791
830
|
tool_output = ToolOutput(
|
|
792
831
|
result=translation,
|
|
@@ -816,7 +855,10 @@ class TheTool:
|
|
|
816
855
|
)
|
|
817
856
|
|
|
818
857
|
metadata = ToolOutputMetadata(
|
|
819
|
-
tool_name=tool_name,
|
|
858
|
+
tool_name=tool_name,
|
|
859
|
+
execution_time=perf_counter() - start,
|
|
860
|
+
processed_by=self.model,
|
|
861
|
+
token_usage=operator_output.token_usage,
|
|
820
862
|
)
|
|
821
863
|
tool_output = ToolOutput(
|
|
822
864
|
result=operator_output.result,
|
|
@@ -894,7 +936,10 @@ class TheTool:
|
|
|
894
936
|
)
|
|
895
937
|
|
|
896
938
|
metadata = ToolOutputMetadata(
|
|
897
|
-
tool_name=tool_name,
|
|
939
|
+
tool_name=tool_name,
|
|
940
|
+
execution_time=perf_counter() - start,
|
|
941
|
+
processed_by=self.model,
|
|
942
|
+
token_usage=operator_output.token_usage,
|
|
898
943
|
)
|
|
899
944
|
tool_output = ToolOutput(
|
|
900
945
|
result=operator_output.result,
|
|
@@ -975,7 +1020,10 @@ class TheTool:
|
|
|
975
1020
|
)
|
|
976
1021
|
|
|
977
1022
|
metadata = ToolOutputMetadata(
|
|
978
|
-
tool_name=tool_name,
|
|
1023
|
+
tool_name=tool_name,
|
|
1024
|
+
execution_time=perf_counter() - start,
|
|
1025
|
+
processed_by=self.model,
|
|
1026
|
+
token_usage=operator_output.token_usage,
|
|
979
1027
|
)
|
|
980
1028
|
tool_output = ToolOutput(
|
|
981
1029
|
result=operator_output.result,
|
|
@@ -1055,7 +1103,10 @@ class TheTool:
|
|
|
1055
1103
|
)
|
|
1056
1104
|
|
|
1057
1105
|
metadata = ToolOutputMetadata(
|
|
1058
|
-
tool_name=tool_name,
|
|
1106
|
+
tool_name=tool_name,
|
|
1107
|
+
execution_time=perf_counter() - start,
|
|
1108
|
+
processed_by=self.model,
|
|
1109
|
+
token_usage=operator_output.token_usage,
|
|
1059
1110
|
)
|
|
1060
1111
|
tool_output = ToolOutput(
|
|
1061
1112
|
result=operator_output.result,
|
|
File without changes
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
from typing import Any, Literal
|
|
2
|
-
|
|
3
|
-
from pydantic import BaseModel, Field, create_model
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class OperatorOutput(BaseModel):
|
|
7
|
-
result: Any
|
|
8
|
-
analysis: str | None
|
|
9
|
-
logprobs: list[dict[str, Any]] | None
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class Str(BaseModel):
|
|
13
|
-
result: str = Field(
|
|
14
|
-
..., description="The output string", json_schema_extra={"example": "text"}
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class Bool(BaseModel):
|
|
19
|
-
result: bool = Field(
|
|
20
|
-
...,
|
|
21
|
-
description="Boolean indicating the output state",
|
|
22
|
-
json_schema_extra={"example": True},
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ListStr(BaseModel):
|
|
27
|
-
result: list[str] = Field(
|
|
28
|
-
...,
|
|
29
|
-
description="The output list of strings",
|
|
30
|
-
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class ListDictStrStr(BaseModel):
|
|
35
|
-
result: list[dict[str, str]] = Field(
|
|
36
|
-
...,
|
|
37
|
-
description="List of dictionaries containing string key-value pairs",
|
|
38
|
-
json_schema_extra={
|
|
39
|
-
"example": [
|
|
40
|
-
{"text": "Mohammad", "type": "PER"},
|
|
41
|
-
{"text": "Iran", "type": "LOC"},
|
|
42
|
-
]
|
|
43
|
-
},
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class ReasonListStr(BaseModel):
|
|
48
|
-
reason: str = Field(..., description="Thinking process that led to the output")
|
|
49
|
-
result: list[str] = Field(
|
|
50
|
-
...,
|
|
51
|
-
description="The output list of strings",
|
|
52
|
-
json_schema_extra={"example": ["text_1", "text_2", "text_3"]},
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# Create CategorizerOutput with dynamic categories
|
|
57
|
-
def create_dynamic_model(allowed_values: list[str]) -> type[BaseModel]:
|
|
58
|
-
literal_type = Literal[*allowed_values]
|
|
59
|
-
|
|
60
|
-
CategorizerOutput = create_model(
|
|
61
|
-
"CategorizerOutput",
|
|
62
|
-
reason=(
|
|
63
|
-
str,
|
|
64
|
-
Field(
|
|
65
|
-
..., description="Explanation of why the input belongs to the category"
|
|
66
|
-
),
|
|
67
|
-
),
|
|
68
|
-
result=(literal_type, Field(..., description="Predicted category label")),
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
return CategorizerOutput
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hamtaa_texttools-2.2.0 → hamtaa_texttools-2.3.0}/hamtaa_texttools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|