aidial-adapter-anthropic 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aidial_adapter_anthropic/_utils/json.py +116 -0
- aidial_adapter_anthropic/_utils/list.py +84 -0
- aidial_adapter_anthropic/_utils/pydantic.py +6 -0
- aidial_adapter_anthropic/_utils/resource.py +54 -0
- aidial_adapter_anthropic/_utils/text.py +4 -0
- aidial_adapter_anthropic/adapter/__init__.py +4 -0
- aidial_adapter_anthropic/adapter/_base.py +95 -0
- aidial_adapter_anthropic/adapter/_claude/adapter.py +549 -0
- aidial_adapter_anthropic/adapter/_claude/blocks.py +128 -0
- aidial_adapter_anthropic/adapter/_claude/citations.py +63 -0
- aidial_adapter_anthropic/adapter/_claude/config.py +39 -0
- aidial_adapter_anthropic/adapter/_claude/converters.py +303 -0
- aidial_adapter_anthropic/adapter/_claude/params.py +25 -0
- aidial_adapter_anthropic/adapter/_claude/state.py +45 -0
- aidial_adapter_anthropic/adapter/_claude/tokenizer/__init__.py +10 -0
- aidial_adapter_anthropic/adapter/_claude/tokenizer/anthropic.py +57 -0
- aidial_adapter_anthropic/adapter/_claude/tokenizer/approximate.py +260 -0
- aidial_adapter_anthropic/adapter/_claude/tokenizer/base.py +26 -0
- aidial_adapter_anthropic/adapter/_claude/tools.py +98 -0
- aidial_adapter_anthropic/adapter/_decorator/base.py +53 -0
- aidial_adapter_anthropic/adapter/_decorator/preprocess.py +63 -0
- aidial_adapter_anthropic/adapter/_decorator/replicator.py +32 -0
- aidial_adapter_anthropic/adapter/_errors.py +71 -0
- aidial_adapter_anthropic/adapter/_tokenize.py +12 -0
- aidial_adapter_anthropic/adapter/_truncate_prompt.py +168 -0
- aidial_adapter_anthropic/adapter/claude.py +17 -0
- aidial_adapter_anthropic/dial/_attachments.py +238 -0
- aidial_adapter_anthropic/dial/_lazy_stage.py +40 -0
- aidial_adapter_anthropic/dial/_message.py +341 -0
- aidial_adapter_anthropic/dial/consumer.py +235 -0
- aidial_adapter_anthropic/dial/request.py +170 -0
- aidial_adapter_anthropic/dial/resource.py +189 -0
- aidial_adapter_anthropic/dial/storage.py +138 -0
- aidial_adapter_anthropic/dial/token_usage.py +19 -0
- aidial_adapter_anthropic/dial/tools.py +180 -0
- aidial_adapter_anthropic-0.1.0.dist-info/LICENSE +202 -0
- aidial_adapter_anthropic-0.1.0.dist-info/METADATA +121 -0
- aidial_adapter_anthropic-0.1.0.dist-info/RECORD +39 -0
- aidial_adapter_anthropic-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
An attempt to approximate the tokenizer for Claude models.
|
|
3
|
+
|
|
4
|
+
This tokenizer doesn't provide the precise token count,
|
|
5
|
+
because Anthropic doesn't provide the exact tokenization algorithm.
|
|
6
|
+
|
|
7
|
+
This tokenizer provides an *overestimation* of the request token count.
|
|
8
|
+
We need to be conservative, since the tokenizer is used in the prompt
|
|
9
|
+
truncation algorithm. So we are choosing to be unable to pack the request with tokens
|
|
10
|
+
as tightly as possible over making an additional chat completion request,
|
|
11
|
+
which is going to fail with a token overflow error.
|
|
12
|
+
|
|
13
|
+
1. For the text parts of request we count every byte in their UTF-8 encoding.
|
|
14
|
+
Note that the official Claude 2 tokenizer couldn't be used
|
|
15
|
+
for anything more than a very rough estimate:
|
|
16
|
+
https://github.com/anthropics/anthropic-sdk-python/blob/246a2978694b584429d4bbd5b44245ff8eac2ac2/src/anthropic/_client.py#L270-L283
|
|
17
|
+
|
|
18
|
+
2. For the image parts we use the official approximation:
|
|
19
|
+
> tokens = (width px * height px)/750
|
|
20
|
+
https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
|
|
21
|
+
|
|
22
|
+
3. For the tool usage we use the official approximation:
|
|
23
|
+
https://docs.anthropic.com/en/docs/build-with-claude/tool-use#pricing
|
|
24
|
+
a. tool-related components of the request are serialized to strings and tokenized as such,
|
|
25
|
+
b. the hidden tool-enabling system prompt is accounted as per the documentation.
|
|
26
|
+
|
|
27
|
+
TODO: use the official tokenizer:
|
|
28
|
+
https://docs.anthropic.com/en/docs/build-with-claude/token-counting
|
|
29
|
+
once it's supported in Bedrock:
|
|
30
|
+
https://github.com/anthropics/anthropic-sdk-python/blob/599f2b9a9501b8c98fb3132043c3ec71e3026f84/src/anthropic/lib/bedrock/_client.py#L61-L62
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import base64
|
|
34
|
+
import io
|
|
35
|
+
import json
|
|
36
|
+
import logging
|
|
37
|
+
import math
|
|
38
|
+
from typing import List, Literal, Tuple, assert_never
|
|
39
|
+
|
|
40
|
+
from anthropic._types import Base64FileInput
|
|
41
|
+
from anthropic.types.beta import (
|
|
42
|
+
BetaBashCodeExecutionToolResultBlock as BashCodeExecutionToolResultBlock,
|
|
43
|
+
)
|
|
44
|
+
from anthropic.types.beta import (
|
|
45
|
+
BetaCodeExecutionToolResultBlock as CodeExecutionToolResultBlock,
|
|
46
|
+
)
|
|
47
|
+
from anthropic.types.beta import (
|
|
48
|
+
BetaContainerUploadBlock as ContainerUploadBlock,
|
|
49
|
+
)
|
|
50
|
+
from anthropic.types.beta import BetaContentBlockParam as ContentBlockParam
|
|
51
|
+
from anthropic.types.beta import BetaMCPToolResultBlock as MCPToolResultBlock
|
|
52
|
+
from anthropic.types.beta import BetaMCPToolUseBlock as MCPToolUseBlock
|
|
53
|
+
from anthropic.types.beta import BetaMessageParam as ClaudeMessage
|
|
54
|
+
from anthropic.types.beta import (
|
|
55
|
+
BetaRedactedThinkingBlock as RedactedThinkingBlock,
|
|
56
|
+
)
|
|
57
|
+
from anthropic.types.beta import BetaServerToolUseBlock as ServerToolUseBlock
|
|
58
|
+
from anthropic.types.beta import BetaTextBlock as TextBlock
|
|
59
|
+
from anthropic.types.beta import (
|
|
60
|
+
BetaTextEditorCodeExecutionToolResultBlock as TextEditorCodeExecutionToolResultBlock,
|
|
61
|
+
)
|
|
62
|
+
from anthropic.types.beta import BetaThinkingBlock as ThinkingBlock
|
|
63
|
+
from anthropic.types.beta import BetaToolParam as ToolParam
|
|
64
|
+
from anthropic.types.beta import (
|
|
65
|
+
BetaToolResultBlockParam as ToolResultBlockParam,
|
|
66
|
+
)
|
|
67
|
+
from anthropic.types.beta import (
|
|
68
|
+
BetaToolSearchToolResultBlock as ToolSearchToolResultBlock,
|
|
69
|
+
)
|
|
70
|
+
from anthropic.types.beta import BetaToolUseBlock as ToolUseBlock
|
|
71
|
+
from anthropic.types.beta import (
|
|
72
|
+
BetaWebFetchToolResultBlock as WebFetchToolResultBlock,
|
|
73
|
+
)
|
|
74
|
+
from anthropic.types.beta import (
|
|
75
|
+
BetaWebSearchToolResultBlock as WebSearchToolResultBlock,
|
|
76
|
+
)
|
|
77
|
+
from anthropic.types.beta.beta_image_block_param import Source
|
|
78
|
+
from anthropic.types.beta.beta_tool_result_block_param import (
|
|
79
|
+
Content as ToolResultBlockParamContent,
|
|
80
|
+
)
|
|
81
|
+
from PIL import Image
|
|
82
|
+
|
|
83
|
+
from aidial_adapter_anthropic.adapter._claude.params import ClaudeParameters
|
|
84
|
+
from aidial_adapter_anthropic.adapter._tokenize import default_tokenize_string
|
|
85
|
+
|
|
86
|
+
_log = logging.getLogger(__name__)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ApproximateTokenizer:
|
|
90
|
+
def tokenize_text(self, text: str) -> int:
|
|
91
|
+
return default_tokenize_string(text)
|
|
92
|
+
|
|
93
|
+
def _get_image_size(
|
|
94
|
+
self, image_data: str | Base64FileInput
|
|
95
|
+
) -> Tuple[int, int]:
|
|
96
|
+
try:
|
|
97
|
+
if not isinstance(image_data, str):
|
|
98
|
+
raise ValueError("Images as files aren't yet supported.")
|
|
99
|
+
|
|
100
|
+
image_bytes = base64.b64decode(image_data)
|
|
101
|
+
with Image.open(io.BytesIO(image_bytes)) as img:
|
|
102
|
+
return img.size
|
|
103
|
+
except Exception:
|
|
104
|
+
_log.exception("Cannot compute image size, assuming 1000x1000")
|
|
105
|
+
return 1000, 1000
|
|
106
|
+
|
|
107
|
+
def _tokenize_image(self, source: Source) -> int:
|
|
108
|
+
match source["type"]:
|
|
109
|
+
case "url" | "file":
|
|
110
|
+
return 0
|
|
111
|
+
case "base64":
|
|
112
|
+
width, height = self._get_image_size(source["data"])
|
|
113
|
+
return math.ceil((width * height) / 750.0)
|
|
114
|
+
case _:
|
|
115
|
+
assert_never(source)
|
|
116
|
+
|
|
117
|
+
def _tokenize_tool_use(self, id: str, input: object, name: str) -> int:
|
|
118
|
+
return self.tokenize_text(f"{id} {name} {json.dumps(input)}")
|
|
119
|
+
|
|
120
|
+
def _tokenize_tool_result(self, message: ToolResultBlockParam) -> int:
|
|
121
|
+
tokens: int = self.tokenize_text(message["tool_use_id"])
|
|
122
|
+
if (content := message.get("content")) is not None:
|
|
123
|
+
if isinstance(content, str):
|
|
124
|
+
tokens += self.tokenize_text(content)
|
|
125
|
+
else:
|
|
126
|
+
for sub_message in content:
|
|
127
|
+
tokens += self._tokenize_sub_message(sub_message)
|
|
128
|
+
return tokens
|
|
129
|
+
|
|
130
|
+
def _tokenize_sub_message(
|
|
131
|
+
self,
|
|
132
|
+
message: ContentBlockParam | ToolResultBlockParamContent,
|
|
133
|
+
) -> int:
|
|
134
|
+
if isinstance(message, dict):
|
|
135
|
+
match message["type"]:
|
|
136
|
+
case "text":
|
|
137
|
+
return self.tokenize_text(message["text"])
|
|
138
|
+
case "image":
|
|
139
|
+
return self._tokenize_image(message["source"])
|
|
140
|
+
case "tool_use":
|
|
141
|
+
return self._tokenize_tool_use(
|
|
142
|
+
message["id"], message["input"], message["name"]
|
|
143
|
+
)
|
|
144
|
+
case "tool_result":
|
|
145
|
+
return self._tokenize_tool_result(message)
|
|
146
|
+
case "document":
|
|
147
|
+
return self.tokenize_text(json.dumps(message))
|
|
148
|
+
case "thinking":
|
|
149
|
+
return self.tokenize_text(message["thinking"])
|
|
150
|
+
case "redacted_thinking":
|
|
151
|
+
return self.tokenize_text(message["data"])
|
|
152
|
+
case "server_tool_use":
|
|
153
|
+
return self.tokenize_text(json.dumps(message["input"]))
|
|
154
|
+
case "web_search_tool_result":
|
|
155
|
+
return self.tokenize_text(json.dumps(message["content"]))
|
|
156
|
+
case (
|
|
157
|
+
"search_result"
|
|
158
|
+
| "code_execution_tool_result"
|
|
159
|
+
| "mcp_tool_use"
|
|
160
|
+
| "mcp_tool_result"
|
|
161
|
+
| "container_upload"
|
|
162
|
+
| "bash_code_execution_tool_result"
|
|
163
|
+
| "text_editor_code_execution_tool_result"
|
|
164
|
+
| "web_fetch_tool_result"
|
|
165
|
+
| "tool_search_tool_result"
|
|
166
|
+
| "tool_reference"
|
|
167
|
+
):
|
|
168
|
+
return 0
|
|
169
|
+
case _:
|
|
170
|
+
assert_never(message["type"])
|
|
171
|
+
else:
|
|
172
|
+
match message:
|
|
173
|
+
case TextBlock():
|
|
174
|
+
return self.tokenize_text(message.text)
|
|
175
|
+
case ToolUseBlock():
|
|
176
|
+
return self._tokenize_tool_use(
|
|
177
|
+
message.id, message.input, message.name
|
|
178
|
+
)
|
|
179
|
+
case ThinkingBlock(thinking=thinking):
|
|
180
|
+
return self.tokenize_text(thinking)
|
|
181
|
+
case RedactedThinkingBlock(data=data):
|
|
182
|
+
return self.tokenize_text(data)
|
|
183
|
+
case ServerToolUseBlock(input=input):
|
|
184
|
+
return self.tokenize_text(json.dumps(input))
|
|
185
|
+
case WebSearchToolResultBlock(content=content):
|
|
186
|
+
return self.tokenize_text(json.dumps(content))
|
|
187
|
+
case (
|
|
188
|
+
CodeExecutionToolResultBlock()
|
|
189
|
+
| MCPToolUseBlock()
|
|
190
|
+
| MCPToolResultBlock()
|
|
191
|
+
| ContainerUploadBlock()
|
|
192
|
+
| BashCodeExecutionToolResultBlock()
|
|
193
|
+
| TextEditorCodeExecutionToolResultBlock()
|
|
194
|
+
| WebFetchToolResultBlock()
|
|
195
|
+
| ToolSearchToolResultBlock()
|
|
196
|
+
):
|
|
197
|
+
return 0
|
|
198
|
+
case _:
|
|
199
|
+
assert_never(message)
|
|
200
|
+
|
|
201
|
+
def _tokenize_message(self, message: ClaudeMessage) -> int:
|
|
202
|
+
tokens: int = 0
|
|
203
|
+
content = message["content"]
|
|
204
|
+
|
|
205
|
+
match content:
|
|
206
|
+
case str():
|
|
207
|
+
tokens += self.tokenize_text(content)
|
|
208
|
+
case _:
|
|
209
|
+
for item in content:
|
|
210
|
+
tokens += self._tokenize_sub_message(item)
|
|
211
|
+
|
|
212
|
+
return tokens
|
|
213
|
+
|
|
214
|
+
def _tokenize_messages(self, messages: List[ClaudeMessage]) -> int:
|
|
215
|
+
# A rough estimation
|
|
216
|
+
per_message_tokens = 5
|
|
217
|
+
|
|
218
|
+
tokens: int = 0
|
|
219
|
+
for message in messages:
|
|
220
|
+
tokens += self._tokenize_message(message) + per_message_tokens
|
|
221
|
+
return tokens
|
|
222
|
+
|
|
223
|
+
def _tokenize_tool_param(self, tool: ToolParam) -> int:
|
|
224
|
+
return self.tokenize_text(json.dumps(tool))
|
|
225
|
+
|
|
226
|
+
def tokenize_tool_system_message(
|
|
227
|
+
self,
|
|
228
|
+
tool_choice: Literal["none", "auto", "any", "tool"],
|
|
229
|
+
) -> int:
|
|
230
|
+
# Different models has different pricing for the tool use:
|
|
231
|
+
# https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview#pricing
|
|
232
|
+
# Here we provide a default for Claude Opus 3
|
|
233
|
+
return 530 if tool_choice in ("auto", "none") else 281
|
|
234
|
+
|
|
235
|
+
async def tokenize(
|
|
236
|
+
self, params: ClaudeParameters, messages: List[ClaudeMessage]
|
|
237
|
+
) -> int:
|
|
238
|
+
tokens: int = 0
|
|
239
|
+
|
|
240
|
+
if system := params["system"]:
|
|
241
|
+
if isinstance(system, str):
|
|
242
|
+
tokens += self.tokenize_text(system)
|
|
243
|
+
else:
|
|
244
|
+
for item in system:
|
|
245
|
+
tokens += self._tokenize_sub_message(item)
|
|
246
|
+
|
|
247
|
+
if tools := params["tools"]:
|
|
248
|
+
if tool_choice := params["tool_choice"]:
|
|
249
|
+
choice = tool_choice["type"]
|
|
250
|
+
else:
|
|
251
|
+
choice = "auto"
|
|
252
|
+
|
|
253
|
+
tokens += self.tokenize_tool_system_message(choice)
|
|
254
|
+
|
|
255
|
+
for tool in tools:
|
|
256
|
+
tokens += self._tokenize_tool_param(tool)
|
|
257
|
+
|
|
258
|
+
tokens += self._tokenize_messages(messages)
|
|
259
|
+
|
|
260
|
+
return tokens
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import List, Protocol, Set, Tuple, runtime_checkable
|
|
2
|
+
|
|
3
|
+
from anthropic.types.beta import BetaMessageParam as ClaudeMessage
|
|
4
|
+
|
|
5
|
+
from aidial_adapter_anthropic.adapter._claude.params import ClaudeParameters
|
|
6
|
+
from aidial_adapter_anthropic.dial._attachments import WithResources
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@runtime_checkable
|
|
10
|
+
class ClaudeTokenizer(Protocol):
|
|
11
|
+
def tokenize_text(self, text: str) -> int: ...
|
|
12
|
+
|
|
13
|
+
async def tokenize(
|
|
14
|
+
self, params: ClaudeParameters, messages: List[ClaudeMessage]
|
|
15
|
+
) -> int: ...
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_tokenizer(tokenizer: ClaudeTokenizer, params: ClaudeParameters):
|
|
19
|
+
async def _tokenize(
|
|
20
|
+
messages: List[Tuple[WithResources[ClaudeMessage], Set[int]]],
|
|
21
|
+
) -> int:
|
|
22
|
+
return await tokenizer.tokenize(
|
|
23
|
+
params, [msg[0].payload for msg in messages]
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
return _tokenize
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import assert_never
|
|
4
|
+
|
|
5
|
+
from aidial_sdk.chat_completion import FunctionCall, ToolCall
|
|
6
|
+
from anthropic.types.beta import BetaToolUseBlock as ToolUseBlock
|
|
7
|
+
|
|
8
|
+
from aidial_adapter_anthropic.adapter._errors import ValidationError
|
|
9
|
+
from aidial_adapter_anthropic.dial._message import (
|
|
10
|
+
AIFunctionCallMessage,
|
|
11
|
+
AIRegularMessage,
|
|
12
|
+
AIToolCallMessage,
|
|
13
|
+
BaseMessage,
|
|
14
|
+
HumanFunctionResultMessage,
|
|
15
|
+
HumanRegularMessage,
|
|
16
|
+
HumanToolResultMessage,
|
|
17
|
+
SystemMessage,
|
|
18
|
+
ToolMessage,
|
|
19
|
+
)
|
|
20
|
+
from aidial_adapter_anthropic.dial.consumer import Consumer, ToolUseMessage
|
|
21
|
+
from aidial_adapter_anthropic.dial.tools import ToolsMode
|
|
22
|
+
|
|
23
|
+
_log = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def to_dial_function_call(block: ToolUseBlock, streaming: bool) -> FunctionCall:
|
|
27
|
+
arguments = "" if streaming else json.dumps(block.input)
|
|
28
|
+
return FunctionCall(name=block.name, arguments=arguments)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_dial_tool_call(block: ToolUseBlock, streaming: bool) -> ToolCall:
|
|
32
|
+
return ToolCall(
|
|
33
|
+
id=block.id,
|
|
34
|
+
type="function",
|
|
35
|
+
function=to_dial_function_call(block, streaming),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def process_tools_block(
|
|
40
|
+
consumer: Consumer,
|
|
41
|
+
block: ToolUseBlock,
|
|
42
|
+
tools_mode: ToolsMode | None,
|
|
43
|
+
*,
|
|
44
|
+
streaming: bool,
|
|
45
|
+
) -> ToolUseMessage | None:
|
|
46
|
+
match tools_mode:
|
|
47
|
+
case ToolsMode.TOOLS:
|
|
48
|
+
return consumer.create_function_tool_call(
|
|
49
|
+
to_dial_tool_call(block, streaming)
|
|
50
|
+
)
|
|
51
|
+
case ToolsMode.FUNCTIONS:
|
|
52
|
+
if consumer.has_function_call:
|
|
53
|
+
_log.warning(
|
|
54
|
+
"The model generated more than one tool call. "
|
|
55
|
+
"Only the first one will be taken in to account."
|
|
56
|
+
)
|
|
57
|
+
return None
|
|
58
|
+
else:
|
|
59
|
+
return consumer.create_function_call(
|
|
60
|
+
to_dial_function_call(block, streaming)
|
|
61
|
+
)
|
|
62
|
+
case None:
|
|
63
|
+
raise ValidationError(
|
|
64
|
+
"A model has called a tool, but no tools were given to the model in the first place."
|
|
65
|
+
)
|
|
66
|
+
case _:
|
|
67
|
+
assert_never(tools_mode)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def function_to_tool_messages(
|
|
71
|
+
message: BaseMessage | ToolMessage,
|
|
72
|
+
) -> BaseMessage | HumanToolResultMessage | AIToolCallMessage:
|
|
73
|
+
match message:
|
|
74
|
+
case (
|
|
75
|
+
SystemMessage()
|
|
76
|
+
| HumanRegularMessage()
|
|
77
|
+
| AIRegularMessage()
|
|
78
|
+
| HumanToolResultMessage()
|
|
79
|
+
| AIToolCallMessage()
|
|
80
|
+
):
|
|
81
|
+
return message
|
|
82
|
+
case AIFunctionCallMessage():
|
|
83
|
+
return AIToolCallMessage(
|
|
84
|
+
content=message.content,
|
|
85
|
+
calls=[
|
|
86
|
+
ToolCall(
|
|
87
|
+
id=message.call.name,
|
|
88
|
+
type="function",
|
|
89
|
+
function=message.call,
|
|
90
|
+
)
|
|
91
|
+
],
|
|
92
|
+
)
|
|
93
|
+
case HumanFunctionResultMessage():
|
|
94
|
+
return HumanToolResultMessage(
|
|
95
|
+
id=message.name, content=message.content
|
|
96
|
+
)
|
|
97
|
+
case _:
|
|
98
|
+
assert_never(message)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Callable, List
|
|
2
|
+
|
|
3
|
+
from aidial_sdk.chat_completion import Message
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from aidial_adapter_anthropic.adapter._base import ChatCompletionAdapter
|
|
7
|
+
from aidial_adapter_anthropic.adapter._truncate_prompt import DiscardedMessages
|
|
8
|
+
from aidial_adapter_anthropic.dial.consumer import Consumer
|
|
9
|
+
from aidial_adapter_anthropic.dial.request import ModelParameters
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ChatCompletionDecorator(ChatCompletionAdapter):
|
|
13
|
+
adapter: ChatCompletionAdapter
|
|
14
|
+
|
|
15
|
+
async def chat(
|
|
16
|
+
self,
|
|
17
|
+
consumer: Consumer,
|
|
18
|
+
params: ModelParameters,
|
|
19
|
+
messages: List[Message],
|
|
20
|
+
) -> None:
|
|
21
|
+
await self.adapter.chat(consumer, params, messages)
|
|
22
|
+
|
|
23
|
+
async def configuration(self) -> type[BaseModel]:
|
|
24
|
+
return await self.adapter.configuration()
|
|
25
|
+
|
|
26
|
+
async def count_prompt_tokens(
|
|
27
|
+
self, params: ModelParameters, messages: List[Message]
|
|
28
|
+
) -> int:
|
|
29
|
+
return await self.adapter.count_prompt_tokens(params, messages)
|
|
30
|
+
|
|
31
|
+
async def count_completion_tokens(self, string: str) -> int:
|
|
32
|
+
return await self.adapter.count_completion_tokens(string)
|
|
33
|
+
|
|
34
|
+
async def compute_discarded_messages(
|
|
35
|
+
self, params: ModelParameters, messages: List[Message]
|
|
36
|
+
) -> DiscardedMessages | None:
|
|
37
|
+
return await self.adapter.compute_discarded_messages(params, messages)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
ChatCompletionTransformer = Callable[
|
|
41
|
+
[ChatCompletionAdapter], ChatCompletionAdapter
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def compose_decorators(
|
|
46
|
+
*decorators: ChatCompletionTransformer,
|
|
47
|
+
) -> ChatCompletionTransformer:
|
|
48
|
+
def compose(adapter: ChatCompletionAdapter) -> ChatCompletionAdapter:
|
|
49
|
+
for decorator in reversed(decorators):
|
|
50
|
+
adapter = decorator(adapter)
|
|
51
|
+
return adapter
|
|
52
|
+
|
|
53
|
+
return compose
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Callable, List
|
|
2
|
+
|
|
3
|
+
from aidial_sdk.chat_completion import Message
|
|
4
|
+
|
|
5
|
+
from aidial_adapter_anthropic._utils.list import ListProjection
|
|
6
|
+
from aidial_adapter_anthropic.adapter._decorator.base import (
|
|
7
|
+
ChatCompletionDecorator,
|
|
8
|
+
ChatCompletionTransformer,
|
|
9
|
+
)
|
|
10
|
+
from aidial_adapter_anthropic.adapter._truncate_prompt import DiscardedMessages
|
|
11
|
+
from aidial_adapter_anthropic.dial.consumer import Consumer
|
|
12
|
+
from aidial_adapter_anthropic.dial.request import ModelParameters
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def preprocess_messages_decorator(
|
|
16
|
+
on_messages: Callable[[List[Message]], ListProjection[Message]],
|
|
17
|
+
) -> ChatCompletionTransformer:
|
|
18
|
+
return lambda adapter: PreprocessMessagesDecorator(
|
|
19
|
+
on_messages=on_messages, adapter=adapter
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PreprocessMessagesDecorator(ChatCompletionDecorator):
|
|
24
|
+
on_messages: Callable[[List[Message]], ListProjection[Message]]
|
|
25
|
+
|
|
26
|
+
async def chat(
|
|
27
|
+
self,
|
|
28
|
+
consumer: Consumer,
|
|
29
|
+
params: ModelParameters,
|
|
30
|
+
messages: List[Message],
|
|
31
|
+
) -> None:
|
|
32
|
+
new_messages = self.on_messages(messages)
|
|
33
|
+
await self.adapter.chat(consumer, params, new_messages.raw_list)
|
|
34
|
+
if (
|
|
35
|
+
discarded_messages := consumer.get_discarded_messages()
|
|
36
|
+
) is not None:
|
|
37
|
+
discarded_messages = list(
|
|
38
|
+
new_messages.to_original_indices(discarded_messages)
|
|
39
|
+
)
|
|
40
|
+
consumer.set_discarded_messages(discarded_messages)
|
|
41
|
+
|
|
42
|
+
async def count_prompt_tokens(
|
|
43
|
+
self, params: ModelParameters, messages: List[Message]
|
|
44
|
+
) -> int:
|
|
45
|
+
new_messages = self.on_messages(messages)
|
|
46
|
+
return await self.adapter.count_prompt_tokens(
|
|
47
|
+
params, new_messages.raw_list
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
async def compute_discarded_messages(
|
|
51
|
+
self, params: ModelParameters, messages: List[Message]
|
|
52
|
+
) -> DiscardedMessages | None:
|
|
53
|
+
new_messages = self.on_messages(messages)
|
|
54
|
+
discarded_messages = await self.adapter.compute_discarded_messages(
|
|
55
|
+
params, new_messages.raw_list
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if discarded_messages is not None:
|
|
59
|
+
discarded_messages = list(
|
|
60
|
+
new_messages.to_original_indices(discarded_messages)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return discarded_messages
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from aidial_sdk.chat_completion import Message
|
|
5
|
+
|
|
6
|
+
from aidial_adapter_anthropic.adapter._decorator.base import (
|
|
7
|
+
ChatCompletionDecorator,
|
|
8
|
+
ChatCompletionTransformer,
|
|
9
|
+
)
|
|
10
|
+
from aidial_adapter_anthropic.dial.consumer import Consumer
|
|
11
|
+
from aidial_adapter_anthropic.dial.request import ModelParameters
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def replicator_decorator() -> ChatCompletionTransformer:
|
|
15
|
+
return lambda adapter: ReplicatorDecorator(adapter=adapter)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ReplicatorDecorator(ChatCompletionDecorator):
|
|
19
|
+
async def chat(
|
|
20
|
+
self,
|
|
21
|
+
consumer: Consumer,
|
|
22
|
+
params: ModelParameters,
|
|
23
|
+
messages: List[Message],
|
|
24
|
+
) -> None:
|
|
25
|
+
params1 = params.copy()
|
|
26
|
+
params1.n = 1
|
|
27
|
+
|
|
28
|
+
async def _chat(root_consumer: Consumer):
|
|
29
|
+
with root_consumer.fork() as consumer:
|
|
30
|
+
await self.adapter.chat(consumer, params1, messages)
|
|
31
|
+
|
|
32
|
+
await asyncio.gather(*(_chat(consumer) for _ in range(params.n)))
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from aidial_sdk.chat_completion import Choice
|
|
4
|
+
from aidial_sdk.exceptions import HTTPException as DialException
|
|
5
|
+
from aidial_sdk.exceptions import RequestValidationError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class UserError(Exception):
|
|
9
|
+
"""
|
|
10
|
+
The user errors are aimed to a DIAL chat user.
|
|
11
|
+
So whenever an exceptional situation arises that could be handled by a chat user themselves,
|
|
12
|
+
we should raise a UserError with a `display_message` explaining the error and
|
|
13
|
+
an optional `usage` message to help the user understand how to use the application correctly:
|
|
14
|
+
|
|
15
|
+
* `error_message` is what the chat user will be shown as an error message,
|
|
16
|
+
* `usage_message` is reported in a `Usage` dialog stage to educate the chat user.
|
|
17
|
+
|
|
18
|
+
A typical example of a user error is validation of supported input data attachments.
|
|
19
|
+
The chat user has full control over the list of attachments, so they can fix the issue themselves.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
error_message: str
|
|
23
|
+
usage_message: Optional[str]
|
|
24
|
+
|
|
25
|
+
def __init__(self, error_message: str, usage_message: Optional[str] = None):
|
|
26
|
+
self.error_message = error_message
|
|
27
|
+
self.usage_message = usage_message
|
|
28
|
+
super().__init__(self.error_message)
|
|
29
|
+
|
|
30
|
+
async def report_usage(self, choice: Choice) -> None:
|
|
31
|
+
if self.usage_message is not None:
|
|
32
|
+
with choice.create_stage("Usage") as stage:
|
|
33
|
+
stage.append_content(self.usage_message)
|
|
34
|
+
|
|
35
|
+
def to_dial_exception(self) -> DialException:
|
|
36
|
+
return RequestValidationError(
|
|
37
|
+
message=self.error_message,
|
|
38
|
+
display_message=self.error_message,
|
|
39
|
+
code="invalid_argument",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ValidationError(Exception):
|
|
44
|
+
"""
|
|
45
|
+
The validation errors are aimed to a DIAL API client (e.g. DIAL application developer).
|
|
46
|
+
They report in which way the request to the application is invalid.
|
|
47
|
+
|
|
48
|
+
Typically the validation errors are raised when the request not semantically valid but syntactically well-formed.
|
|
49
|
+
|
|
50
|
+
For example, an application doesn't support tools/functions feature, but the request contains it.
|
|
51
|
+
It's of no use to report such an error to a chat user, because they can't fix it themselves in the chat.
|
|
52
|
+
But the DIAL application developer who has a finer control over the request can fix the issue by modifying the request.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
message: str
|
|
56
|
+
|
|
57
|
+
def __init__(self, message: str):
|
|
58
|
+
self.message = message
|
|
59
|
+
super().__init__(self.message)
|
|
60
|
+
|
|
61
|
+
def to_dial_exception(self) -> DialException:
|
|
62
|
+
return RequestValidationError(
|
|
63
|
+
message=self.message,
|
|
64
|
+
code="invalid_argument",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# The third category of errors is everything else, including standard Python exceptions, like ValueError or KeyError.
|
|
69
|
+
# These kind of errors are internal to the DIAL application and thus highlight bugs in the application code itself.
|
|
70
|
+
# Neither the chat user nor the DIAL application developer can fix the issue, because there is nothing wrong with the request.
|
|
71
|
+
# Thus, such errors are simply reported as internal server errors with HTTP code 500.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
def default_tokenize_string(string: str) -> int:
|
|
2
|
+
"""
|
|
3
|
+
The number of bytes is a proxy for the number of tokens for
|
|
4
|
+
models which do not provide any means to count tokens.
|
|
5
|
+
|
|
6
|
+
Any token number estimator should satisfy the following requirements:
|
|
7
|
+
1. Overestimation of number of tokens is allowed.
|
|
8
|
+
It's ok to truncate the chat history more than necessary.
|
|
9
|
+
2. Underestimation of number of tokens is prohibited.
|
|
10
|
+
It's wrong to leave the chat history as is when the truncation was actually required.
|
|
11
|
+
"""
|
|
12
|
+
return len(string.encode("utf-8"))
|