aidial-adapter-anthropic 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. aidial_adapter_anthropic/_utils/json.py +116 -0
  2. aidial_adapter_anthropic/_utils/list.py +84 -0
  3. aidial_adapter_anthropic/_utils/pydantic.py +6 -0
  4. aidial_adapter_anthropic/_utils/resource.py +54 -0
  5. aidial_adapter_anthropic/_utils/text.py +4 -0
  6. aidial_adapter_anthropic/adapter/__init__.py +4 -0
  7. aidial_adapter_anthropic/adapter/_base.py +95 -0
  8. aidial_adapter_anthropic/adapter/_claude/adapter.py +549 -0
  9. aidial_adapter_anthropic/adapter/_claude/blocks.py +128 -0
  10. aidial_adapter_anthropic/adapter/_claude/citations.py +63 -0
  11. aidial_adapter_anthropic/adapter/_claude/config.py +39 -0
  12. aidial_adapter_anthropic/adapter/_claude/converters.py +303 -0
  13. aidial_adapter_anthropic/adapter/_claude/params.py +25 -0
  14. aidial_adapter_anthropic/adapter/_claude/state.py +45 -0
  15. aidial_adapter_anthropic/adapter/_claude/tokenizer/__init__.py +10 -0
  16. aidial_adapter_anthropic/adapter/_claude/tokenizer/anthropic.py +57 -0
  17. aidial_adapter_anthropic/adapter/_claude/tokenizer/approximate.py +260 -0
  18. aidial_adapter_anthropic/adapter/_claude/tokenizer/base.py +26 -0
  19. aidial_adapter_anthropic/adapter/_claude/tools.py +98 -0
  20. aidial_adapter_anthropic/adapter/_decorator/base.py +53 -0
  21. aidial_adapter_anthropic/adapter/_decorator/preprocess.py +63 -0
  22. aidial_adapter_anthropic/adapter/_decorator/replicator.py +32 -0
  23. aidial_adapter_anthropic/adapter/_errors.py +71 -0
  24. aidial_adapter_anthropic/adapter/_tokenize.py +12 -0
  25. aidial_adapter_anthropic/adapter/_truncate_prompt.py +168 -0
  26. aidial_adapter_anthropic/adapter/claude.py +17 -0
  27. aidial_adapter_anthropic/dial/_attachments.py +238 -0
  28. aidial_adapter_anthropic/dial/_lazy_stage.py +40 -0
  29. aidial_adapter_anthropic/dial/_message.py +341 -0
  30. aidial_adapter_anthropic/dial/consumer.py +235 -0
  31. aidial_adapter_anthropic/dial/request.py +170 -0
  32. aidial_adapter_anthropic/dial/resource.py +189 -0
  33. aidial_adapter_anthropic/dial/storage.py +138 -0
  34. aidial_adapter_anthropic/dial/token_usage.py +19 -0
  35. aidial_adapter_anthropic/dial/tools.py +180 -0
  36. aidial_adapter_anthropic-0.1.0.dist-info/LICENSE +202 -0
  37. aidial_adapter_anthropic-0.1.0.dist-info/METADATA +121 -0
  38. aidial_adapter_anthropic-0.1.0.dist-info/RECORD +39 -0
  39. aidial_adapter_anthropic-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,260 @@
1
+ """
2
+ An attempt to approximate the tokenizer for Claude models.
3
+
4
+ This tokenizer doesn't provide the precise token count,
5
+ because Anthropic doesn't provide the exact tokenization algorithm.
6
+
7
+ This tokenizer provides an *overestimation* of the request token count.
8
+ We need to be conservative, since the tokenizer is used in the prompt
9
+ truncation algorithm. So we are choosing to be unable to pack the request with tokens
10
+ as tightly as possible over making an additional chat completion request,
11
+ which is going to fail with a token overflow error.
12
+
13
+ 1. For the text parts of request we count every byte in their UTF-8 encoding.
14
+ Note that the official Claude 2 tokenizer couldn't be used
15
+ for anything more than a very rough estimate:
16
+ https://github.com/anthropics/anthropic-sdk-python/blob/246a2978694b584429d4bbd5b44245ff8eac2ac2/src/anthropic/_client.py#L270-L283
17
+
18
+ 2. For the image parts we use the official approximation:
19
+ > tokens = (width px * height px)/750
20
+ https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
21
+
22
+ 3. For the tool usage we use the official approximation:
23
+ https://docs.anthropic.com/en/docs/build-with-claude/tool-use#pricing
24
+ a. tool-related components of the request are serialized to strings and tokenized as such,
25
+ b. the hidden tool-enabling system prompt is accounted as per the documentation.
26
+
27
+ TODO: use the official tokenizer:
28
+ https://docs.anthropic.com/en/docs/build-with-claude/token-counting
29
+ once it's supported in Bedrock:
30
+ https://github.com/anthropics/anthropic-sdk-python/blob/599f2b9a9501b8c98fb3132043c3ec71e3026f84/src/anthropic/lib/bedrock/_client.py#L61-L62
31
+ """
32
+
33
+ import base64
34
+ import io
35
+ import json
36
+ import logging
37
+ import math
38
+ from typing import List, Literal, Tuple, assert_never
39
+
40
+ from anthropic._types import Base64FileInput
41
+ from anthropic.types.beta import (
42
+ BetaBashCodeExecutionToolResultBlock as BashCodeExecutionToolResultBlock,
43
+ )
44
+ from anthropic.types.beta import (
45
+ BetaCodeExecutionToolResultBlock as CodeExecutionToolResultBlock,
46
+ )
47
+ from anthropic.types.beta import (
48
+ BetaContainerUploadBlock as ContainerUploadBlock,
49
+ )
50
+ from anthropic.types.beta import BetaContentBlockParam as ContentBlockParam
51
+ from anthropic.types.beta import BetaMCPToolResultBlock as MCPToolResultBlock
52
+ from anthropic.types.beta import BetaMCPToolUseBlock as MCPToolUseBlock
53
+ from anthropic.types.beta import BetaMessageParam as ClaudeMessage
54
+ from anthropic.types.beta import (
55
+ BetaRedactedThinkingBlock as RedactedThinkingBlock,
56
+ )
57
+ from anthropic.types.beta import BetaServerToolUseBlock as ServerToolUseBlock
58
+ from anthropic.types.beta import BetaTextBlock as TextBlock
59
+ from anthropic.types.beta import (
60
+ BetaTextEditorCodeExecutionToolResultBlock as TextEditorCodeExecutionToolResultBlock,
61
+ )
62
+ from anthropic.types.beta import BetaThinkingBlock as ThinkingBlock
63
+ from anthropic.types.beta import BetaToolParam as ToolParam
64
+ from anthropic.types.beta import (
65
+ BetaToolResultBlockParam as ToolResultBlockParam,
66
+ )
67
+ from anthropic.types.beta import (
68
+ BetaToolSearchToolResultBlock as ToolSearchToolResultBlock,
69
+ )
70
+ from anthropic.types.beta import BetaToolUseBlock as ToolUseBlock
71
+ from anthropic.types.beta import (
72
+ BetaWebFetchToolResultBlock as WebFetchToolResultBlock,
73
+ )
74
+ from anthropic.types.beta import (
75
+ BetaWebSearchToolResultBlock as WebSearchToolResultBlock,
76
+ )
77
+ from anthropic.types.beta.beta_image_block_param import Source
78
+ from anthropic.types.beta.beta_tool_result_block_param import (
79
+ Content as ToolResultBlockParamContent,
80
+ )
81
+ from PIL import Image
82
+
83
+ from aidial_adapter_anthropic.adapter._claude.params import ClaudeParameters
84
+ from aidial_adapter_anthropic.adapter._tokenize import default_tokenize_string
85
+
86
+ _log = logging.getLogger(__name__)
87
+
88
+
89
+ class ApproximateTokenizer:
90
+ def tokenize_text(self, text: str) -> int:
91
+ return default_tokenize_string(text)
92
+
93
+ def _get_image_size(
94
+ self, image_data: str | Base64FileInput
95
+ ) -> Tuple[int, int]:
96
+ try:
97
+ if not isinstance(image_data, str):
98
+ raise ValueError("Images as files aren't yet supported.")
99
+
100
+ image_bytes = base64.b64decode(image_data)
101
+ with Image.open(io.BytesIO(image_bytes)) as img:
102
+ return img.size
103
+ except Exception:
104
+ _log.exception("Cannot compute image size, assuming 1000x1000")
105
+ return 1000, 1000
106
+
107
+ def _tokenize_image(self, source: Source) -> int:
108
+ match source["type"]:
109
+ case "url" | "file":
110
+ return 0
111
+ case "base64":
112
+ width, height = self._get_image_size(source["data"])
113
+ return math.ceil((width * height) / 750.0)
114
+ case _:
115
+ assert_never(source)
116
+
117
+ def _tokenize_tool_use(self, id: str, input: object, name: str) -> int:
118
+ return self.tokenize_text(f"{id} {name} {json.dumps(input)}")
119
+
120
+ def _tokenize_tool_result(self, message: ToolResultBlockParam) -> int:
121
+ tokens: int = self.tokenize_text(message["tool_use_id"])
122
+ if (content := message.get("content")) is not None:
123
+ if isinstance(content, str):
124
+ tokens += self.tokenize_text(content)
125
+ else:
126
+ for sub_message in content:
127
+ tokens += self._tokenize_sub_message(sub_message)
128
+ return tokens
129
+
130
+ def _tokenize_sub_message(
131
+ self,
132
+ message: ContentBlockParam | ToolResultBlockParamContent,
133
+ ) -> int:
134
+ if isinstance(message, dict):
135
+ match message["type"]:
136
+ case "text":
137
+ return self.tokenize_text(message["text"])
138
+ case "image":
139
+ return self._tokenize_image(message["source"])
140
+ case "tool_use":
141
+ return self._tokenize_tool_use(
142
+ message["id"], message["input"], message["name"]
143
+ )
144
+ case "tool_result":
145
+ return self._tokenize_tool_result(message)
146
+ case "document":
147
+ return self.tokenize_text(json.dumps(message))
148
+ case "thinking":
149
+ return self.tokenize_text(message["thinking"])
150
+ case "redacted_thinking":
151
+ return self.tokenize_text(message["data"])
152
+ case "server_tool_use":
153
+ return self.tokenize_text(json.dumps(message["input"]))
154
+ case "web_search_tool_result":
155
+ return self.tokenize_text(json.dumps(message["content"]))
156
+ case (
157
+ "search_result"
158
+ | "code_execution_tool_result"
159
+ | "mcp_tool_use"
160
+ | "mcp_tool_result"
161
+ | "container_upload"
162
+ | "bash_code_execution_tool_result"
163
+ | "text_editor_code_execution_tool_result"
164
+ | "web_fetch_tool_result"
165
+ | "tool_search_tool_result"
166
+ | "tool_reference"
167
+ ):
168
+ return 0
169
+ case _:
170
+ assert_never(message["type"])
171
+ else:
172
+ match message:
173
+ case TextBlock():
174
+ return self.tokenize_text(message.text)
175
+ case ToolUseBlock():
176
+ return self._tokenize_tool_use(
177
+ message.id, message.input, message.name
178
+ )
179
+ case ThinkingBlock(thinking=thinking):
180
+ return self.tokenize_text(thinking)
181
+ case RedactedThinkingBlock(data=data):
182
+ return self.tokenize_text(data)
183
+ case ServerToolUseBlock(input=input):
184
+ return self.tokenize_text(json.dumps(input))
185
+ case WebSearchToolResultBlock(content=content):
186
+ return self.tokenize_text(json.dumps(content))
187
+ case (
188
+ CodeExecutionToolResultBlock()
189
+ | MCPToolUseBlock()
190
+ | MCPToolResultBlock()
191
+ | ContainerUploadBlock()
192
+ | BashCodeExecutionToolResultBlock()
193
+ | TextEditorCodeExecutionToolResultBlock()
194
+ | WebFetchToolResultBlock()
195
+ | ToolSearchToolResultBlock()
196
+ ):
197
+ return 0
198
+ case _:
199
+ assert_never(message)
200
+
201
+ def _tokenize_message(self, message: ClaudeMessage) -> int:
202
+ tokens: int = 0
203
+ content = message["content"]
204
+
205
+ match content:
206
+ case str():
207
+ tokens += self.tokenize_text(content)
208
+ case _:
209
+ for item in content:
210
+ tokens += self._tokenize_sub_message(item)
211
+
212
+ return tokens
213
+
214
+ def _tokenize_messages(self, messages: List[ClaudeMessage]) -> int:
215
+ # A rough estimation
216
+ per_message_tokens = 5
217
+
218
+ tokens: int = 0
219
+ for message in messages:
220
+ tokens += self._tokenize_message(message) + per_message_tokens
221
+ return tokens
222
+
223
+ def _tokenize_tool_param(self, tool: ToolParam) -> int:
224
+ return self.tokenize_text(json.dumps(tool))
225
+
226
+ def tokenize_tool_system_message(
227
+ self,
228
+ tool_choice: Literal["none", "auto", "any", "tool"],
229
+ ) -> int:
230
+ # Different models has different pricing for the tool use:
231
+ # https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview#pricing
232
+ # Here we provide a default for Claude Opus 3
233
+ return 530 if tool_choice in ("auto", "none") else 281
234
+
235
+ async def tokenize(
236
+ self, params: ClaudeParameters, messages: List[ClaudeMessage]
237
+ ) -> int:
238
+ tokens: int = 0
239
+
240
+ if system := params["system"]:
241
+ if isinstance(system, str):
242
+ tokens += self.tokenize_text(system)
243
+ else:
244
+ for item in system:
245
+ tokens += self._tokenize_sub_message(item)
246
+
247
+ if tools := params["tools"]:
248
+ if tool_choice := params["tool_choice"]:
249
+ choice = tool_choice["type"]
250
+ else:
251
+ choice = "auto"
252
+
253
+ tokens += self.tokenize_tool_system_message(choice)
254
+
255
+ for tool in tools:
256
+ tokens += self._tokenize_tool_param(tool)
257
+
258
+ tokens += self._tokenize_messages(messages)
259
+
260
+ return tokens
@@ -0,0 +1,26 @@
1
+ from typing import List, Protocol, Set, Tuple, runtime_checkable
2
+
3
+ from anthropic.types.beta import BetaMessageParam as ClaudeMessage
4
+
5
+ from aidial_adapter_anthropic.adapter._claude.params import ClaudeParameters
6
+ from aidial_adapter_anthropic.dial._attachments import WithResources
7
+
8
+
9
+ @runtime_checkable
10
+ class ClaudeTokenizer(Protocol):
11
+ def tokenize_text(self, text: str) -> int: ...
12
+
13
+ async def tokenize(
14
+ self, params: ClaudeParameters, messages: List[ClaudeMessage]
15
+ ) -> int: ...
16
+
17
+
18
+ def create_tokenizer(tokenizer: ClaudeTokenizer, params: ClaudeParameters):
19
+ async def _tokenize(
20
+ messages: List[Tuple[WithResources[ClaudeMessage], Set[int]]],
21
+ ) -> int:
22
+ return await tokenizer.tokenize(
23
+ params, [msg[0].payload for msg in messages]
24
+ )
25
+
26
+ return _tokenize
@@ -0,0 +1,98 @@
1
+ import json
2
+ import logging
3
+ from typing import assert_never
4
+
5
+ from aidial_sdk.chat_completion import FunctionCall, ToolCall
6
+ from anthropic.types.beta import BetaToolUseBlock as ToolUseBlock
7
+
8
+ from aidial_adapter_anthropic.adapter._errors import ValidationError
9
+ from aidial_adapter_anthropic.dial._message import (
10
+ AIFunctionCallMessage,
11
+ AIRegularMessage,
12
+ AIToolCallMessage,
13
+ BaseMessage,
14
+ HumanFunctionResultMessage,
15
+ HumanRegularMessage,
16
+ HumanToolResultMessage,
17
+ SystemMessage,
18
+ ToolMessage,
19
+ )
20
+ from aidial_adapter_anthropic.dial.consumer import Consumer, ToolUseMessage
21
+ from aidial_adapter_anthropic.dial.tools import ToolsMode
22
+
23
+ _log = logging.getLogger(__name__)
24
+
25
+
26
+ def to_dial_function_call(block: ToolUseBlock, streaming: bool) -> FunctionCall:
27
+ arguments = "" if streaming else json.dumps(block.input)
28
+ return FunctionCall(name=block.name, arguments=arguments)
29
+
30
+
31
+ def to_dial_tool_call(block: ToolUseBlock, streaming: bool) -> ToolCall:
32
+ return ToolCall(
33
+ id=block.id,
34
+ type="function",
35
+ function=to_dial_function_call(block, streaming),
36
+ )
37
+
38
+
39
+ def process_tools_block(
40
+ consumer: Consumer,
41
+ block: ToolUseBlock,
42
+ tools_mode: ToolsMode | None,
43
+ *,
44
+ streaming: bool,
45
+ ) -> ToolUseMessage | None:
46
+ match tools_mode:
47
+ case ToolsMode.TOOLS:
48
+ return consumer.create_function_tool_call(
49
+ to_dial_tool_call(block, streaming)
50
+ )
51
+ case ToolsMode.FUNCTIONS:
52
+ if consumer.has_function_call:
53
+ _log.warning(
54
+ "The model generated more than one tool call. "
55
+ "Only the first one will be taken in to account."
56
+ )
57
+ return None
58
+ else:
59
+ return consumer.create_function_call(
60
+ to_dial_function_call(block, streaming)
61
+ )
62
+ case None:
63
+ raise ValidationError(
64
+ "A model has called a tool, but no tools were given to the model in the first place."
65
+ )
66
+ case _:
67
+ assert_never(tools_mode)
68
+
69
+
70
+ def function_to_tool_messages(
71
+ message: BaseMessage | ToolMessage,
72
+ ) -> BaseMessage | HumanToolResultMessage | AIToolCallMessage:
73
+ match message:
74
+ case (
75
+ SystemMessage()
76
+ | HumanRegularMessage()
77
+ | AIRegularMessage()
78
+ | HumanToolResultMessage()
79
+ | AIToolCallMessage()
80
+ ):
81
+ return message
82
+ case AIFunctionCallMessage():
83
+ return AIToolCallMessage(
84
+ content=message.content,
85
+ calls=[
86
+ ToolCall(
87
+ id=message.call.name,
88
+ type="function",
89
+ function=message.call,
90
+ )
91
+ ],
92
+ )
93
+ case HumanFunctionResultMessage():
94
+ return HumanToolResultMessage(
95
+ id=message.name, content=message.content
96
+ )
97
+ case _:
98
+ assert_never(message)
@@ -0,0 +1,53 @@
1
+ from typing import Callable, List
2
+
3
+ from aidial_sdk.chat_completion import Message
4
+ from pydantic import BaseModel
5
+
6
+ from aidial_adapter_anthropic.adapter._base import ChatCompletionAdapter
7
+ from aidial_adapter_anthropic.adapter._truncate_prompt import DiscardedMessages
8
+ from aidial_adapter_anthropic.dial.consumer import Consumer
9
+ from aidial_adapter_anthropic.dial.request import ModelParameters
10
+
11
+
12
+ class ChatCompletionDecorator(ChatCompletionAdapter):
13
+ adapter: ChatCompletionAdapter
14
+
15
+ async def chat(
16
+ self,
17
+ consumer: Consumer,
18
+ params: ModelParameters,
19
+ messages: List[Message],
20
+ ) -> None:
21
+ await self.adapter.chat(consumer, params, messages)
22
+
23
+ async def configuration(self) -> type[BaseModel]:
24
+ return await self.adapter.configuration()
25
+
26
+ async def count_prompt_tokens(
27
+ self, params: ModelParameters, messages: List[Message]
28
+ ) -> int:
29
+ return await self.adapter.count_prompt_tokens(params, messages)
30
+
31
+ async def count_completion_tokens(self, string: str) -> int:
32
+ return await self.adapter.count_completion_tokens(string)
33
+
34
+ async def compute_discarded_messages(
35
+ self, params: ModelParameters, messages: List[Message]
36
+ ) -> DiscardedMessages | None:
37
+ return await self.adapter.compute_discarded_messages(params, messages)
38
+
39
+
40
+ ChatCompletionTransformer = Callable[
41
+ [ChatCompletionAdapter], ChatCompletionAdapter
42
+ ]
43
+
44
+
45
+ def compose_decorators(
46
+ *decorators: ChatCompletionTransformer,
47
+ ) -> ChatCompletionTransformer:
48
+ def compose(adapter: ChatCompletionAdapter) -> ChatCompletionAdapter:
49
+ for decorator in reversed(decorators):
50
+ adapter = decorator(adapter)
51
+ return adapter
52
+
53
+ return compose
@@ -0,0 +1,63 @@
1
+ from typing import Callable, List
2
+
3
+ from aidial_sdk.chat_completion import Message
4
+
5
+ from aidial_adapter_anthropic._utils.list import ListProjection
6
+ from aidial_adapter_anthropic.adapter._decorator.base import (
7
+ ChatCompletionDecorator,
8
+ ChatCompletionTransformer,
9
+ )
10
+ from aidial_adapter_anthropic.adapter._truncate_prompt import DiscardedMessages
11
+ from aidial_adapter_anthropic.dial.consumer import Consumer
12
+ from aidial_adapter_anthropic.dial.request import ModelParameters
13
+
14
+
15
+ def preprocess_messages_decorator(
16
+ on_messages: Callable[[List[Message]], ListProjection[Message]],
17
+ ) -> ChatCompletionTransformer:
18
+ return lambda adapter: PreprocessMessagesDecorator(
19
+ on_messages=on_messages, adapter=adapter
20
+ )
21
+
22
+
23
+ class PreprocessMessagesDecorator(ChatCompletionDecorator):
24
+ on_messages: Callable[[List[Message]], ListProjection[Message]]
25
+
26
+ async def chat(
27
+ self,
28
+ consumer: Consumer,
29
+ params: ModelParameters,
30
+ messages: List[Message],
31
+ ) -> None:
32
+ new_messages = self.on_messages(messages)
33
+ await self.adapter.chat(consumer, params, new_messages.raw_list)
34
+ if (
35
+ discarded_messages := consumer.get_discarded_messages()
36
+ ) is not None:
37
+ discarded_messages = list(
38
+ new_messages.to_original_indices(discarded_messages)
39
+ )
40
+ consumer.set_discarded_messages(discarded_messages)
41
+
42
+ async def count_prompt_tokens(
43
+ self, params: ModelParameters, messages: List[Message]
44
+ ) -> int:
45
+ new_messages = self.on_messages(messages)
46
+ return await self.adapter.count_prompt_tokens(
47
+ params, new_messages.raw_list
48
+ )
49
+
50
+ async def compute_discarded_messages(
51
+ self, params: ModelParameters, messages: List[Message]
52
+ ) -> DiscardedMessages | None:
53
+ new_messages = self.on_messages(messages)
54
+ discarded_messages = await self.adapter.compute_discarded_messages(
55
+ params, new_messages.raw_list
56
+ )
57
+
58
+ if discarded_messages is not None:
59
+ discarded_messages = list(
60
+ new_messages.to_original_indices(discarded_messages)
61
+ )
62
+
63
+ return discarded_messages
@@ -0,0 +1,32 @@
1
+ import asyncio
2
+ from typing import List
3
+
4
+ from aidial_sdk.chat_completion import Message
5
+
6
+ from aidial_adapter_anthropic.adapter._decorator.base import (
7
+ ChatCompletionDecorator,
8
+ ChatCompletionTransformer,
9
+ )
10
+ from aidial_adapter_anthropic.dial.consumer import Consumer
11
+ from aidial_adapter_anthropic.dial.request import ModelParameters
12
+
13
+
14
+ def replicator_decorator() -> ChatCompletionTransformer:
15
+ return lambda adapter: ReplicatorDecorator(adapter=adapter)
16
+
17
+
18
+ class ReplicatorDecorator(ChatCompletionDecorator):
19
+ async def chat(
20
+ self,
21
+ consumer: Consumer,
22
+ params: ModelParameters,
23
+ messages: List[Message],
24
+ ) -> None:
25
+ params1 = params.copy()
26
+ params1.n = 1
27
+
28
+ async def _chat(root_consumer: Consumer):
29
+ with root_consumer.fork() as consumer:
30
+ await self.adapter.chat(consumer, params1, messages)
31
+
32
+ await asyncio.gather(*(_chat(consumer) for _ in range(params.n)))
@@ -0,0 +1,71 @@
1
+ from typing import Optional
2
+
3
+ from aidial_sdk.chat_completion import Choice
4
+ from aidial_sdk.exceptions import HTTPException as DialException
5
+ from aidial_sdk.exceptions import RequestValidationError
6
+
7
+
8
+ class UserError(Exception):
9
+ """
10
+ The user errors are aimed to a DIAL chat user.
11
+ So whenever an exceptional situation arises that could be handled by a chat user themselves,
12
+ we should raise a UserError with a `display_message` explaining the error and
13
+ an optional `usage` message to help the user understand how to use the application correctly:
14
+
15
+ * `error_message` is what the chat user will be shown as an error message,
16
+ * `usage_message` is reported in a `Usage` dialog stage to educate the chat user.
17
+
18
+ A typical example of a user error is validation of supported input data attachments.
19
+ The chat user has full control over the list of attachments, so they can fix the issue themselves.
20
+ """
21
+
22
+ error_message: str
23
+ usage_message: Optional[str]
24
+
25
+ def __init__(self, error_message: str, usage_message: Optional[str] = None):
26
+ self.error_message = error_message
27
+ self.usage_message = usage_message
28
+ super().__init__(self.error_message)
29
+
30
+ async def report_usage(self, choice: Choice) -> None:
31
+ if self.usage_message is not None:
32
+ with choice.create_stage("Usage") as stage:
33
+ stage.append_content(self.usage_message)
34
+
35
+ def to_dial_exception(self) -> DialException:
36
+ return RequestValidationError(
37
+ message=self.error_message,
38
+ display_message=self.error_message,
39
+ code="invalid_argument",
40
+ )
41
+
42
+
43
+ class ValidationError(Exception):
44
+ """
45
+ The validation errors are aimed to a DIAL API client (e.g. DIAL application developer).
46
+ They report in which way the request to the application is invalid.
47
+
48
+ Typically the validation errors are raised when the request not semantically valid but syntactically well-formed.
49
+
50
+ For example, an application doesn't support tools/functions feature, but the request contains it.
51
+ It's of no use to report such an error to a chat user, because they can't fix it themselves in the chat.
52
+ But the DIAL application developer who has a finer control over the request can fix the issue by modifying the request.
53
+ """
54
+
55
+ message: str
56
+
57
+ def __init__(self, message: str):
58
+ self.message = message
59
+ super().__init__(self.message)
60
+
61
+ def to_dial_exception(self) -> DialException:
62
+ return RequestValidationError(
63
+ message=self.message,
64
+ code="invalid_argument",
65
+ )
66
+
67
+
68
+ # The third category of errors is everything else, including standard Python exceptions, like ValueError or KeyError.
69
+ # These kind of errors are internal to the DIAL application and thus highlight bugs in the application code itself.
70
+ # Neither the chat user nor the DIAL application developer can fix the issue, because there is nothing wrong with the request.
71
+ # Thus, such errors are simply reported as internal server errors with HTTP code 500.
@@ -0,0 +1,12 @@
1
+ def default_tokenize_string(string: str) -> int:
2
+ """
3
+ The number of bytes is a proxy for the number of tokens for
4
+ models which do not provide any means to count tokens.
5
+
6
+ Any token number estimator should satisfy the following requirements:
7
+ 1. Overestimation of number of tokens is allowed.
8
+ It's ok to truncate the chat history more than necessary.
9
+ 2. Underestimation of number of tokens is prohibited.
10
+ It's wrong to leave the chat history as is when the truncation was actually required.
11
+ """
12
+ return len(string.encode("utf-8"))