livekit-plugins-anthropic 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/anthropic/llm.py +115 -29
- livekit/plugins/anthropic/version.py +1 -1
- {livekit_plugins_anthropic-0.2.10.dist-info → livekit_plugins_anthropic-0.2.12.dist-info}/METADATA +1 -1
- livekit_plugins_anthropic-0.2.12.dist-info/RECORD +10 -0
- {livekit_plugins_anthropic-0.2.10.dist-info → livekit_plugins_anthropic-0.2.12.dist-info}/WHEEL +1 -1
- livekit_plugins_anthropic-0.2.10.dist-info/RECORD +0 -10
- {livekit_plugins_anthropic-0.2.10.dist-info → livekit_plugins_anthropic-0.2.12.dist-info}/top_level.txt +0 -0
livekit/plugins/anthropic/llm.py
CHANGED
@@ -53,6 +53,8 @@ from .models import (
|
|
53
53
|
ChatModels,
|
54
54
|
)
|
55
55
|
|
56
|
+
CACHE_CONTROL_EPHEMERAL = anthropic.types.CacheControlEphemeralParam(type="ephemeral")
|
57
|
+
|
56
58
|
|
57
59
|
@dataclass
|
58
60
|
class LLMOptions:
|
@@ -61,6 +63,8 @@ class LLMOptions:
|
|
61
63
|
temperature: float | None
|
62
64
|
parallel_tool_calls: bool | None
|
63
65
|
tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] | None
|
66
|
+
caching: Literal["ephemeral"] | None = None
|
67
|
+
"""If set to "ephemeral", the system prompt, tools, and chat history will be cached."""
|
64
68
|
|
65
69
|
|
66
70
|
class LLM(llm.LLM):
|
@@ -75,12 +79,23 @@ class LLM(llm.LLM):
|
|
75
79
|
temperature: float | None = None,
|
76
80
|
parallel_tool_calls: bool | None = None,
|
77
81
|
tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
|
82
|
+
caching: Literal["ephemeral"] | None = None,
|
78
83
|
) -> None:
|
79
84
|
"""
|
80
85
|
Create a new instance of Anthropic LLM.
|
81
86
|
|
82
87
|
``api_key`` must be set to your Anthropic API key, either using the argument or by setting
|
83
88
|
the ``ANTHROPIC_API_KEY`` environmental variable.
|
89
|
+
|
90
|
+
model (str | ChatModels): The model to use. Defaults to "claude-3-5-sonnet-20241022".
|
91
|
+
api_key (str | None): The Anthropic API key. Defaults to the ANTHROPIC_API_KEY environment variable.
|
92
|
+
base_url (str | None): The base URL for the Anthropic API. Defaults to None.
|
93
|
+
user (str | None): The user for the Anthropic API. Defaults to None.
|
94
|
+
client (anthropic.AsyncClient | None): The Anthropic client to use. Defaults to None.
|
95
|
+
temperature (float | None): The temperature for the Anthropic API. Defaults to None.
|
96
|
+
parallel_tool_calls (bool | None): Whether to parallelize tool calls. Defaults to None.
|
97
|
+
tool_choice (Union[ToolChoice, Literal["auto", "required", "none"]] | None): The tool choice for the Anthropic API. Defaults to "auto".
|
98
|
+
caching (Literal["ephemeral"] | None): If set to "ephemeral", caching will be enabled for the system prompt, tools, and chat history.
|
84
99
|
"""
|
85
100
|
|
86
101
|
super().__init__(
|
@@ -101,6 +116,7 @@ class LLM(llm.LLM):
|
|
101
116
|
temperature=temperature,
|
102
117
|
parallel_tool_calls=parallel_tool_calls,
|
103
118
|
tool_choice=tool_choice,
|
119
|
+
caching=caching,
|
104
120
|
)
|
105
121
|
self._client = client or anthropic.AsyncClient(
|
106
122
|
api_key=api_key,
|
@@ -138,12 +154,24 @@ class LLM(llm.LLM):
|
|
138
154
|
opts: dict[str, Any] = dict()
|
139
155
|
if fnc_ctx and len(fnc_ctx.ai_functions) > 0:
|
140
156
|
fncs_desc: list[anthropic.types.ToolParam] = []
|
141
|
-
for fnc in fnc_ctx.ai_functions.values():
|
142
|
-
|
157
|
+
for i, fnc in enumerate(fnc_ctx.ai_functions.values()):
|
158
|
+
# caching last tool will cache all the tools if caching is enabled
|
159
|
+
cache_ctrl = (
|
160
|
+
CACHE_CONTROL_EPHEMERAL
|
161
|
+
if (i == len(fnc_ctx.ai_functions) - 1)
|
162
|
+
and self._opts.caching == "ephemeral"
|
163
|
+
else None
|
164
|
+
)
|
165
|
+
fncs_desc.append(
|
166
|
+
_build_function_description(
|
167
|
+
fnc,
|
168
|
+
cache_ctrl=cache_ctrl,
|
169
|
+
)
|
170
|
+
)
|
143
171
|
|
144
172
|
opts["tools"] = fncs_desc
|
145
173
|
if tool_choice is not None:
|
146
|
-
anthropic_tool_choice: dict[str, Any] = {"type": "auto"}
|
174
|
+
anthropic_tool_choice: dict[str, Any] | None = {"type": "auto"}
|
147
175
|
if isinstance(tool_choice, ToolChoice):
|
148
176
|
if tool_choice.type == "function":
|
149
177
|
anthropic_tool_choice = {
|
@@ -153,17 +181,29 @@ class LLM(llm.LLM):
|
|
153
181
|
elif isinstance(tool_choice, str):
|
154
182
|
if tool_choice == "required":
|
155
183
|
anthropic_tool_choice = {"type": "any"}
|
156
|
-
|
157
|
-
|
158
|
-
|
184
|
+
elif tool_choice == "none":
|
185
|
+
opts["tools"] = []
|
186
|
+
anthropic_tool_choice = None
|
187
|
+
if anthropic_tool_choice is not None:
|
188
|
+
if parallel_tool_calls is False:
|
189
|
+
anthropic_tool_choice["disable_parallel_tool_use"] = True
|
190
|
+
opts["tool_choice"] = anthropic_tool_choice
|
191
|
+
|
192
|
+
latest_system_message: anthropic.types.TextBlockParam | None = (
|
193
|
+
_latest_system_message(chat_ctx, caching=self._opts.caching)
|
194
|
+
)
|
195
|
+
if latest_system_message:
|
196
|
+
opts["system"] = [latest_system_message]
|
159
197
|
|
160
|
-
|
161
|
-
|
198
|
+
anthropic_ctx = _build_anthropic_context(
|
199
|
+
chat_ctx.messages,
|
200
|
+
id(self),
|
201
|
+
caching=self._opts.caching,
|
202
|
+
)
|
162
203
|
collaped_anthropic_ctx = _merge_messages(anthropic_ctx)
|
163
204
|
|
164
205
|
stream = self._client.messages.create(
|
165
206
|
max_tokens=opts.get("max_tokens", 1024),
|
166
|
-
system=latest_system_message,
|
167
207
|
messages=collaped_anthropic_ctx,
|
168
208
|
model=self._opts.model,
|
169
209
|
temperature=temperature or anthropic.NOT_GIVEN,
|
@@ -209,6 +249,8 @@ class LLMStream(llm.LLMStream):
|
|
209
249
|
self._request_id: str = ""
|
210
250
|
self._ignoring_cot = False # ignore chain of thought
|
211
251
|
self._input_tokens = 0
|
252
|
+
self._cache_creation_tokens = 0
|
253
|
+
self._cache_read_tokens = 0
|
212
254
|
self._output_tokens = 0
|
213
255
|
|
214
256
|
async def _run(self) -> None:
|
@@ -230,7 +272,12 @@ class LLMStream(llm.LLMStream):
|
|
230
272
|
usage=llm.CompletionUsage(
|
231
273
|
completion_tokens=self._output_tokens,
|
232
274
|
prompt_tokens=self._input_tokens,
|
233
|
-
total_tokens=self._input_tokens
|
275
|
+
total_tokens=self._input_tokens
|
276
|
+
+ self._output_tokens
|
277
|
+
+ self._cache_creation_tokens
|
278
|
+
+ self._cache_read_tokens,
|
279
|
+
cache_creation_input_tokens=self._cache_creation_tokens,
|
280
|
+
cache_read_input_tokens=self._cache_read_tokens,
|
234
281
|
),
|
235
282
|
)
|
236
283
|
)
|
@@ -253,6 +300,12 @@ class LLMStream(llm.LLMStream):
|
|
253
300
|
self._request_id = event.message.id
|
254
301
|
self._input_tokens = event.message.usage.input_tokens
|
255
302
|
self._output_tokens = event.message.usage.output_tokens
|
303
|
+
if event.message.usage.cache_creation_input_tokens:
|
304
|
+
self._cache_creation_tokens = (
|
305
|
+
event.message.usage.cache_creation_input_tokens
|
306
|
+
)
|
307
|
+
if event.message.usage.cache_read_input_tokens:
|
308
|
+
self._cache_read_tokens = event.message.usage.cache_read_input_tokens
|
256
309
|
elif event.type == "message_delta":
|
257
310
|
self._output_tokens += event.usage.output_tokens
|
258
311
|
elif event.type == "content_block_start":
|
@@ -317,7 +370,9 @@ class LLMStream(llm.LLMStream):
|
|
317
370
|
return None
|
318
371
|
|
319
372
|
|
320
|
-
def _latest_system_message(
|
373
|
+
def _latest_system_message(
|
374
|
+
chat_ctx: llm.ChatContext, caching: Literal["ephemeral"] | None = None
|
375
|
+
) -> anthropic.types.TextBlockParam | None:
|
321
376
|
latest_system_message: llm.ChatMessage | None = None
|
322
377
|
for m in chat_ctx.messages:
|
323
378
|
if m.role == "system":
|
@@ -332,7 +387,14 @@ def _latest_system_message(chat_ctx: llm.ChatContext) -> str:
|
|
332
387
|
latest_system_str = " ".join(
|
333
388
|
[c for c in latest_system_message.content if isinstance(c, str)]
|
334
389
|
)
|
335
|
-
|
390
|
+
if latest_system_str:
|
391
|
+
system_text_block = anthropic.types.TextBlockParam(
|
392
|
+
text=latest_system_str,
|
393
|
+
type="text",
|
394
|
+
cache_control=CACHE_CONTROL_EPHEMERAL if caching == "ephemeral" else None,
|
395
|
+
)
|
396
|
+
return system_text_block
|
397
|
+
return None
|
336
398
|
|
337
399
|
|
338
400
|
def _merge_messages(
|
@@ -362,18 +424,29 @@ def _merge_messages(
|
|
362
424
|
|
363
425
|
|
364
426
|
def _build_anthropic_context(
|
365
|
-
chat_ctx: List[llm.ChatMessage],
|
427
|
+
chat_ctx: List[llm.ChatMessage],
|
428
|
+
cache_key: Any,
|
429
|
+
caching: Literal["ephemeral"] | None,
|
366
430
|
) -> List[anthropic.types.MessageParam]:
|
367
431
|
result: List[anthropic.types.MessageParam] = []
|
368
|
-
for msg in chat_ctx:
|
369
|
-
|
432
|
+
for i, msg in enumerate(chat_ctx):
|
433
|
+
# caching last message will cache whole chat history if caching is enabled
|
434
|
+
cache_ctrl = (
|
435
|
+
CACHE_CONTROL_EPHEMERAL
|
436
|
+
if ((i == len(chat_ctx) - 1) and caching == "ephemeral")
|
437
|
+
else None
|
438
|
+
)
|
439
|
+
a_msg = _build_anthropic_message(msg, cache_key, cache_ctrl=cache_ctrl)
|
440
|
+
|
370
441
|
if a_msg:
|
371
442
|
result.append(a_msg)
|
372
443
|
return result
|
373
444
|
|
374
445
|
|
375
446
|
def _build_anthropic_message(
|
376
|
-
msg: llm.ChatMessage,
|
447
|
+
msg: llm.ChatMessage,
|
448
|
+
cache_key: Any,
|
449
|
+
cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
|
377
450
|
) -> anthropic.types.MessageParam | None:
|
378
451
|
if msg.role == "user" or msg.role == "assistant":
|
379
452
|
a_msg: anthropic.types.MessageParam = {
|
@@ -386,22 +459,27 @@ def _build_anthropic_message(
|
|
386
459
|
# add content if provided
|
387
460
|
if isinstance(msg.content, str) and msg.content:
|
388
461
|
a_msg["content"].append(
|
389
|
-
anthropic.types.
|
462
|
+
anthropic.types.TextBlockParam(
|
390
463
|
text=msg.content,
|
391
464
|
type="text",
|
465
|
+
cache_control=cache_ctrl,
|
392
466
|
)
|
393
467
|
)
|
394
468
|
elif isinstance(msg.content, list):
|
395
469
|
for cnt in msg.content:
|
396
470
|
if isinstance(cnt, str) and cnt:
|
397
|
-
content: anthropic.types.
|
398
|
-
|
399
|
-
|
471
|
+
content: anthropic.types.TextBlockParam = (
|
472
|
+
anthropic.types.TextBlockParam(
|
473
|
+
text=cnt,
|
474
|
+
type="text",
|
475
|
+
cache_control=cache_ctrl,
|
476
|
+
)
|
400
477
|
)
|
401
478
|
a_content.append(content)
|
402
479
|
elif isinstance(cnt, llm.ChatImage):
|
403
|
-
a_content.append(
|
404
|
-
|
480
|
+
a_content.append(
|
481
|
+
_build_anthropic_image_content(cnt, cache_key, cache_ctrl)
|
482
|
+
)
|
405
483
|
if msg.tool_calls is not None:
|
406
484
|
for fnc in msg.tool_calls:
|
407
485
|
tool_use = anthropic.types.ToolUseBlockParam(
|
@@ -409,6 +487,7 @@ def _build_anthropic_message(
|
|
409
487
|
type="tool_use",
|
410
488
|
name=fnc.function_info.name,
|
411
489
|
input=fnc.arguments,
|
490
|
+
cache_control=cache_ctrl,
|
412
491
|
)
|
413
492
|
a_content.append(tool_use)
|
414
493
|
|
@@ -427,6 +506,7 @@ def _build_anthropic_message(
|
|
427
506
|
type="tool_result",
|
428
507
|
content=msg.content,
|
429
508
|
is_error=msg.tool_exception is not None,
|
509
|
+
cache_control=cache_ctrl,
|
430
510
|
)
|
431
511
|
return {
|
432
512
|
"role": "user",
|
@@ -437,7 +517,9 @@ def _build_anthropic_message(
|
|
437
517
|
|
438
518
|
|
439
519
|
def _build_anthropic_image_content(
|
440
|
-
image: llm.ChatImage,
|
520
|
+
image: llm.ChatImage,
|
521
|
+
cache_key: Any,
|
522
|
+
cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
|
441
523
|
) -> anthropic.types.ImageBlockParam:
|
442
524
|
if isinstance(image.image, str): # image is a URL
|
443
525
|
if not image.image.startswith("data:"):
|
@@ -463,6 +545,7 @@ def _build_anthropic_image_content(
|
|
463
545
|
media_type,
|
464
546
|
),
|
465
547
|
},
|
548
|
+
"cache_control": cache_ctrl,
|
466
549
|
}
|
467
550
|
except (ValueError, IndexError) as e:
|
468
551
|
raise ValueError(
|
@@ -490,6 +573,7 @@ def _build_anthropic_image_content(
|
|
490
573
|
"data": image._cache[cache_key],
|
491
574
|
"media_type": "image/jpeg",
|
492
575
|
},
|
576
|
+
"cache_control": cache_ctrl,
|
493
577
|
}
|
494
578
|
|
495
579
|
raise ValueError(
|
@@ -499,6 +583,7 @@ def _build_anthropic_image_content(
|
|
499
583
|
|
500
584
|
def _build_function_description(
|
501
585
|
fnc_info: llm.function_context.FunctionInfo,
|
586
|
+
cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
|
502
587
|
) -> anthropic.types.ToolParam:
|
503
588
|
def build_schema_field(arg_info: llm.function_context.FunctionArgInfo):
|
504
589
|
def type2str(t: type) -> str:
|
@@ -520,7 +605,7 @@ def _build_function_description(
|
|
520
605
|
if arg_info.description:
|
521
606
|
p["description"] = arg_info.description
|
522
607
|
|
523
|
-
|
608
|
+
_, inner_th = _is_optional_type(arg_info.type)
|
524
609
|
|
525
610
|
if get_origin(inner_th) is list:
|
526
611
|
inner_type = get_args(inner_th)[0]
|
@@ -542,8 +627,9 @@ def _build_function_description(
|
|
542
627
|
for arg_info in fnc_info.arguments.values():
|
543
628
|
input_schema[arg_info.name] = build_schema_field(arg_info)
|
544
629
|
|
545
|
-
return
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
630
|
+
return anthropic.types.ToolParam(
|
631
|
+
name=fnc_info.name,
|
632
|
+
description=fnc_info.description,
|
633
|
+
input_schema=input_schema,
|
634
|
+
cache_control=cache_ctrl,
|
635
|
+
)
|
@@ -0,0 +1,10 @@
|
|
1
|
+
livekit/plugins/anthropic/__init__.py,sha256=1WCyNEaR6qBsX54qJQM0SeY-QHIucww16PLXcSnMqRo,1175
|
2
|
+
livekit/plugins/anthropic/llm.py,sha256=dtIA1qWxMPWFxG4QbAeQ-xztmJZxRxBzYxqLFty59dA,23374
|
3
|
+
livekit/plugins/anthropic/log.py,sha256=fG1pYSY88AnT738gZrmzF9FO4l4BdGENj3VKHMQB3Yo,72
|
4
|
+
livekit/plugins/anthropic/models.py,sha256=wyTr2nl6SL4ylN6s4mHJcqtmgV2mjJysZo89FknWdhI,213
|
5
|
+
livekit/plugins/anthropic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
livekit/plugins/anthropic/version.py,sha256=L9v54yZpBzq0Hizz-thIscGPt87Ydvf01GZclaT0Yuw,601
|
7
|
+
livekit_plugins_anthropic-0.2.12.dist-info/METADATA,sha256=ZVjHKrbkK6a81xnO_oH-6-8gBrX_R_esQqJb15s_lhI,1481
|
8
|
+
livekit_plugins_anthropic-0.2.12.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
9
|
+
livekit_plugins_anthropic-0.2.12.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
+
livekit_plugins_anthropic-0.2.12.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
livekit/plugins/anthropic/__init__.py,sha256=1WCyNEaR6qBsX54qJQM0SeY-QHIucww16PLXcSnMqRo,1175
|
2
|
-
livekit/plugins/anthropic/llm.py,sha256=xRayMqq_FXFGZwskpqcINKVCbXL-754Jh2CJMWvxJnY,19261
|
3
|
-
livekit/plugins/anthropic/log.py,sha256=fG1pYSY88AnT738gZrmzF9FO4l4BdGENj3VKHMQB3Yo,72
|
4
|
-
livekit/plugins/anthropic/models.py,sha256=wyTr2nl6SL4ylN6s4mHJcqtmgV2mjJysZo89FknWdhI,213
|
5
|
-
livekit/plugins/anthropic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
livekit/plugins/anthropic/version.py,sha256=vT0ONEJ_8wtOHcTDpZ96O0mQtrdNrO2FOuqDfAOF_bI,601
|
7
|
-
livekit_plugins_anthropic-0.2.10.dist-info/METADATA,sha256=8DF-B_YzfayulYZMpOB2_smktfH8s9gKXXXeccz5J4U,1481
|
8
|
-
livekit_plugins_anthropic-0.2.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
-
livekit_plugins_anthropic-0.2.10.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
|
10
|
-
livekit_plugins_anthropic-0.2.10.dist-info/RECORD,,
|
File without changes
|