livekit-plugins-anthropic 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,8 @@ from .models import (
53
53
  ChatModels,
54
54
  )
55
55
 
56
+ CACHE_CONTROL_EPHEMERAL = anthropic.types.CacheControlEphemeralParam(type="ephemeral")
57
+
56
58
 
57
59
  @dataclass
58
60
  class LLMOptions:
@@ -61,6 +63,8 @@ class LLMOptions:
61
63
  temperature: float | None
62
64
  parallel_tool_calls: bool | None
63
65
  tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] | None
66
+ caching: Literal["ephemeral"] | None = None
67
+ """If set to "ephemeral", the system prompt, tools, and chat history will be cached."""
64
68
 
65
69
 
66
70
  class LLM(llm.LLM):
@@ -75,12 +79,23 @@ class LLM(llm.LLM):
75
79
  temperature: float | None = None,
76
80
  parallel_tool_calls: bool | None = None,
77
81
  tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
82
+ caching: Literal["ephemeral"] | None = None,
78
83
  ) -> None:
79
84
  """
80
85
  Create a new instance of Anthropic LLM.
81
86
 
82
87
  ``api_key`` must be set to your Anthropic API key, either using the argument or by setting
83
88
  the ``ANTHROPIC_API_KEY`` environmental variable.
89
+
90
+ model (str | ChatModels): The model to use. Defaults to "claude-3-5-sonnet-20241022".
91
+ api_key (str | None): The Anthropic API key. Defaults to the ANTHROPIC_API_KEY environment variable.
92
+ base_url (str | None): The base URL for the Anthropic API. Defaults to None.
93
+ user (str | None): The user for the Anthropic API. Defaults to None.
94
+ client (anthropic.AsyncClient | None): The Anthropic client to use. Defaults to None.
95
+ temperature (float | None): The temperature for the Anthropic API. Defaults to None.
96
+ parallel_tool_calls (bool | None): Whether to parallelize tool calls. Defaults to None.
97
+ tool_choice (Union[ToolChoice, Literal["auto", "required", "none"]] | None): The tool choice for the Anthropic API. Defaults to "auto".
98
+ caching (Literal["ephemeral"] | None): If set to "ephemeral", caching will be enabled for the system prompt, tools, and chat history.
84
99
  """
85
100
 
86
101
  super().__init__(
@@ -101,6 +116,7 @@ class LLM(llm.LLM):
101
116
  temperature=temperature,
102
117
  parallel_tool_calls=parallel_tool_calls,
103
118
  tool_choice=tool_choice,
119
+ caching=caching,
104
120
  )
105
121
  self._client = client or anthropic.AsyncClient(
106
122
  api_key=api_key,
@@ -138,12 +154,24 @@ class LLM(llm.LLM):
138
154
  opts: dict[str, Any] = dict()
139
155
  if fnc_ctx and len(fnc_ctx.ai_functions) > 0:
140
156
  fncs_desc: list[anthropic.types.ToolParam] = []
141
- for fnc in fnc_ctx.ai_functions.values():
142
- fncs_desc.append(_build_function_description(fnc))
157
+ for i, fnc in enumerate(fnc_ctx.ai_functions.values()):
158
+ # caching last tool will cache all the tools if caching is enabled
159
+ cache_ctrl = (
160
+ CACHE_CONTROL_EPHEMERAL
161
+ if (i == len(fnc_ctx.ai_functions) - 1)
162
+ and self._opts.caching == "ephemeral"
163
+ else None
164
+ )
165
+ fncs_desc.append(
166
+ _build_function_description(
167
+ fnc,
168
+ cache_ctrl=cache_ctrl,
169
+ )
170
+ )
143
171
 
144
172
  opts["tools"] = fncs_desc
145
173
  if tool_choice is not None:
146
- anthropic_tool_choice: dict[str, Any] = {"type": "auto"}
174
+ anthropic_tool_choice: dict[str, Any] | None = {"type": "auto"}
147
175
  if isinstance(tool_choice, ToolChoice):
148
176
  if tool_choice.type == "function":
149
177
  anthropic_tool_choice = {
@@ -153,17 +181,29 @@ class LLM(llm.LLM):
153
181
  elif isinstance(tool_choice, str):
154
182
  if tool_choice == "required":
155
183
  anthropic_tool_choice = {"type": "any"}
156
- if parallel_tool_calls is not None and parallel_tool_calls is False:
157
- anthropic_tool_choice["disable_parallel_tool_use"] = True
158
- opts["tool_choice"] = anthropic_tool_choice
184
+ elif tool_choice == "none":
185
+ opts["tools"] = []
186
+ anthropic_tool_choice = None
187
+ if anthropic_tool_choice is not None:
188
+ if parallel_tool_calls is False:
189
+ anthropic_tool_choice["disable_parallel_tool_use"] = True
190
+ opts["tool_choice"] = anthropic_tool_choice
191
+
192
+ latest_system_message: anthropic.types.TextBlockParam | None = (
193
+ _latest_system_message(chat_ctx, caching=self._opts.caching)
194
+ )
195
+ if latest_system_message:
196
+ opts["system"] = [latest_system_message]
159
197
 
160
- latest_system_message = _latest_system_message(chat_ctx)
161
- anthropic_ctx = _build_anthropic_context(chat_ctx.messages, id(self))
198
+ anthropic_ctx = _build_anthropic_context(
199
+ chat_ctx.messages,
200
+ id(self),
201
+ caching=self._opts.caching,
202
+ )
162
203
  collaped_anthropic_ctx = _merge_messages(anthropic_ctx)
163
204
 
164
205
  stream = self._client.messages.create(
165
206
  max_tokens=opts.get("max_tokens", 1024),
166
- system=latest_system_message,
167
207
  messages=collaped_anthropic_ctx,
168
208
  model=self._opts.model,
169
209
  temperature=temperature or anthropic.NOT_GIVEN,
@@ -209,6 +249,8 @@ class LLMStream(llm.LLMStream):
209
249
  self._request_id: str = ""
210
250
  self._ignoring_cot = False # ignore chain of thought
211
251
  self._input_tokens = 0
252
+ self._cache_creation_tokens = 0
253
+ self._cache_read_tokens = 0
212
254
  self._output_tokens = 0
213
255
 
214
256
  async def _run(self) -> None:
@@ -230,7 +272,12 @@ class LLMStream(llm.LLMStream):
230
272
  usage=llm.CompletionUsage(
231
273
  completion_tokens=self._output_tokens,
232
274
  prompt_tokens=self._input_tokens,
233
- total_tokens=self._input_tokens + self._output_tokens,
275
+ total_tokens=self._input_tokens
276
+ + self._output_tokens
277
+ + self._cache_creation_tokens
278
+ + self._cache_read_tokens,
279
+ cache_creation_input_tokens=self._cache_creation_tokens,
280
+ cache_read_input_tokens=self._cache_read_tokens,
234
281
  ),
235
282
  )
236
283
  )
@@ -253,6 +300,12 @@ class LLMStream(llm.LLMStream):
253
300
  self._request_id = event.message.id
254
301
  self._input_tokens = event.message.usage.input_tokens
255
302
  self._output_tokens = event.message.usage.output_tokens
303
+ if event.message.usage.cache_creation_input_tokens:
304
+ self._cache_creation_tokens = (
305
+ event.message.usage.cache_creation_input_tokens
306
+ )
307
+ if event.message.usage.cache_read_input_tokens:
308
+ self._cache_read_tokens = event.message.usage.cache_read_input_tokens
256
309
  elif event.type == "message_delta":
257
310
  self._output_tokens += event.usage.output_tokens
258
311
  elif event.type == "content_block_start":
@@ -317,7 +370,9 @@ class LLMStream(llm.LLMStream):
317
370
  return None
318
371
 
319
372
 
320
- def _latest_system_message(chat_ctx: llm.ChatContext) -> str:
373
+ def _latest_system_message(
374
+ chat_ctx: llm.ChatContext, caching: Literal["ephemeral"] | None = None
375
+ ) -> anthropic.types.TextBlockParam | None:
321
376
  latest_system_message: llm.ChatMessage | None = None
322
377
  for m in chat_ctx.messages:
323
378
  if m.role == "system":
@@ -332,7 +387,14 @@ def _latest_system_message(chat_ctx: llm.ChatContext) -> str:
332
387
  latest_system_str = " ".join(
333
388
  [c for c in latest_system_message.content if isinstance(c, str)]
334
389
  )
335
- return latest_system_str
390
+ if latest_system_str:
391
+ system_text_block = anthropic.types.TextBlockParam(
392
+ text=latest_system_str,
393
+ type="text",
394
+ cache_control=CACHE_CONTROL_EPHEMERAL if caching == "ephemeral" else None,
395
+ )
396
+ return system_text_block
397
+ return None
336
398
 
337
399
 
338
400
  def _merge_messages(
@@ -362,18 +424,29 @@ def _merge_messages(
362
424
 
363
425
 
364
426
  def _build_anthropic_context(
365
- chat_ctx: List[llm.ChatMessage], cache_key: Any
427
+ chat_ctx: List[llm.ChatMessage],
428
+ cache_key: Any,
429
+ caching: Literal["ephemeral"] | None,
366
430
  ) -> List[anthropic.types.MessageParam]:
367
431
  result: List[anthropic.types.MessageParam] = []
368
- for msg in chat_ctx:
369
- a_msg = _build_anthropic_message(msg, cache_key, chat_ctx)
432
+ for i, msg in enumerate(chat_ctx):
433
+ # caching last message will cache whole chat history if caching is enabled
434
+ cache_ctrl = (
435
+ CACHE_CONTROL_EPHEMERAL
436
+ if ((i == len(chat_ctx) - 1) and caching == "ephemeral")
437
+ else None
438
+ )
439
+ a_msg = _build_anthropic_message(msg, cache_key, cache_ctrl=cache_ctrl)
440
+
370
441
  if a_msg:
371
442
  result.append(a_msg)
372
443
  return result
373
444
 
374
445
 
375
446
  def _build_anthropic_message(
376
- msg: llm.ChatMessage, cache_key: Any, chat_ctx: List[llm.ChatMessage]
447
+ msg: llm.ChatMessage,
448
+ cache_key: Any,
449
+ cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
377
450
  ) -> anthropic.types.MessageParam | None:
378
451
  if msg.role == "user" or msg.role == "assistant":
379
452
  a_msg: anthropic.types.MessageParam = {
@@ -386,22 +459,27 @@ def _build_anthropic_message(
386
459
  # add content if provided
387
460
  if isinstance(msg.content, str) and msg.content:
388
461
  a_msg["content"].append(
389
- anthropic.types.TextBlock(
462
+ anthropic.types.TextBlockParam(
390
463
  text=msg.content,
391
464
  type="text",
465
+ cache_control=cache_ctrl,
392
466
  )
393
467
  )
394
468
  elif isinstance(msg.content, list):
395
469
  for cnt in msg.content:
396
470
  if isinstance(cnt, str) and cnt:
397
- content: anthropic.types.TextBlock = anthropic.types.TextBlock(
398
- text=cnt,
399
- type="text",
471
+ content: anthropic.types.TextBlockParam = (
472
+ anthropic.types.TextBlockParam(
473
+ text=cnt,
474
+ type="text",
475
+ cache_control=cache_ctrl,
476
+ )
400
477
  )
401
478
  a_content.append(content)
402
479
  elif isinstance(cnt, llm.ChatImage):
403
- a_content.append(_build_anthropic_image_content(cnt, cache_key))
404
-
480
+ a_content.append(
481
+ _build_anthropic_image_content(cnt, cache_key, cache_ctrl)
482
+ )
405
483
  if msg.tool_calls is not None:
406
484
  for fnc in msg.tool_calls:
407
485
  tool_use = anthropic.types.ToolUseBlockParam(
@@ -409,6 +487,7 @@ def _build_anthropic_message(
409
487
  type="tool_use",
410
488
  name=fnc.function_info.name,
411
489
  input=fnc.arguments,
490
+ cache_control=cache_ctrl,
412
491
  )
413
492
  a_content.append(tool_use)
414
493
 
@@ -427,6 +506,7 @@ def _build_anthropic_message(
427
506
  type="tool_result",
428
507
  content=msg.content,
429
508
  is_error=msg.tool_exception is not None,
509
+ cache_control=cache_ctrl,
430
510
  )
431
511
  return {
432
512
  "role": "user",
@@ -437,7 +517,9 @@ def _build_anthropic_message(
437
517
 
438
518
 
439
519
  def _build_anthropic_image_content(
440
- image: llm.ChatImage, cache_key: Any
520
+ image: llm.ChatImage,
521
+ cache_key: Any,
522
+ cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
441
523
  ) -> anthropic.types.ImageBlockParam:
442
524
  if isinstance(image.image, str): # image is a URL
443
525
  if not image.image.startswith("data:"):
@@ -463,6 +545,7 @@ def _build_anthropic_image_content(
463
545
  media_type,
464
546
  ),
465
547
  },
548
+ "cache_control": cache_ctrl,
466
549
  }
467
550
  except (ValueError, IndexError) as e:
468
551
  raise ValueError(
@@ -490,6 +573,7 @@ def _build_anthropic_image_content(
490
573
  "data": image._cache[cache_key],
491
574
  "media_type": "image/jpeg",
492
575
  },
576
+ "cache_control": cache_ctrl,
493
577
  }
494
578
 
495
579
  raise ValueError(
@@ -499,6 +583,7 @@ def _build_anthropic_image_content(
499
583
 
500
584
  def _build_function_description(
501
585
  fnc_info: llm.function_context.FunctionInfo,
586
+ cache_ctrl: anthropic.types.CacheControlEphemeralParam | None,
502
587
  ) -> anthropic.types.ToolParam:
503
588
  def build_schema_field(arg_info: llm.function_context.FunctionArgInfo):
504
589
  def type2str(t: type) -> str:
@@ -520,7 +605,7 @@ def _build_function_description(
520
605
  if arg_info.description:
521
606
  p["description"] = arg_info.description
522
607
 
523
- is_optional, inner_th = _is_optional_type(arg_info.type)
608
+ _, inner_th = _is_optional_type(arg_info.type)
524
609
 
525
610
  if get_origin(inner_th) is list:
526
611
  inner_type = get_args(inner_th)[0]
@@ -542,8 +627,9 @@ def _build_function_description(
542
627
  for arg_info in fnc_info.arguments.values():
543
628
  input_schema[arg_info.name] = build_schema_field(arg_info)
544
629
 
545
- return {
546
- "name": fnc_info.name,
547
- "description": fnc_info.description,
548
- "input_schema": input_schema,
549
- }
630
+ return anthropic.types.ToolParam(
631
+ name=fnc_info.name,
632
+ description=fnc_info.description,
633
+ input_schema=input_schema,
634
+ cache_control=cache_ctrl,
635
+ )
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.2.10"
15
+ __version__ = "0.2.12"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: livekit-plugins-anthropic
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: Agent Framework plugin for services from Anthropic
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -0,0 +1,10 @@
1
+ livekit/plugins/anthropic/__init__.py,sha256=1WCyNEaR6qBsX54qJQM0SeY-QHIucww16PLXcSnMqRo,1175
2
+ livekit/plugins/anthropic/llm.py,sha256=dtIA1qWxMPWFxG4QbAeQ-xztmJZxRxBzYxqLFty59dA,23374
3
+ livekit/plugins/anthropic/log.py,sha256=fG1pYSY88AnT738gZrmzF9FO4l4BdGENj3VKHMQB3Yo,72
4
+ livekit/plugins/anthropic/models.py,sha256=wyTr2nl6SL4ylN6s4mHJcqtmgV2mjJysZo89FknWdhI,213
5
+ livekit/plugins/anthropic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/anthropic/version.py,sha256=L9v54yZpBzq0Hizz-thIscGPt87Ydvf01GZclaT0Yuw,601
7
+ livekit_plugins_anthropic-0.2.12.dist-info/METADATA,sha256=ZVjHKrbkK6a81xnO_oH-6-8gBrX_R_esQqJb15s_lhI,1481
8
+ livekit_plugins_anthropic-0.2.12.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
9
+ livekit_plugins_anthropic-0.2.12.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_anthropic-0.2.12.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- livekit/plugins/anthropic/__init__.py,sha256=1WCyNEaR6qBsX54qJQM0SeY-QHIucww16PLXcSnMqRo,1175
2
- livekit/plugins/anthropic/llm.py,sha256=xRayMqq_FXFGZwskpqcINKVCbXL-754Jh2CJMWvxJnY,19261
3
- livekit/plugins/anthropic/log.py,sha256=fG1pYSY88AnT738gZrmzF9FO4l4BdGENj3VKHMQB3Yo,72
4
- livekit/plugins/anthropic/models.py,sha256=wyTr2nl6SL4ylN6s4mHJcqtmgV2mjJysZo89FknWdhI,213
5
- livekit/plugins/anthropic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/anthropic/version.py,sha256=vT0ONEJ_8wtOHcTDpZ96O0mQtrdNrO2FOuqDfAOF_bI,601
7
- livekit_plugins_anthropic-0.2.10.dist-info/METADATA,sha256=8DF-B_YzfayulYZMpOB2_smktfH8s9gKXXXeccz5J4U,1481
8
- livekit_plugins_anthropic-0.2.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
- livekit_plugins_anthropic-0.2.10.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
- livekit_plugins_anthropic-0.2.10.dist-info/RECORD,,