arize-phoenix 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (84) hide show
  1. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/METADATA +3 -2
  2. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/RECORD +82 -50
  3. phoenix/config.py +5 -2
  4. phoenix/datetime_utils.py +8 -1
  5. phoenix/db/bulk_inserter.py +40 -1
  6. phoenix/db/facilitator.py +263 -4
  7. phoenix/db/insertion/helpers.py +15 -0
  8. phoenix/db/insertion/span.py +3 -1
  9. phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
  10. phoenix/db/models.py +267 -9
  11. phoenix/db/types/model_provider.py +1 -0
  12. phoenix/db/types/token_price_customization.py +29 -0
  13. phoenix/server/api/context.py +38 -4
  14. phoenix/server/api/dataloaders/__init__.py +41 -5
  15. phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
  16. phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
  17. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
  18. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
  19. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
  20. phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
  21. phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
  22. phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +58 -0
  23. phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
  24. phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
  25. phoenix/server/api/dataloaders/span_cost_summary_by_project.py +140 -0
  26. phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
  27. phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
  28. phoenix/server/api/dataloaders/span_costs.py +35 -0
  29. phoenix/server/api/dataloaders/types.py +29 -0
  30. phoenix/server/api/helpers/playground_clients.py +562 -12
  31. phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
  32. phoenix/server/api/helpers/prompts/models.py +67 -0
  33. phoenix/server/api/input_types/GenerativeModelInput.py +2 -0
  34. phoenix/server/api/input_types/ProjectSessionSort.py +3 -0
  35. phoenix/server/api/input_types/SpanSort.py +17 -0
  36. phoenix/server/api/mutations/__init__.py +2 -0
  37. phoenix/server/api/mutations/chat_mutations.py +17 -0
  38. phoenix/server/api/mutations/model_mutations.py +208 -0
  39. phoenix/server/api/queries.py +82 -41
  40. phoenix/server/api/routers/v1/traces.py +11 -4
  41. phoenix/server/api/subscriptions.py +36 -2
  42. phoenix/server/api/types/CostBreakdown.py +15 -0
  43. phoenix/server/api/types/Experiment.py +59 -1
  44. phoenix/server/api/types/ExperimentRun.py +58 -4
  45. phoenix/server/api/types/GenerativeModel.py +143 -2
  46. phoenix/server/api/types/GenerativeProvider.py +33 -20
  47. phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
  48. phoenix/server/api/types/ModelInterface.py +11 -0
  49. phoenix/server/api/types/PlaygroundModel.py +10 -0
  50. phoenix/server/api/types/Project.py +42 -0
  51. phoenix/server/api/types/ProjectSession.py +44 -0
  52. phoenix/server/api/types/Span.py +137 -0
  53. phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
  54. phoenix/server/api/types/SpanCostSummary.py +10 -0
  55. phoenix/server/api/types/TokenPrice.py +16 -0
  56. phoenix/server/api/types/TokenUsage.py +3 -3
  57. phoenix/server/api/types/Trace.py +41 -0
  58. phoenix/server/app.py +59 -0
  59. phoenix/server/cost_tracking/cost_details_calculator.py +190 -0
  60. phoenix/server/cost_tracking/cost_model_lookup.py +151 -0
  61. phoenix/server/cost_tracking/helpers.py +68 -0
  62. phoenix/server/cost_tracking/model_cost_manifest.json +59 -329
  63. phoenix/server/cost_tracking/regex_specificity.py +397 -0
  64. phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
  65. phoenix/server/daemons/__init__.py +0 -0
  66. phoenix/server/daemons/generative_model_store.py +51 -0
  67. phoenix/server/daemons/span_cost_calculator.py +103 -0
  68. phoenix/server/dml_event_handler.py +1 -0
  69. phoenix/server/static/.vite/manifest.json +36 -36
  70. phoenix/server/static/assets/components-BnK9kodr.js +5055 -0
  71. phoenix/server/static/assets/{index-qiubV_74.js → index-S3YKLmbo.js} +13 -13
  72. phoenix/server/static/assets/{pages-C4V07ozl.js → pages-BW6PBHZb.js} +809 -417
  73. phoenix/server/static/assets/{vendor-Bfsiga8H.js → vendor-DqQvHbPa.js} +147 -147
  74. phoenix/server/static/assets/{vendor-arizeai-CQOWsrzm.js → vendor-arizeai-CLX44PFA.js} +1 -1
  75. phoenix/server/static/assets/{vendor-codemirror-CrcGVhB2.js → vendor-codemirror-Du3XyJnB.js} +1 -1
  76. phoenix/server/static/assets/{vendor-recharts-Yyg3G-Rq.js → vendor-recharts-B2PJDrnX.js} +25 -25
  77. phoenix/server/static/assets/{vendor-shiki-OPjag7Hm.js → vendor-shiki-CNbrFjf9.js} +1 -1
  78. phoenix/version.py +1 -1
  79. phoenix/server/cost_tracking/cost_lookup.py +0 -255
  80. phoenix/server/static/assets/components-CUUWyAMo.js +0 -4509
  81. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/WHEEL +0 -0
  82. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/entry_points.txt +0 -0
  83. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/IP_NOTICE +0 -0
  84. {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -463,6 +463,35 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
463
463
  yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
464
464
  yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
465
465
 
466
+ if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
467
+ prompt_details = usage.prompt_tokens_details
468
+ if (
469
+ hasattr(prompt_details, "cached_tokens")
470
+ and prompt_details.cached_tokens is not None
471
+ ):
472
+ yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
473
+ if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
474
+ yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
475
+
476
+ if (
477
+ hasattr(usage, "completion_tokens_details")
478
+ and usage.completion_tokens_details is not None
479
+ ):
480
+ completion_details = usage.completion_tokens_details
481
+ if (
482
+ hasattr(completion_details, "reasoning_tokens")
483
+ and completion_details.reasoning_tokens is not None
484
+ ):
485
+ yield (
486
+ LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
487
+ completion_details.reasoning_tokens,
488
+ )
489
+ if (
490
+ hasattr(completion_details, "audio_tokens")
491
+ and completion_details.audio_tokens is not None
492
+ ):
493
+ yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
494
+
466
495
 
467
496
  def _get_credential_value(
468
497
  credentials: Optional[list[PlaygroundClientCredential]], env_var_name: str
@@ -597,6 +626,465 @@ class OllamaStreamingClient(OpenAIBaseStreamingClient):
597
626
  self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
598
627
 
599
628
 
629
+ @register_llm_client(
630
+ provider_key=GenerativeProviderKey.AWS,
631
+ model_names=[
632
+ PROVIDER_DEFAULT,
633
+ "anthropic.claude-3-5-sonnet-20240620-v1:0",
634
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
635
+ "anthropic.claude-3-haiku-20240307-v1:0",
636
+ "anthropic.claude-3-5-sonnet-20241022-v2:0",
637
+ "anthropic.claude-3-5-haiku-20241022-v1:0",
638
+ "anthropic.claude-opus-4-20250514-v1:0",
639
+ "anthropic.claude-sonnet-4-20250514-v1:0",
640
+ "amazon.titan-embed-text-v2:0",
641
+ "amazon.nova-pro-v1:0",
642
+ "amazon.nova-premier-v1:0:8k",
643
+ "amazon.nova-premier-v1:0:20k",
644
+ "amazon.nova-premier-v1:0:1000k",
645
+ "amazon.nova-premier-v1:0:mm",
646
+ "amazon.nova-premier-v1:0",
647
+ "amazon.nova-lite-v1:0",
648
+ "amazon.nova-micro-v1:0",
649
+ "deepseek.r1-v1:0",
650
+ "mistral.pixtral-large-2502-v1:0",
651
+ "meta.llama3-1-8b-instruct-v1:0:128k",
652
+ "meta.llama3-1-8b-instruct-v1:0",
653
+ "meta.llama3-1-70b-instruct-v1:0:128k",
654
+ "meta.llama3-1-70b-instruct-v1:0",
655
+ "meta.llama3-1-405b-instruct-v1:0",
656
+ "meta.llama3-2-11b-instruct-v1:0",
657
+ "meta.llama3-2-90b-instruct-v1:0",
658
+ "meta.llama3-2-1b-instruct-v1:0",
659
+ "meta.llama3-2-3b-instruct-v1:0",
660
+ "meta.llama3-3-70b-instruct-v1:0",
661
+ "meta.llama4-scout-17b-instruct-v1:0",
662
+ "meta.llama4-maverick-17b-instruct-v1:0",
663
+ ],
664
+ )
665
+ class BedrockStreamingClient(PlaygroundStreamingClient):
666
+ def __init__(
667
+ self,
668
+ model: GenerativeModelInput,
669
+ credentials: Optional[list[PlaygroundClientCredential]] = None,
670
+ ) -> None:
671
+ import boto3 # type: ignore[import-untyped]
672
+
673
+ super().__init__(model=model, credentials=credentials)
674
+ self.region = model.region or "us-east-1"
675
+ self.api = "converse"
676
+ self.aws_access_key_id = _get_credential_value(credentials, "AWS_ACCESS_KEY_ID") or getenv(
677
+ "AWS_ACCESS_KEY_ID"
678
+ )
679
+ self.aws_secret_access_key = _get_credential_value(
680
+ credentials, "AWS_SECRET_ACCESS_KEY"
681
+ ) or getenv("AWS_SECRET_ACCESS_KEY")
682
+ self.aws_session_token = _get_credential_value(credentials, "AWS_SESSION_TOKEN") or getenv(
683
+ "AWS_SESSION_TOKEN"
684
+ )
685
+ self.model_name = model.name
686
+ self.client = boto3.client(
687
+ service_name="bedrock-runtime",
688
+ region_name="us-east-1", # match the default region in the UI
689
+ aws_access_key_id=self.aws_access_key_id,
690
+ aws_secret_access_key=self.aws_secret_access_key,
691
+ aws_session_token=self.aws_session_token,
692
+ )
693
+
694
+ self._attributes[LLM_PROVIDER] = "aws"
695
+ self._attributes[LLM_SYSTEM] = "aws"
696
+
697
+ @classmethod
698
+ def dependencies(cls) -> list[Dependency]:
699
+ return [Dependency(name="boto3")]
700
+
701
+ @classmethod
702
+ def supported_invocation_parameters(cls) -> list[InvocationParameter]:
703
+ return [
704
+ IntInvocationParameter(
705
+ invocation_name="max_tokens",
706
+ canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
707
+ label="Max Tokens",
708
+ default_value=1024,
709
+ ),
710
+ BoundedFloatInvocationParameter(
711
+ invocation_name="temperature",
712
+ canonical_name=CanonicalParameterName.TEMPERATURE,
713
+ label="Temperature",
714
+ default_value=1.0,
715
+ min_value=0.0,
716
+ max_value=1.0,
717
+ ),
718
+ BoundedFloatInvocationParameter(
719
+ invocation_name="top_p",
720
+ canonical_name=CanonicalParameterName.TOP_P,
721
+ label="Top P",
722
+ default_value=1.0,
723
+ min_value=0.0,
724
+ max_value=1.0,
725
+ ),
726
+ JSONInvocationParameter(
727
+ invocation_name="tool_choice",
728
+ label="Tool Choice",
729
+ canonical_name=CanonicalParameterName.TOOL_CHOICE,
730
+ ),
731
+ ]
732
+
733
+ async def chat_completion_create(
734
+ self,
735
+ messages: list[
736
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
737
+ ],
738
+ tools: list[JSONScalarType],
739
+ **invocation_parameters: Any,
740
+ ) -> AsyncIterator[ChatCompletionChunk]:
741
+ import boto3
742
+
743
+ if (
744
+ self.client.meta.region_name != self.region
745
+ ): # override the region if it's different from the default
746
+ self.client = boto3.client(
747
+ "bedrock-runtime",
748
+ region_name=self.region,
749
+ aws_access_key_id=self.aws_access_key_id,
750
+ aws_secret_access_key=self.aws_secret_access_key,
751
+ aws_session_token=self.aws_session_token,
752
+ )
753
+ if self.api == "invoke":
754
+ async for chunk in self._handle_invoke_api(messages, tools, invocation_parameters):
755
+ yield chunk
756
+ else:
757
+ async for chunk in self._handle_converse_api(messages, tools, invocation_parameters):
758
+ yield chunk
759
+
760
+ async def _handle_converse_api(
761
+ self,
762
+ messages: list[
763
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
764
+ ],
765
+ tools: list[JSONScalarType],
766
+ invocation_parameters: dict[str, Any],
767
+ ) -> AsyncIterator[ChatCompletionChunk]:
768
+ """
769
+ Handle the converse API.
770
+ """
771
+ # Build messages in Converse API format
772
+ converse_messages = self._build_converse_messages(messages)
773
+
774
+ # Build the request parameters for Converse API
775
+ converse_params: dict[str, Any] = {
776
+ "modelId": f"us.{self.model_name}",
777
+ "messages": converse_messages,
778
+ "inferenceConfig": {
779
+ "maxTokens": invocation_parameters["max_tokens"],
780
+ "temperature": invocation_parameters["temperature"],
781
+ "topP": invocation_parameters["top_p"],
782
+ },
783
+ }
784
+
785
+ # Add system prompt if available
786
+ system_prompt = self._extract_system_prompt(messages)
787
+ if system_prompt:
788
+ converse_params["system"] = [{"text": system_prompt}]
789
+
790
+ # Add tools if provided
791
+ if tools:
792
+ converse_params["toolConfig"] = {"tools": tools}
793
+ if (
794
+ "tool_choice" in invocation_parameters
795
+ and invocation_parameters["tool_choice"]["type"] != "none"
796
+ ):
797
+ converse_params["toolConfig"]["toolChoice"] = {}
798
+
799
+ if invocation_parameters["tool_choice"]["type"] == "auto":
800
+ converse_params["toolConfig"]["toolChoice"]["auto"] = {}
801
+ elif invocation_parameters["tool_choice"]["type"] == "any":
802
+ converse_params["toolConfig"]["toolChoice"]["any"] = {}
803
+ else:
804
+ converse_params["toolConfig"]["toolChoice"]["tool"] = {
805
+ "name": invocation_parameters["tool_choice"]["name"],
806
+ }
807
+
808
+ # Make the streaming API call
809
+ response = self.client.converse_stream(**converse_params)
810
+
811
+ # Track active tool calls
812
+ active_tool_calls = {} # contentBlockIndex -> {id, name, arguments_buffer}
813
+
814
+ # Process the event stream
815
+ event_stream = response.get("stream")
816
+
817
+ for event in event_stream:
818
+ # Handle content block start events
819
+ if "contentBlockStart" in event:
820
+ content_block_start = event["contentBlockStart"]
821
+ start_event = content_block_start.get("start", {})
822
+ block_index = content_block_start.get(
823
+ "contentBlockIndex", 0
824
+ ) # Get the actual index
825
+
826
+ if "toolUse" in start_event:
827
+ tool_use = start_event["toolUse"]
828
+ active_tool_calls[block_index] = { # Use the actual block index
829
+ "id": tool_use.get("toolUseId"),
830
+ "name": tool_use.get("name"),
831
+ "arguments_buffer": "",
832
+ }
833
+
834
+ # Yield initial tool call chunk
835
+ yield ToolCallChunk(
836
+ id=tool_use.get("toolUseId"),
837
+ function=FunctionCallChunk(
838
+ name=tool_use.get("name"),
839
+ arguments="",
840
+ ),
841
+ )
842
+
843
+ # Handle content block delta events
844
+ elif "contentBlockDelta" in event:
845
+ content_delta = event["contentBlockDelta"]
846
+ delta = content_delta.get("delta", {})
847
+ delta_index = content_delta.get("contentBlockIndex", 0)
848
+
849
+ # Handle text delta
850
+ if "text" in delta:
851
+ yield TextChunk(content=delta["text"])
852
+
853
+ # Handle tool use delta
854
+ elif "toolUse" in delta:
855
+ tool_delta = delta["toolUse"]
856
+ if "input" in tool_delta and delta_index in active_tool_calls:
857
+ # Accumulate tool arguments
858
+ json_chunk = tool_delta["input"]
859
+ active_tool_calls[delta_index]["arguments_buffer"] += json_chunk
860
+
861
+ # Yield incremental argument update
862
+ yield ToolCallChunk(
863
+ id=active_tool_calls[delta_index]["id"],
864
+ function=FunctionCallChunk(
865
+ name=active_tool_calls[delta_index]["name"],
866
+ arguments=json_chunk,
867
+ ),
868
+ )
869
+
870
+ # Handle content block stop events
871
+ elif "contentBlockStop" in event:
872
+ stop_index = event["contentBlockStop"].get("contentBlockIndex", 0)
873
+ if stop_index in active_tool_calls:
874
+ del active_tool_calls[stop_index]
875
+
876
+ elif "metadata" in event:
877
+ self._attributes.update(
878
+ {
879
+ LLM_TOKEN_COUNT_PROMPT: event.get("metadata")
880
+ .get("usage", {})
881
+ .get("inputTokens", 0)
882
+ }
883
+ )
884
+
885
+ self._attributes.update(
886
+ {
887
+ LLM_TOKEN_COUNT_COMPLETION: event.get("metadata")
888
+ .get("usage", {})
889
+ .get("outputTokens", 0)
890
+ }
891
+ )
892
+
893
+ self._attributes.update(
894
+ {
895
+ LLM_TOKEN_COUNT_TOTAL: event.get("metadata")
896
+ .get("usage", {})
897
+ .get("totalTokens", 0)
898
+ }
899
+ )
900
+
901
+ async def _handle_invoke_api(
902
+ self,
903
+ messages: list[
904
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
905
+ ],
906
+ tools: list[JSONScalarType],
907
+ invocation_parameters: dict[str, Any],
908
+ ) -> AsyncIterator[ChatCompletionChunk]:
909
+ if "anthropic" not in self.model_name:
910
+ raise ValueError("Invoke API is only supported for Anthropic models")
911
+
912
+ bedrock_messages, system_prompt = self._build_bedrock_messages(messages)
913
+ bedrock_params = {
914
+ "anthropic_version": "bedrock-2023-05-31",
915
+ "max_tokens": invocation_parameters["max_tokens"],
916
+ "messages": bedrock_messages,
917
+ "system": system_prompt,
918
+ "temperature": invocation_parameters["temperature"],
919
+ "top_p": invocation_parameters["top_p"],
920
+ "tools": tools,
921
+ }
922
+
923
+ response = self.client.invoke_model_with_response_stream(
924
+ modelId=f"us.{self.model_name}", # or another Claude model
925
+ contentType="application/json",
926
+ accept="application/json",
927
+ body=json.dumps(bedrock_params),
928
+ trace="ENABLED_FULL",
929
+ )
930
+
931
+ # The response['body'] is an EventStream object
932
+ event_stream = response["body"]
933
+
934
+ # Track active tool calls and their accumulating arguments
935
+ active_tool_calls: dict[int, dict[str, Any]] = {} # index -> {id, name, arguments_buffer}
936
+
937
+ for event in event_stream:
938
+ if "chunk" in event:
939
+ chunk_data = json.loads(event["chunk"]["bytes"].decode("utf-8"))
940
+
941
+ # Handle text content
942
+ if chunk_data.get("type") == "content_block_delta":
943
+ delta = chunk_data.get("delta", {})
944
+ index = chunk_data.get("index", 0)
945
+
946
+ if delta.get("type") == "text_delta" and "text" in delta:
947
+ yield TextChunk(content=delta["text"])
948
+
949
+ elif delta.get("type") == "input_json_delta":
950
+ # Accumulate tool arguments
951
+ if index in active_tool_calls:
952
+ active_tool_calls[index]["arguments_buffer"] += delta.get(
953
+ "partial_json", ""
954
+ )
955
+ # Yield incremental argument update
956
+ yield ToolCallChunk(
957
+ id=active_tool_calls[index]["id"],
958
+ function=FunctionCallChunk(
959
+ name=active_tool_calls[index]["name"],
960
+ arguments=delta.get("partial_json", ""),
961
+ ),
962
+ )
963
+
964
+ # Handle tool call start
965
+ elif chunk_data.get("type") == "content_block_start":
966
+ content_block = chunk_data.get("content_block", {})
967
+ index = chunk_data.get("index", 0)
968
+
969
+ if content_block.get("type") == "tool_use":
970
+ # Initialize tool call tracking
971
+ active_tool_calls[index] = {
972
+ "id": content_block.get("id"),
973
+ "name": content_block.get("name"),
974
+ "arguments_buffer": "",
975
+ }
976
+
977
+ # Yield initial tool call chunk
978
+ yield ToolCallChunk(
979
+ id=content_block.get("id"),
980
+ function=FunctionCallChunk(
981
+ name=content_block.get("name"),
982
+ arguments="", # Start with empty, will be filled by deltas
983
+ ),
984
+ )
985
+
986
+ # Handle content block stop (tool call complete)
987
+ elif chunk_data.get("type") == "content_block_stop":
988
+ index = chunk_data.get("index", 0)
989
+ if index in active_tool_calls:
990
+ # Tool call is complete, clean up
991
+ del active_tool_calls[index]
992
+
993
+ elif chunk_data.get("type") == "message_stop":
994
+ self._attributes.update(
995
+ {
996
+ LLM_TOKEN_COUNT_COMPLETION: chunk_data.get(
997
+ "amazon-bedrock-invocationMetrics", {}
998
+ ).get("outputTokenCount", 0)
999
+ }
1000
+ )
1001
+
1002
+ self._attributes.update(
1003
+ {
1004
+ LLM_TOKEN_COUNT_PROMPT: chunk_data.get(
1005
+ "amazon-bedrock-invocationMetrics", {}
1006
+ ).get("inputTokenCount", 0)
1007
+ }
1008
+ )
1009
+
1010
+ def _build_bedrock_messages(
1011
+ self,
1012
+ messages: list[
1013
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
1014
+ ],
1015
+ ) -> tuple[list[dict[str, Any]], str]:
1016
+ bedrock_messages = []
1017
+ system_prompt = ""
1018
+ for role, content, _, _ in messages:
1019
+ if role == ChatCompletionMessageRole.USER:
1020
+ bedrock_messages.append(
1021
+ {
1022
+ "role": "user",
1023
+ "content": content,
1024
+ }
1025
+ )
1026
+ elif role == ChatCompletionMessageRole.AI:
1027
+ bedrock_messages.append(
1028
+ {
1029
+ "role": "assistant",
1030
+ "content": content,
1031
+ }
1032
+ )
1033
+ elif role == ChatCompletionMessageRole.SYSTEM:
1034
+ system_prompt += content + "\n"
1035
+ return bedrock_messages, system_prompt
1036
+
1037
+ def _extract_system_prompt(
1038
+ self,
1039
+ messages: list[
1040
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
1041
+ ],
1042
+ ) -> str:
1043
+ """Extract system prompt from messages."""
1044
+ system_prompts = []
1045
+ for role, content, _, _ in messages:
1046
+ if role == ChatCompletionMessageRole.SYSTEM:
1047
+ system_prompts.append(content)
1048
+ return "\n".join(system_prompts)
1049
+
1050
+ def _build_converse_messages(
1051
+ self,
1052
+ messages: list[
1053
+ tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
1054
+ ],
1055
+ ) -> list[dict[str, Any]]:
1056
+ """Convert messages to Converse API format."""
1057
+ converse_messages: list[dict[str, Any]] = []
1058
+ for role, content, _id, tool_calls in messages:
1059
+ if role == ChatCompletionMessageRole.USER:
1060
+ converse_messages.append({"role": "user", "content": [{"text": content}]})
1061
+ elif role == ChatCompletionMessageRole.TOOL:
1062
+ converse_messages.append(
1063
+ {
1064
+ "role": "user",
1065
+ "content": [
1066
+ {
1067
+ "toolResult": {
1068
+ "toolUseId": _id,
1069
+ "content": [{"json": json.loads(content)}],
1070
+ }
1071
+ }
1072
+ ],
1073
+ }
1074
+ )
1075
+
1076
+ elif role == ChatCompletionMessageRole.AI:
1077
+ # Handle assistant messages with potential tool calls
1078
+ message: dict[str, Any] = {"role": "assistant", "content": []}
1079
+ if content:
1080
+ message["content"].append({"text": content})
1081
+ if tool_calls:
1082
+ for tool_call in tool_calls:
1083
+ message["content"].append(tool_call)
1084
+ converse_messages.append(message)
1085
+ return converse_messages
1086
+
1087
+
600
1088
  @register_llm_client(
601
1089
  provider_key=GenerativeProviderKey.OPENAI,
602
1090
  model_names=[
@@ -656,13 +1144,20 @@ class OpenAIStreamingClient(OpenAIBaseStreamingClient):
656
1144
  provider_key=GenerativeProviderKey.OPENAI,
657
1145
  model_names=[
658
1146
  "o1",
1147
+ "o1-pro",
659
1148
  "o1-2024-12-17",
1149
+ "o1-pro-2025-03-19",
660
1150
  "o1-mini",
661
1151
  "o1-mini-2024-09-12",
662
1152
  "o1-preview",
663
1153
  "o1-preview-2024-09-12",
1154
+ "o3",
1155
+ "o3-pro",
1156
+ "o3-2025-04-16",
664
1157
  "o3-mini",
665
1158
  "o3-mini-2025-01-31",
1159
+ "o4-mini",
1160
+ "o4-mini-2025-04-16",
666
1161
  ],
667
1162
  )
668
1163
  class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
@@ -799,6 +1294,35 @@ class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
799
1294
  yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
800
1295
  yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
801
1296
 
1297
+ if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
1298
+ prompt_details = usage.prompt_tokens_details
1299
+ if (
1300
+ hasattr(prompt_details, "cached_tokens")
1301
+ and prompt_details.cached_tokens is not None
1302
+ ):
1303
+ yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
1304
+ if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
1305
+ yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
1306
+
1307
+ if (
1308
+ hasattr(usage, "completion_tokens_details")
1309
+ and usage.completion_tokens_details is not None
1310
+ ):
1311
+ completion_details = usage.completion_tokens_details
1312
+ if (
1313
+ hasattr(completion_details, "reasoning_tokens")
1314
+ and completion_details.reasoning_tokens is not None
1315
+ ):
1316
+ yield (
1317
+ LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
1318
+ completion_details.reasoning_tokens,
1319
+ )
1320
+ if (
1321
+ hasattr(completion_details, "audio_tokens")
1322
+ and completion_details.audio_tokens is not None
1323
+ ):
1324
+ yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
1325
+
802
1326
 
803
1327
  @register_llm_client(
804
1328
  provider_key=GenerativeProviderKey.AZURE_OPENAI,
@@ -856,12 +1380,6 @@ class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
856
1380
  provider_key=GenerativeProviderKey.ANTHROPIC,
857
1381
  model_names=[
858
1382
  PROVIDER_DEFAULT,
859
- "claude-sonnet-4-0",
860
- "claude-sonnet-4-20250514",
861
- "claude-opus-4-0",
862
- "claude-opus-4-20250514",
863
- "claude-3-7-sonnet-latest",
864
- "claude-3-7-sonnet-20250219",
865
1383
  "claude-3-5-sonnet-latest",
866
1384
  "claude-3-5-haiku-latest",
867
1385
  "claude-3-5-sonnet-20241022",
@@ -962,15 +1480,34 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
962
1480
  async with await throttled_stream(**anthropic_params) as stream:
963
1481
  async for event in stream:
964
1482
  if isinstance(event, anthropic_types.RawMessageStartEvent):
965
- self._attributes.update(
966
- {LLM_TOKEN_COUNT_PROMPT: event.message.usage.input_tokens}
967
- )
1483
+ usage = event.message.usage
1484
+
1485
+ token_counts: dict[str, Any] = {}
1486
+ if prompt_tokens := (
1487
+ (usage.input_tokens or 0)
1488
+ + (getattr(usage, "cache_creation_input_tokens", 0) or 0)
1489
+ + (getattr(usage, "cache_read_input_tokens", 0) or 0)
1490
+ ):
1491
+ token_counts[LLM_TOKEN_COUNT_PROMPT] = prompt_tokens
1492
+ if cache_creation_tokens := getattr(usage, "cache_creation_input_tokens", None):
1493
+ if cache_creation_tokens is not None:
1494
+ token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = (
1495
+ cache_creation_tokens
1496
+ )
1497
+ self._attributes.update(token_counts)
968
1498
  elif isinstance(event, anthropic_streaming.TextEvent):
969
1499
  yield TextChunk(content=event.text)
970
1500
  elif isinstance(event, anthropic_streaming.MessageStopEvent):
971
- self._attributes.update(
972
- {LLM_TOKEN_COUNT_COMPLETION: event.message.usage.output_tokens}
973
- )
1501
+ usage = event.message.usage
1502
+ output_token_counts: dict[str, Any] = {}
1503
+ if usage.output_tokens:
1504
+ output_token_counts[LLM_TOKEN_COUNT_COMPLETION] = usage.output_tokens
1505
+ if cache_read_tokens := getattr(usage, "cache_read_input_tokens", None):
1506
+ if cache_read_tokens is not None:
1507
+ output_token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = (
1508
+ cache_read_tokens
1509
+ )
1510
+ self._attributes.update(output_token_counts)
974
1511
  elif (
975
1512
  isinstance(event, anthropic_streaming.ContentBlockStopEvent)
976
1513
  and event.content_block.type == "tool_use"
@@ -1055,6 +1592,10 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
1055
1592
  @register_llm_client(
1056
1593
  provider_key=GenerativeProviderKey.ANTHROPIC,
1057
1594
  model_names=[
1595
+ "claude-sonnet-4-0",
1596
+ "claude-sonnet-4-20250514",
1597
+ "claude-opus-4-0",
1598
+ "claude-opus-4-20250514",
1058
1599
  "claude-3-7-sonnet-latest",
1059
1600
  "claude-3-7-sonnet-20250219",
1060
1601
  ],
@@ -1239,6 +1780,15 @@ LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
1239
1780
  LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
1240
1781
  LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
1241
1782
  LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
1783
+ LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
1784
+ LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
1785
+ SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
1786
+ )
1787
+ LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
1788
+ LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING = (
1789
+ SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING
1790
+ )
1791
+ LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO
1242
1792
 
1243
1793
 
1244
1794
  class _HttpxClient(wrapt.ObjectProxy): # type: ignore