arize-phoenix 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/METADATA +3 -2
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/RECORD +82 -50
- phoenix/config.py +5 -2
- phoenix/datetime_utils.py +8 -1
- phoenix/db/bulk_inserter.py +40 -1
- phoenix/db/facilitator.py +263 -4
- phoenix/db/insertion/helpers.py +15 -0
- phoenix/db/insertion/span.py +3 -1
- phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
- phoenix/db/models.py +267 -9
- phoenix/db/types/model_provider.py +1 -0
- phoenix/db/types/token_price_customization.py +29 -0
- phoenix/server/api/context.py +38 -4
- phoenix/server/api/dataloaders/__init__.py +41 -5
- phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
- phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
- phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project.py +140 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
- phoenix/server/api/dataloaders/span_costs.py +35 -0
- phoenix/server/api/dataloaders/types.py +29 -0
- phoenix/server/api/helpers/playground_clients.py +562 -12
- phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
- phoenix/server/api/helpers/prompts/models.py +67 -0
- phoenix/server/api/input_types/GenerativeModelInput.py +2 -0
- phoenix/server/api/input_types/ProjectSessionSort.py +3 -0
- phoenix/server/api/input_types/SpanSort.py +17 -0
- phoenix/server/api/mutations/__init__.py +2 -0
- phoenix/server/api/mutations/chat_mutations.py +17 -0
- phoenix/server/api/mutations/model_mutations.py +208 -0
- phoenix/server/api/queries.py +82 -41
- phoenix/server/api/routers/v1/traces.py +11 -4
- phoenix/server/api/subscriptions.py +36 -2
- phoenix/server/api/types/CostBreakdown.py +15 -0
- phoenix/server/api/types/Experiment.py +59 -1
- phoenix/server/api/types/ExperimentRun.py +58 -4
- phoenix/server/api/types/GenerativeModel.py +143 -2
- phoenix/server/api/types/GenerativeProvider.py +33 -20
- phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
- phoenix/server/api/types/ModelInterface.py +11 -0
- phoenix/server/api/types/PlaygroundModel.py +10 -0
- phoenix/server/api/types/Project.py +42 -0
- phoenix/server/api/types/ProjectSession.py +44 -0
- phoenix/server/api/types/Span.py +137 -0
- phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
- phoenix/server/api/types/SpanCostSummary.py +10 -0
- phoenix/server/api/types/TokenPrice.py +16 -0
- phoenix/server/api/types/TokenUsage.py +3 -3
- phoenix/server/api/types/Trace.py +41 -0
- phoenix/server/app.py +59 -0
- phoenix/server/cost_tracking/cost_details_calculator.py +190 -0
- phoenix/server/cost_tracking/cost_model_lookup.py +151 -0
- phoenix/server/cost_tracking/helpers.py +68 -0
- phoenix/server/cost_tracking/model_cost_manifest.json +59 -329
- phoenix/server/cost_tracking/regex_specificity.py +397 -0
- phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
- phoenix/server/daemons/__init__.py +0 -0
- phoenix/server/daemons/generative_model_store.py +51 -0
- phoenix/server/daemons/span_cost_calculator.py +103 -0
- phoenix/server/dml_event_handler.py +1 -0
- phoenix/server/static/.vite/manifest.json +36 -36
- phoenix/server/static/assets/components-BnK9kodr.js +5055 -0
- phoenix/server/static/assets/{index-qiubV_74.js → index-S3YKLmbo.js} +13 -13
- phoenix/server/static/assets/{pages-C4V07ozl.js → pages-BW6PBHZb.js} +809 -417
- phoenix/server/static/assets/{vendor-Bfsiga8H.js → vendor-DqQvHbPa.js} +147 -147
- phoenix/server/static/assets/{vendor-arizeai-CQOWsrzm.js → vendor-arizeai-CLX44PFA.js} +1 -1
- phoenix/server/static/assets/{vendor-codemirror-CrcGVhB2.js → vendor-codemirror-Du3XyJnB.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-Yyg3G-Rq.js → vendor-recharts-B2PJDrnX.js} +25 -25
- phoenix/server/static/assets/{vendor-shiki-OPjag7Hm.js → vendor-shiki-CNbrFjf9.js} +1 -1
- phoenix/version.py +1 -1
- phoenix/server/cost_tracking/cost_lookup.py +0 -255
- phoenix/server/static/assets/components-CUUWyAMo.js +0 -4509
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -463,6 +463,35 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
|
|
|
463
463
|
yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
|
|
464
464
|
yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
|
|
465
465
|
|
|
466
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
|
|
467
|
+
prompt_details = usage.prompt_tokens_details
|
|
468
|
+
if (
|
|
469
|
+
hasattr(prompt_details, "cached_tokens")
|
|
470
|
+
and prompt_details.cached_tokens is not None
|
|
471
|
+
):
|
|
472
|
+
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
|
|
473
|
+
if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
|
|
474
|
+
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
|
|
475
|
+
|
|
476
|
+
if (
|
|
477
|
+
hasattr(usage, "completion_tokens_details")
|
|
478
|
+
and usage.completion_tokens_details is not None
|
|
479
|
+
):
|
|
480
|
+
completion_details = usage.completion_tokens_details
|
|
481
|
+
if (
|
|
482
|
+
hasattr(completion_details, "reasoning_tokens")
|
|
483
|
+
and completion_details.reasoning_tokens is not None
|
|
484
|
+
):
|
|
485
|
+
yield (
|
|
486
|
+
LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
|
|
487
|
+
completion_details.reasoning_tokens,
|
|
488
|
+
)
|
|
489
|
+
if (
|
|
490
|
+
hasattr(completion_details, "audio_tokens")
|
|
491
|
+
and completion_details.audio_tokens is not None
|
|
492
|
+
):
|
|
493
|
+
yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
|
|
494
|
+
|
|
466
495
|
|
|
467
496
|
def _get_credential_value(
|
|
468
497
|
credentials: Optional[list[PlaygroundClientCredential]], env_var_name: str
|
|
@@ -597,6 +626,465 @@ class OllamaStreamingClient(OpenAIBaseStreamingClient):
|
|
|
597
626
|
self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
|
|
598
627
|
|
|
599
628
|
|
|
629
|
+
@register_llm_client(
|
|
630
|
+
provider_key=GenerativeProviderKey.AWS,
|
|
631
|
+
model_names=[
|
|
632
|
+
PROVIDER_DEFAULT,
|
|
633
|
+
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
634
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
635
|
+
"anthropic.claude-3-haiku-20240307-v1:0",
|
|
636
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
637
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
638
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
639
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
640
|
+
"amazon.titan-embed-text-v2:0",
|
|
641
|
+
"amazon.nova-pro-v1:0",
|
|
642
|
+
"amazon.nova-premier-v1:0:8k",
|
|
643
|
+
"amazon.nova-premier-v1:0:20k",
|
|
644
|
+
"amazon.nova-premier-v1:0:1000k",
|
|
645
|
+
"amazon.nova-premier-v1:0:mm",
|
|
646
|
+
"amazon.nova-premier-v1:0",
|
|
647
|
+
"amazon.nova-lite-v1:0",
|
|
648
|
+
"amazon.nova-micro-v1:0",
|
|
649
|
+
"deepseek.r1-v1:0",
|
|
650
|
+
"mistral.pixtral-large-2502-v1:0",
|
|
651
|
+
"meta.llama3-1-8b-instruct-v1:0:128k",
|
|
652
|
+
"meta.llama3-1-8b-instruct-v1:0",
|
|
653
|
+
"meta.llama3-1-70b-instruct-v1:0:128k",
|
|
654
|
+
"meta.llama3-1-70b-instruct-v1:0",
|
|
655
|
+
"meta.llama3-1-405b-instruct-v1:0",
|
|
656
|
+
"meta.llama3-2-11b-instruct-v1:0",
|
|
657
|
+
"meta.llama3-2-90b-instruct-v1:0",
|
|
658
|
+
"meta.llama3-2-1b-instruct-v1:0",
|
|
659
|
+
"meta.llama3-2-3b-instruct-v1:0",
|
|
660
|
+
"meta.llama3-3-70b-instruct-v1:0",
|
|
661
|
+
"meta.llama4-scout-17b-instruct-v1:0",
|
|
662
|
+
"meta.llama4-maverick-17b-instruct-v1:0",
|
|
663
|
+
],
|
|
664
|
+
)
|
|
665
|
+
class BedrockStreamingClient(PlaygroundStreamingClient):
|
|
666
|
+
def __init__(
|
|
667
|
+
self,
|
|
668
|
+
model: GenerativeModelInput,
|
|
669
|
+
credentials: Optional[list[PlaygroundClientCredential]] = None,
|
|
670
|
+
) -> None:
|
|
671
|
+
import boto3 # type: ignore[import-untyped]
|
|
672
|
+
|
|
673
|
+
super().__init__(model=model, credentials=credentials)
|
|
674
|
+
self.region = model.region or "us-east-1"
|
|
675
|
+
self.api = "converse"
|
|
676
|
+
self.aws_access_key_id = _get_credential_value(credentials, "AWS_ACCESS_KEY_ID") or getenv(
|
|
677
|
+
"AWS_ACCESS_KEY_ID"
|
|
678
|
+
)
|
|
679
|
+
self.aws_secret_access_key = _get_credential_value(
|
|
680
|
+
credentials, "AWS_SECRET_ACCESS_KEY"
|
|
681
|
+
) or getenv("AWS_SECRET_ACCESS_KEY")
|
|
682
|
+
self.aws_session_token = _get_credential_value(credentials, "AWS_SESSION_TOKEN") or getenv(
|
|
683
|
+
"AWS_SESSION_TOKEN"
|
|
684
|
+
)
|
|
685
|
+
self.model_name = model.name
|
|
686
|
+
self.client = boto3.client(
|
|
687
|
+
service_name="bedrock-runtime",
|
|
688
|
+
region_name="us-east-1", # match the default region in the UI
|
|
689
|
+
aws_access_key_id=self.aws_access_key_id,
|
|
690
|
+
aws_secret_access_key=self.aws_secret_access_key,
|
|
691
|
+
aws_session_token=self.aws_session_token,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
self._attributes[LLM_PROVIDER] = "aws"
|
|
695
|
+
self._attributes[LLM_SYSTEM] = "aws"
|
|
696
|
+
|
|
697
|
+
@classmethod
|
|
698
|
+
def dependencies(cls) -> list[Dependency]:
|
|
699
|
+
return [Dependency(name="boto3")]
|
|
700
|
+
|
|
701
|
+
@classmethod
|
|
702
|
+
def supported_invocation_parameters(cls) -> list[InvocationParameter]:
|
|
703
|
+
return [
|
|
704
|
+
IntInvocationParameter(
|
|
705
|
+
invocation_name="max_tokens",
|
|
706
|
+
canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
|
|
707
|
+
label="Max Tokens",
|
|
708
|
+
default_value=1024,
|
|
709
|
+
),
|
|
710
|
+
BoundedFloatInvocationParameter(
|
|
711
|
+
invocation_name="temperature",
|
|
712
|
+
canonical_name=CanonicalParameterName.TEMPERATURE,
|
|
713
|
+
label="Temperature",
|
|
714
|
+
default_value=1.0,
|
|
715
|
+
min_value=0.0,
|
|
716
|
+
max_value=1.0,
|
|
717
|
+
),
|
|
718
|
+
BoundedFloatInvocationParameter(
|
|
719
|
+
invocation_name="top_p",
|
|
720
|
+
canonical_name=CanonicalParameterName.TOP_P,
|
|
721
|
+
label="Top P",
|
|
722
|
+
default_value=1.0,
|
|
723
|
+
min_value=0.0,
|
|
724
|
+
max_value=1.0,
|
|
725
|
+
),
|
|
726
|
+
JSONInvocationParameter(
|
|
727
|
+
invocation_name="tool_choice",
|
|
728
|
+
label="Tool Choice",
|
|
729
|
+
canonical_name=CanonicalParameterName.TOOL_CHOICE,
|
|
730
|
+
),
|
|
731
|
+
]
|
|
732
|
+
|
|
733
|
+
async def chat_completion_create(
|
|
734
|
+
self,
|
|
735
|
+
messages: list[
|
|
736
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
737
|
+
],
|
|
738
|
+
tools: list[JSONScalarType],
|
|
739
|
+
**invocation_parameters: Any,
|
|
740
|
+
) -> AsyncIterator[ChatCompletionChunk]:
|
|
741
|
+
import boto3
|
|
742
|
+
|
|
743
|
+
if (
|
|
744
|
+
self.client.meta.region_name != self.region
|
|
745
|
+
): # override the region if it's different from the default
|
|
746
|
+
self.client = boto3.client(
|
|
747
|
+
"bedrock-runtime",
|
|
748
|
+
region_name=self.region,
|
|
749
|
+
aws_access_key_id=self.aws_access_key_id,
|
|
750
|
+
aws_secret_access_key=self.aws_secret_access_key,
|
|
751
|
+
aws_session_token=self.aws_session_token,
|
|
752
|
+
)
|
|
753
|
+
if self.api == "invoke":
|
|
754
|
+
async for chunk in self._handle_invoke_api(messages, tools, invocation_parameters):
|
|
755
|
+
yield chunk
|
|
756
|
+
else:
|
|
757
|
+
async for chunk in self._handle_converse_api(messages, tools, invocation_parameters):
|
|
758
|
+
yield chunk
|
|
759
|
+
|
|
760
|
+
async def _handle_converse_api(
|
|
761
|
+
self,
|
|
762
|
+
messages: list[
|
|
763
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
764
|
+
],
|
|
765
|
+
tools: list[JSONScalarType],
|
|
766
|
+
invocation_parameters: dict[str, Any],
|
|
767
|
+
) -> AsyncIterator[ChatCompletionChunk]:
|
|
768
|
+
"""
|
|
769
|
+
Handle the converse API.
|
|
770
|
+
"""
|
|
771
|
+
# Build messages in Converse API format
|
|
772
|
+
converse_messages = self._build_converse_messages(messages)
|
|
773
|
+
|
|
774
|
+
# Build the request parameters for Converse API
|
|
775
|
+
converse_params: dict[str, Any] = {
|
|
776
|
+
"modelId": f"us.{self.model_name}",
|
|
777
|
+
"messages": converse_messages,
|
|
778
|
+
"inferenceConfig": {
|
|
779
|
+
"maxTokens": invocation_parameters["max_tokens"],
|
|
780
|
+
"temperature": invocation_parameters["temperature"],
|
|
781
|
+
"topP": invocation_parameters["top_p"],
|
|
782
|
+
},
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
# Add system prompt if available
|
|
786
|
+
system_prompt = self._extract_system_prompt(messages)
|
|
787
|
+
if system_prompt:
|
|
788
|
+
converse_params["system"] = [{"text": system_prompt}]
|
|
789
|
+
|
|
790
|
+
# Add tools if provided
|
|
791
|
+
if tools:
|
|
792
|
+
converse_params["toolConfig"] = {"tools": tools}
|
|
793
|
+
if (
|
|
794
|
+
"tool_choice" in invocation_parameters
|
|
795
|
+
and invocation_parameters["tool_choice"]["type"] != "none"
|
|
796
|
+
):
|
|
797
|
+
converse_params["toolConfig"]["toolChoice"] = {}
|
|
798
|
+
|
|
799
|
+
if invocation_parameters["tool_choice"]["type"] == "auto":
|
|
800
|
+
converse_params["toolConfig"]["toolChoice"]["auto"] = {}
|
|
801
|
+
elif invocation_parameters["tool_choice"]["type"] == "any":
|
|
802
|
+
converse_params["toolConfig"]["toolChoice"]["any"] = {}
|
|
803
|
+
else:
|
|
804
|
+
converse_params["toolConfig"]["toolChoice"]["tool"] = {
|
|
805
|
+
"name": invocation_parameters["tool_choice"]["name"],
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
# Make the streaming API call
|
|
809
|
+
response = self.client.converse_stream(**converse_params)
|
|
810
|
+
|
|
811
|
+
# Track active tool calls
|
|
812
|
+
active_tool_calls = {} # contentBlockIndex -> {id, name, arguments_buffer}
|
|
813
|
+
|
|
814
|
+
# Process the event stream
|
|
815
|
+
event_stream = response.get("stream")
|
|
816
|
+
|
|
817
|
+
for event in event_stream:
|
|
818
|
+
# Handle content block start events
|
|
819
|
+
if "contentBlockStart" in event:
|
|
820
|
+
content_block_start = event["contentBlockStart"]
|
|
821
|
+
start_event = content_block_start.get("start", {})
|
|
822
|
+
block_index = content_block_start.get(
|
|
823
|
+
"contentBlockIndex", 0
|
|
824
|
+
) # Get the actual index
|
|
825
|
+
|
|
826
|
+
if "toolUse" in start_event:
|
|
827
|
+
tool_use = start_event["toolUse"]
|
|
828
|
+
active_tool_calls[block_index] = { # Use the actual block index
|
|
829
|
+
"id": tool_use.get("toolUseId"),
|
|
830
|
+
"name": tool_use.get("name"),
|
|
831
|
+
"arguments_buffer": "",
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
# Yield initial tool call chunk
|
|
835
|
+
yield ToolCallChunk(
|
|
836
|
+
id=tool_use.get("toolUseId"),
|
|
837
|
+
function=FunctionCallChunk(
|
|
838
|
+
name=tool_use.get("name"),
|
|
839
|
+
arguments="",
|
|
840
|
+
),
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
# Handle content block delta events
|
|
844
|
+
elif "contentBlockDelta" in event:
|
|
845
|
+
content_delta = event["contentBlockDelta"]
|
|
846
|
+
delta = content_delta.get("delta", {})
|
|
847
|
+
delta_index = content_delta.get("contentBlockIndex", 0)
|
|
848
|
+
|
|
849
|
+
# Handle text delta
|
|
850
|
+
if "text" in delta:
|
|
851
|
+
yield TextChunk(content=delta["text"])
|
|
852
|
+
|
|
853
|
+
# Handle tool use delta
|
|
854
|
+
elif "toolUse" in delta:
|
|
855
|
+
tool_delta = delta["toolUse"]
|
|
856
|
+
if "input" in tool_delta and delta_index in active_tool_calls:
|
|
857
|
+
# Accumulate tool arguments
|
|
858
|
+
json_chunk = tool_delta["input"]
|
|
859
|
+
active_tool_calls[delta_index]["arguments_buffer"] += json_chunk
|
|
860
|
+
|
|
861
|
+
# Yield incremental argument update
|
|
862
|
+
yield ToolCallChunk(
|
|
863
|
+
id=active_tool_calls[delta_index]["id"],
|
|
864
|
+
function=FunctionCallChunk(
|
|
865
|
+
name=active_tool_calls[delta_index]["name"],
|
|
866
|
+
arguments=json_chunk,
|
|
867
|
+
),
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
# Handle content block stop events
|
|
871
|
+
elif "contentBlockStop" in event:
|
|
872
|
+
stop_index = event["contentBlockStop"].get("contentBlockIndex", 0)
|
|
873
|
+
if stop_index in active_tool_calls:
|
|
874
|
+
del active_tool_calls[stop_index]
|
|
875
|
+
|
|
876
|
+
elif "metadata" in event:
|
|
877
|
+
self._attributes.update(
|
|
878
|
+
{
|
|
879
|
+
LLM_TOKEN_COUNT_PROMPT: event.get("metadata")
|
|
880
|
+
.get("usage", {})
|
|
881
|
+
.get("inputTokens", 0)
|
|
882
|
+
}
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
self._attributes.update(
|
|
886
|
+
{
|
|
887
|
+
LLM_TOKEN_COUNT_COMPLETION: event.get("metadata")
|
|
888
|
+
.get("usage", {})
|
|
889
|
+
.get("outputTokens", 0)
|
|
890
|
+
}
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
self._attributes.update(
|
|
894
|
+
{
|
|
895
|
+
LLM_TOKEN_COUNT_TOTAL: event.get("metadata")
|
|
896
|
+
.get("usage", {})
|
|
897
|
+
.get("totalTokens", 0)
|
|
898
|
+
}
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
async def _handle_invoke_api(
|
|
902
|
+
self,
|
|
903
|
+
messages: list[
|
|
904
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
905
|
+
],
|
|
906
|
+
tools: list[JSONScalarType],
|
|
907
|
+
invocation_parameters: dict[str, Any],
|
|
908
|
+
) -> AsyncIterator[ChatCompletionChunk]:
|
|
909
|
+
if "anthropic" not in self.model_name:
|
|
910
|
+
raise ValueError("Invoke API is only supported for Anthropic models")
|
|
911
|
+
|
|
912
|
+
bedrock_messages, system_prompt = self._build_bedrock_messages(messages)
|
|
913
|
+
bedrock_params = {
|
|
914
|
+
"anthropic_version": "bedrock-2023-05-31",
|
|
915
|
+
"max_tokens": invocation_parameters["max_tokens"],
|
|
916
|
+
"messages": bedrock_messages,
|
|
917
|
+
"system": system_prompt,
|
|
918
|
+
"temperature": invocation_parameters["temperature"],
|
|
919
|
+
"top_p": invocation_parameters["top_p"],
|
|
920
|
+
"tools": tools,
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
response = self.client.invoke_model_with_response_stream(
|
|
924
|
+
modelId=f"us.{self.model_name}", # or another Claude model
|
|
925
|
+
contentType="application/json",
|
|
926
|
+
accept="application/json",
|
|
927
|
+
body=json.dumps(bedrock_params),
|
|
928
|
+
trace="ENABLED_FULL",
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
# The response['body'] is an EventStream object
|
|
932
|
+
event_stream = response["body"]
|
|
933
|
+
|
|
934
|
+
# Track active tool calls and their accumulating arguments
|
|
935
|
+
active_tool_calls: dict[int, dict[str, Any]] = {} # index -> {id, name, arguments_buffer}
|
|
936
|
+
|
|
937
|
+
for event in event_stream:
|
|
938
|
+
if "chunk" in event:
|
|
939
|
+
chunk_data = json.loads(event["chunk"]["bytes"].decode("utf-8"))
|
|
940
|
+
|
|
941
|
+
# Handle text content
|
|
942
|
+
if chunk_data.get("type") == "content_block_delta":
|
|
943
|
+
delta = chunk_data.get("delta", {})
|
|
944
|
+
index = chunk_data.get("index", 0)
|
|
945
|
+
|
|
946
|
+
if delta.get("type") == "text_delta" and "text" in delta:
|
|
947
|
+
yield TextChunk(content=delta["text"])
|
|
948
|
+
|
|
949
|
+
elif delta.get("type") == "input_json_delta":
|
|
950
|
+
# Accumulate tool arguments
|
|
951
|
+
if index in active_tool_calls:
|
|
952
|
+
active_tool_calls[index]["arguments_buffer"] += delta.get(
|
|
953
|
+
"partial_json", ""
|
|
954
|
+
)
|
|
955
|
+
# Yield incremental argument update
|
|
956
|
+
yield ToolCallChunk(
|
|
957
|
+
id=active_tool_calls[index]["id"],
|
|
958
|
+
function=FunctionCallChunk(
|
|
959
|
+
name=active_tool_calls[index]["name"],
|
|
960
|
+
arguments=delta.get("partial_json", ""),
|
|
961
|
+
),
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
# Handle tool call start
|
|
965
|
+
elif chunk_data.get("type") == "content_block_start":
|
|
966
|
+
content_block = chunk_data.get("content_block", {})
|
|
967
|
+
index = chunk_data.get("index", 0)
|
|
968
|
+
|
|
969
|
+
if content_block.get("type") == "tool_use":
|
|
970
|
+
# Initialize tool call tracking
|
|
971
|
+
active_tool_calls[index] = {
|
|
972
|
+
"id": content_block.get("id"),
|
|
973
|
+
"name": content_block.get("name"),
|
|
974
|
+
"arguments_buffer": "",
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
# Yield initial tool call chunk
|
|
978
|
+
yield ToolCallChunk(
|
|
979
|
+
id=content_block.get("id"),
|
|
980
|
+
function=FunctionCallChunk(
|
|
981
|
+
name=content_block.get("name"),
|
|
982
|
+
arguments="", # Start with empty, will be filled by deltas
|
|
983
|
+
),
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
# Handle content block stop (tool call complete)
|
|
987
|
+
elif chunk_data.get("type") == "content_block_stop":
|
|
988
|
+
index = chunk_data.get("index", 0)
|
|
989
|
+
if index in active_tool_calls:
|
|
990
|
+
# Tool call is complete, clean up
|
|
991
|
+
del active_tool_calls[index]
|
|
992
|
+
|
|
993
|
+
elif chunk_data.get("type") == "message_stop":
|
|
994
|
+
self._attributes.update(
|
|
995
|
+
{
|
|
996
|
+
LLM_TOKEN_COUNT_COMPLETION: chunk_data.get(
|
|
997
|
+
"amazon-bedrock-invocationMetrics", {}
|
|
998
|
+
).get("outputTokenCount", 0)
|
|
999
|
+
}
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
self._attributes.update(
|
|
1003
|
+
{
|
|
1004
|
+
LLM_TOKEN_COUNT_PROMPT: chunk_data.get(
|
|
1005
|
+
"amazon-bedrock-invocationMetrics", {}
|
|
1006
|
+
).get("inputTokenCount", 0)
|
|
1007
|
+
}
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
def _build_bedrock_messages(
|
|
1011
|
+
self,
|
|
1012
|
+
messages: list[
|
|
1013
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
1014
|
+
],
|
|
1015
|
+
) -> tuple[list[dict[str, Any]], str]:
|
|
1016
|
+
bedrock_messages = []
|
|
1017
|
+
system_prompt = ""
|
|
1018
|
+
for role, content, _, _ in messages:
|
|
1019
|
+
if role == ChatCompletionMessageRole.USER:
|
|
1020
|
+
bedrock_messages.append(
|
|
1021
|
+
{
|
|
1022
|
+
"role": "user",
|
|
1023
|
+
"content": content,
|
|
1024
|
+
}
|
|
1025
|
+
)
|
|
1026
|
+
elif role == ChatCompletionMessageRole.AI:
|
|
1027
|
+
bedrock_messages.append(
|
|
1028
|
+
{
|
|
1029
|
+
"role": "assistant",
|
|
1030
|
+
"content": content,
|
|
1031
|
+
}
|
|
1032
|
+
)
|
|
1033
|
+
elif role == ChatCompletionMessageRole.SYSTEM:
|
|
1034
|
+
system_prompt += content + "\n"
|
|
1035
|
+
return bedrock_messages, system_prompt
|
|
1036
|
+
|
|
1037
|
+
def _extract_system_prompt(
|
|
1038
|
+
self,
|
|
1039
|
+
messages: list[
|
|
1040
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
1041
|
+
],
|
|
1042
|
+
) -> str:
|
|
1043
|
+
"""Extract system prompt from messages."""
|
|
1044
|
+
system_prompts = []
|
|
1045
|
+
for role, content, _, _ in messages:
|
|
1046
|
+
if role == ChatCompletionMessageRole.SYSTEM:
|
|
1047
|
+
system_prompts.append(content)
|
|
1048
|
+
return "\n".join(system_prompts)
|
|
1049
|
+
|
|
1050
|
+
def _build_converse_messages(
|
|
1051
|
+
self,
|
|
1052
|
+
messages: list[
|
|
1053
|
+
tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
|
|
1054
|
+
],
|
|
1055
|
+
) -> list[dict[str, Any]]:
|
|
1056
|
+
"""Convert messages to Converse API format."""
|
|
1057
|
+
converse_messages: list[dict[str, Any]] = []
|
|
1058
|
+
for role, content, _id, tool_calls in messages:
|
|
1059
|
+
if role == ChatCompletionMessageRole.USER:
|
|
1060
|
+
converse_messages.append({"role": "user", "content": [{"text": content}]})
|
|
1061
|
+
elif role == ChatCompletionMessageRole.TOOL:
|
|
1062
|
+
converse_messages.append(
|
|
1063
|
+
{
|
|
1064
|
+
"role": "user",
|
|
1065
|
+
"content": [
|
|
1066
|
+
{
|
|
1067
|
+
"toolResult": {
|
|
1068
|
+
"toolUseId": _id,
|
|
1069
|
+
"content": [{"json": json.loads(content)}],
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
],
|
|
1073
|
+
}
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
elif role == ChatCompletionMessageRole.AI:
|
|
1077
|
+
# Handle assistant messages with potential tool calls
|
|
1078
|
+
message: dict[str, Any] = {"role": "assistant", "content": []}
|
|
1079
|
+
if content:
|
|
1080
|
+
message["content"].append({"text": content})
|
|
1081
|
+
if tool_calls:
|
|
1082
|
+
for tool_call in tool_calls:
|
|
1083
|
+
message["content"].append(tool_call)
|
|
1084
|
+
converse_messages.append(message)
|
|
1085
|
+
return converse_messages
|
|
1086
|
+
|
|
1087
|
+
|
|
600
1088
|
@register_llm_client(
|
|
601
1089
|
provider_key=GenerativeProviderKey.OPENAI,
|
|
602
1090
|
model_names=[
|
|
@@ -656,13 +1144,20 @@ class OpenAIStreamingClient(OpenAIBaseStreamingClient):
|
|
|
656
1144
|
provider_key=GenerativeProviderKey.OPENAI,
|
|
657
1145
|
model_names=[
|
|
658
1146
|
"o1",
|
|
1147
|
+
"o1-pro",
|
|
659
1148
|
"o1-2024-12-17",
|
|
1149
|
+
"o1-pro-2025-03-19",
|
|
660
1150
|
"o1-mini",
|
|
661
1151
|
"o1-mini-2024-09-12",
|
|
662
1152
|
"o1-preview",
|
|
663
1153
|
"o1-preview-2024-09-12",
|
|
1154
|
+
"o3",
|
|
1155
|
+
"o3-pro",
|
|
1156
|
+
"o3-2025-04-16",
|
|
664
1157
|
"o3-mini",
|
|
665
1158
|
"o3-mini-2025-01-31",
|
|
1159
|
+
"o4-mini",
|
|
1160
|
+
"o4-mini-2025-04-16",
|
|
666
1161
|
],
|
|
667
1162
|
)
|
|
668
1163
|
class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
|
|
@@ -799,6 +1294,35 @@ class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
|
|
|
799
1294
|
yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
|
|
800
1295
|
yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
|
|
801
1296
|
|
|
1297
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
|
|
1298
|
+
prompt_details = usage.prompt_tokens_details
|
|
1299
|
+
if (
|
|
1300
|
+
hasattr(prompt_details, "cached_tokens")
|
|
1301
|
+
and prompt_details.cached_tokens is not None
|
|
1302
|
+
):
|
|
1303
|
+
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
|
|
1304
|
+
if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
|
|
1305
|
+
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
|
|
1306
|
+
|
|
1307
|
+
if (
|
|
1308
|
+
hasattr(usage, "completion_tokens_details")
|
|
1309
|
+
and usage.completion_tokens_details is not None
|
|
1310
|
+
):
|
|
1311
|
+
completion_details = usage.completion_tokens_details
|
|
1312
|
+
if (
|
|
1313
|
+
hasattr(completion_details, "reasoning_tokens")
|
|
1314
|
+
and completion_details.reasoning_tokens is not None
|
|
1315
|
+
):
|
|
1316
|
+
yield (
|
|
1317
|
+
LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
|
|
1318
|
+
completion_details.reasoning_tokens,
|
|
1319
|
+
)
|
|
1320
|
+
if (
|
|
1321
|
+
hasattr(completion_details, "audio_tokens")
|
|
1322
|
+
and completion_details.audio_tokens is not None
|
|
1323
|
+
):
|
|
1324
|
+
yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
|
|
1325
|
+
|
|
802
1326
|
|
|
803
1327
|
@register_llm_client(
|
|
804
1328
|
provider_key=GenerativeProviderKey.AZURE_OPENAI,
|
|
@@ -856,12 +1380,6 @@ class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
|
|
|
856
1380
|
provider_key=GenerativeProviderKey.ANTHROPIC,
|
|
857
1381
|
model_names=[
|
|
858
1382
|
PROVIDER_DEFAULT,
|
|
859
|
-
"claude-sonnet-4-0",
|
|
860
|
-
"claude-sonnet-4-20250514",
|
|
861
|
-
"claude-opus-4-0",
|
|
862
|
-
"claude-opus-4-20250514",
|
|
863
|
-
"claude-3-7-sonnet-latest",
|
|
864
|
-
"claude-3-7-sonnet-20250219",
|
|
865
1383
|
"claude-3-5-sonnet-latest",
|
|
866
1384
|
"claude-3-5-haiku-latest",
|
|
867
1385
|
"claude-3-5-sonnet-20241022",
|
|
@@ -962,15 +1480,34 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
|
|
|
962
1480
|
async with await throttled_stream(**anthropic_params) as stream:
|
|
963
1481
|
async for event in stream:
|
|
964
1482
|
if isinstance(event, anthropic_types.RawMessageStartEvent):
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
1483
|
+
usage = event.message.usage
|
|
1484
|
+
|
|
1485
|
+
token_counts: dict[str, Any] = {}
|
|
1486
|
+
if prompt_tokens := (
|
|
1487
|
+
(usage.input_tokens or 0)
|
|
1488
|
+
+ (getattr(usage, "cache_creation_input_tokens", 0) or 0)
|
|
1489
|
+
+ (getattr(usage, "cache_read_input_tokens", 0) or 0)
|
|
1490
|
+
):
|
|
1491
|
+
token_counts[LLM_TOKEN_COUNT_PROMPT] = prompt_tokens
|
|
1492
|
+
if cache_creation_tokens := getattr(usage, "cache_creation_input_tokens", None):
|
|
1493
|
+
if cache_creation_tokens is not None:
|
|
1494
|
+
token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = (
|
|
1495
|
+
cache_creation_tokens
|
|
1496
|
+
)
|
|
1497
|
+
self._attributes.update(token_counts)
|
|
968
1498
|
elif isinstance(event, anthropic_streaming.TextEvent):
|
|
969
1499
|
yield TextChunk(content=event.text)
|
|
970
1500
|
elif isinstance(event, anthropic_streaming.MessageStopEvent):
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1501
|
+
usage = event.message.usage
|
|
1502
|
+
output_token_counts: dict[str, Any] = {}
|
|
1503
|
+
if usage.output_tokens:
|
|
1504
|
+
output_token_counts[LLM_TOKEN_COUNT_COMPLETION] = usage.output_tokens
|
|
1505
|
+
if cache_read_tokens := getattr(usage, "cache_read_input_tokens", None):
|
|
1506
|
+
if cache_read_tokens is not None:
|
|
1507
|
+
output_token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = (
|
|
1508
|
+
cache_read_tokens
|
|
1509
|
+
)
|
|
1510
|
+
self._attributes.update(output_token_counts)
|
|
974
1511
|
elif (
|
|
975
1512
|
isinstance(event, anthropic_streaming.ContentBlockStopEvent)
|
|
976
1513
|
and event.content_block.type == "tool_use"
|
|
@@ -1055,6 +1592,10 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
|
|
|
1055
1592
|
@register_llm_client(
|
|
1056
1593
|
provider_key=GenerativeProviderKey.ANTHROPIC,
|
|
1057
1594
|
model_names=[
|
|
1595
|
+
"claude-sonnet-4-0",
|
|
1596
|
+
"claude-sonnet-4-20250514",
|
|
1597
|
+
"claude-opus-4-0",
|
|
1598
|
+
"claude-opus-4-20250514",
|
|
1058
1599
|
"claude-3-7-sonnet-latest",
|
|
1059
1600
|
"claude-3-7-sonnet-20250219",
|
|
1060
1601
|
],
|
|
@@ -1239,6 +1780,15 @@ LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
|
|
|
1239
1780
|
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
|
|
1240
1781
|
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
|
|
1241
1782
|
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
|
|
1783
|
+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
|
|
1784
|
+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
|
|
1785
|
+
SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
|
|
1786
|
+
)
|
|
1787
|
+
LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
|
|
1788
|
+
LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING = (
|
|
1789
|
+
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING
|
|
1790
|
+
)
|
|
1791
|
+
LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO
|
|
1242
1792
|
|
|
1243
1793
|
|
|
1244
1794
|
class _HttpxClient(wrapt.ObjectProxy): # type: ignore
|