agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/models/groq/groq.py
CHANGED
|
@@ -1,65 +1,32 @@
|
|
|
1
|
+
from collections.abc import AsyncIterator
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
from os import getenv
|
|
3
|
-
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
4
|
+
from typing import Any, Dict, Iterator, List, Optional, Type, Union
|
|
4
5
|
|
|
5
6
|
import httpx
|
|
7
|
+
from pydantic import BaseModel
|
|
6
8
|
|
|
7
|
-
from agno.
|
|
9
|
+
from agno.exceptions import ModelAuthenticationError, ModelProviderError
|
|
8
10
|
from agno.models.base import Model
|
|
9
11
|
from agno.models.message import Message
|
|
10
|
-
from agno.models.
|
|
11
|
-
from agno.
|
|
12
|
-
from agno.
|
|
13
|
-
from agno.utils.
|
|
12
|
+
from agno.models.metrics import Metrics
|
|
13
|
+
from agno.models.response import ModelResponse
|
|
14
|
+
from agno.run.agent import RunOutput
|
|
15
|
+
from agno.utils.http import get_default_async_client, get_default_sync_client
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
|
+
from agno.utils.openai import images_to_message
|
|
14
18
|
|
|
15
19
|
try:
|
|
20
|
+
from groq import APIError, APIResponseValidationError, APIStatusError
|
|
16
21
|
from groq import AsyncGroq as AsyncGroqClient
|
|
17
22
|
from groq import Groq as GroqClient
|
|
18
|
-
from groq.types.chat import ChatCompletion
|
|
23
|
+
from groq.types.chat import ChatCompletion
|
|
19
24
|
from groq.types.chat.chat_completion_chunk import ChatCompletionChunk, ChoiceDelta, ChoiceDeltaToolCall
|
|
20
25
|
from groq.types.completion_usage import CompletionUsage
|
|
21
|
-
except
|
|
26
|
+
except ImportError:
|
|
22
27
|
raise ImportError("`groq` not installed. Please install using `pip install groq`")
|
|
23
28
|
|
|
24
29
|
|
|
25
|
-
@dataclass
|
|
26
|
-
class Metrics(BaseMetrics):
|
|
27
|
-
completion_time: Optional[float] = None
|
|
28
|
-
prompt_time: Optional[float] = None
|
|
29
|
-
queue_time: Optional[float] = None
|
|
30
|
-
total_time: Optional[float] = None
|
|
31
|
-
|
|
32
|
-
def log(self):
|
|
33
|
-
metric_lines = []
|
|
34
|
-
if self.time_to_first_token is not None:
|
|
35
|
-
metric_lines.append(f"* Time to first token: {self.time_to_first_token:.4f}s")
|
|
36
|
-
metric_lines.extend(
|
|
37
|
-
[
|
|
38
|
-
f"* Time to generate response: {self.response_timer.elapsed:.4f}s",
|
|
39
|
-
f"* Tokens per second: {self.output_tokens / self.response_timer.elapsed:.4f} tokens/s",
|
|
40
|
-
f"* Input tokens: {self.input_tokens or self.prompt_tokens}",
|
|
41
|
-
f"* Output tokens: {self.output_tokens or self.completion_tokens}",
|
|
42
|
-
f"* Total tokens: {self.total_tokens}",
|
|
43
|
-
]
|
|
44
|
-
)
|
|
45
|
-
if self.completion_time is not None:
|
|
46
|
-
metric_lines.append(f"* Completion time: {self.completion_time:.4f}s")
|
|
47
|
-
if self.prompt_time is not None:
|
|
48
|
-
metric_lines.append(f"* Prompt time: {self.prompt_time:.4f}s")
|
|
49
|
-
if self.queue_time is not None:
|
|
50
|
-
metric_lines.append(f"* Queue time: {self.queue_time:.4f}s")
|
|
51
|
-
if self.total_time is not None:
|
|
52
|
-
metric_lines.append(f"* Total time: {self.total_time:.4f}s")
|
|
53
|
-
|
|
54
|
-
self._log(metric_lines=metric_lines)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
@dataclass
|
|
58
|
-
class StreamData:
|
|
59
|
-
response_content: str = ""
|
|
60
|
-
response_tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
|
|
61
|
-
|
|
62
|
-
|
|
63
30
|
@dataclass
|
|
64
31
|
class Groq(Model):
|
|
65
32
|
"""
|
|
@@ -68,7 +35,7 @@ class Groq(Model):
|
|
|
68
35
|
For more information, see: https://console.groq.com/docs/libraries
|
|
69
36
|
"""
|
|
70
37
|
|
|
71
|
-
id: str = "
|
|
38
|
+
id: str = "llama-3.3-70b-versatile"
|
|
72
39
|
name: str = "Groq"
|
|
73
40
|
provider: str = "Groq"
|
|
74
41
|
|
|
@@ -78,7 +45,6 @@ class Groq(Model):
|
|
|
78
45
|
logprobs: Optional[bool] = None
|
|
79
46
|
max_tokens: Optional[int] = None
|
|
80
47
|
presence_penalty: Optional[float] = None
|
|
81
|
-
response_format: Optional[Dict[str, Any]] = None
|
|
82
48
|
seed: Optional[int] = None
|
|
83
49
|
stop: Optional[Union[str, List[str]]] = None
|
|
84
50
|
temperature: Optional[float] = None
|
|
@@ -96,115 +62,134 @@ class Groq(Model):
|
|
|
96
62
|
max_retries: Optional[int] = None
|
|
97
63
|
default_headers: Optional[Any] = None
|
|
98
64
|
default_query: Optional[Any] = None
|
|
99
|
-
http_client: Optional[httpx.Client] = None
|
|
65
|
+
http_client: Optional[Union[httpx.Client, httpx.AsyncClient]] = None
|
|
100
66
|
client_params: Optional[Dict[str, Any]] = None
|
|
101
67
|
|
|
102
68
|
# Groq clients
|
|
103
69
|
client: Optional[GroqClient] = None
|
|
104
70
|
async_client: Optional[AsyncGroqClient] = None
|
|
105
71
|
|
|
106
|
-
def
|
|
107
|
-
|
|
72
|
+
def _get_client_params(self) -> Dict[str, Any]:
|
|
73
|
+
# Fetch API key from env if not already set
|
|
108
74
|
if not self.api_key:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
75
|
+
self.api_key = getenv("GROQ_API_KEY")
|
|
76
|
+
if not self.api_key:
|
|
77
|
+
raise ModelAuthenticationError(
|
|
78
|
+
message="GROQ_API_KEY not set. Please set the GROQ_API_KEY environment variable.",
|
|
79
|
+
model_name=self.name,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Define base client params
|
|
83
|
+
base_params = {
|
|
84
|
+
"api_key": self.api_key,
|
|
85
|
+
"base_url": self.base_url,
|
|
86
|
+
"timeout": self.timeout,
|
|
87
|
+
"max_retries": self.max_retries,
|
|
88
|
+
"default_headers": self.default_headers,
|
|
89
|
+
"default_query": self.default_query,
|
|
90
|
+
}
|
|
91
|
+
# Create client_params dict with non-None values
|
|
92
|
+
client_params = {k: v for k, v in base_params.items() if v is not None}
|
|
93
|
+
# Add additional client params if provided
|
|
124
94
|
if self.client_params:
|
|
125
95
|
client_params.update(self.client_params)
|
|
126
96
|
return client_params
|
|
127
97
|
|
|
128
98
|
def get_client(self) -> GroqClient:
|
|
129
99
|
"""
|
|
130
|
-
Returns a Groq client.
|
|
100
|
+
Returns a Groq client. Caches the client to avoid recreating it on every request.
|
|
131
101
|
|
|
132
102
|
Returns:
|
|
133
103
|
GroqClient: An instance of the Groq client.
|
|
134
104
|
"""
|
|
135
|
-
if self.client:
|
|
105
|
+
if self.client and not self.client.is_closed():
|
|
136
106
|
return self.client
|
|
137
107
|
|
|
138
|
-
client_params: Dict[str, Any] = self.
|
|
108
|
+
client_params: Dict[str, Any] = self._get_client_params()
|
|
139
109
|
if self.http_client is not None:
|
|
140
|
-
|
|
141
|
-
|
|
110
|
+
if isinstance(self.http_client, httpx.Client):
|
|
111
|
+
client_params["http_client"] = self.http_client
|
|
112
|
+
else:
|
|
113
|
+
log_warning("http_client is not an instance of httpx.Client. Using default global httpx.Client.")
|
|
114
|
+
# Use global sync client when user http_client is invalid
|
|
115
|
+
client_params["http_client"] = get_default_sync_client()
|
|
116
|
+
else:
|
|
117
|
+
# Use global sync client when no custom http_client is provided
|
|
118
|
+
client_params["http_client"] = get_default_sync_client()
|
|
119
|
+
|
|
120
|
+
self.client = GroqClient(**client_params)
|
|
121
|
+
return self.client
|
|
142
122
|
|
|
143
123
|
def get_async_client(self) -> AsyncGroqClient:
|
|
144
124
|
"""
|
|
145
|
-
Returns an asynchronous Groq client.
|
|
125
|
+
Returns an asynchronous Groq client. Caches the client to avoid recreating it on every request.
|
|
146
126
|
|
|
147
127
|
Returns:
|
|
148
128
|
AsyncGroqClient: An instance of the asynchronous Groq client.
|
|
149
129
|
"""
|
|
150
|
-
if self.async_client:
|
|
130
|
+
if self.async_client and not self.async_client.is_closed():
|
|
151
131
|
return self.async_client
|
|
152
132
|
|
|
153
|
-
client_params: Dict[str, Any] = self.
|
|
133
|
+
client_params: Dict[str, Any] = self._get_client_params()
|
|
154
134
|
if self.http_client:
|
|
155
|
-
|
|
135
|
+
if isinstance(self.http_client, httpx.AsyncClient):
|
|
136
|
+
client_params["http_client"] = self.http_client
|
|
137
|
+
else:
|
|
138
|
+
log_warning(
|
|
139
|
+
"http_client is not an instance of httpx.AsyncClient. Using default global httpx.AsyncClient."
|
|
140
|
+
)
|
|
141
|
+
# Use global async client when user http_client is invalid
|
|
142
|
+
client_params["http_client"] = get_default_async_client()
|
|
156
143
|
else:
|
|
157
|
-
#
|
|
158
|
-
client_params["http_client"] =
|
|
159
|
-
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
160
|
-
)
|
|
161
|
-
return AsyncGroqClient(**client_params)
|
|
144
|
+
# Use global async client when no custom http_client is provided
|
|
145
|
+
client_params["http_client"] = get_default_async_client()
|
|
162
146
|
|
|
163
|
-
|
|
164
|
-
|
|
147
|
+
# Create and cache the client
|
|
148
|
+
self.async_client = AsyncGroqClient(**client_params)
|
|
149
|
+
return self.async_client
|
|
150
|
+
|
|
151
|
+
def get_request_params(
|
|
152
|
+
self,
|
|
153
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
154
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
155
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
156
|
+
) -> Dict[str, Any]:
|
|
165
157
|
"""
|
|
166
158
|
Returns keyword arguments for API requests.
|
|
167
159
|
|
|
168
160
|
Returns:
|
|
169
161
|
Dict[str, Any]: A dictionary of keyword arguments for API requests.
|
|
170
162
|
"""
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
if self.extra_headers:
|
|
197
|
-
request_params["extra_headers"] = self.extra_headers
|
|
198
|
-
if self.extra_query:
|
|
199
|
-
request_params["extra_query"] = self.extra_query
|
|
200
|
-
if self.tools:
|
|
201
|
-
request_params["tools"] = self.tools
|
|
202
|
-
if self.tool_choice is None:
|
|
203
|
-
request_params["tool_choice"] = "auto"
|
|
204
|
-
else:
|
|
205
|
-
request_params["tool_choice"] = self.tool_choice
|
|
163
|
+
# Define base request parameters
|
|
164
|
+
base_params = {
|
|
165
|
+
"frequency_penalty": self.frequency_penalty,
|
|
166
|
+
"logit_bias": self.logit_bias,
|
|
167
|
+
"logprobs": self.logprobs,
|
|
168
|
+
"max_tokens": self.max_tokens,
|
|
169
|
+
"presence_penalty": self.presence_penalty,
|
|
170
|
+
"response_format": response_format,
|
|
171
|
+
"seed": self.seed,
|
|
172
|
+
"stop": self.stop,
|
|
173
|
+
"temperature": self.temperature,
|
|
174
|
+
"top_logprobs": self.top_logprobs,
|
|
175
|
+
"top_p": self.top_p,
|
|
176
|
+
"user": self.user,
|
|
177
|
+
"extra_headers": self.extra_headers,
|
|
178
|
+
"extra_query": self.extra_query,
|
|
179
|
+
}
|
|
180
|
+
# Filter out None values
|
|
181
|
+
request_params = {k: v for k, v in base_params.items() if v is not None}
|
|
182
|
+
# Add tools
|
|
183
|
+
if tools is not None:
|
|
184
|
+
request_params["tools"] = tools
|
|
185
|
+
if tool_choice is not None:
|
|
186
|
+
request_params["tool_choice"] = tool_choice
|
|
187
|
+
# Add additional request params if provided
|
|
206
188
|
if self.request_params:
|
|
207
189
|
request_params.update(self.request_params)
|
|
190
|
+
|
|
191
|
+
if request_params:
|
|
192
|
+
log_debug(f"Calling {self.provider} with request parameters: {request_params}", log_level=2)
|
|
208
193
|
return request_params
|
|
209
194
|
|
|
210
195
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -214,15 +199,14 @@ class Groq(Model):
|
|
|
214
199
|
Returns:
|
|
215
200
|
Dict[str, Any]: The dictionary representation of the model.
|
|
216
201
|
"""
|
|
217
|
-
|
|
218
|
-
|
|
202
|
+
model_dict = super().to_dict()
|
|
203
|
+
model_dict.update(
|
|
219
204
|
{
|
|
220
205
|
"frequency_penalty": self.frequency_penalty,
|
|
221
206
|
"logit_bias": self.logit_bias,
|
|
222
207
|
"logprobs": self.logprobs,
|
|
223
208
|
"max_tokens": self.max_tokens,
|
|
224
209
|
"presence_penalty": self.presence_penalty,
|
|
225
|
-
"response_format": self.response_format,
|
|
226
210
|
"seed": self.seed,
|
|
227
211
|
"stop": self.stop,
|
|
228
212
|
"temperature": self.temperature,
|
|
@@ -231,631 +215,372 @@ class Groq(Model):
|
|
|
231
215
|
"user": self.user,
|
|
232
216
|
"extra_headers": self.extra_headers,
|
|
233
217
|
"extra_query": self.extra_query,
|
|
234
|
-
"tools": self.tools,
|
|
235
|
-
"tool_choice": self.tool_choice
|
|
236
|
-
if (self.tools is not None and self.tool_choice is not None)
|
|
237
|
-
else "auto",
|
|
238
218
|
}
|
|
239
219
|
)
|
|
240
|
-
cleaned_dict = {k: v for k, v in
|
|
220
|
+
cleaned_dict = {k: v for k, v in model_dict.items() if v is not None}
|
|
241
221
|
return cleaned_dict
|
|
242
222
|
|
|
243
|
-
def format_message(
|
|
223
|
+
def format_message(
|
|
224
|
+
self,
|
|
225
|
+
message: Message,
|
|
226
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
227
|
+
compress_tool_results: bool = False,
|
|
228
|
+
) -> Dict[str, Any]:
|
|
244
229
|
"""
|
|
245
|
-
Format a message into the format expected by
|
|
230
|
+
Format a message into the format expected by Groq.
|
|
246
231
|
|
|
247
232
|
Args:
|
|
248
233
|
message (Message): The message to format.
|
|
234
|
+
response_format: Optional response format specification.
|
|
235
|
+
compress_tool_results: Whether to compress tool results.
|
|
249
236
|
|
|
250
237
|
Returns:
|
|
251
238
|
Dict[str, Any]: The formatted message.
|
|
252
239
|
"""
|
|
253
|
-
if
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
# message = self.add_audio_to_message(message=message, audio=message.audio)
|
|
240
|
+
# Use compressed content for tool messages if compression is active
|
|
241
|
+
if message.role == "tool":
|
|
242
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
243
|
+
else:
|
|
244
|
+
content = message.content
|
|
259
245
|
|
|
260
|
-
|
|
246
|
+
message_dict: Dict[str, Any] = {
|
|
247
|
+
"role": message.role,
|
|
248
|
+
"content": content,
|
|
249
|
+
"name": message.name,
|
|
250
|
+
"tool_call_id": message.tool_call_id,
|
|
251
|
+
"tool_calls": message.tool_calls,
|
|
252
|
+
}
|
|
253
|
+
message_dict = {k: v for k, v in message_dict.items() if v is not None}
|
|
261
254
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
255
|
+
if (
|
|
256
|
+
message.role == "system"
|
|
257
|
+
and isinstance(message.content, str)
|
|
258
|
+
and response_format is not None
|
|
259
|
+
and isinstance(response_format, dict)
|
|
260
|
+
and response_format.get("type") == "json_object"
|
|
261
|
+
):
|
|
262
|
+
# This is required by Groq to ensure the model outputs in the correct format
|
|
263
|
+
message.content += "\n\nYour output should be in JSON format."
|
|
265
264
|
|
|
266
|
-
|
|
267
|
-
|
|
265
|
+
if message.images is not None and len(message.images) > 0:
|
|
266
|
+
# Ignore non-string message content
|
|
267
|
+
# because we assume that the images/audio are already added to the message
|
|
268
|
+
if isinstance(message.content, str):
|
|
269
|
+
message_dict["content"] = [{"type": "text", "text": message.content}]
|
|
270
|
+
message_dict["content"].extend(images_to_message(images=message.images))
|
|
268
271
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
"""
|
|
272
|
-
return self.get_client().chat.completions.create(
|
|
273
|
-
model=self.id,
|
|
274
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
275
|
-
**self.request_kwargs,
|
|
276
|
-
)
|
|
272
|
+
if message.files is not None and len(message.files) > 0:
|
|
273
|
+
log_warning("File input is currently unsupported.")
|
|
277
274
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
Sends an asynchronous chat completion request to the Groq API.
|
|
275
|
+
if message.audio is not None and len(message.audio) > 0:
|
|
276
|
+
log_warning("Audio input is currently unsupported.")
|
|
281
277
|
|
|
282
|
-
|
|
283
|
-
|
|
278
|
+
if message.videos is not None and len(message.videos) > 0:
|
|
279
|
+
log_warning("Video input is currently unsupported.")
|
|
284
280
|
|
|
285
|
-
|
|
286
|
-
ChatCompletion: The chat completion response from the API.
|
|
287
|
-
"""
|
|
288
|
-
return await self.get_async_client().chat.completions.create(
|
|
289
|
-
model=self.id,
|
|
290
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
291
|
-
**self.request_kwargs,
|
|
292
|
-
)
|
|
281
|
+
return message_dict
|
|
293
282
|
|
|
294
|
-
def
|
|
283
|
+
def invoke(
|
|
284
|
+
self,
|
|
285
|
+
messages: List[Message],
|
|
286
|
+
assistant_message: Message,
|
|
287
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
288
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
289
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
290
|
+
run_response: Optional[RunOutput] = None,
|
|
291
|
+
compress_tool_results: bool = False,
|
|
292
|
+
) -> ModelResponse:
|
|
295
293
|
"""
|
|
296
|
-
Send a
|
|
297
|
-
|
|
298
|
-
Args:
|
|
299
|
-
messages (List[Message]): A list of messages to send to the model.
|
|
300
|
-
|
|
301
|
-
Returns:
|
|
302
|
-
Iterator[ChatCompletionChunk]: An iterator of chat completion chunks.
|
|
294
|
+
Send a chat completion request to the Groq API.
|
|
303
295
|
"""
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
stream=True,
|
|
308
|
-
**self.request_kwargs,
|
|
309
|
-
)
|
|
296
|
+
try:
|
|
297
|
+
if run_response and run_response.metrics:
|
|
298
|
+
run_response.metrics.set_time_to_first_token()
|
|
310
299
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
300
|
+
assistant_message.metrics.start_timer()
|
|
301
|
+
provider_response = self.get_client().chat.completions.create(
|
|
302
|
+
model=self.id,
|
|
303
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
304
|
+
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
305
|
+
)
|
|
306
|
+
assistant_message.metrics.stop_timer()
|
|
314
307
|
|
|
315
|
-
|
|
316
|
-
messages (List[Message]): A list of messages to send to the model.
|
|
308
|
+
model_response = self._parse_provider_response(provider_response, response_format=response_format)
|
|
317
309
|
|
|
318
|
-
|
|
319
|
-
Any: An asynchronous iterator of chat completion chunks.
|
|
320
|
-
"""
|
|
321
|
-
async_stream = await self.get_async_client().chat.completions.create(
|
|
322
|
-
model=self.id,
|
|
323
|
-
messages=[self.format_message(m) for m in messages], # type: ignore
|
|
324
|
-
stream=True,
|
|
325
|
-
**self.request_kwargs,
|
|
326
|
-
)
|
|
327
|
-
async for chunk in async_stream: # type: ignore
|
|
328
|
-
yield chunk
|
|
310
|
+
return model_response
|
|
329
311
|
|
|
330
|
-
|
|
312
|
+
except (APIResponseValidationError, APIStatusError) as e:
|
|
313
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
314
|
+
raise ModelProviderError(
|
|
315
|
+
message=e.response.text, status_code=e.response.status_code, model_name=self.name, model_id=self.id
|
|
316
|
+
) from e
|
|
317
|
+
except APIError as e:
|
|
318
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
319
|
+
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
320
|
+
except Exception as e:
|
|
321
|
+
log_error(f"Unexpected error calling Groq API: {str(e)}")
|
|
322
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
323
|
+
|
|
324
|
+
async def ainvoke(
|
|
331
325
|
self,
|
|
332
|
-
assistant_message: Message,
|
|
333
326
|
messages: List[Message],
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
327
|
+
assistant_message: Message,
|
|
328
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
329
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
330
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
331
|
+
run_response: Optional[RunOutput] = None,
|
|
332
|
+
compress_tool_results: bool = False,
|
|
333
|
+
) -> ModelResponse:
|
|
337
334
|
"""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
Args:
|
|
341
|
-
assistant_message (Message): The assistant message.
|
|
342
|
-
messages (List[Message]): The list of messages.
|
|
343
|
-
model_response (ModelResponse): The model response.
|
|
344
|
-
tool_role (str): The role of the tool call. Defaults to "tool".
|
|
345
|
-
|
|
346
|
-
Returns:
|
|
347
|
-
Optional[ModelResponse]: The model response after handling tool calls.
|
|
335
|
+
Sends an asynchronous chat completion request to the Groq API.
|
|
348
336
|
"""
|
|
349
|
-
|
|
350
|
-
if
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
messages.append(
|
|
361
|
-
Message(
|
|
362
|
-
role="tool",
|
|
363
|
-
tool_call_id=_tool_call_id,
|
|
364
|
-
content="Could not find function to call.",
|
|
365
|
-
)
|
|
366
|
-
)
|
|
367
|
-
continue
|
|
368
|
-
if _function_call.error is not None:
|
|
369
|
-
messages.append(
|
|
370
|
-
Message(
|
|
371
|
-
role="tool",
|
|
372
|
-
tool_call_id=_tool_call_id,
|
|
373
|
-
content=_function_call.error,
|
|
374
|
-
)
|
|
375
|
-
)
|
|
376
|
-
continue
|
|
377
|
-
function_calls_to_run.append(_function_call)
|
|
378
|
-
|
|
379
|
-
if self.show_tool_calls:
|
|
380
|
-
model_response.content += "\nRunning:"
|
|
381
|
-
for _f in function_calls_to_run:
|
|
382
|
-
model_response.content += f"\n - {_f.get_call_str()}"
|
|
383
|
-
model_response.content += "\n\n"
|
|
384
|
-
|
|
385
|
-
for function_call_response in self.run_function_calls(
|
|
386
|
-
function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
|
|
387
|
-
):
|
|
388
|
-
if (
|
|
389
|
-
function_call_response.event == ModelResponseEvent.tool_call_completed.value
|
|
390
|
-
and function_call_response.tool_calls is not None
|
|
391
|
-
):
|
|
392
|
-
model_response.tool_calls.extend(function_call_response.tool_calls)
|
|
337
|
+
try:
|
|
338
|
+
if run_response and run_response.metrics:
|
|
339
|
+
run_response.metrics.set_time_to_first_token()
|
|
340
|
+
|
|
341
|
+
assistant_message.metrics.start_timer()
|
|
342
|
+
response = await self.get_async_client().chat.completions.create(
|
|
343
|
+
model=self.id,
|
|
344
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
345
|
+
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
346
|
+
)
|
|
347
|
+
assistant_message.metrics.stop_timer()
|
|
393
348
|
|
|
394
|
-
|
|
395
|
-
messages.extend(function_call_results)
|
|
349
|
+
model_response = self._parse_provider_response(response, response_format=response_format)
|
|
396
350
|
|
|
397
351
|
return model_response
|
|
398
|
-
return None
|
|
399
|
-
|
|
400
|
-
def update_usage_metrics(
|
|
401
|
-
self, assistant_message: Message, metrics: Metrics, response_usage: Optional[CompletionUsage]
|
|
402
|
-
) -> None:
|
|
403
|
-
"""
|
|
404
|
-
Update the usage metrics for the assistant message and the model.
|
|
405
352
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
if prompt_tokens is not None:
|
|
420
|
-
metrics.input_tokens = prompt_tokens
|
|
421
|
-
metrics.prompt_tokens = prompt_tokens
|
|
422
|
-
assistant_message.metrics["input_tokens"] = prompt_tokens
|
|
423
|
-
assistant_message.metrics["prompt_tokens"] = prompt_tokens
|
|
424
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
|
|
425
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
|
|
426
|
-
if completion_tokens is not None:
|
|
427
|
-
metrics.output_tokens = completion_tokens
|
|
428
|
-
metrics.completion_tokens = completion_tokens
|
|
429
|
-
assistant_message.metrics["output_tokens"] = completion_tokens
|
|
430
|
-
assistant_message.metrics["completion_tokens"] = completion_tokens
|
|
431
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
|
|
432
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
|
|
433
|
-
if total_tokens is not None:
|
|
434
|
-
metrics.total_tokens = total_tokens
|
|
435
|
-
assistant_message.metrics["total_tokens"] = total_tokens
|
|
436
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
|
|
437
|
-
if response_usage.completion_time is not None:
|
|
438
|
-
metrics.completion_time = response_usage.completion_time
|
|
439
|
-
assistant_message.metrics["completion_time"] = response_usage.completion_time
|
|
440
|
-
self.metrics["completion_time"] = (
|
|
441
|
-
self.metrics.get("completion_time", 0) + response_usage.completion_time
|
|
442
|
-
)
|
|
443
|
-
if response_usage.prompt_time is not None:
|
|
444
|
-
metrics.prompt_time = response_usage.prompt_time
|
|
445
|
-
assistant_message.metrics["prompt_time"] = response_usage.prompt_time
|
|
446
|
-
self.metrics["prompt_time"] = self.metrics.get("prompt_time", 0) + response_usage.prompt_time
|
|
447
|
-
if response_usage.queue_time is not None:
|
|
448
|
-
metrics.queue_time = response_usage.queue_time
|
|
449
|
-
assistant_message.metrics["queue_time"] = response_usage.queue_time
|
|
450
|
-
self.metrics["queue_time"] = self.metrics.get("queue_time", 0) + response_usage.queue_time
|
|
451
|
-
if response_usage.total_time is not None:
|
|
452
|
-
metrics.total_time = response_usage.total_time
|
|
453
|
-
assistant_message.metrics["total_time"] = response_usage.total_time
|
|
454
|
-
self.metrics["total_time"] = self.metrics.get("total_time", 0) + response_usage.total_time
|
|
455
|
-
|
|
456
|
-
def create_assistant_message(
|
|
353
|
+
except (APIResponseValidationError, APIStatusError) as e:
|
|
354
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
355
|
+
raise ModelProviderError(
|
|
356
|
+
message=e.response.text, status_code=e.response.status_code, model_name=self.name, model_id=self.id
|
|
357
|
+
) from e
|
|
358
|
+
except APIError as e:
|
|
359
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
360
|
+
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
361
|
+
except Exception as e:
|
|
362
|
+
log_error(f"Unexpected error calling Groq API: {str(e)}")
|
|
363
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
364
|
+
|
|
365
|
+
def invoke_stream(
|
|
457
366
|
self,
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
367
|
+
messages: List[Message],
|
|
368
|
+
assistant_message: Message,
|
|
369
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
370
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
371
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
372
|
+
run_response: Optional[RunOutput] = None,
|
|
373
|
+
compress_tool_results: bool = False,
|
|
374
|
+
) -> Iterator[ModelResponse]:
|
|
462
375
|
"""
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
Args:
|
|
466
|
-
response_message (ChatCompletionMessage): The response message.
|
|
467
|
-
metrics (Metrics): The metrics.
|
|
468
|
-
response_usage (Optional[CompletionUsage]): The response usage.
|
|
469
|
-
|
|
470
|
-
Returns:
|
|
471
|
-
Message: The assistant message.
|
|
376
|
+
Send a streaming chat completion request to the Groq API.
|
|
472
377
|
"""
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
)
|
|
477
|
-
if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
|
|
478
|
-
try:
|
|
479
|
-
assistant_message.tool_calls = [t.model_dump() for t in response_message.tool_calls]
|
|
480
|
-
except Exception as e:
|
|
481
|
-
logger.warning(f"Error processing tool calls: {e}")
|
|
378
|
+
try:
|
|
379
|
+
if run_response and run_response.metrics:
|
|
380
|
+
run_response.metrics.set_time_to_first_token()
|
|
482
381
|
|
|
483
|
-
|
|
484
|
-
self.update_usage_metrics(assistant_message, metrics, response_usage)
|
|
485
|
-
return assistant_message
|
|
382
|
+
assistant_message.metrics.start_timer()
|
|
486
383
|
|
|
487
|
-
|
|
384
|
+
for chunk in self.get_client().chat.completions.create(
|
|
385
|
+
model=self.id,
|
|
386
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
387
|
+
stream=True,
|
|
388
|
+
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
389
|
+
):
|
|
390
|
+
yield self._parse_provider_response_delta(chunk) # type: ignore
|
|
391
|
+
|
|
392
|
+
assistant_message.metrics.stop_timer()
|
|
393
|
+
|
|
394
|
+
except (APIResponseValidationError, APIStatusError) as e:
|
|
395
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
396
|
+
raise ModelProviderError(
|
|
397
|
+
message=e.response.text, status_code=e.response.status_code, model_name=self.name, model_id=self.id
|
|
398
|
+
) from e
|
|
399
|
+
except APIError as e:
|
|
400
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
401
|
+
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
402
|
+
except Exception as e:
|
|
403
|
+
log_error(f"Unexpected error calling Groq API: {str(e)}")
|
|
404
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
405
|
+
|
|
406
|
+
async def ainvoke_stream(
|
|
407
|
+
self,
|
|
408
|
+
messages: List[Message],
|
|
409
|
+
assistant_message: Message,
|
|
410
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
411
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
412
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
413
|
+
run_response: Optional[RunOutput] = None,
|
|
414
|
+
compress_tool_results: bool = False,
|
|
415
|
+
) -> AsyncIterator[ModelResponse]:
|
|
488
416
|
"""
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
Args:
|
|
492
|
-
messages (List[Message]): A list of messages.
|
|
493
|
-
|
|
494
|
-
Returns:
|
|
495
|
-
ModelResponse: The model response.
|
|
417
|
+
Sends an asynchronous streaming chat completion request to the Groq API.
|
|
496
418
|
"""
|
|
497
|
-
logger.debug("---------- Groq Response Start ----------")
|
|
498
|
-
self._log_messages(messages)
|
|
499
|
-
model_response = ModelResponse()
|
|
500
|
-
metrics = Metrics()
|
|
501
419
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
metrics.stop_response_timer()
|
|
420
|
+
try:
|
|
421
|
+
if run_response and run_response.metrics:
|
|
422
|
+
run_response.metrics.set_time_to_first_token()
|
|
506
423
|
|
|
507
|
-
|
|
508
|
-
response_message: ChatCompletionMessage = response.choices[0].message
|
|
509
|
-
response_usage: Optional[CompletionUsage] = response.usage
|
|
424
|
+
assistant_message.metrics.start_timer()
|
|
510
425
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
426
|
+
async_stream = await self.get_async_client().chat.completions.create(
|
|
427
|
+
model=self.id,
|
|
428
|
+
messages=[self.format_message(m, response_format, compress_tool_results) for m in messages], # type: ignore
|
|
429
|
+
stream=True,
|
|
430
|
+
**self.get_request_params(response_format=response_format, tools=tools, tool_choice=tool_choice),
|
|
431
|
+
)
|
|
432
|
+
async for chunk in async_stream: # type: ignore
|
|
433
|
+
yield self._parse_provider_response_delta(chunk) # type: ignore
|
|
515
434
|
|
|
516
|
-
|
|
517
|
-
messages.append(assistant_message)
|
|
435
|
+
assistant_message.metrics.stop_timer()
|
|
518
436
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
437
|
+
except (APIResponseValidationError, APIStatusError) as e:
|
|
438
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
439
|
+
raise ModelProviderError(
|
|
440
|
+
message=e.response.text, status_code=e.response.status_code, model_name=self.name, model_id=self.id
|
|
441
|
+
) from e
|
|
442
|
+
except APIError as e:
|
|
443
|
+
log_error(f"Error calling Groq API: {str(e)}")
|
|
444
|
+
raise ModelProviderError(message=e.message, model_name=self.name, model_id=self.id) from e
|
|
445
|
+
except Exception as e:
|
|
446
|
+
log_error(f"Unexpected error calling Groq API: {str(e)}")
|
|
447
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
522
448
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
449
|
+
# Override base method
|
|
450
|
+
@staticmethod
|
|
451
|
+
def parse_tool_calls(tool_calls_data: List[ChoiceDeltaToolCall]) -> List[Dict[str, Any]]:
|
|
452
|
+
"""
|
|
453
|
+
Build tool calls from streamed tool call data.
|
|
527
454
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
if (
|
|
531
|
-
self.handle_tool_calls(
|
|
532
|
-
assistant_message=assistant_message,
|
|
533
|
-
messages=messages,
|
|
534
|
-
model_response=model_response,
|
|
535
|
-
tool_role=tool_role,
|
|
536
|
-
)
|
|
537
|
-
is not None
|
|
538
|
-
):
|
|
539
|
-
return self.handle_post_tool_call_messages(messages=messages, model_response=model_response)
|
|
540
|
-
logger.debug("---------- Groq Response End ----------")
|
|
541
|
-
return model_response
|
|
455
|
+
Args:
|
|
456
|
+
tool_calls_data (List[ChoiceDeltaToolCall]): The tool call data to build from.
|
|
542
457
|
|
|
543
|
-
|
|
458
|
+
Returns:
|
|
459
|
+
List[Dict[str, Any]]: The built tool calls.
|
|
544
460
|
"""
|
|
545
|
-
|
|
461
|
+
tool_calls: List[Dict[str, Any]] = []
|
|
462
|
+
for _tool_call in tool_calls_data:
|
|
463
|
+
_index = _tool_call.index
|
|
464
|
+
_tool_call_id = _tool_call.id
|
|
465
|
+
_tool_call_type = _tool_call.type
|
|
466
|
+
_function_name = _tool_call.function.name if _tool_call.function else None
|
|
467
|
+
_function_arguments = _tool_call.function.arguments if _tool_call.function else None
|
|
468
|
+
|
|
469
|
+
if len(tool_calls) <= _index:
|
|
470
|
+
tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
|
|
471
|
+
tool_call_entry = tool_calls[_index]
|
|
472
|
+
if not tool_call_entry:
|
|
473
|
+
tool_call_entry["id"] = _tool_call_id
|
|
474
|
+
tool_call_entry["type"] = _tool_call_type
|
|
475
|
+
tool_call_entry["function"] = {
|
|
476
|
+
"name": _function_name or "",
|
|
477
|
+
"arguments": _function_arguments or "",
|
|
478
|
+
}
|
|
479
|
+
else:
|
|
480
|
+
if _function_name:
|
|
481
|
+
tool_call_entry["function"]["name"] += _function_name
|
|
482
|
+
if _function_arguments:
|
|
483
|
+
tool_call_entry["function"]["arguments"] += _function_arguments
|
|
484
|
+
if _tool_call_id:
|
|
485
|
+
tool_call_entry["id"] = _tool_call_id
|
|
486
|
+
if _tool_call_type:
|
|
487
|
+
tool_call_entry["type"] = _tool_call_type
|
|
488
|
+
return tool_calls
|
|
489
|
+
|
|
490
|
+
def _parse_provider_response(self, response: ChatCompletion, **kwargs) -> ModelResponse:
|
|
491
|
+
"""
|
|
492
|
+
Parse the Groq response into a ModelResponse.
|
|
546
493
|
|
|
547
494
|
Args:
|
|
548
|
-
|
|
495
|
+
response: Raw response from Groq
|
|
549
496
|
|
|
550
497
|
Returns:
|
|
551
|
-
ModelResponse:
|
|
498
|
+
ModelResponse: Parsed response data
|
|
552
499
|
"""
|
|
553
|
-
logger.debug("---------- Groq Async Response Start ----------")
|
|
554
|
-
self._log_messages(messages)
|
|
555
500
|
model_response = ModelResponse()
|
|
556
|
-
metrics = Metrics()
|
|
557
501
|
|
|
558
|
-
#
|
|
559
|
-
|
|
560
|
-
response: ChatCompletion = await self.ainvoke(messages=messages)
|
|
561
|
-
metrics.stop_response_timer()
|
|
502
|
+
# Get response message
|
|
503
|
+
response_message = response.choices[0].message
|
|
562
504
|
|
|
563
|
-
#
|
|
564
|
-
response_message
|
|
565
|
-
|
|
505
|
+
# Add role
|
|
506
|
+
if response_message.role is not None:
|
|
507
|
+
model_response.role = response_message.role
|
|
566
508
|
|
|
567
|
-
#
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
)
|
|
509
|
+
# Add content
|
|
510
|
+
if response_message.content is not None:
|
|
511
|
+
model_response.content = response_message.content
|
|
571
512
|
|
|
572
|
-
#
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
513
|
+
# Add tool calls
|
|
514
|
+
if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
|
|
515
|
+
try:
|
|
516
|
+
model_response.tool_calls = [t.model_dump() for t in response_message.tool_calls]
|
|
517
|
+
except Exception as e:
|
|
518
|
+
log_warning(f"Error processing tool calls: {e}")
|
|
578
519
|
|
|
579
|
-
#
|
|
580
|
-
if
|
|
581
|
-
|
|
582
|
-
model_response.content = assistant_message.get_content_string()
|
|
520
|
+
# Add usage metrics if present
|
|
521
|
+
if response.usage is not None:
|
|
522
|
+
model_response.response_usage = self._get_metrics(response.usage)
|
|
583
523
|
|
|
584
|
-
# -*- Handle tool calls
|
|
585
|
-
tool_role = "tool"
|
|
586
|
-
if (
|
|
587
|
-
self.handle_tool_calls(
|
|
588
|
-
assistant_message=assistant_message,
|
|
589
|
-
messages=messages,
|
|
590
|
-
model_response=model_response,
|
|
591
|
-
tool_role=tool_role,
|
|
592
|
-
)
|
|
593
|
-
is not None
|
|
594
|
-
):
|
|
595
|
-
return await self.ahandle_post_tool_call_messages(messages=messages, model_response=model_response)
|
|
596
|
-
|
|
597
|
-
logger.debug("---------- Groq Async Response End ----------")
|
|
598
524
|
return model_response
|
|
599
525
|
|
|
600
|
-
def
|
|
601
|
-
"""
|
|
602
|
-
Update the usage metrics for the assistant message and the model.
|
|
603
|
-
|
|
604
|
-
Args:
|
|
605
|
-
assistant_message (Message): The assistant message.
|
|
606
|
-
metrics (Metrics): The metrics.
|
|
526
|
+
def _parse_provider_response_delta(self, response: ChatCompletionChunk) -> ModelResponse:
|
|
607
527
|
"""
|
|
608
|
-
|
|
609
|
-
assistant_message.metrics["time"] = metrics.response_timer.elapsed
|
|
610
|
-
self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
|
|
611
|
-
|
|
612
|
-
if metrics.time_to_first_token is not None:
|
|
613
|
-
assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
|
|
614
|
-
self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
|
|
615
|
-
|
|
616
|
-
if metrics.input_tokens is not None:
|
|
617
|
-
assistant_message.metrics["input_tokens"] = metrics.input_tokens
|
|
618
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
|
|
619
|
-
if metrics.output_tokens is not None:
|
|
620
|
-
assistant_message.metrics["output_tokens"] = metrics.output_tokens
|
|
621
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
|
|
622
|
-
if metrics.prompt_tokens is not None:
|
|
623
|
-
assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
|
|
624
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
|
|
625
|
-
if metrics.completion_tokens is not None:
|
|
626
|
-
assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
|
|
627
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
|
|
628
|
-
if metrics.total_tokens is not None:
|
|
629
|
-
assistant_message.metrics["total_tokens"] = metrics.total_tokens
|
|
630
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
|
|
631
|
-
if metrics.completion_time is not None:
|
|
632
|
-
assistant_message.metrics["completion_time"] = metrics.completion_time
|
|
633
|
-
self.metrics["completion_time"] = self.metrics.get("completion_time", 0) + metrics.completion_time
|
|
634
|
-
if metrics.prompt_time is not None:
|
|
635
|
-
assistant_message.metrics["prompt_time"] = metrics.prompt_time
|
|
636
|
-
self.metrics["prompt_time"] = self.metrics.get("prompt_time", 0) + metrics.prompt_time
|
|
637
|
-
if metrics.queue_time is not None:
|
|
638
|
-
assistant_message.metrics["queue_time"] = metrics.queue_time
|
|
639
|
-
self.metrics["queue_time"] = self.metrics.get("queue_time", 0) + metrics.queue_time
|
|
640
|
-
if metrics.total_time is not None:
|
|
641
|
-
assistant_message.metrics["total_time"] = metrics.total_time
|
|
642
|
-
self.metrics["total_time"] = self.metrics.get("total_time", 0) + metrics.total_time
|
|
643
|
-
|
|
644
|
-
def add_response_usage_to_metrics(self, metrics: Metrics, response_usage: CompletionUsage):
|
|
645
|
-
metrics.input_tokens = response_usage.prompt_tokens
|
|
646
|
-
metrics.prompt_tokens = response_usage.prompt_tokens
|
|
647
|
-
metrics.output_tokens = response_usage.completion_tokens
|
|
648
|
-
metrics.completion_tokens = response_usage.completion_tokens
|
|
649
|
-
metrics.total_tokens = response_usage.total_tokens
|
|
650
|
-
metrics.completion_time = response_usage.completion_time
|
|
651
|
-
metrics.prompt_time = response_usage.prompt_time
|
|
652
|
-
metrics.queue_time = response_usage.queue_time
|
|
653
|
-
metrics.total_time = response_usage.total_time
|
|
654
|
-
|
|
655
|
-
def handle_stream_tool_calls(
|
|
656
|
-
self,
|
|
657
|
-
assistant_message: Message,
|
|
658
|
-
messages: List[Message],
|
|
659
|
-
tool_role: str = "tool",
|
|
660
|
-
) -> Iterator[ModelResponse]:
|
|
661
|
-
"""
|
|
662
|
-
Handle tool calls for response stream.
|
|
528
|
+
Parse the Groq streaming response into ModelResponse objects.
|
|
663
529
|
|
|
664
530
|
Args:
|
|
665
|
-
|
|
666
|
-
messages (List[Message]): The list of messages.
|
|
667
|
-
tool_role (str): The role of the tool call. Defaults to "tool".
|
|
531
|
+
response: Raw response chunk from Groq
|
|
668
532
|
|
|
669
533
|
Returns:
|
|
670
|
-
|
|
534
|
+
ModelResponse: Iterator of parsed response data
|
|
671
535
|
"""
|
|
672
|
-
|
|
673
|
-
function_calls_to_run: List[FunctionCall] = []
|
|
674
|
-
function_call_results: List[Message] = []
|
|
675
|
-
for tool_call in assistant_message.tool_calls:
|
|
676
|
-
_tool_call_id = tool_call.get("id")
|
|
677
|
-
_function_call = get_function_call_for_tool_call(tool_call, self._functions)
|
|
678
|
-
if _function_call is None:
|
|
679
|
-
messages.append(
|
|
680
|
-
Message(
|
|
681
|
-
role=tool_role,
|
|
682
|
-
tool_call_id=_tool_call_id,
|
|
683
|
-
content="Could not find function to call.",
|
|
684
|
-
)
|
|
685
|
-
)
|
|
686
|
-
continue
|
|
687
|
-
if _function_call.error is not None:
|
|
688
|
-
messages.append(
|
|
689
|
-
Message(
|
|
690
|
-
role=tool_role,
|
|
691
|
-
tool_call_id=_tool_call_id,
|
|
692
|
-
content=_function_call.error,
|
|
693
|
-
)
|
|
694
|
-
)
|
|
695
|
-
continue
|
|
696
|
-
function_calls_to_run.append(_function_call)
|
|
697
|
-
|
|
698
|
-
if self.show_tool_calls:
|
|
699
|
-
yield ModelResponse(content="\nRunning:")
|
|
700
|
-
for _f in function_calls_to_run:
|
|
701
|
-
yield ModelResponse(content=f"\n - {_f.get_call_str()}")
|
|
702
|
-
yield ModelResponse(content="\n\n")
|
|
703
|
-
|
|
704
|
-
for function_call_response in self.run_function_calls(
|
|
705
|
-
function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
|
|
706
|
-
):
|
|
707
|
-
yield function_call_response
|
|
708
|
-
|
|
709
|
-
if len(function_call_results) > 0:
|
|
710
|
-
messages.extend(function_call_results)
|
|
536
|
+
model_response = ModelResponse()
|
|
711
537
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
Generate a streaming response from Groq.
|
|
538
|
+
if len(response.choices) > 0:
|
|
539
|
+
choice_delta: ChoiceDelta = response.choices[0].delta
|
|
715
540
|
|
|
716
|
-
|
|
717
|
-
|
|
541
|
+
if choice_delta:
|
|
542
|
+
# Add content
|
|
543
|
+
if choice_delta.content is not None:
|
|
544
|
+
model_response.content = choice_delta.content
|
|
718
545
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
logger.debug("---------- Groq Response Start ----------")
|
|
723
|
-
self._log_messages(messages)
|
|
724
|
-
stream_data: StreamData = StreamData()
|
|
725
|
-
metrics: Metrics = Metrics()
|
|
726
|
-
|
|
727
|
-
# -*- Generate response
|
|
728
|
-
metrics.start_response_timer()
|
|
729
|
-
for response in self.invoke_stream(messages=messages):
|
|
730
|
-
if len(response.choices) > 0:
|
|
731
|
-
metrics.completion_tokens += 1
|
|
732
|
-
if metrics.completion_tokens == 1:
|
|
733
|
-
metrics.time_to_first_token = metrics.response_timer.elapsed
|
|
734
|
-
|
|
735
|
-
response_delta: ChoiceDelta = response.choices[0].delta
|
|
736
|
-
response_content: Optional[str] = response_delta.content
|
|
737
|
-
response_tool_calls: Optional[List[ChoiceDeltaToolCall]] = response_delta.tool_calls
|
|
738
|
-
|
|
739
|
-
if response_content is not None:
|
|
740
|
-
stream_data.response_content += response_content
|
|
741
|
-
yield ModelResponse(content=response_content)
|
|
742
|
-
|
|
743
|
-
if response_tool_calls is not None:
|
|
744
|
-
if stream_data.response_tool_calls is None:
|
|
745
|
-
stream_data.response_tool_calls = []
|
|
746
|
-
stream_data.response_tool_calls.extend(response_tool_calls)
|
|
747
|
-
|
|
748
|
-
if response.usage is not None:
|
|
749
|
-
self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
|
|
750
|
-
metrics.stop_response_timer()
|
|
751
|
-
|
|
752
|
-
# -*- Create assistant message
|
|
753
|
-
assistant_message = Message(role="assistant")
|
|
754
|
-
if stream_data.response_content != "":
|
|
755
|
-
assistant_message.content = stream_data.response_content
|
|
756
|
-
|
|
757
|
-
if stream_data.response_tool_calls is not None:
|
|
758
|
-
_tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
|
|
759
|
-
if len(_tool_calls) > 0:
|
|
760
|
-
assistant_message.tool_calls = _tool_calls
|
|
761
|
-
|
|
762
|
-
# -*- Update usage metrics
|
|
763
|
-
self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
|
|
764
|
-
|
|
765
|
-
# -*- Add assistant message to messages
|
|
766
|
-
messages.append(assistant_message)
|
|
767
|
-
|
|
768
|
-
# -*- Log response and metrics
|
|
769
|
-
assistant_message.log()
|
|
770
|
-
metrics.log()
|
|
771
|
-
|
|
772
|
-
# -*- Handle tool calls
|
|
773
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
774
|
-
tool_role = "tool"
|
|
775
|
-
yield from self.handle_stream_tool_calls(
|
|
776
|
-
assistant_message=assistant_message, messages=messages, tool_role=tool_role
|
|
777
|
-
)
|
|
778
|
-
yield from self.handle_post_tool_call_messages_stream(messages=messages)
|
|
779
|
-
logger.debug("---------- Groq Response End ----------")
|
|
780
|
-
|
|
781
|
-
async def aresponse_stream(self, messages: List[Message]) -> Any:
|
|
782
|
-
"""
|
|
783
|
-
Generate an asynchronous streaming response from Groq.
|
|
546
|
+
# Add tool calls
|
|
547
|
+
if choice_delta.tool_calls is not None:
|
|
548
|
+
model_response.tool_calls = choice_delta.tool_calls # type: ignore
|
|
784
549
|
|
|
785
|
-
|
|
786
|
-
|
|
550
|
+
# Add usage metrics if present
|
|
551
|
+
if response.x_groq is not None and response.x_groq.usage is not None:
|
|
552
|
+
model_response.response_usage = self._get_metrics(response.x_groq.usage)
|
|
787
553
|
|
|
788
|
-
|
|
789
|
-
Any: An asynchronous iterator of model responses.
|
|
790
|
-
"""
|
|
791
|
-
logger.debug("---------- Groq Async Response Start ----------")
|
|
792
|
-
self._log_messages(messages)
|
|
793
|
-
stream_data: StreamData = StreamData()
|
|
794
|
-
metrics: Metrics = Metrics()
|
|
795
|
-
|
|
796
|
-
# -*- Generate response
|
|
797
|
-
metrics.start_response_timer()
|
|
798
|
-
async for response in self.ainvoke_stream(messages=messages):
|
|
799
|
-
if len(response.choices) > 0:
|
|
800
|
-
metrics.completion_tokens += 1
|
|
801
|
-
if metrics.completion_tokens == 1:
|
|
802
|
-
metrics.time_to_first_token = metrics.response_timer.elapsed
|
|
803
|
-
|
|
804
|
-
response_delta: ChoiceDelta = response.choices[0].delta
|
|
805
|
-
response_content = response_delta.content
|
|
806
|
-
response_tool_calls = response_delta.tool_calls
|
|
807
|
-
|
|
808
|
-
if response_content is not None:
|
|
809
|
-
stream_data.response_content += response_content
|
|
810
|
-
yield ModelResponse(content=response_content)
|
|
811
|
-
|
|
812
|
-
if response_tool_calls is not None:
|
|
813
|
-
if stream_data.response_tool_calls is None:
|
|
814
|
-
stream_data.response_tool_calls = []
|
|
815
|
-
stream_data.response_tool_calls.extend(response_tool_calls)
|
|
816
|
-
|
|
817
|
-
if response.usage is not None:
|
|
818
|
-
self.add_response_usage_to_metrics(metrics=metrics, response_usage=response.usage)
|
|
819
|
-
metrics.stop_response_timer()
|
|
820
|
-
|
|
821
|
-
# -*- Create assistant message
|
|
822
|
-
assistant_message = Message(role="assistant")
|
|
823
|
-
if stream_data.response_content != "":
|
|
824
|
-
assistant_message.content = stream_data.response_content
|
|
825
|
-
|
|
826
|
-
if stream_data.response_tool_calls is not None:
|
|
827
|
-
_tool_calls = self.build_tool_calls(stream_data.response_tool_calls)
|
|
828
|
-
if len(_tool_calls) > 0:
|
|
829
|
-
assistant_message.tool_calls = _tool_calls
|
|
830
|
-
|
|
831
|
-
self.update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
|
|
832
|
-
|
|
833
|
-
# -*- Add assistant message to messages
|
|
834
|
-
messages.append(assistant_message)
|
|
835
|
-
|
|
836
|
-
# -*- Log response and metrics
|
|
837
|
-
assistant_message.log()
|
|
838
|
-
metrics.log()
|
|
839
|
-
|
|
840
|
-
# -*- Handle tool calls
|
|
841
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
842
|
-
tool_role = "tool"
|
|
843
|
-
for tool_call_response in self.handle_stream_tool_calls(
|
|
844
|
-
assistant_message=assistant_message, messages=messages, tool_role=tool_role
|
|
845
|
-
):
|
|
846
|
-
yield tool_call_response
|
|
847
|
-
async for post_tool_call_response in self.ahandle_post_tool_call_messages_stream(messages=messages):
|
|
848
|
-
yield post_tool_call_response
|
|
849
|
-
logger.debug("---------- Groq Async Response End ----------")
|
|
554
|
+
return model_response
|
|
850
555
|
|
|
851
|
-
def
|
|
556
|
+
def _get_metrics(self, response_usage: CompletionUsage) -> Metrics:
|
|
852
557
|
"""
|
|
853
|
-
|
|
558
|
+
Parse the given Groq usage into an Agno Metrics object.
|
|
854
559
|
|
|
855
560
|
Args:
|
|
856
|
-
|
|
561
|
+
response_usage: Usage data from Groq
|
|
857
562
|
|
|
858
563
|
Returns:
|
|
859
|
-
|
|
564
|
+
Metrics: Parsed metrics data
|
|
860
565
|
"""
|
|
861
|
-
|
|
566
|
+
metrics = Metrics()
|
|
567
|
+
|
|
568
|
+
metrics.input_tokens = response_usage.prompt_tokens or 0
|
|
569
|
+
metrics.output_tokens = response_usage.completion_tokens or 0
|
|
570
|
+
metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
|
|
571
|
+
|
|
572
|
+
# Additional time metrics offered by Groq
|
|
573
|
+
if completion_time := response_usage.completion_time:
|
|
574
|
+
metrics.provider_metrics = metrics.provider_metrics or {}
|
|
575
|
+
metrics.provider_metrics["completion_time"] = completion_time
|
|
576
|
+
if prompt_time := response_usage.prompt_time:
|
|
577
|
+
metrics.provider_metrics = metrics.provider_metrics or {}
|
|
578
|
+
metrics.provider_metrics["prompt_time"] = prompt_time
|
|
579
|
+
if queue_time := response_usage.queue_time:
|
|
580
|
+
metrics.provider_metrics = metrics.provider_metrics or {}
|
|
581
|
+
metrics.provider_metrics["queue_time"] = queue_time
|
|
582
|
+
if total_time := response_usage.total_time:
|
|
583
|
+
metrics.provider_metrics = metrics.provider_metrics or {}
|
|
584
|
+
metrics.provider_metrics["total_time"] = total_time
|
|
585
|
+
|
|
586
|
+
return metrics
|