agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,70 +1,42 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import AsyncIterator
|
|
3
|
+
from dataclasses import asdict, dataclass
|
|
2
4
|
from os import getenv
|
|
3
|
-
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Type, Union
|
|
4
6
|
|
|
5
7
|
import httpx
|
|
8
|
+
from huggingface_hub import ChatCompletionInputStreamOptions
|
|
6
9
|
from pydantic import BaseModel
|
|
7
10
|
|
|
8
|
-
from agno.
|
|
11
|
+
from agno.exceptions import ModelProviderError
|
|
12
|
+
from agno.models.base import Model
|
|
9
13
|
from agno.models.message import Message
|
|
14
|
+
from agno.models.metrics import Metrics
|
|
10
15
|
from agno.models.response import ModelResponse
|
|
11
|
-
from agno.
|
|
12
|
-
from agno.utils.log import
|
|
13
|
-
from agno.utils.tools import get_function_call_for_tool_call
|
|
16
|
+
from agno.run.agent import RunOutput
|
|
17
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
14
18
|
|
|
15
19
|
try:
|
|
16
20
|
from huggingface_hub import (
|
|
17
21
|
AsyncInferenceClient,
|
|
18
22
|
ChatCompletionOutput,
|
|
19
23
|
ChatCompletionOutputMessage,
|
|
20
|
-
ChatCompletionOutputUsage,
|
|
21
24
|
ChatCompletionStreamOutput,
|
|
22
25
|
ChatCompletionStreamOutputDelta,
|
|
23
26
|
ChatCompletionStreamOutputDeltaToolCall,
|
|
24
27
|
InferenceClient,
|
|
25
28
|
)
|
|
26
|
-
|
|
29
|
+
from huggingface_hub.errors import InferenceTimeoutError
|
|
30
|
+
except ImportError:
|
|
27
31
|
raise ImportError("`huggingface_hub` not installed. Please install using `pip install huggingface_hub`")
|
|
28
32
|
|
|
29
33
|
|
|
30
|
-
@dataclass
|
|
31
|
-
class StreamData:
|
|
32
|
-
response_content: str = ""
|
|
33
|
-
response_tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = None
|
|
34
|
-
|
|
35
|
-
|
|
36
34
|
@dataclass
|
|
37
35
|
class HuggingFace(Model):
|
|
38
36
|
"""
|
|
39
37
|
A class for interacting with HuggingFace Hub Inference models.
|
|
40
38
|
|
|
41
|
-
|
|
42
|
-
id (str): The id of the HuggingFace model to use. Default is "meta-llama/Meta-Llama-3-8B-Instruct".
|
|
43
|
-
name (str): The name of this chat model instance. Default is "HuggingFace".
|
|
44
|
-
provider (str): The provider of the model. Default is "HuggingFace".
|
|
45
|
-
store (Optional[bool]): Whether or not to store the output of this chat completion request for use in the model distillation or evals products.
|
|
46
|
-
frequency_penalty (Optional[float]): Penalizes new tokens based on their frequency in the text so far.
|
|
47
|
-
logit_bias (Optional[Any]): Modifies the likelihood of specified tokens appearing in the completion.
|
|
48
|
-
logprobs (Optional[bool]): Include the log probabilities on the logprobs most likely tokens.
|
|
49
|
-
max_tokens (Optional[int]): The maximum number of tokens to generate in the chat completion.
|
|
50
|
-
presence_penalty (Optional[float]): Penalizes new tokens based on whether they appear in the text so far.
|
|
51
|
-
response_format (Optional[Any]): An object specifying the format that the model must output.
|
|
52
|
-
seed (Optional[int]): A seed for deterministic sampling.
|
|
53
|
-
stop (Optional[Union[str, List[str]]]): Up to 4 sequences where the API will stop generating further tokens.
|
|
54
|
-
temperature (Optional[float]): Controls randomness in the model's output.
|
|
55
|
-
top_logprobs (Optional[int]): How many log probability results to return per token.
|
|
56
|
-
top_p (Optional[float]): Controls diversity via nucleus sampling.
|
|
57
|
-
request_params (Optional[Dict[str, Any]]): Additional parameters to include in the request.
|
|
58
|
-
api_key (Optional[str]): The Access Token for authenticating with HuggingFace.
|
|
59
|
-
base_url (Optional[Union[str, httpx.URL]]): The base URL for API requests.
|
|
60
|
-
timeout (Optional[float]): The timeout for API requests.
|
|
61
|
-
max_retries (Optional[int]): The maximum number of retries for failed requests.
|
|
62
|
-
default_headers (Optional[Any]): Default headers to include in all requests.
|
|
63
|
-
default_query (Optional[Any]): Default query parameters to include in all requests.
|
|
64
|
-
http_client (Optional[httpx.Client]): An optional pre-configured HTTP client.
|
|
65
|
-
client_params (Optional[Dict[str, Any]]): Additional parameters for client configuration.
|
|
66
|
-
client (Optional[InferenceClient]): The HuggingFace Hub Inference client instance.
|
|
67
|
-
async_client (Optional[AsyncInferenceClient]): The asynchronous HuggingFace Hub client instance.
|
|
39
|
+
For more information, see: https://huggingface.co/docs/api-inference/index
|
|
68
40
|
"""
|
|
69
41
|
|
|
70
42
|
id: str = "meta-llama/Meta-Llama-3-8B-Instruct"
|
|
@@ -78,7 +50,6 @@ class HuggingFace(Model):
|
|
|
78
50
|
logprobs: Optional[bool] = None
|
|
79
51
|
max_tokens: Optional[int] = None
|
|
80
52
|
presence_penalty: Optional[float] = None
|
|
81
|
-
response_format: Optional[Any] = None
|
|
82
53
|
seed: Optional[int] = None
|
|
83
54
|
stop: Optional[Union[str, List[str]]] = None
|
|
84
55
|
temperature: Optional[float] = None
|
|
@@ -93,7 +64,6 @@ class HuggingFace(Model):
|
|
|
93
64
|
max_retries: Optional[int] = None
|
|
94
65
|
default_headers: Optional[Any] = None
|
|
95
66
|
default_query: Optional[Any] = None
|
|
96
|
-
http_client: Optional[httpx.Client] = None
|
|
97
67
|
client_params: Optional[Dict[str, Any]] = None
|
|
98
68
|
|
|
99
69
|
# HuggingFace Hub Inference clients
|
|
@@ -103,7 +73,7 @@ class HuggingFace(Model):
|
|
|
103
73
|
def get_client_params(self) -> Dict[str, Any]:
|
|
104
74
|
self.api_key = self.api_key or getenv("HF_TOKEN")
|
|
105
75
|
if not self.api_key:
|
|
106
|
-
|
|
76
|
+
log_error("HF_TOKEN not set. Please set the HF_TOKEN environment variable.")
|
|
107
77
|
|
|
108
78
|
_client_params: Dict[str, Any] = {}
|
|
109
79
|
if self.api_key is not None:
|
|
@@ -133,9 +103,8 @@ class HuggingFace(Model):
|
|
|
133
103
|
return self.client
|
|
134
104
|
|
|
135
105
|
_client_params: Dict[str, Any] = self.get_client_params()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return InferenceClient(**_client_params)
|
|
106
|
+
self.client = InferenceClient(**_client_params)
|
|
107
|
+
return self.client
|
|
139
108
|
|
|
140
109
|
def get_async_client(self) -> AsyncInferenceClient:
|
|
141
110
|
"""
|
|
@@ -148,18 +117,12 @@ class HuggingFace(Model):
|
|
|
148
117
|
return self.async_client
|
|
149
118
|
|
|
150
119
|
_client_params: Dict[str, Any] = self.get_client_params()
|
|
120
|
+
self.async_client = AsyncInferenceClient(**_client_params)
|
|
121
|
+
return self.async_client
|
|
151
122
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
# Create a new async HTTP client with custom limits
|
|
156
|
-
_client_params["http_client"] = httpx.AsyncClient(
|
|
157
|
-
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
|
|
158
|
-
)
|
|
159
|
-
return AsyncInferenceClient(**_client_params)
|
|
160
|
-
|
|
161
|
-
@property
|
|
162
|
-
def request_kwargs(self) -> Dict[str, Any]:
|
|
123
|
+
def get_request_params(
|
|
124
|
+
self, tools: Optional[List[Dict[str, Any]]] = None, tool_choice: Optional[Union[str, Dict[str, Any]]] = None
|
|
125
|
+
) -> Dict[str, Any]:
|
|
163
126
|
"""
|
|
164
127
|
Returns keyword arguments for inference model client requests.
|
|
165
128
|
|
|
@@ -179,8 +142,6 @@ class HuggingFace(Model):
|
|
|
179
142
|
_request_params["max_tokens"] = self.max_tokens
|
|
180
143
|
if self.presence_penalty is not None:
|
|
181
144
|
_request_params["presence_penalty"] = self.presence_penalty
|
|
182
|
-
if self.response_format is not None:
|
|
183
|
-
_request_params["response_format"] = self.response_format
|
|
184
145
|
if self.seed is not None:
|
|
185
146
|
_request_params["seed"] = self.seed
|
|
186
147
|
if self.stop is not None:
|
|
@@ -191,14 +152,17 @@ class HuggingFace(Model):
|
|
|
191
152
|
_request_params["top_logprobs"] = self.top_logprobs
|
|
192
153
|
if self.top_p is not None:
|
|
193
154
|
_request_params["top_p"] = self.top_p
|
|
194
|
-
if
|
|
195
|
-
_request_params["tools"] =
|
|
196
|
-
if
|
|
155
|
+
if tools is not None:
|
|
156
|
+
_request_params["tools"] = tools
|
|
157
|
+
if tool_choice is None:
|
|
197
158
|
_request_params["tool_choice"] = "auto"
|
|
198
159
|
else:
|
|
199
|
-
_request_params["tool_choice"] =
|
|
160
|
+
_request_params["tool_choice"] = tool_choice
|
|
200
161
|
if self.request_params is not None:
|
|
201
162
|
_request_params.update(self.request_params)
|
|
163
|
+
|
|
164
|
+
if _request_params:
|
|
165
|
+
log_debug(f"Calling {self.provider} with request parameters: {_request_params}", log_level=2)
|
|
202
166
|
return _request_params
|
|
203
167
|
|
|
204
168
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -217,558 +181,322 @@ class HuggingFace(Model):
|
|
|
217
181
|
"logprobs": self.logprobs,
|
|
218
182
|
"max_tokens": self.max_tokens,
|
|
219
183
|
"presence_penalty": self.presence_penalty,
|
|
220
|
-
"response_format": self.response_format,
|
|
221
184
|
"seed": self.seed,
|
|
222
185
|
"stop": self.stop,
|
|
223
186
|
"temperature": self.temperature,
|
|
224
187
|
"top_logprobs": self.top_logprobs,
|
|
225
188
|
"top_p": self.top_p,
|
|
226
|
-
"tools": self.tools,
|
|
227
|
-
"tool_choice": self.tool_choice
|
|
228
|
-
if (self.tools is not None and self.tool_choice is not None)
|
|
229
|
-
else "auto",
|
|
230
189
|
}
|
|
231
190
|
)
|
|
232
191
|
cleaned_dict = {k: v for k, v in _dict.items() if v is not None}
|
|
233
192
|
return cleaned_dict
|
|
234
193
|
|
|
235
|
-
def
|
|
194
|
+
def _format_message(self, message: Message, compress_tool_results: bool = False) -> Dict[str, Any]:
|
|
236
195
|
"""
|
|
237
|
-
|
|
196
|
+
Format a message into the format expected by HuggingFace.
|
|
238
197
|
|
|
239
198
|
Args:
|
|
240
|
-
|
|
199
|
+
message (Message): The message to format.
|
|
200
|
+
compress_tool_results: Whether to compress tool results.
|
|
241
201
|
|
|
242
202
|
Returns:
|
|
243
|
-
|
|
203
|
+
Dict[str, Any]: The formatted message.
|
|
244
204
|
"""
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
205
|
+
# Use compressed content for tool messages if compression is active
|
|
206
|
+
if message.role == "tool":
|
|
207
|
+
content = message.get_content(use_compressed_content=compress_tool_results)
|
|
208
|
+
else:
|
|
209
|
+
content = message.content if message.content is not None else ""
|
|
250
210
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
211
|
+
message_dict: Dict[str, Any] = {
|
|
212
|
+
"role": message.role,
|
|
213
|
+
"content": content,
|
|
214
|
+
"name": message.name or message.tool_name,
|
|
215
|
+
"tool_call_id": message.tool_call_id,
|
|
216
|
+
"tool_calls": message.tool_calls,
|
|
217
|
+
}
|
|
254
218
|
|
|
255
|
-
|
|
256
|
-
messages (List[Message]): A list of messages to send to the model.
|
|
219
|
+
message_dict = {k: v for k, v in message_dict.items() if v is not None}
|
|
257
220
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
"""
|
|
261
|
-
return await self.get_async_client().chat.completions.create(
|
|
262
|
-
model=self.id,
|
|
263
|
-
messages=[m.to_dict() for m in messages],
|
|
264
|
-
**self.request_kwargs,
|
|
265
|
-
)
|
|
221
|
+
if message.tool_calls is None or len(message.tool_calls) == 0:
|
|
222
|
+
message_dict["tool_calls"] = None
|
|
266
223
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
Send a streaming chat completion request to the HuggingFace API.
|
|
224
|
+
if message.audio is not None and len(message.audio) > 0:
|
|
225
|
+
log_warning("Audio input is currently unsupported.")
|
|
270
226
|
|
|
271
|
-
|
|
272
|
-
|
|
227
|
+
if message.files is not None and len(message.files) > 0:
|
|
228
|
+
log_warning("File input is currently unsupported.")
|
|
273
229
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
"""
|
|
277
|
-
yield from self.get_client().chat.completions.create(
|
|
278
|
-
model=self.id,
|
|
279
|
-
messages=[m.to_dict() for m in messages], # type: ignore
|
|
280
|
-
stream=True,
|
|
281
|
-
stream_options={"include_usage": True},
|
|
282
|
-
**self.request_kwargs,
|
|
283
|
-
) # type: ignore
|
|
284
|
-
|
|
285
|
-
async def ainvoke_stream(self, messages: List[Message]) -> Any:
|
|
286
|
-
"""
|
|
287
|
-
Sends an asynchronous streaming chat completion request to the HuggingFace API.
|
|
230
|
+
if message.images is not None and len(message.images) > 0:
|
|
231
|
+
log_warning("Image input is currently unsupported.")
|
|
288
232
|
|
|
289
|
-
|
|
290
|
-
|
|
233
|
+
if message.videos is not None and len(message.videos) > 0:
|
|
234
|
+
log_warning("Video input is currently unsupported.")
|
|
291
235
|
|
|
292
|
-
|
|
293
|
-
Any: An asynchronous iterator of chat completion chunks.
|
|
294
|
-
"""
|
|
295
|
-
async_stream = await self.get_async_client().chat.completions.create(
|
|
296
|
-
model=self.id,
|
|
297
|
-
messages=[m.to_dict() for m in messages],
|
|
298
|
-
stream=True,
|
|
299
|
-
stream_options={"include_usage": True},
|
|
300
|
-
**self.request_kwargs,
|
|
301
|
-
)
|
|
302
|
-
async for chunk in async_stream: # type: ignore
|
|
303
|
-
yield chunk
|
|
236
|
+
return message_dict
|
|
304
237
|
|
|
305
|
-
def
|
|
306
|
-
self,
|
|
307
|
-
|
|
238
|
+
def invoke(
|
|
239
|
+
self,
|
|
240
|
+
messages: List[Message],
|
|
241
|
+
assistant_message: Message,
|
|
242
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
243
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
244
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
245
|
+
run_response: Optional[RunOutput] = None,
|
|
246
|
+
compress_tool_results: bool = False,
|
|
247
|
+
) -> ModelResponse:
|
|
308
248
|
"""
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
assistant_message (Message): The assistant message.
|
|
313
|
-
messages (List[Message]): The list of messages.
|
|
314
|
-
model_response (ModelResponse): The model response.
|
|
315
|
-
|
|
316
|
-
Returns:
|
|
317
|
-
Optional[ModelResponse]: The model response after handling tool calls.
|
|
249
|
+
Send a chat completion request to the HuggingFace Hub.
|
|
318
250
|
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
role="tool",
|
|
331
|
-
tool_call_id=_tool_call_id,
|
|
332
|
-
content="Could not find function to call.",
|
|
333
|
-
)
|
|
334
|
-
)
|
|
335
|
-
continue
|
|
336
|
-
if _function_call.error is not None:
|
|
337
|
-
messages.append(
|
|
338
|
-
Message(
|
|
339
|
-
role="tool",
|
|
340
|
-
tool_call_id=_tool_call_id,
|
|
341
|
-
content=_function_call.error,
|
|
342
|
-
)
|
|
343
|
-
)
|
|
344
|
-
continue
|
|
345
|
-
function_calls_to_run.append(_function_call)
|
|
346
|
-
|
|
347
|
-
if self.show_tool_calls:
|
|
348
|
-
model_response.content += "\nRunning:"
|
|
349
|
-
for _f in function_calls_to_run:
|
|
350
|
-
model_response.content += f"\n - {_f.get_call_str()}"
|
|
351
|
-
model_response.content += "\n\n"
|
|
352
|
-
|
|
353
|
-
for _ in self.run_function_calls(
|
|
354
|
-
function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
|
|
355
|
-
):
|
|
356
|
-
pass
|
|
357
|
-
|
|
358
|
-
if len(function_call_results) > 0:
|
|
359
|
-
messages.extend(function_call_results)
|
|
251
|
+
try:
|
|
252
|
+
if run_response and run_response.metrics:
|
|
253
|
+
run_response.metrics.set_time_to_first_token()
|
|
254
|
+
|
|
255
|
+
assistant_message.metrics.start_timer()
|
|
256
|
+
provider_response = self.get_client().chat.completions.create(
|
|
257
|
+
model=self.id,
|
|
258
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
259
|
+
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
260
|
+
)
|
|
261
|
+
assistant_message.metrics.stop_timer()
|
|
360
262
|
|
|
361
|
-
return
|
|
362
|
-
return None
|
|
263
|
+
return self._parse_provider_response(provider_response, response_format=response_format)
|
|
363
264
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
265
|
+
except InferenceTimeoutError as e:
|
|
266
|
+
log_error(f"Error invoking HuggingFace model: {e}")
|
|
267
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
268
|
+
except Exception as e:
|
|
269
|
+
log_error(f"Unexpected error invoking HuggingFace model: {e}")
|
|
270
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
369
271
|
|
|
370
|
-
|
|
371
|
-
assistant_message (Message): The assistant message.
|
|
372
|
-
metrics (Metrics): The metrics.
|
|
373
|
-
response_usage (Optional[CompletionUsage]): The response usage.
|
|
374
|
-
"""
|
|
375
|
-
# Update time taken to generate response
|
|
376
|
-
assistant_message.metrics["time"] = metrics.response_timer.elapsed
|
|
377
|
-
self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
|
|
378
|
-
if response_usage:
|
|
379
|
-
prompt_tokens = response_usage.prompt_tokens
|
|
380
|
-
completion_tokens = response_usage.completion_tokens
|
|
381
|
-
total_tokens = response_usage.total_tokens
|
|
382
|
-
|
|
383
|
-
if prompt_tokens is not None:
|
|
384
|
-
metrics.input_tokens = prompt_tokens
|
|
385
|
-
metrics.prompt_tokens = prompt_tokens
|
|
386
|
-
assistant_message.metrics["input_tokens"] = prompt_tokens
|
|
387
|
-
assistant_message.metrics["prompt_tokens"] = prompt_tokens
|
|
388
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + prompt_tokens
|
|
389
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + prompt_tokens
|
|
390
|
-
if completion_tokens is not None:
|
|
391
|
-
metrics.output_tokens = completion_tokens
|
|
392
|
-
metrics.completion_tokens = completion_tokens
|
|
393
|
-
assistant_message.metrics["output_tokens"] = completion_tokens
|
|
394
|
-
assistant_message.metrics["completion_tokens"] = completion_tokens
|
|
395
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + completion_tokens
|
|
396
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + completion_tokens
|
|
397
|
-
if total_tokens is not None:
|
|
398
|
-
metrics.total_tokens = total_tokens
|
|
399
|
-
assistant_message.metrics["total_tokens"] = total_tokens
|
|
400
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + total_tokens
|
|
401
|
-
if response_usage.prompt_tokens_details is not None:
|
|
402
|
-
if isinstance(response_usage.prompt_tokens_details, dict):
|
|
403
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details
|
|
404
|
-
elif isinstance(response_usage.prompt_tokens_details, BaseModel):
|
|
405
|
-
metrics.prompt_tokens_details = response_usage.prompt_tokens_details.model_dump(exclude_none=True)
|
|
406
|
-
assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
|
|
407
|
-
if metrics.prompt_tokens_details is not None:
|
|
408
|
-
for k, v in metrics.prompt_tokens_details.items():
|
|
409
|
-
self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
|
|
410
|
-
if response_usage.completion_tokens_details is not None:
|
|
411
|
-
if isinstance(response_usage.completion_tokens_details, dict):
|
|
412
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details
|
|
413
|
-
elif isinstance(response_usage.completion_tokens_details, BaseModel):
|
|
414
|
-
metrics.completion_tokens_details = response_usage.completion_tokens_details.model_dump(
|
|
415
|
-
exclude_none=True
|
|
416
|
-
)
|
|
417
|
-
assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
|
|
418
|
-
if metrics.completion_tokens_details is not None:
|
|
419
|
-
for k, v in metrics.completion_tokens_details.items():
|
|
420
|
-
self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
|
|
421
|
-
|
|
422
|
-
def _create_assistant_message(
|
|
272
|
+
async def ainvoke(
|
|
423
273
|
self,
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
274
|
+
messages: List[Message],
|
|
275
|
+
assistant_message: Message,
|
|
276
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
277
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
278
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
279
|
+
run_response: Optional[RunOutput] = None,
|
|
280
|
+
compress_tool_results: bool = False,
|
|
281
|
+
) -> ModelResponse:
|
|
428
282
|
"""
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
Args:
|
|
432
|
-
response_message (ChatCompletionMessage): The response message.
|
|
433
|
-
metrics (Metrics): The metrics.
|
|
434
|
-
response_usage (Optional[CompletionUsage]): The response usage.
|
|
435
|
-
|
|
436
|
-
Returns:
|
|
437
|
-
Message: The assistant message.
|
|
283
|
+
Sends an asynchronous chat completion request to the HuggingFace Hub Inference.
|
|
438
284
|
"""
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
285
|
+
try:
|
|
286
|
+
if run_response and run_response.metrics:
|
|
287
|
+
run_response.metrics.set_time_to_first_token()
|
|
288
|
+
|
|
289
|
+
assistant_message.metrics.start_timer()
|
|
290
|
+
provider_response = await self.get_async_client().chat.completions.create(
|
|
291
|
+
model=self.id,
|
|
292
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
293
|
+
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
294
|
+
)
|
|
295
|
+
assistant_message.metrics.stop_timer()
|
|
445
296
|
|
|
446
|
-
|
|
297
|
+
return self._parse_provider_response(provider_response, response_format=response_format)
|
|
447
298
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
299
|
+
except InferenceTimeoutError as e:
|
|
300
|
+
log_error(f"Error invoking HuggingFace model: {e}")
|
|
301
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
302
|
+
except Exception as e:
|
|
303
|
+
log_error(f"Unexpected error invoking HuggingFace model: {e}")
|
|
304
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
454
305
|
|
|
455
|
-
|
|
456
|
-
|
|
306
|
+
def invoke_stream(
|
|
307
|
+
self,
|
|
308
|
+
messages: List[Message],
|
|
309
|
+
assistant_message: Message,
|
|
310
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
311
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
312
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
313
|
+
run_response: Optional[RunOutput] = None,
|
|
314
|
+
compress_tool_results: bool = False,
|
|
315
|
+
) -> Iterator[ModelResponse]:
|
|
457
316
|
"""
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
317
|
+
Send a streaming chat completion request to the HuggingFace API.
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
if run_response and run_response.metrics:
|
|
321
|
+
run_response.metrics.set_time_to_first_token()
|
|
462
322
|
|
|
463
|
-
|
|
464
|
-
metrics.start_response_timer()
|
|
465
|
-
response: Union[ChatCompletionOutput] = self.invoke(messages=messages)
|
|
466
|
-
metrics.stop_response_timer()
|
|
323
|
+
assistant_message.metrics.start_timer()
|
|
467
324
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
325
|
+
stream = self.get_client().chat.completions.create(
|
|
326
|
+
model=self.id,
|
|
327
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
328
|
+
stream=True,
|
|
329
|
+
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
330
|
+
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
331
|
+
)
|
|
471
332
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
response_message=response_message, metrics=metrics, response_usage=response_usage
|
|
475
|
-
)
|
|
333
|
+
for chunk in stream:
|
|
334
|
+
yield self._parse_provider_response_delta(chunk)
|
|
476
335
|
|
|
477
|
-
|
|
478
|
-
messages.append(assistant_message)
|
|
336
|
+
assistant_message.metrics.stop_timer()
|
|
479
337
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
338
|
+
except InferenceTimeoutError as e:
|
|
339
|
+
log_error(f"Error invoking HuggingFace model: {e}")
|
|
340
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
341
|
+
except Exception as e:
|
|
342
|
+
log_error(f"Unexpected error invoking HuggingFace model: {e}")
|
|
343
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
483
344
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
345
|
+
async def ainvoke_stream(
|
|
346
|
+
self,
|
|
347
|
+
messages: List[Message],
|
|
348
|
+
assistant_message: Message,
|
|
349
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
350
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
351
|
+
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
|
352
|
+
run_response: Optional[RunOutput] = None,
|
|
353
|
+
compress_tool_results: bool = False,
|
|
354
|
+
) -> AsyncIterator[Any]:
|
|
355
|
+
"""
|
|
356
|
+
Sends an asynchronous streaming chat completion request to the HuggingFace API.
|
|
357
|
+
"""
|
|
358
|
+
try:
|
|
359
|
+
if run_response and run_response.metrics:
|
|
360
|
+
run_response.metrics.set_time_to_first_token()
|
|
361
|
+
|
|
362
|
+
assistant_message.metrics.start_timer()
|
|
363
|
+
provider_response = await self.get_async_client().chat.completions.create(
|
|
364
|
+
model=self.id,
|
|
365
|
+
messages=[self._format_message(m, compress_tool_results) for m in messages],
|
|
366
|
+
stream=True,
|
|
367
|
+
stream_options=ChatCompletionInputStreamOptions(include_usage=True), # type: ignore
|
|
368
|
+
**self.get_request_params(tools=tools, tool_choice=tool_choice),
|
|
369
|
+
)
|
|
492
370
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
model_response.content = assistant_message.get_content_string()
|
|
371
|
+
async for chunk in provider_response:
|
|
372
|
+
yield self._parse_provider_response_delta(chunk)
|
|
496
373
|
|
|
497
|
-
|
|
498
|
-
|
|
374
|
+
assistant_message.metrics.stop_timer()
|
|
375
|
+
|
|
376
|
+
except InferenceTimeoutError as e:
|
|
377
|
+
log_error(f"Error invoking HuggingFace model: {e}")
|
|
378
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
379
|
+
except Exception as e:
|
|
380
|
+
log_error(f"Unexpected error invoking HuggingFace model: {e}")
|
|
381
|
+
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
|
|
499
382
|
|
|
500
|
-
|
|
383
|
+
# Override base method
|
|
384
|
+
@staticmethod
|
|
385
|
+
def parse_tool_calls(tool_calls_data: List[ChatCompletionStreamOutputDeltaToolCall]) -> List[Dict[str, Any]]:
|
|
501
386
|
"""
|
|
502
|
-
|
|
387
|
+
Build tool calls from streamed tool call data.
|
|
503
388
|
|
|
504
389
|
Args:
|
|
505
|
-
|
|
390
|
+
tool_calls_data (List[ChatCompletionStreamOutputDeltaToolCall]): The tool call data to build from.
|
|
506
391
|
|
|
507
392
|
Returns:
|
|
508
|
-
|
|
393
|
+
List[Dict[str, Any]]: The built tool calls.
|
|
394
|
+
"""
|
|
395
|
+
tool_calls: List[Dict[str, Any]] = []
|
|
396
|
+
for tool_call in tool_calls_data:
|
|
397
|
+
_tool_call = tool_call[0]
|
|
398
|
+
_index = _tool_call.index
|
|
399
|
+
_tool_call_id = _tool_call.id
|
|
400
|
+
_tool_call_type = _tool_call.type
|
|
401
|
+
_function_name = _tool_call.function.name if _tool_call.function else None
|
|
402
|
+
_function_arguments = _tool_call.function.arguments if _tool_call.function else None
|
|
403
|
+
|
|
404
|
+
if len(tool_calls) <= _index:
|
|
405
|
+
tool_calls.extend([{}] * (_index - len(tool_calls) + 1))
|
|
406
|
+
tool_call_entry = tool_calls[_index]
|
|
407
|
+
if not tool_call_entry:
|
|
408
|
+
tool_call_entry["id"] = _tool_call_id
|
|
409
|
+
tool_call_entry["type"] = _tool_call_type
|
|
410
|
+
tool_call_entry["function"] = {
|
|
411
|
+
"name": _function_name or "",
|
|
412
|
+
"arguments": _function_arguments or "",
|
|
413
|
+
}
|
|
414
|
+
else:
|
|
415
|
+
if _function_name:
|
|
416
|
+
tool_call_entry["function"]["name"] += _function_name
|
|
417
|
+
if _function_arguments:
|
|
418
|
+
tool_call_entry["function"]["arguments"] += _function_arguments
|
|
419
|
+
if _tool_call_id:
|
|
420
|
+
tool_call_entry["id"] = _tool_call_id
|
|
421
|
+
if _tool_call_type:
|
|
422
|
+
tool_call_entry["type"] = _tool_call_type
|
|
423
|
+
return tool_calls
|
|
424
|
+
|
|
425
|
+
def _parse_provider_response(
|
|
426
|
+
self,
|
|
427
|
+
response: ChatCompletionOutput,
|
|
428
|
+
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
|
|
429
|
+
) -> ModelResponse:
|
|
430
|
+
"""
|
|
431
|
+
Parse the provider response into a ModelResponse.
|
|
509
432
|
"""
|
|
510
|
-
logger.debug("---------- HuggingFace Async Response Start ----------")
|
|
511
|
-
self._log_messages(messages)
|
|
512
433
|
model_response = ModelResponse()
|
|
513
|
-
metrics = Metrics()
|
|
514
434
|
|
|
515
|
-
# -*- Generate response
|
|
516
|
-
metrics.start_response_timer()
|
|
517
|
-
response: Union[ChatCompletionOutput] = await self.ainvoke(messages=messages)
|
|
518
|
-
metrics.stop_response_timer()
|
|
519
|
-
|
|
520
|
-
# -*- Parse response
|
|
521
435
|
response_message: ChatCompletionOutputMessage = response.choices[0].message
|
|
522
|
-
response_usage: Optional[ChatCompletionOutputUsage] = response.usage
|
|
523
436
|
|
|
524
|
-
|
|
437
|
+
model_response.role = response_message.role
|
|
438
|
+
if response_message.content is not None:
|
|
439
|
+
model_response.content = response_message.content
|
|
440
|
+
|
|
441
|
+
if response_message.tool_calls is not None and len(response_message.tool_calls) > 0:
|
|
442
|
+
model_response.tool_calls = [asdict(t) for t in response_message.tool_calls]
|
|
443
|
+
for tool_call in model_response.tool_calls:
|
|
444
|
+
if isinstance(tool_call["function"]["arguments"], dict):
|
|
445
|
+
tool_call["function"]["arguments"] = json.dumps(tool_call["function"]["arguments"])
|
|
446
|
+
|
|
525
447
|
try:
|
|
526
448
|
if (
|
|
527
|
-
|
|
528
|
-
and
|
|
529
|
-
and issubclass(
|
|
449
|
+
response_format is not None
|
|
450
|
+
and isinstance(response_format, type)
|
|
451
|
+
and issubclass(response_format, BaseModel)
|
|
530
452
|
):
|
|
531
453
|
parsed_object = response_message.parsed # type: ignore
|
|
532
454
|
if parsed_object is not None:
|
|
533
455
|
model_response.parsed = parsed_object
|
|
534
456
|
except Exception as e:
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
# -*- Create assistant message
|
|
538
|
-
assistant_message = self._create_assistant_message(
|
|
539
|
-
response_message=response_message, metrics=metrics, response_usage=response_usage
|
|
540
|
-
)
|
|
541
|
-
|
|
542
|
-
# -*- Add assistant message to messages
|
|
543
|
-
messages.append(assistant_message)
|
|
457
|
+
log_warning(f"Error retrieving structured outputs: {e}")
|
|
544
458
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
metrics.log()
|
|
459
|
+
if response.usage is not None:
|
|
460
|
+
model_response.response_usage = self._get_metrics(response)
|
|
548
461
|
|
|
549
|
-
# -*- Handle tool calls
|
|
550
|
-
if self._handle_tool_calls(assistant_message, messages, model_response):
|
|
551
|
-
response_after_tool_calls = await self.aresponse(messages=messages)
|
|
552
|
-
if response_after_tool_calls.content is not None:
|
|
553
|
-
if model_response.content is None:
|
|
554
|
-
model_response.content = ""
|
|
555
|
-
model_response.content += response_after_tool_calls.content
|
|
556
|
-
return model_response
|
|
557
|
-
|
|
558
|
-
# -*- Update model response
|
|
559
|
-
if assistant_message.content is not None:
|
|
560
|
-
model_response.content = assistant_message.get_content_string()
|
|
561
|
-
|
|
562
|
-
logger.debug("---------- HuggingFace Async Response End ----------")
|
|
563
462
|
return model_response
|
|
564
463
|
|
|
565
|
-
def
|
|
464
|
+
def _parse_provider_response_delta(self, response_delta: ChatCompletionStreamOutput) -> ModelResponse:
|
|
566
465
|
"""
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
Args:
|
|
570
|
-
assistant_message (Message): The assistant message.
|
|
571
|
-
metrics (Metrics): The metrics.
|
|
572
|
-
"""
|
|
573
|
-
# Update time taken to generate response
|
|
574
|
-
assistant_message.metrics["time"] = metrics.response_timer.elapsed
|
|
575
|
-
self.metrics.setdefault("response_times", []).append(metrics.response_timer.elapsed)
|
|
576
|
-
|
|
577
|
-
if metrics.time_to_first_token is not None:
|
|
578
|
-
assistant_message.metrics["time_to_first_token"] = metrics.time_to_first_token
|
|
579
|
-
self.metrics.setdefault("time_to_first_token", []).append(metrics.time_to_first_token)
|
|
580
|
-
|
|
581
|
-
if metrics.input_tokens is not None:
|
|
582
|
-
assistant_message.metrics["input_tokens"] = metrics.input_tokens
|
|
583
|
-
self.metrics["input_tokens"] = self.metrics.get("input_tokens", 0) + metrics.input_tokens
|
|
584
|
-
if metrics.output_tokens is not None:
|
|
585
|
-
assistant_message.metrics["output_tokens"] = metrics.output_tokens
|
|
586
|
-
self.metrics["output_tokens"] = self.metrics.get("output_tokens", 0) + metrics.output_tokens
|
|
587
|
-
if metrics.prompt_tokens is not None:
|
|
588
|
-
assistant_message.metrics["prompt_tokens"] = metrics.prompt_tokens
|
|
589
|
-
self.metrics["prompt_tokens"] = self.metrics.get("prompt_tokens", 0) + metrics.prompt_tokens
|
|
590
|
-
if metrics.completion_tokens is not None:
|
|
591
|
-
assistant_message.metrics["completion_tokens"] = metrics.completion_tokens
|
|
592
|
-
self.metrics["completion_tokens"] = self.metrics.get("completion_tokens", 0) + metrics.completion_tokens
|
|
593
|
-
if metrics.total_tokens is not None:
|
|
594
|
-
assistant_message.metrics["total_tokens"] = metrics.total_tokens
|
|
595
|
-
self.metrics["total_tokens"] = self.metrics.get("total_tokens", 0) + metrics.total_tokens
|
|
596
|
-
if metrics.prompt_tokens_details is not None:
|
|
597
|
-
assistant_message.metrics["prompt_tokens_details"] = metrics.prompt_tokens_details
|
|
598
|
-
for k, v in metrics.prompt_tokens_details.items():
|
|
599
|
-
self.metrics.get("prompt_tokens_details", {}).get(k, 0) + v
|
|
600
|
-
if metrics.completion_tokens_details is not None:
|
|
601
|
-
assistant_message.metrics["completion_tokens_details"] = metrics.completion_tokens_details
|
|
602
|
-
for k, v in metrics.completion_tokens_details.items():
|
|
603
|
-
self.metrics.get("completion_tokens_details", {}).get(k, 0) + v
|
|
604
|
-
|
|
605
|
-
def _handle_stream_tool_calls(
|
|
606
|
-
self,
|
|
607
|
-
assistant_message: Message,
|
|
608
|
-
messages: List[Message],
|
|
609
|
-
) -> Iterator[ModelResponse]:
|
|
466
|
+
Parse the provider response delta into a ModelResponse.
|
|
610
467
|
"""
|
|
611
|
-
|
|
468
|
+
model_response = ModelResponse()
|
|
469
|
+
if response_delta.choices and len(response_delta.choices) > 0:
|
|
470
|
+
response_delta_message: ChatCompletionStreamOutputDelta = response_delta.choices[0].delta
|
|
612
471
|
|
|
613
|
-
|
|
614
|
-
assistant_message (Message): The assistant message.
|
|
615
|
-
messages (List[Message]): The list of messages.
|
|
472
|
+
model_response.role = response_delta_message.role
|
|
616
473
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
function_call_results: List[Message] = []
|
|
624
|
-
for tool_call in assistant_message.tool_calls:
|
|
625
|
-
_tool_call_id = tool_call.get("id")
|
|
626
|
-
_function_call = get_function_call_for_tool_call(tool_call, self._functions)
|
|
627
|
-
if _function_call is None:
|
|
628
|
-
messages.append(
|
|
629
|
-
Message(
|
|
630
|
-
role=tool_role,
|
|
631
|
-
tool_call_id=_tool_call_id,
|
|
632
|
-
content="Could not find function to call.",
|
|
633
|
-
)
|
|
634
|
-
)
|
|
635
|
-
continue
|
|
636
|
-
if _function_call.error is not None:
|
|
637
|
-
messages.append(
|
|
638
|
-
Message(
|
|
639
|
-
role=tool_role,
|
|
640
|
-
tool_call_id=_tool_call_id,
|
|
641
|
-
content=_function_call.error,
|
|
642
|
-
)
|
|
643
|
-
)
|
|
644
|
-
continue
|
|
645
|
-
function_calls_to_run.append(_function_call)
|
|
646
|
-
|
|
647
|
-
if self.show_tool_calls:
|
|
648
|
-
yield ModelResponse(content="\nRunning:")
|
|
649
|
-
for _f in function_calls_to_run:
|
|
650
|
-
yield ModelResponse(content=f"\n - {_f.get_call_str()}")
|
|
651
|
-
yield ModelResponse(content="\n\n")
|
|
652
|
-
|
|
653
|
-
for intermediate_model_response in self.run_function_calls(
|
|
654
|
-
function_calls=function_calls_to_run, function_call_results=function_call_results, tool_role=tool_role
|
|
655
|
-
):
|
|
656
|
-
yield intermediate_model_response
|
|
474
|
+
if response_delta_message.content is not None:
|
|
475
|
+
model_response.content = response_delta_message.content
|
|
476
|
+
if response_delta_message.tool_calls is not None and len(response_delta_message.tool_calls) > 0:
|
|
477
|
+
model_response.tool_calls = [response_delta_message.tool_calls] # type: ignore
|
|
478
|
+
if response_delta.usage is not None:
|
|
479
|
+
model_response.response_usage = self._get_metrics(response_delta)
|
|
657
480
|
|
|
658
|
-
|
|
659
|
-
messages.extend(function_call_results)
|
|
481
|
+
return model_response
|
|
660
482
|
|
|
661
|
-
def
|
|
483
|
+
def _get_metrics(self, response: Union[ChatCompletionOutput, ChatCompletionStreamOutput]) -> Metrics:
|
|
662
484
|
"""
|
|
663
|
-
|
|
485
|
+
Parse the given HuggingFace-specific usage into an Agno Metrics object.
|
|
664
486
|
|
|
665
487
|
Args:
|
|
666
|
-
|
|
488
|
+
response: The HuggingFace response to parse.
|
|
667
489
|
|
|
668
490
|
Returns:
|
|
669
|
-
|
|
670
|
-
"""
|
|
671
|
-
logger.debug("---------- HuggingFace Response Start ----------")
|
|
672
|
-
self._log_messages(messages)
|
|
673
|
-
stream_data: StreamData = StreamData()
|
|
674
|
-
|
|
675
|
-
# -*- Generate response
|
|
676
|
-
for response in self.invoke_stream(messages=messages):
|
|
677
|
-
if len(response.choices) > 0:
|
|
678
|
-
# metrics.completion_tokens += 1
|
|
679
|
-
|
|
680
|
-
response_delta: ChatCompletionStreamOutputDelta = response.choices[0].delta
|
|
681
|
-
response_content: Optional[str] = response_delta.content
|
|
682
|
-
response_tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = response_delta.tool_calls
|
|
683
|
-
|
|
684
|
-
if response_content is not None:
|
|
685
|
-
stream_data.response_content += response_content
|
|
686
|
-
yield ModelResponse(content=response_content)
|
|
687
|
-
|
|
688
|
-
if response_tool_calls is not None:
|
|
689
|
-
if stream_data.response_tool_calls is None:
|
|
690
|
-
stream_data.response_tool_calls = []
|
|
691
|
-
stream_data.response_tool_calls.extend(response_tool_calls)
|
|
692
|
-
|
|
693
|
-
# -*- Create assistant message
|
|
694
|
-
assistant_message = Message(role="assistant")
|
|
695
|
-
if stream_data.response_content != "":
|
|
696
|
-
assistant_message.content = stream_data.response_content
|
|
697
|
-
|
|
698
|
-
if stream_data.response_tool_calls is not None:
|
|
699
|
-
_tool_calls = self._build_tool_calls(stream_data.response_tool_calls)
|
|
700
|
-
if len(_tool_calls) > 0:
|
|
701
|
-
assistant_message.tool_calls = _tool_calls
|
|
702
|
-
|
|
703
|
-
# -*- Add assistant message to messages
|
|
704
|
-
messages.append(assistant_message)
|
|
705
|
-
|
|
706
|
-
# -*- Handle tool calls
|
|
707
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
708
|
-
yield from self._handle_stream_tool_calls(assistant_message, messages)
|
|
709
|
-
yield from self.response_stream(messages=messages)
|
|
710
|
-
logger.debug("---------- HuggingFace Response End ----------")
|
|
711
|
-
|
|
712
|
-
async def aresponse_stream(self, messages: List[Message]) -> Any:
|
|
491
|
+
Metrics: Parsed metrics data
|
|
713
492
|
"""
|
|
714
|
-
|
|
493
|
+
metrics = Metrics()
|
|
715
494
|
|
|
716
|
-
|
|
717
|
-
|
|
495
|
+
if not response.usage:
|
|
496
|
+
return metrics
|
|
718
497
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
stream_data: StreamData = StreamData()
|
|
725
|
-
metrics: Metrics = Metrics()
|
|
726
|
-
|
|
727
|
-
# -*- Generate response
|
|
728
|
-
metrics.start_response_timer()
|
|
729
|
-
async for response in self.ainvoke_stream(messages=messages):
|
|
730
|
-
if len(response.choices) > 0:
|
|
731
|
-
metrics.completion_tokens += 1
|
|
732
|
-
if metrics.completion_tokens == 1:
|
|
733
|
-
metrics.time_to_first_token = metrics.response_timer.elapsed
|
|
734
|
-
|
|
735
|
-
response_delta: ChatCompletionStreamOutputDelta = response.choices[0].delta
|
|
736
|
-
response_content = response_delta.content
|
|
737
|
-
response_tool_calls = response_delta.tool_calls
|
|
738
|
-
|
|
739
|
-
if response_content is not None:
|
|
740
|
-
stream_data.response_content += response_content
|
|
741
|
-
yield ModelResponse(content=response_content)
|
|
742
|
-
|
|
743
|
-
if response_tool_calls is not None:
|
|
744
|
-
if stream_data.response_tool_calls is None:
|
|
745
|
-
stream_data.response_tool_calls = []
|
|
746
|
-
stream_data.response_tool_calls.extend(response_tool_calls)
|
|
747
|
-
metrics.stop_response_timer()
|
|
748
|
-
|
|
749
|
-
# -*- Create assistant message
|
|
750
|
-
assistant_message = Message(role="assistant")
|
|
751
|
-
if stream_data.response_content != "":
|
|
752
|
-
assistant_message.content = stream_data.response_content
|
|
753
|
-
|
|
754
|
-
if stream_data.response_tool_calls is not None:
|
|
755
|
-
_tool_calls = self._build_tool_calls(stream_data.response_tool_calls)
|
|
756
|
-
if len(_tool_calls) > 0:
|
|
757
|
-
assistant_message.tool_calls = _tool_calls
|
|
758
|
-
|
|
759
|
-
self._update_stream_metrics(assistant_message=assistant_message, metrics=metrics)
|
|
760
|
-
|
|
761
|
-
# -*- Add assistant message to messages
|
|
762
|
-
messages.append(assistant_message)
|
|
763
|
-
|
|
764
|
-
# -*- Log response and metrics
|
|
765
|
-
assistant_message.log()
|
|
766
|
-
metrics.log()
|
|
767
|
-
|
|
768
|
-
# -*- Handle tool calls
|
|
769
|
-
if assistant_message.tool_calls is not None and len(assistant_message.tool_calls) > 0:
|
|
770
|
-
for model_response in self._handle_stream_tool_calls(assistant_message, messages):
|
|
771
|
-
yield model_response
|
|
772
|
-
async for model_response in self.aresponse_stream(messages=messages):
|
|
773
|
-
yield model_response
|
|
774
|
-
logger.debug("---------- HuggingFace Hub Async Response End ----------")
|
|
498
|
+
metrics.input_tokens = response.usage.prompt_tokens or 0
|
|
499
|
+
metrics.output_tokens = response.usage.completion_tokens or 0
|
|
500
|
+
metrics.total_tokens = metrics.input_tokens + metrics.output_tokens
|
|
501
|
+
|
|
502
|
+
return metrics
|