agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import uuid
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import IO, Any, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
7
|
+
from agno.knowledge.document.base import Document
|
|
8
|
+
from agno.knowledge.reader.base import Reader
|
|
9
|
+
from agno.knowledge.types import ContentType
|
|
10
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
11
|
+
|
|
12
|
+
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
|
+
|
|
14
|
+
# Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
|
|
15
|
+
try:
|
|
16
|
+
from agno.knowledge.chunking.markdown import MarkdownChunking
|
|
17
|
+
|
|
18
|
+
DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
|
|
19
|
+
MARKDOWN_CHUNKER_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
22
|
+
|
|
23
|
+
DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
|
|
24
|
+
MARKDOWN_CHUNKER_AVAILABLE = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MarkdownReader(Reader):
|
|
28
|
+
"""Reader for Markdown files"""
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
32
|
+
"""Get the list of supported chunking strategies for Markdown readers."""
|
|
33
|
+
strategies = [
|
|
34
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
35
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
36
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
37
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
38
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
# Only include MarkdownChunking if it's available
|
|
42
|
+
if MARKDOWN_CHUNKER_AVAILABLE:
|
|
43
|
+
strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
|
|
44
|
+
|
|
45
|
+
return strategies
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
49
|
+
return [ContentType.MARKDOWN]
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
chunking_strategy: Optional[ChunkingStrategy] = None,
|
|
54
|
+
name: Optional[str] = None,
|
|
55
|
+
description: Optional[str] = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
# Use the default chunking strategy if none provided
|
|
58
|
+
if chunking_strategy is None:
|
|
59
|
+
chunking_strategy = DEFAULT_CHUNKER_STRATEGY
|
|
60
|
+
|
|
61
|
+
super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
|
|
62
|
+
|
|
63
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
64
|
+
try:
|
|
65
|
+
if isinstance(file, Path):
|
|
66
|
+
if not file.exists():
|
|
67
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
68
|
+
log_debug(f"Reading: {file}")
|
|
69
|
+
file_name = name or file.stem
|
|
70
|
+
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
71
|
+
else:
|
|
72
|
+
log_debug(f"Reading uploaded file: {file.name}")
|
|
73
|
+
file_name = name or file.name.split(".")[0]
|
|
74
|
+
file.seek(0)
|
|
75
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
76
|
+
|
|
77
|
+
documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
|
|
78
|
+
if self.chunk:
|
|
79
|
+
chunked_documents = []
|
|
80
|
+
for document in documents:
|
|
81
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
82
|
+
return chunked_documents
|
|
83
|
+
return documents
|
|
84
|
+
except Exception as e:
|
|
85
|
+
log_error(f"Error reading: {file}: {e}")
|
|
86
|
+
return []
|
|
87
|
+
|
|
88
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
89
|
+
try:
|
|
90
|
+
if isinstance(file, Path):
|
|
91
|
+
if not file.exists():
|
|
92
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
93
|
+
|
|
94
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
95
|
+
file_name = name or file.stem
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
import aiofiles
|
|
99
|
+
|
|
100
|
+
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
101
|
+
file_contents = await f.read()
|
|
102
|
+
except ImportError:
|
|
103
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
104
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
105
|
+
else:
|
|
106
|
+
log_debug(f"Reading uploaded file asynchronously: {file.name}")
|
|
107
|
+
file_name = name or file.name.split(".")[0]
|
|
108
|
+
file.seek(0)
|
|
109
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
110
|
+
|
|
111
|
+
document = Document(
|
|
112
|
+
name=file_name,
|
|
113
|
+
id=str(uuid.uuid4()), # Fixed an issue with the id creation
|
|
114
|
+
content=file_contents,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if self.chunk:
|
|
118
|
+
return await self._async_chunk_document(document)
|
|
119
|
+
return [document]
|
|
120
|
+
except Exception as e:
|
|
121
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
125
|
+
if not self.chunk or not document:
|
|
126
|
+
return [document]
|
|
127
|
+
|
|
128
|
+
async def process_chunk(chunk_doc: Document) -> Document:
|
|
129
|
+
return chunk_doc
|
|
130
|
+
|
|
131
|
+
chunked_documents = self.chunk_document(document)
|
|
132
|
+
|
|
133
|
+
if not chunked_documents:
|
|
134
|
+
return [document]
|
|
135
|
+
|
|
136
|
+
tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
|
|
137
|
+
return await asyncio.gather(*tasks)
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import IO, Any, List, Optional, Tuple, Union
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
8
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
9
|
+
from agno.knowledge.document.base import Document
|
|
10
|
+
from agno.knowledge.reader.base import Reader
|
|
11
|
+
from agno.knowledge.types import ContentType
|
|
12
|
+
from agno.utils.log import log_debug, log_error
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from pypdf import PdfReader as DocumentReader # noqa: F401
|
|
16
|
+
from pypdf.errors import PdfStreamError
|
|
17
|
+
except ImportError:
|
|
18
|
+
raise ImportError("`pypdf` not installed. Please install it via `pip install pypdf`.")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
PAGE_START_NUMBERING_FORMAT_DEFAULT = "<start page {page_nr}>"
|
|
22
|
+
PAGE_END_NUMBERING_FORMAT_DEFAULT = "<end page {page_nr}>"
|
|
23
|
+
PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL = 0.4
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _ocr_reader(page: Any) -> str:
|
|
27
|
+
"""A single PDF page object."""
|
|
28
|
+
try:
|
|
29
|
+
import rapidocr_onnxruntime as rapidocr
|
|
30
|
+
except ImportError:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"`rapidocr_onnxruntime` not installed. Please install it via `pip install rapidocr_onnxruntime`."
|
|
33
|
+
)
|
|
34
|
+
ocr = rapidocr.RapidOCR()
|
|
35
|
+
images_text_list = []
|
|
36
|
+
|
|
37
|
+
# Extract and process images
|
|
38
|
+
for image_object in page.images:
|
|
39
|
+
image_data = image_object.data
|
|
40
|
+
|
|
41
|
+
# Perform OCR on the image
|
|
42
|
+
ocr_result, elapse = ocr(image_data)
|
|
43
|
+
|
|
44
|
+
# Extract text from OCR result
|
|
45
|
+
images_text_list += [item[1] for item in ocr_result] if ocr_result else []
|
|
46
|
+
|
|
47
|
+
return "\n".join(images_text_list)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def _async_ocr_reader(page: Any) -> str:
|
|
51
|
+
"""page: A single PDF page object."""
|
|
52
|
+
try:
|
|
53
|
+
import rapidocr_onnxruntime as rapidocr
|
|
54
|
+
except ImportError:
|
|
55
|
+
raise ImportError(
|
|
56
|
+
"`rapidocr_onnxruntime` not installed. Please install it via `pip install rapidocr_onnxruntime`."
|
|
57
|
+
)
|
|
58
|
+
ocr = rapidocr.RapidOCR()
|
|
59
|
+
|
|
60
|
+
# Process images in parallel
|
|
61
|
+
async def process_image(image_data: bytes) -> List[str]:
|
|
62
|
+
ocr_result, _ = ocr(image_data)
|
|
63
|
+
return [item[1] for item in ocr_result] if ocr_result else []
|
|
64
|
+
|
|
65
|
+
image_tasks = [process_image(image.data) for image in page.images]
|
|
66
|
+
images_results = await asyncio.gather(*image_tasks)
|
|
67
|
+
|
|
68
|
+
images_text_list: List = []
|
|
69
|
+
for result in images_results:
|
|
70
|
+
images_text_list.extend(result)
|
|
71
|
+
|
|
72
|
+
images_text = "\n".join(images_text_list)
|
|
73
|
+
return images_text
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _clean_page_numbers(
|
|
77
|
+
page_content_list: List[str],
|
|
78
|
+
extra_content: List[str] = [],
|
|
79
|
+
page_start_numbering_format: str = PAGE_START_NUMBERING_FORMAT_DEFAULT,
|
|
80
|
+
page_end_numbering_format: str = PAGE_END_NUMBERING_FORMAT_DEFAULT,
|
|
81
|
+
) -> Tuple[List[str], Optional[int]]:
|
|
82
|
+
f"""
|
|
83
|
+
Identifies and removes or reformats page numbers from a list of PDF page contents, based on the most consistent sequential numbering.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
page_content_list (List[str]): A list of strings where each string represents the content of a PDF page.
|
|
87
|
+
extra_content (List[str]): A list of strings where each string will be appended after the main content. Can be used for appending image information.
|
|
88
|
+
page_start_numbering_format (str): A format string to prepend to the page content, with `{{page_nr}}` as a placeholder for the page number.
|
|
89
|
+
Defaults to {PAGE_START_NUMBERING_FORMAT_DEFAULT}. Make it an empty string to remove the page number.
|
|
90
|
+
page_end_numbering_format (str): A format string to append to the page content, with `{{page_nr}}` as a placeholder for the page number.
|
|
91
|
+
Defaults to {PAGE_END_NUMBERING_FORMAT_DEFAULT}. Make it an empty string to remove the page number.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List[str]: The list of page contents with page numbers removed or reformatted based on the detected sequence.
|
|
95
|
+
Optional[Int]: The shift for the page numbering. Can be (-2, -1, 0, 1, 2).
|
|
96
|
+
|
|
97
|
+
Notes:
|
|
98
|
+
- The function scans for page numbers using a regular expression that matches digits at the start or end of a string.
|
|
99
|
+
- It evaluates several potential starting points for numbering (-2, -1, 0, 1, 2 shifts) to determine the most consistent sequence.
|
|
100
|
+
- If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
|
|
101
|
+
the page numbers are processed.
|
|
102
|
+
- If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
|
|
103
|
+
`page_end_numbering_format` is provided.
|
|
104
|
+
"""
|
|
105
|
+
assert len(extra_content) == 0 or len(extra_content) == len(page_content_list), (
|
|
106
|
+
"Please provide an equally sized list of extra content if provided."
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Regex to match potential page numbers at the start or end of a string
|
|
110
|
+
page_number_regex = re.compile(r"^\s*(\d+)\s*|\s*(\d+)\s*$")
|
|
111
|
+
|
|
112
|
+
def find_page_number(content):
|
|
113
|
+
match = page_number_regex.search(content)
|
|
114
|
+
if match:
|
|
115
|
+
return int(match.group(1) or match.group(2))
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
page_numbers = [find_page_number(content) for content in page_content_list]
|
|
119
|
+
if all(x is None or x > 5 for x in page_numbers):
|
|
120
|
+
# This approach won't work reliably for higher page numbers.
|
|
121
|
+
page_content_list = [
|
|
122
|
+
f"\n{page_content_list[i]}\n{extra_content[i]}" if extra_content else page_content_list[i]
|
|
123
|
+
for i in range(len(page_content_list))
|
|
124
|
+
]
|
|
125
|
+
return page_content_list, None
|
|
126
|
+
|
|
127
|
+
# Possible range shifts to detect page numbering
|
|
128
|
+
range_shifts = [-2, -1, 0, 1, 2]
|
|
129
|
+
best_match, best_correct_count, best_shift = _identify_best_page_sequence(page_numbers, range_shifts)
|
|
130
|
+
|
|
131
|
+
# Check if at least ..% of the pages have correct sequential numbering
|
|
132
|
+
if best_match and best_correct_count / len(page_numbers) >= PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL:
|
|
133
|
+
# Remove the page numbers from the content
|
|
134
|
+
for i, expected_number in enumerate(best_match):
|
|
135
|
+
page_content_list[i] = re.sub(
|
|
136
|
+
rf"^\s*{expected_number}\s*|\s*{expected_number}\s*$", "", page_content_list[i]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
page_start = (
|
|
140
|
+
page_start_numbering_format.format(page_nr=expected_number) + "\n"
|
|
141
|
+
if page_start_numbering_format
|
|
142
|
+
else ""
|
|
143
|
+
)
|
|
144
|
+
page_end = (
|
|
145
|
+
"\n" + page_end_numbering_format.format(page_nr=expected_number) if page_end_numbering_format else ""
|
|
146
|
+
)
|
|
147
|
+
extra_info = "\n" + extra_content[i] if extra_content else ""
|
|
148
|
+
|
|
149
|
+
# Add formatted page numbering if configured.
|
|
150
|
+
page_content_list[i] = page_start + page_content_list[i] + extra_info + page_end
|
|
151
|
+
else:
|
|
152
|
+
best_shift = None
|
|
153
|
+
|
|
154
|
+
return page_content_list, best_shift
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _identify_best_page_sequence(page_numbers, range_shifts):
|
|
158
|
+
best_match = None
|
|
159
|
+
best_shift: Optional[int] = None
|
|
160
|
+
best_correct_count = 0
|
|
161
|
+
|
|
162
|
+
for shift in range_shifts:
|
|
163
|
+
expected_numbers = [i + shift for i in range(len(page_numbers))]
|
|
164
|
+
# Check if expected number occurs (or that the expected "2" occurs in an incorrectly merged number like 25,
|
|
165
|
+
# where 2 is the page number and 5 is part of the PDF content).
|
|
166
|
+
correct_count = sum(
|
|
167
|
+
1
|
|
168
|
+
for actual, expected in zip(page_numbers, expected_numbers)
|
|
169
|
+
if actual == expected or str(actual).startswith(str(expected)) or str(actual).endswith(str(expected))
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if correct_count > best_correct_count:
|
|
173
|
+
best_correct_count = correct_count
|
|
174
|
+
best_match = expected_numbers
|
|
175
|
+
best_shift = shift
|
|
176
|
+
|
|
177
|
+
return best_match, best_correct_count, best_shift
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class BasePDFReader(Reader):
|
|
181
|
+
def __init__(
|
|
182
|
+
self,
|
|
183
|
+
split_on_pages: bool = True,
|
|
184
|
+
page_start_numbering_format: Optional[str] = None,
|
|
185
|
+
page_end_numbering_format: Optional[str] = None,
|
|
186
|
+
password: Optional[str] = None,
|
|
187
|
+
chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
|
|
188
|
+
**kwargs,
|
|
189
|
+
):
|
|
190
|
+
if page_start_numbering_format is None:
|
|
191
|
+
page_start_numbering_format = PAGE_START_NUMBERING_FORMAT_DEFAULT
|
|
192
|
+
if page_end_numbering_format is None:
|
|
193
|
+
page_end_numbering_format = PAGE_END_NUMBERING_FORMAT_DEFAULT
|
|
194
|
+
|
|
195
|
+
self.split_on_pages = split_on_pages
|
|
196
|
+
self.page_start_numbering_format = page_start_numbering_format
|
|
197
|
+
self.page_end_numbering_format = page_end_numbering_format
|
|
198
|
+
self.password = password
|
|
199
|
+
|
|
200
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
204
|
+
"""Get the list of supported chunking strategies for PDF readers."""
|
|
205
|
+
return [
|
|
206
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
207
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
208
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
209
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
210
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
|
|
214
|
+
chunked_documents: List[Document] = []
|
|
215
|
+
for document in documents:
|
|
216
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
217
|
+
return chunked_documents
|
|
218
|
+
|
|
219
|
+
def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
|
|
220
|
+
"""Determines the document name from the source or a provided name."""
|
|
221
|
+
try:
|
|
222
|
+
if name:
|
|
223
|
+
return name
|
|
224
|
+
if isinstance(pdf_source, str):
|
|
225
|
+
return pdf_source.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
226
|
+
# Assumes a file-like object with a .name attribute
|
|
227
|
+
return pdf_source.name.split(".")[0]
|
|
228
|
+
except Exception:
|
|
229
|
+
# The original code had a bug here, it should check `name` first.
|
|
230
|
+
return name or "pdf"
|
|
231
|
+
|
|
232
|
+
def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
|
|
233
|
+
if not doc_reader.is_encrypted:
|
|
234
|
+
return True
|
|
235
|
+
|
|
236
|
+
# Use provided password or fall back to instance password
|
|
237
|
+
pdf_password = password or self.password
|
|
238
|
+
if not pdf_password:
|
|
239
|
+
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
decrypted_pdf = doc_reader.decrypt(pdf_password)
|
|
244
|
+
if decrypted_pdf:
|
|
245
|
+
log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
|
|
246
|
+
return True
|
|
247
|
+
else:
|
|
248
|
+
log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
|
|
249
|
+
return False
|
|
250
|
+
except Exception as e:
|
|
251
|
+
log_error(f'Error decrypting PDF file "{doc_name}": {e}')
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
|
|
255
|
+
if self.split_on_pages:
|
|
256
|
+
shift = page_number_shift if page_number_shift is not None else 1
|
|
257
|
+
documents: List[Document] = []
|
|
258
|
+
for page_number, page_content in enumerate(pdf_content, start=shift):
|
|
259
|
+
documents.append(
|
|
260
|
+
Document(
|
|
261
|
+
name=doc_name,
|
|
262
|
+
id=(str(uuid4()) if use_uuid_for_id else f"{doc_name}_{page_number}"),
|
|
263
|
+
meta_data={"page": page_number},
|
|
264
|
+
content=page_content,
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
pdf_content_str = "\n".join(pdf_content)
|
|
269
|
+
document = Document(
|
|
270
|
+
name=doc_name,
|
|
271
|
+
id=str(uuid4()) if use_uuid_for_id else doc_name,
|
|
272
|
+
meta_data={},
|
|
273
|
+
content=pdf_content_str,
|
|
274
|
+
)
|
|
275
|
+
documents = [document]
|
|
276
|
+
|
|
277
|
+
if self.chunk:
|
|
278
|
+
return self._build_chunked_documents(documents)
|
|
279
|
+
return documents
|
|
280
|
+
|
|
281
|
+
def _pdf_reader_to_documents(
|
|
282
|
+
self,
|
|
283
|
+
doc_reader: DocumentReader,
|
|
284
|
+
doc_name,
|
|
285
|
+
read_images=False,
|
|
286
|
+
use_uuid_for_id=False,
|
|
287
|
+
):
|
|
288
|
+
pdf_content = []
|
|
289
|
+
pdf_images_text = []
|
|
290
|
+
for page in doc_reader.pages:
|
|
291
|
+
pdf_content.append(page.extract_text())
|
|
292
|
+
if read_images:
|
|
293
|
+
pdf_images_text.append(_ocr_reader(page))
|
|
294
|
+
|
|
295
|
+
pdf_content, shift = _clean_page_numbers(
|
|
296
|
+
page_content_list=pdf_content,
|
|
297
|
+
extra_content=pdf_images_text,
|
|
298
|
+
page_start_numbering_format=self.page_start_numbering_format,
|
|
299
|
+
page_end_numbering_format=self.page_end_numbering_format,
|
|
300
|
+
)
|
|
301
|
+
return self._create_documents(pdf_content, doc_name, use_uuid_for_id, shift)
|
|
302
|
+
|
|
303
|
+
async def _async_pdf_reader_to_documents(
|
|
304
|
+
self,
|
|
305
|
+
doc_reader: DocumentReader,
|
|
306
|
+
doc_name: str,
|
|
307
|
+
read_images=False,
|
|
308
|
+
use_uuid_for_id=False,
|
|
309
|
+
):
|
|
310
|
+
async def _read_pdf_page(page, read_images) -> Tuple[str, str]:
|
|
311
|
+
# We tried "asyncio.to_thread(page.extract_text)", but it maintains state internally, which leads to issues.
|
|
312
|
+
page_text = page.extract_text()
|
|
313
|
+
|
|
314
|
+
if read_images:
|
|
315
|
+
pdf_images_text = await _async_ocr_reader(page)
|
|
316
|
+
else:
|
|
317
|
+
pdf_images_text = ""
|
|
318
|
+
|
|
319
|
+
return page_text, pdf_images_text
|
|
320
|
+
|
|
321
|
+
# Process pages in parallel using asyncio.gather
|
|
322
|
+
pdf_content: List[Tuple[str, str]] = await asyncio.gather(
|
|
323
|
+
*[_read_pdf_page(page, read_images) for page in doc_reader.pages]
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
pdf_content_clean, shift = _clean_page_numbers(
|
|
327
|
+
page_content_list=[x[0] for x in pdf_content],
|
|
328
|
+
extra_content=[x[1] for x in pdf_content],
|
|
329
|
+
page_start_numbering_format=self.page_start_numbering_format,
|
|
330
|
+
page_end_numbering_format=self.page_end_numbering_format,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
return self._create_documents(pdf_content_clean, doc_name, use_uuid_for_id, shift)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class PDFReader(BasePDFReader):
|
|
337
|
+
"""Reader for PDF files"""
|
|
338
|
+
|
|
339
|
+
@classmethod
|
|
340
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
341
|
+
return [ContentType.PDF]
|
|
342
|
+
|
|
343
|
+
def read(
|
|
344
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
345
|
+
) -> List[Document]:
|
|
346
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
347
|
+
log_debug(f"Reading: {doc_name}")
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
pdf_reader = DocumentReader(pdf)
|
|
351
|
+
except PdfStreamError as e:
|
|
352
|
+
log_error(f"Error reading PDF: {e}")
|
|
353
|
+
return []
|
|
354
|
+
# Handle PDF decryption
|
|
355
|
+
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
356
|
+
return []
|
|
357
|
+
|
|
358
|
+
# Read and chunk
|
|
359
|
+
return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
|
|
360
|
+
|
|
361
|
+
async def async_read(
|
|
362
|
+
self,
|
|
363
|
+
pdf: Optional[Union[str, Path, IO[Any]]] = None,
|
|
364
|
+
name: Optional[str] = None,
|
|
365
|
+
password: Optional[str] = None,
|
|
366
|
+
) -> List[Document]:
|
|
367
|
+
if pdf is None:
|
|
368
|
+
log_error("No pdf provided")
|
|
369
|
+
return []
|
|
370
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
371
|
+
log_debug(f"Reading: {doc_name}")
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
pdf_reader = DocumentReader(pdf)
|
|
375
|
+
except PdfStreamError as e:
|
|
376
|
+
log_error(f"Error reading PDF: {e}")
|
|
377
|
+
return []
|
|
378
|
+
|
|
379
|
+
# Handle PDF decryption
|
|
380
|
+
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
381
|
+
return []
|
|
382
|
+
|
|
383
|
+
# Read and chunk.
|
|
384
|
+
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
class PDFImageReader(BasePDFReader):
|
|
388
|
+
"""Reader for PDF files with text and images extraction"""
|
|
389
|
+
|
|
390
|
+
def read(
|
|
391
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
392
|
+
) -> List[Document]:
|
|
393
|
+
if not pdf:
|
|
394
|
+
raise ValueError("No pdf provided")
|
|
395
|
+
|
|
396
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
397
|
+
log_debug(f"Reading: {doc_name}")
|
|
398
|
+
try:
|
|
399
|
+
pdf_reader = DocumentReader(pdf)
|
|
400
|
+
except PdfStreamError as e:
|
|
401
|
+
log_error(f"Error reading PDF: {e}")
|
|
402
|
+
return []
|
|
403
|
+
|
|
404
|
+
# Handle PDF decryption
|
|
405
|
+
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
406
|
+
return []
|
|
407
|
+
|
|
408
|
+
# Read and chunk.
|
|
409
|
+
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
410
|
+
|
|
411
|
+
async def async_read(
|
|
412
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
413
|
+
) -> List[Document]:
|
|
414
|
+
if not pdf:
|
|
415
|
+
raise ValueError("No pdf provided")
|
|
416
|
+
|
|
417
|
+
doc_name = self._get_doc_name(pdf, name)
|
|
418
|
+
log_debug(f"Reading: {doc_name}")
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
pdf_reader = DocumentReader(pdf)
|
|
422
|
+
except PdfStreamError as e:
|
|
423
|
+
log_error(f"Error reading PDF: {e}")
|
|
424
|
+
return []
|
|
425
|
+
|
|
426
|
+
# Handle PDF decryption
|
|
427
|
+
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
428
|
+
return []
|
|
429
|
+
|
|
430
|
+
# Read and chunk.
|
|
431
|
+
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import IO, Any, List, Optional, Union
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from pptx import Presentation # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PPTXReader(Reader):
|
|
20
|
+
"""Reader for PPTX files"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for PPTX readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.PPTX]
|
|
39
|
+
|
|
40
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
41
|
+
"""Read a pptx file and return a list of documents"""
|
|
42
|
+
try:
|
|
43
|
+
if isinstance(file, Path):
|
|
44
|
+
if not file.exists():
|
|
45
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
|
+
presentation = Presentation(str(file))
|
|
48
|
+
doc_name = name or file.stem
|
|
49
|
+
else:
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
|
|
51
|
+
presentation = Presentation(file)
|
|
52
|
+
doc_name = name or (
|
|
53
|
+
getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Extract text from all slides
|
|
57
|
+
slide_texts = []
|
|
58
|
+
for slide_number, slide in enumerate(presentation.slides, 1):
|
|
59
|
+
slide_text = f"Slide {slide_number}:\n"
|
|
60
|
+
|
|
61
|
+
# Extract text from shapes that contain text
|
|
62
|
+
text_content = []
|
|
63
|
+
for shape in slide.shapes:
|
|
64
|
+
if hasattr(shape, "text") and shape.text.strip():
|
|
65
|
+
text_content.append(shape.text.strip())
|
|
66
|
+
|
|
67
|
+
if text_content:
|
|
68
|
+
slide_text += "\n".join(text_content)
|
|
69
|
+
else:
|
|
70
|
+
slide_text += "(No text content)"
|
|
71
|
+
|
|
72
|
+
slide_texts.append(slide_text)
|
|
73
|
+
|
|
74
|
+
doc_content = "\n\n".join(slide_texts)
|
|
75
|
+
|
|
76
|
+
documents = [
|
|
77
|
+
Document(
|
|
78
|
+
name=doc_name,
|
|
79
|
+
id=str(uuid4()),
|
|
80
|
+
content=doc_content,
|
|
81
|
+
)
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
if self.chunk:
|
|
85
|
+
chunked_documents = []
|
|
86
|
+
for document in documents:
|
|
87
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
88
|
+
return chunked_documents
|
|
89
|
+
return documents
|
|
90
|
+
|
|
91
|
+
except Exception as e:
|
|
92
|
+
log_error(f"Error reading file: {e}")
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
96
|
+
"""Asynchronously read a pptx file and return a list of documents"""
|
|
97
|
+
try:
|
|
98
|
+
return await asyncio.to_thread(self.read, file, name)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
101
|
+
return []
|