ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
from typing import Union, List
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import PurePath
|
|
5
|
+
from ..stores.models import Document
|
|
6
|
+
from .basevideo import BaseVideoLoader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def split_text(text, max_length):
|
|
10
|
+
"""Split text into chunks of a maximum length, ensuring not to break words."""
|
|
11
|
+
# Split the transcript into paragraphs
|
|
12
|
+
paragraphs = text.split('\n\n')
|
|
13
|
+
chunks = []
|
|
14
|
+
current_chunk = ""
|
|
15
|
+
for paragraph in paragraphs:
|
|
16
|
+
# If the paragraph is too large, split it into sentences
|
|
17
|
+
if len(paragraph) > max_length:
|
|
18
|
+
# Split paragraph into sentences
|
|
19
|
+
sentences = re.split(r'(?<=[.!?]) +', paragraph)
|
|
20
|
+
for sentence in sentences:
|
|
21
|
+
if len(current_chunk) + len(sentence) + 1 > max_length:
|
|
22
|
+
# Save the current chunk and start a new one
|
|
23
|
+
chunks.append(current_chunk.strip())
|
|
24
|
+
current_chunk = sentence
|
|
25
|
+
else:
|
|
26
|
+
# Add sentence to the current chunk
|
|
27
|
+
current_chunk += " " + sentence
|
|
28
|
+
else:
|
|
29
|
+
# If adding the paragraph exceeds max size, start a new chunk
|
|
30
|
+
if len(current_chunk) + len(paragraph) + 2 > max_length:
|
|
31
|
+
chunks.append(current_chunk.strip())
|
|
32
|
+
current_chunk = paragraph
|
|
33
|
+
else:
|
|
34
|
+
# Add paragraph to the current chunk
|
|
35
|
+
current_chunk += "\n\n" + paragraph
|
|
36
|
+
# Add any remaining text to chunks
|
|
37
|
+
if current_chunk.strip():
|
|
38
|
+
chunks.append(current_chunk.strip())
|
|
39
|
+
|
|
40
|
+
return chunks
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class VideoLocalLoader(BaseVideoLoader):
|
|
44
|
+
"""
|
|
45
|
+
Generating Video transcripts from local Videos.
|
|
46
|
+
"""
|
|
47
|
+
extensions: List[str] = ['.mp4', '.webm']
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
*args,
|
|
52
|
+
source: List[Union[str, PurePath]] = None,
|
|
53
|
+
tokenizer: Union[str, Callable] = None,
|
|
54
|
+
text_splitter: Union[str, Callable] = None,
|
|
55
|
+
source_type: str = 'video',
|
|
56
|
+
**kwargs
|
|
57
|
+
):
|
|
58
|
+
super().__init__(
|
|
59
|
+
source=source,
|
|
60
|
+
tokenizer=tokenizer,
|
|
61
|
+
text_splitter=text_splitter,
|
|
62
|
+
source_type=source_type,
|
|
63
|
+
**kwargs
|
|
64
|
+
)
|
|
65
|
+
self.extract_frames: bool = kwargs.pop('extract_frames', False)
|
|
66
|
+
self.seconds_per_frame: int = kwargs.pop('seconds_per_frame', 1)
|
|
67
|
+
self.compress_speed: bool = kwargs.pop('compress_speed', False)
|
|
68
|
+
self.speed_factor: float = kwargs.pop('speed_factor', 1.5)
|
|
69
|
+
|
|
70
|
+
async def _load(self, path: Union[str, PurePath, List[PurePath]], **kwargs) -> List[Document]:
|
|
71
|
+
metadata = {
|
|
72
|
+
"url": f"{path}",
|
|
73
|
+
"source": f"{path}",
|
|
74
|
+
"filename": f"{path.name}",
|
|
75
|
+
"question": '',
|
|
76
|
+
"answer": '',
|
|
77
|
+
'type': 'video_transcript',
|
|
78
|
+
"source_type": self._source_type,
|
|
79
|
+
"data": {},
|
|
80
|
+
"summary": '',
|
|
81
|
+
"document_meta": {
|
|
82
|
+
"language": self._language,
|
|
83
|
+
"topic_tags": ""
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
documents = []
|
|
87
|
+
transcript_path = path.with_suffix('.txt')
|
|
88
|
+
vtt_path = path.with_suffix('.vtt')
|
|
89
|
+
srt_path = path.with_suffix(".srt")
|
|
90
|
+
summary_path = path.with_suffix('.summary')
|
|
91
|
+
audio_path = path.with_suffix('.wav')
|
|
92
|
+
# second: extract audio from File
|
|
93
|
+
try:
|
|
94
|
+
self.extract_audio(
|
|
95
|
+
path,
|
|
96
|
+
audio_path,
|
|
97
|
+
compress_speed=self.compress_speed,
|
|
98
|
+
speed_factor=self.speed_factor
|
|
99
|
+
)
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
print(f"Error extracting audio from video: {exc}")
|
|
102
|
+
raise
|
|
103
|
+
transcript = ''
|
|
104
|
+
try:
|
|
105
|
+
# ensure a clean 16k Hz mono wav file for whisper
|
|
106
|
+
wav_path = self.ensure_wav_16k_mono(audio_path)
|
|
107
|
+
# get the Whisper parser
|
|
108
|
+
transcript_whisper = self.get_whisperx_transcript(wav_path)
|
|
109
|
+
transcript = transcript_whisper.get('text', '') if transcript_whisper else ''
|
|
110
|
+
except Exception as exc:
|
|
111
|
+
print(f"Error transcribing audio from video: {exc}")
|
|
112
|
+
raise
|
|
113
|
+
# diarization:
|
|
114
|
+
if self._diarization:
|
|
115
|
+
if (srt := self.audio_to_srt(
|
|
116
|
+
audio_path=wav_path,
|
|
117
|
+
asr=transcript_whisper,
|
|
118
|
+
output_srt_path=srt_path,
|
|
119
|
+
max_gap_s=0.5,
|
|
120
|
+
max_chars=90,
|
|
121
|
+
max_duration_s=0.9,
|
|
122
|
+
)):
|
|
123
|
+
doc = Document(
|
|
124
|
+
page_content=srt,
|
|
125
|
+
metadata={
|
|
126
|
+
"source": f"{srt_path}",
|
|
127
|
+
"url": f"{srt_path.name}",
|
|
128
|
+
"filename": f"{srt_path}",
|
|
129
|
+
"origin": f"{path}",
|
|
130
|
+
'type': 'srt_transcript',
|
|
131
|
+
"source_type": 'AUDIO',
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
# Summarize the transcript
|
|
135
|
+
if transcript:
|
|
136
|
+
# first: extract summary, saving summary as a document:
|
|
137
|
+
summary = await self.summary_from_text(transcript)
|
|
138
|
+
self.saving_file(summary_path, summary.encode('utf-8'))
|
|
139
|
+
# second: saving transcript to a file:
|
|
140
|
+
self.saving_file(transcript_path, transcript.encode('utf-8'))
|
|
141
|
+
# Create Three Documents:
|
|
142
|
+
# one is for transcript
|
|
143
|
+
# split document only if size > 65.534
|
|
144
|
+
if len(transcript) > 65534:
|
|
145
|
+
# Split transcript into chunks
|
|
146
|
+
transcript_chunks = split_text(transcript, 32767)
|
|
147
|
+
for chunk in transcript_chunks:
|
|
148
|
+
doc = Document(
|
|
149
|
+
page_content=chunk,
|
|
150
|
+
metadata=metadata
|
|
151
|
+
)
|
|
152
|
+
documents.append(doc)
|
|
153
|
+
else:
|
|
154
|
+
doc = Document(
|
|
155
|
+
page_content=transcript,
|
|
156
|
+
metadata=metadata
|
|
157
|
+
)
|
|
158
|
+
documents.append(doc)
|
|
159
|
+
# second is Summary
|
|
160
|
+
if summary:
|
|
161
|
+
_meta = {
|
|
162
|
+
**metadata,
|
|
163
|
+
"type": 'video summary'
|
|
164
|
+
}
|
|
165
|
+
doc = Document(
|
|
166
|
+
page_content=summary,
|
|
167
|
+
metadata=_meta
|
|
168
|
+
)
|
|
169
|
+
# Third is VTT:
|
|
170
|
+
if transcript_whisper:
|
|
171
|
+
# VTT version:
|
|
172
|
+
vtt_text = self.transcript_to_vtt(transcript_whisper, vtt_path)
|
|
173
|
+
_meta = {
|
|
174
|
+
**metadata,
|
|
175
|
+
"type": 'video subte vtt'
|
|
176
|
+
}
|
|
177
|
+
if len(vtt_text) > 65535:
|
|
178
|
+
transcript_chunks = split_text(vtt_text, 65535)
|
|
179
|
+
for chunk in transcript_chunks:
|
|
180
|
+
doc = Document(
|
|
181
|
+
page_content=chunk,
|
|
182
|
+
metadata=_meta
|
|
183
|
+
)
|
|
184
|
+
documents.append(doc)
|
|
185
|
+
else:
|
|
186
|
+
doc = Document(
|
|
187
|
+
page_content=vtt_text,
|
|
188
|
+
metadata=_meta
|
|
189
|
+
)
|
|
190
|
+
documents.append(doc)
|
|
191
|
+
# Saving every dialog chunk as a separate document
|
|
192
|
+
dialogs = self.transcript_to_blocks(transcript_whisper)
|
|
193
|
+
docs = []
|
|
194
|
+
for chunk in dialogs:
|
|
195
|
+
start_time = chunk['start_time']
|
|
196
|
+
_meta = {
|
|
197
|
+
"source": f"{path.name}: min. {start_time}",
|
|
198
|
+
"type": "video dialog",
|
|
199
|
+
"document_meta": {
|
|
200
|
+
"start": f"{start_time}",
|
|
201
|
+
"end": f"{chunk['end_time']}",
|
|
202
|
+
"id": f"{chunk['id']}",
|
|
203
|
+
"language": self._language,
|
|
204
|
+
"title": f"{path.stem}",
|
|
205
|
+
"topic_tags": ""
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
_info = {**metadata, **_meta}
|
|
209
|
+
doc = Document(
|
|
210
|
+
page_content=chunk['text'],
|
|
211
|
+
metadata=_info
|
|
212
|
+
)
|
|
213
|
+
docs.append(doc)
|
|
214
|
+
documents.extend(docs)
|
|
215
|
+
return documents
|
|
216
|
+
|
|
217
|
+
async def load_video(self, url: str, video_title: str, transcript: str) -> list:
|
|
218
|
+
pass
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
from typing import Union, List, Optional
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
import re
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import PurePath, Path
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from ..stores.models import Document
|
|
8
|
+
from .basevideo import BaseVideoLoader
|
|
9
|
+
from ..clients.google import GoogleGenAIClient
|
|
10
|
+
from ..models.google import GoogleModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def split_text(text, max_length):
|
|
14
|
+
"""Split text into chunks of a maximum length, ensuring not to break words."""
|
|
15
|
+
# Split the transcript into paragraphs
|
|
16
|
+
paragraphs = text.split('\n\n')
|
|
17
|
+
chunks = []
|
|
18
|
+
current_chunk = ""
|
|
19
|
+
for paragraph in paragraphs:
|
|
20
|
+
# If the paragraph is too large, split it into sentences
|
|
21
|
+
if len(paragraph) > max_length:
|
|
22
|
+
# Split paragraph into sentences
|
|
23
|
+
sentences = re.split(r'(?<=[.!?]) +', paragraph)
|
|
24
|
+
for sentence in sentences:
|
|
25
|
+
if len(current_chunk) + len(sentence) + 1 > max_length:
|
|
26
|
+
# Save the current chunk and start a new one
|
|
27
|
+
chunks.append(current_chunk.strip())
|
|
28
|
+
current_chunk = sentence
|
|
29
|
+
else:
|
|
30
|
+
# Add sentence to the current chunk
|
|
31
|
+
current_chunk += " " + sentence
|
|
32
|
+
else:
|
|
33
|
+
# If adding the paragraph exceeds max size, start a new chunk
|
|
34
|
+
if len(current_chunk) + len(paragraph) + 2 > max_length:
|
|
35
|
+
chunks.append(current_chunk.strip())
|
|
36
|
+
current_chunk = paragraph
|
|
37
|
+
else:
|
|
38
|
+
# Add paragraph to the current chunk
|
|
39
|
+
current_chunk += "\n\n" + paragraph
|
|
40
|
+
# Add any remaining text to chunks
|
|
41
|
+
if current_chunk.strip():
|
|
42
|
+
chunks.append(current_chunk.strip())
|
|
43
|
+
|
|
44
|
+
return chunks
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def extract_scenes_from_response(response_text: str) -> List[dict]:
|
|
48
|
+
"""
|
|
49
|
+
Extract structured scenes from the AI response.
|
|
50
|
+
Attempts to parse JSON-like structures or creates scenes from the text.
|
|
51
|
+
"""
|
|
52
|
+
scenes = []
|
|
53
|
+
|
|
54
|
+
# Try to extract JSON from the response
|
|
55
|
+
try:
|
|
56
|
+
# Look for JSON blocks
|
|
57
|
+
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
|
58
|
+
if json_match:
|
|
59
|
+
json_data = json.loads(json_match.group())
|
|
60
|
+
if 'scenes' in json_data:
|
|
61
|
+
return json_data['scenes']
|
|
62
|
+
except json.JSONDecodeError:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
# Fallback: Parse text-based scenes
|
|
66
|
+
# Look for scene markers like "Scene 1:", "Step 1:", etc.
|
|
67
|
+
scene_pattern = r'(?:Scene|Step)\s*(\d+)[:.]?\s*(.*?)(?=(?:Scene|Step)\s*\d+|$)'
|
|
68
|
+
matches = re.findall(scene_pattern, response_text, re.DOTALL | re.IGNORECASE)
|
|
69
|
+
|
|
70
|
+
for i, (scene_num, content) in enumerate(matches):
|
|
71
|
+
# Extract quoted text (spoken text)
|
|
72
|
+
quotes = re.findall(r'"([^"]*)"', content)
|
|
73
|
+
|
|
74
|
+
# Extract instructions (non-quoted text)
|
|
75
|
+
instructions = re.sub(r'"[^"]*"', '', content).strip()
|
|
76
|
+
instructions = re.sub(r'\s+', ' ', instructions)
|
|
77
|
+
|
|
78
|
+
scene_data = {
|
|
79
|
+
'scene_number': int(scene_num) if scene_num.isdigit() else i + 1,
|
|
80
|
+
'instructions': instructions,
|
|
81
|
+
'spoken_text': ' '.join(quotes) if quotes else '',
|
|
82
|
+
'content': content.strip(),
|
|
83
|
+
'timestamp': f"Scene {scene_num}" if scene_num else f"Scene {i + 1}"
|
|
84
|
+
}
|
|
85
|
+
scenes.append(scene_data)
|
|
86
|
+
|
|
87
|
+
# If no scenes found, create one scene with all content
|
|
88
|
+
if not scenes:
|
|
89
|
+
scenes.append({
|
|
90
|
+
'scene_number': 1,
|
|
91
|
+
'instructions': response_text,
|
|
92
|
+
'spoken_text': '',
|
|
93
|
+
'content': response_text,
|
|
94
|
+
'timestamp': 'Full Video'
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
return scenes
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class VideoUnderstandingLoader(BaseVideoLoader):
|
|
101
|
+
"""
|
|
102
|
+
Video analysis loader using Google GenAI for understanding video content.
|
|
103
|
+
Extracts step-by-step instructions and spoken text from training videos.
|
|
104
|
+
"""
|
|
105
|
+
extensions: List[str] = ['.mp4', '.webm', '.avi', '.mov', '.mkv']
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
source: Optional[Union[str, Path, List[Union[str, Path]]]] = None,
|
|
110
|
+
*,
|
|
111
|
+
tokenizer: Union[str, Callable] = None,
|
|
112
|
+
text_splitter: Union[str, Callable] = None,
|
|
113
|
+
source_type: str = 'video_understanding',
|
|
114
|
+
model: Union[str, GoogleModel] = GoogleModel.GEMINI_2_5_FLASH_IMAGE_PREVIEW,
|
|
115
|
+
temperature: float = 0.2,
|
|
116
|
+
prompt: Optional[str] = None,
|
|
117
|
+
custom_instructions: Optional[str] = None,
|
|
118
|
+
**kwargs
|
|
119
|
+
):
|
|
120
|
+
super().__init__(
|
|
121
|
+
source,
|
|
122
|
+
tokenizer=tokenizer,
|
|
123
|
+
text_splitter=text_splitter,
|
|
124
|
+
source_type=source_type,
|
|
125
|
+
**kwargs
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Google GenAI configuration
|
|
129
|
+
self.model = model
|
|
130
|
+
self.temperature = temperature
|
|
131
|
+
self.google_client = None
|
|
132
|
+
|
|
133
|
+
# Custom prompts
|
|
134
|
+
self.prompt = prompt
|
|
135
|
+
self.custom_instructions = custom_instructions
|
|
136
|
+
|
|
137
|
+
# Default prompt for video analysis
|
|
138
|
+
self.default_prompt = """
|
|
139
|
+
Analyze the video and extract step-by-step instructions for employees to follow, and the spoken text into quotation marks, related to the training content shown in this video.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
# Default instruction for video analysis
|
|
143
|
+
self.default_instructions = """
|
|
144
|
+
Video Analysis Instructions:
|
|
145
|
+
1. Videos are training materials for employees to learn how to use Workday.
|
|
146
|
+
2. There are several step-by-step processes shown in the video, with screenshots and spoken text.
|
|
147
|
+
3. Break down the video into distinct scenes based on changes in visuals or context.
|
|
148
|
+
4. For each scene, extract all step-by-step instructions, including any spoken text in quotation marks.
|
|
149
|
+
5. Place each caption into an object with the timecode of the caption in the video.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
async def _get_google_client(self) -> GoogleGenAIClient:
|
|
153
|
+
"""Get or create Google GenAI client."""
|
|
154
|
+
if self.google_client is None:
|
|
155
|
+
self.google_client = GoogleGenAIClient(model=self.model)
|
|
156
|
+
return self.google_client
|
|
157
|
+
|
|
158
|
+
async def _analyze_video_with_ai(self, video_path: Path) -> str:
|
|
159
|
+
"""Analyze video using Google GenAI."""
|
|
160
|
+
try:
|
|
161
|
+
client = await self._get_google_client()
|
|
162
|
+
|
|
163
|
+
# Use custom prompt or default
|
|
164
|
+
prompt = self.prompt or self.default_prompt
|
|
165
|
+
instructions = self.custom_instructions or self.default_instructions
|
|
166
|
+
|
|
167
|
+
async with client as ai_client:
|
|
168
|
+
self.logger.info(f"Analyzing video with Google GenAI: {video_path.name}")
|
|
169
|
+
|
|
170
|
+
response = await ai_client.video_understanding(
|
|
171
|
+
video=video_path,
|
|
172
|
+
prompt=prompt,
|
|
173
|
+
prompt_instruction=instructions,
|
|
174
|
+
temperature=self.temperature,
|
|
175
|
+
stateless=True
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return response.output if hasattr(response, 'output') else str(response)
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
self.logger.error(f"Error analyzing video with AI: {e}")
|
|
182
|
+
return f"Error analyzing video: {str(e)}"
|
|
183
|
+
|
|
184
|
+
async def _load(self, path: Union[str, PurePath, List[PurePath]], **kwargs) -> List[Document]:
|
|
185
|
+
"""Load and analyze video file."""
|
|
186
|
+
if isinstance(path, (str, PurePath)):
|
|
187
|
+
path = Path(path)
|
|
188
|
+
if not path.exists():
|
|
189
|
+
self.logger.error(f"Video file not found: {path}")
|
|
190
|
+
return []
|
|
191
|
+
|
|
192
|
+
self.logger.info(f"Processing video: {path.name}")
|
|
193
|
+
|
|
194
|
+
# Base metadata
|
|
195
|
+
base_metadata = {
|
|
196
|
+
"url": f"file://{path}",
|
|
197
|
+
"source": str(path),
|
|
198
|
+
"filename": path.name,
|
|
199
|
+
"type": "video_understanding",
|
|
200
|
+
"source_type": self._source_type,
|
|
201
|
+
"category": self.category,
|
|
202
|
+
"created_at": datetime.now().strftime("%Y-%m-%d, %H:%M:%S"),
|
|
203
|
+
"document_meta": {
|
|
204
|
+
"language": self._language,
|
|
205
|
+
"model_used": str(self.model.value if hasattr(self.model, 'value') else self.model),
|
|
206
|
+
"analysis_type": "video_understanding",
|
|
207
|
+
"video_title": path.stem
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
documents = []
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
# Analyze video with Google GenAI
|
|
215
|
+
ai_response = await self._analyze_video_with_ai(path)
|
|
216
|
+
|
|
217
|
+
# Save AI response to file
|
|
218
|
+
response_path = path.with_suffix('.ai_analysis.txt')
|
|
219
|
+
self.saving_file(response_path, ai_response.encode('utf-8'))
|
|
220
|
+
|
|
221
|
+
# Extract scenes from AI response
|
|
222
|
+
scenes = extract_scenes_from_response(ai_response)
|
|
223
|
+
|
|
224
|
+
# Create main analysis document
|
|
225
|
+
main_doc_metadata = {
|
|
226
|
+
**base_metadata,
|
|
227
|
+
"type": "video_analysis_full",
|
|
228
|
+
"document_meta": {
|
|
229
|
+
**base_metadata["document_meta"],
|
|
230
|
+
"total_scenes": len(scenes),
|
|
231
|
+
"analysis_timestamp": datetime.now().isoformat()
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# Split if too long
|
|
236
|
+
if len(ai_response) > 65534:
|
|
237
|
+
chunks = split_text(ai_response, 32767)
|
|
238
|
+
for i, chunk in enumerate(chunks):
|
|
239
|
+
chunk_metadata = {
|
|
240
|
+
**main_doc_metadata,
|
|
241
|
+
"type": "video_analysis_chunk",
|
|
242
|
+
"document_meta": {
|
|
243
|
+
**main_doc_metadata["document_meta"],
|
|
244
|
+
"chunk_number": i + 1,
|
|
245
|
+
"total_chunks": len(chunks)
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
doc = Document(
|
|
249
|
+
page_content=chunk,
|
|
250
|
+
metadata=chunk_metadata
|
|
251
|
+
)
|
|
252
|
+
documents.append(doc)
|
|
253
|
+
else:
|
|
254
|
+
doc = Document(
|
|
255
|
+
page_content=ai_response,
|
|
256
|
+
metadata=main_doc_metadata
|
|
257
|
+
)
|
|
258
|
+
documents.append(doc)
|
|
259
|
+
|
|
260
|
+
# Create individual scene documents
|
|
261
|
+
for scene in scenes:
|
|
262
|
+
scene_metadata = {
|
|
263
|
+
**base_metadata,
|
|
264
|
+
"type": "video_scene",
|
|
265
|
+
"source": f"{path.name}: {scene.get('timestamp', 'Scene')}",
|
|
266
|
+
"document_meta": {
|
|
267
|
+
**base_metadata["document_meta"],
|
|
268
|
+
"scene_number": scene.get('scene_number', 1),
|
|
269
|
+
"timestamp": scene.get('timestamp', ''),
|
|
270
|
+
"has_spoken_text": bool(scene.get('spoken_text', '').strip()),
|
|
271
|
+
"has_instructions": bool(scene.get('instructions', '').strip())
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# Create content combining instructions and spoken text
|
|
276
|
+
content_parts = []
|
|
277
|
+
|
|
278
|
+
if scene.get('instructions'):
|
|
279
|
+
content_parts.append(f"INSTRUCTIONS:\n{scene['instructions']}")
|
|
280
|
+
|
|
281
|
+
if scene.get('spoken_text'):
|
|
282
|
+
content_parts.append(f"SPOKEN TEXT:\n\"{scene['spoken_text']}\"")
|
|
283
|
+
|
|
284
|
+
scene_content = "\n\n".join(content_parts) if content_parts else scene.get('content', '')
|
|
285
|
+
|
|
286
|
+
if scene_content.strip():
|
|
287
|
+
scene_doc = Document(
|
|
288
|
+
page_content=scene_content,
|
|
289
|
+
metadata=scene_metadata
|
|
290
|
+
)
|
|
291
|
+
documents.append(scene_doc)
|
|
292
|
+
|
|
293
|
+
# Create separate documents for instructions and spoken text if needed
|
|
294
|
+
all_instructions = []
|
|
295
|
+
all_spoken = []
|
|
296
|
+
|
|
297
|
+
for scene in scenes:
|
|
298
|
+
if scene.get('instructions'):
|
|
299
|
+
all_instructions.append(f"Scene {scene.get('scene_number', '')}: {scene['instructions']}")
|
|
300
|
+
if scene.get('spoken_text'):
|
|
301
|
+
all_spoken.append(f"Scene {scene.get('scene_number', '')}: \"{scene['spoken_text']}\"")
|
|
302
|
+
|
|
303
|
+
# Instructions summary document
|
|
304
|
+
if all_instructions:
|
|
305
|
+
instructions_metadata = {
|
|
306
|
+
**base_metadata,
|
|
307
|
+
"type": "video_instructions_summary",
|
|
308
|
+
"document_meta": {
|
|
309
|
+
**base_metadata["document_meta"],
|
|
310
|
+
"content_type": "instructions_only",
|
|
311
|
+
"scene_count": len(all_instructions)
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
instructions_content = "STEP-BY-STEP INSTRUCTIONS:\n\n" + "\n\n".join(all_instructions)
|
|
316
|
+
|
|
317
|
+
instructions_doc = Document(
|
|
318
|
+
page_content=instructions_content,
|
|
319
|
+
metadata=instructions_metadata
|
|
320
|
+
)
|
|
321
|
+
documents.append(instructions_doc)
|
|
322
|
+
|
|
323
|
+
# Spoken text summary document
|
|
324
|
+
if all_spoken:
|
|
325
|
+
spoken_metadata = {
|
|
326
|
+
**base_metadata,
|
|
327
|
+
"type": "video_spoken_summary",
|
|
328
|
+
"document_meta": {
|
|
329
|
+
**base_metadata["document_meta"],
|
|
330
|
+
"content_type": "spoken_text_only",
|
|
331
|
+
"scene_count": len(all_spoken)
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
spoken_content = "SPOKEN TEXT TRANSCRIPT:\n\n" + "\n\n".join(all_spoken)
|
|
336
|
+
|
|
337
|
+
spoken_doc = Document(
|
|
338
|
+
page_content=spoken_content,
|
|
339
|
+
metadata=spoken_metadata
|
|
340
|
+
)
|
|
341
|
+
documents.append(spoken_doc)
|
|
342
|
+
|
|
343
|
+
self.logger.info(f"Generated {len(documents)} documents from video analysis")
|
|
344
|
+
|
|
345
|
+
except Exception as e:
|
|
346
|
+
self.logger.error(f"Error processing video {path}: {e}")
|
|
347
|
+
# Create error document
|
|
348
|
+
error_metadata = {
|
|
349
|
+
**base_metadata,
|
|
350
|
+
"type": "video_analysis_error",
|
|
351
|
+
"document_meta": {
|
|
352
|
+
**base_metadata["document_meta"],
|
|
353
|
+
"error": str(e),
|
|
354
|
+
"error_timestamp": datetime.now().isoformat()
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
error_doc = Document(
|
|
359
|
+
page_content=f"Error analyzing video {path.name}: {str(e)}",
|
|
360
|
+
metadata=error_metadata
|
|
361
|
+
)
|
|
362
|
+
documents.append(error_doc)
|
|
363
|
+
|
|
364
|
+
return documents
|
|
365
|
+
|
|
366
|
+
async def load_video(self, url: str, video_title: str, transcript: str) -> list:
|
|
367
|
+
"""
|
|
368
|
+
Required abstract method implementation.
|
|
369
|
+
This method is not used in our implementation but required by BaseVideoLoader.
|
|
370
|
+
"""
|
|
371
|
+
# This method is required by the abstract base class but not used in our implementation
|
|
372
|
+
# We use _load instead for our video analysis
|
|
373
|
+
return []
|
|
374
|
+
|
|
375
|
+
async def close(self):
|
|
376
|
+
"""Clean up resources."""
|
|
377
|
+
super().clear_cuda()
|