ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,1103 @@
|
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
from typing import Any, Dict, Optional, List, Tuple
|
|
3
|
+
import re
|
|
4
|
+
import plistlib
|
|
5
|
+
import struct
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
from PIL import Image, ExifTags, PngImagePlugin
|
|
9
|
+
from PIL.ExifTags import TAGS, GPSTAGS, IFD
|
|
10
|
+
from PIL import TiffImagePlugin
|
|
11
|
+
from PIL.TiffImagePlugin import IFDRational
|
|
12
|
+
from libxmp import XMPFiles, consts
|
|
13
|
+
from pillow_heif import register_heif_opener
|
|
14
|
+
from .abstract import ImagePlugin
|
|
15
|
+
import base64
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
register_heif_opener() # ADD HEIF support
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _json_safe(obj):
|
|
22
|
+
"""Return a structure containing only JSON‑serialisable scalar types,
|
|
23
|
+
no IFDRational, no bytes, and **no NUL characters**."""
|
|
24
|
+
if isinstance(obj, IFDRational):
|
|
25
|
+
return float(obj)
|
|
26
|
+
|
|
27
|
+
if isinstance(obj, bytes):
|
|
28
|
+
# bytes -> str *and* strip embedded NULs
|
|
29
|
+
return obj.decode(errors="replace").replace('\x00', '')
|
|
30
|
+
|
|
31
|
+
if isinstance(obj, str):
|
|
32
|
+
# Remove NUL chars from normal strings too
|
|
33
|
+
return obj.replace('\x00', '')
|
|
34
|
+
|
|
35
|
+
if isinstance(obj, Mapping):
|
|
36
|
+
return {k: _json_safe(v) for k, v in obj.items()}
|
|
37
|
+
|
|
38
|
+
if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
|
|
39
|
+
return [_json_safe(v) for v in obj]
|
|
40
|
+
|
|
41
|
+
return obj
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _make_serialisable(val):
|
|
45
|
+
if isinstance(val, IFDRational):
|
|
46
|
+
return float(val)
|
|
47
|
+
if isinstance(val, bytes):
|
|
48
|
+
return val.decode(errors="replace")
|
|
49
|
+
return val
|
|
50
|
+
|
|
51
|
+
def get_xmp_modify_date(image, path: Optional[str] = None) -> str | None:
|
|
52
|
+
# 1) Try to grab the raw XMP packet from the JPEG APP1 segment
|
|
53
|
+
raw_xmp = image.info.get("XML:com.adobe.xmp")
|
|
54
|
+
if raw_xmp:
|
|
55
|
+
# 2) Feed it to XMPFiles via a buffer
|
|
56
|
+
xmpfile = XMPFiles(buffer=raw_xmp)
|
|
57
|
+
else:
|
|
58
|
+
# fallback: let XMPFiles pull directly from the file
|
|
59
|
+
# xmpfile = XMPFiles(file_path=path)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
xmp = xmpfile.get_xmp()
|
|
63
|
+
if not xmp:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
# 3) Common XMP namespaces & properties for modification history:
|
|
67
|
+
# - consts.XMP_NS_XMP / "ModifyDate"
|
|
68
|
+
modify = xmp.get_property(consts.XMP_NS_XMP, "ModifyDate")
|
|
69
|
+
|
|
70
|
+
xmpfile.close_file()
|
|
71
|
+
|
|
72
|
+
return modify
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class EXIFPlugin(ImagePlugin):
|
|
76
|
+
"""
|
|
77
|
+
EXIFPlugin is a plugin for extracting EXIF data from images.
|
|
78
|
+
It extends the ImagePlugin class and implements the analyze method to extract EXIF data.
|
|
79
|
+
"""
|
|
80
|
+
column_name: str = "exif_data"
|
|
81
|
+
|
|
82
|
+
def __init__(self, *args, **kwargs):
|
|
83
|
+
self.extract_geoloc: bool = kwargs.get("extract_geoloc", False)
|
|
84
|
+
super().__init__(*args, **kwargs)
|
|
85
|
+
|
|
86
|
+
def convert_to_degrees(self, value: tuple[IFDRational]):
|
|
87
|
+
"""
|
|
88
|
+
Convert a 3-tuple of (deg, min, sec)—each component either an IFDRational or a float/int—
|
|
89
|
+
into a decimal‐degrees float. Returns None on any error.
|
|
90
|
+
"""
|
|
91
|
+
try:
|
|
92
|
+
# Helper: if `r` has .num and .den, treat it as IFDRational; otherwise, cast to float.
|
|
93
|
+
def to_float(r):
|
|
94
|
+
if hasattr(r, "num") and hasattr(r, "den"):
|
|
95
|
+
# Avoid division by zero
|
|
96
|
+
if r.den == 0:
|
|
97
|
+
return 0.0
|
|
98
|
+
return float(r.num) / float(r.den)
|
|
99
|
+
return float(r) if r is not None else 0.0
|
|
100
|
+
|
|
101
|
+
if not value or len(value) < 3:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
d = to_float(value[0])
|
|
105
|
+
m = to_float(value[1])
|
|
106
|
+
s = to_float(value[2])
|
|
107
|
+
return d + (m / 60.0) + (s / 3600.0)
|
|
108
|
+
|
|
109
|
+
except Exception:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
def extract_gps_datetime(self, exif: dict):
|
|
113
|
+
"""
|
|
114
|
+
Extract GPS coordinates and a timestamp (preferring GPSDateStamp+GPSTimeStamp if available,
|
|
115
|
+
else falling back to DateTimeOriginal/DateTime) from a (string-keyed) EXIF dict.
|
|
116
|
+
|
|
117
|
+
Returns a dict:
|
|
118
|
+
{
|
|
119
|
+
"datetime": <ISO8601 string or None>,
|
|
120
|
+
"date": <date string or None>,
|
|
121
|
+
"latitude": <decimal float or None>,
|
|
122
|
+
"longitude": <decimal float or None>
|
|
123
|
+
}
|
|
124
|
+
"""
|
|
125
|
+
gps = exif.get("GPSInfo", {}) or {}
|
|
126
|
+
# 1) Build latitude/longitude, if present:
|
|
127
|
+
latitude = longitude = None
|
|
128
|
+
lat_tuple = gps.get("GPSLatitude")
|
|
129
|
+
lat_ref = gps.get("GPSLatitudeRef")
|
|
130
|
+
lon_tuple = gps.get("GPSLongitude")
|
|
131
|
+
lon_ref = gps.get("GPSLongitudeRef")
|
|
132
|
+
|
|
133
|
+
if lat_tuple and lat_ref and lon_tuple and lon_ref:
|
|
134
|
+
# Convert the 3-tuples into decimal degrees
|
|
135
|
+
lat_dd = self.convert_to_degrees(lat_tuple)
|
|
136
|
+
lon_dd = self.convert_to_degrees(lon_tuple)
|
|
137
|
+
|
|
138
|
+
if lat_dd is not None:
|
|
139
|
+
if str(lat_ref).upper() == "S":
|
|
140
|
+
lat_dd = -lat_dd
|
|
141
|
+
latitude = lat_dd
|
|
142
|
+
|
|
143
|
+
if lon_dd is not None:
|
|
144
|
+
if str(lon_ref).upper() == "W":
|
|
145
|
+
lon_dd = -lon_dd
|
|
146
|
+
longitude = lon_dd
|
|
147
|
+
|
|
148
|
+
# 2) Build a datetime string: prefer GPSDateStamp+GPSTimeStamp if both exist
|
|
149
|
+
datetime_str = None
|
|
150
|
+
date_str = None
|
|
151
|
+
date_stamp = gps.get("GPSDateStamp") # e.g. "2025:03:18"
|
|
152
|
+
time_stamp = gps.get("GPSTimeStamp") # e.g. (23.0, 57.0, 50.0)
|
|
153
|
+
|
|
154
|
+
if date_stamp and time_stamp:
|
|
155
|
+
try:
|
|
156
|
+
# time_stamp might be floats; cast to int for hours/minutes/seconds.
|
|
157
|
+
h = int(time_stamp[0])
|
|
158
|
+
m = int(time_stamp[1])
|
|
159
|
+
s = int(time_stamp[2])
|
|
160
|
+
# date_stamp format is "YYYY:MM:DD"
|
|
161
|
+
dt = datetime.strptime(date_stamp, "%Y:%m:%d")
|
|
162
|
+
dt = dt.replace(hour=h, minute=m, second=s)
|
|
163
|
+
datetime_str = dt.isoformat()
|
|
164
|
+
date_str = dt.date().isoformat()
|
|
165
|
+
except Exception:
|
|
166
|
+
# If any parsing error, fall back
|
|
167
|
+
datetime_str = None
|
|
168
|
+
|
|
169
|
+
# 3) If GPSDateStamp+GPSTimeStamp didn’t yield a usable value, try DateTimeOriginal/DateTime
|
|
170
|
+
if not datetime_str:
|
|
171
|
+
datetime_str = exif.get("DateTimeOriginal") or exif.get("DateTime") or None
|
|
172
|
+
if datetime_str:
|
|
173
|
+
# Convert to ISO8601 format if it’s a string with YYYY:MM:DD HH:MM:SS
|
|
174
|
+
try:
|
|
175
|
+
dt = datetime.strptime(datetime_str, "%Y:%m:%d %H:%M:%S")
|
|
176
|
+
datetime_str = dt.isoformat()
|
|
177
|
+
date_str = dt.date().isoformat()
|
|
178
|
+
except ValueError:
|
|
179
|
+
# If parsing fails, keep it as is
|
|
180
|
+
pass
|
|
181
|
+
except TypeError:
|
|
182
|
+
# If datetime_str is None or not a string, keep it as None
|
|
183
|
+
datetime_str = None
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
"datetime": datetime_str,
|
|
187
|
+
"date": date_str,
|
|
188
|
+
"latitude": latitude,
|
|
189
|
+
"longitude": longitude
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async def extract_iptc_data(self, image) -> dict:
|
|
193
|
+
"""
|
|
194
|
+
Extract IPTC metadata from an image.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
image: The PIL Image object.
|
|
198
|
+
Returns:
|
|
199
|
+
Dictionary of IPTC data or empty dict if no IPTC data exists.
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
iptc_data = {}
|
|
203
|
+
|
|
204
|
+
# Try to get IPTC data from image.info
|
|
205
|
+
if 'photoshop' in image.info:
|
|
206
|
+
photoshop = image.info['photoshop']
|
|
207
|
+
# Extract IPTC information from photoshop data
|
|
208
|
+
iptc_data = self._parse_photoshop_data(photoshop)
|
|
209
|
+
|
|
210
|
+
# Try alternate keys for IPTC data in image.info
|
|
211
|
+
elif 'iptc' in image.info:
|
|
212
|
+
iptc = image.info['iptc']
|
|
213
|
+
if isinstance(iptc, bytes):
|
|
214
|
+
iptc_records = self._parse_iptc_data(iptc)
|
|
215
|
+
iptc_data.update(iptc_records)
|
|
216
|
+
elif isinstance(iptc, dict):
|
|
217
|
+
iptc_data.update(iptc)
|
|
218
|
+
|
|
219
|
+
# Check for IPTCDigest directly
|
|
220
|
+
if 'IPTCDigest' in image.info:
|
|
221
|
+
iptc_data['IPTCDigest'] = image.info['IPTCDigest']
|
|
222
|
+
|
|
223
|
+
# For JPEG images, try to get IPTC from APP13 segment directly
|
|
224
|
+
if not iptc_data and hasattr(image, 'applist'):
|
|
225
|
+
for segment, content in image.applist:
|
|
226
|
+
if segment == 'APP13' and b'Photoshop 3.0' in content:
|
|
227
|
+
iptc_data = self._parse_photoshop_data(content)
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
# For TIFF, check for IPTC data in specific tags
|
|
231
|
+
if not iptc_data and hasattr(image, 'tag_v2'):
|
|
232
|
+
# 33723 is the IPTC tag in TIFF
|
|
233
|
+
if 33723 in image.tag_v2:
|
|
234
|
+
iptc_raw = image.tag_v2[33723]
|
|
235
|
+
if isinstance(iptc_raw, bytes):
|
|
236
|
+
iptc_records = self._parse_iptc_data(iptc_raw)
|
|
237
|
+
iptc_data.update(iptc_records)
|
|
238
|
+
|
|
239
|
+
# Check for additional IPTC-related tags in TIFF
|
|
240
|
+
iptc_related_tags = [700, 33723, 34377] # Various tags that might contain IPTC data
|
|
241
|
+
for tag in iptc_related_tags:
|
|
242
|
+
if tag in image.tag_v2:
|
|
243
|
+
tag_name = TAGS.get(tag, f"Tag_{tag}")
|
|
244
|
+
iptc_data[tag_name] = _make_serialisable(image.tag_v2[tag])
|
|
245
|
+
|
|
246
|
+
# For PNG, try to get iTXt or tEXt chunks that might contain IPTC
|
|
247
|
+
if not iptc_data and hasattr(image, 'text'):
|
|
248
|
+
for key, value in image.text.items():
|
|
249
|
+
if key.startswith('IPTC') or key == 'XML:com.adobe.xmp':
|
|
250
|
+
iptc_data[key] = value
|
|
251
|
+
elif key == 'IPTCDigest':
|
|
252
|
+
iptc_data['IPTCDigest'] = value
|
|
253
|
+
|
|
254
|
+
# For XMP metadata in any image format
|
|
255
|
+
if 'XML:com.adobe.xmp' in image.info:
|
|
256
|
+
# Extract IPTCDigest from XMP if present
|
|
257
|
+
xmp_data = image.info['XML:com.adobe.xmp']
|
|
258
|
+
if isinstance(xmp_data, str) and 'IPTCDigest' in xmp_data:
|
|
259
|
+
# Simple pattern matching for IPTCDigest in XMP
|
|
260
|
+
match = re.search(r'IPTCDigest="([^"]+)"', xmp_data)
|
|
261
|
+
if match:
|
|
262
|
+
iptc_data['IPTCDigest'] = match.group(1)
|
|
263
|
+
|
|
264
|
+
return _json_safe(iptc_data) if iptc_data else {}
|
|
265
|
+
except Exception as e:
|
|
266
|
+
self.logger.error(f'Error extracting IPTC data: {e}')
|
|
267
|
+
return {}
|
|
268
|
+
|
|
269
|
+
def _parse_photoshop_data(self, data) -> dict:
|
|
270
|
+
"""
|
|
271
|
+
Parse Photoshop data block to extract IPTC metadata.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
data: Raw Photoshop data (bytes or dict) from APP13 segment.
|
|
275
|
+
Returns:
|
|
276
|
+
Dictionary of extracted IPTC data.
|
|
277
|
+
"""
|
|
278
|
+
iptc_data = {}
|
|
279
|
+
try:
|
|
280
|
+
# Handle the case where data is already a dictionary
|
|
281
|
+
if isinstance(data, dict):
|
|
282
|
+
# If it's a dictionary, check for IPTCDigest key directly
|
|
283
|
+
if 'IPTCDigest' in data:
|
|
284
|
+
iptc_data['IPTCDigest'] = data['IPTCDigest']
|
|
285
|
+
|
|
286
|
+
# Check for IPTC data
|
|
287
|
+
if 'IPTC' in data or 1028 in data: # 1028 (0x0404) is the IPTC identifier
|
|
288
|
+
iptc_block = data.get('IPTC', data.get(1028, b''))
|
|
289
|
+
if isinstance(iptc_block, bytes):
|
|
290
|
+
iptc_records = self._parse_iptc_data(iptc_block)
|
|
291
|
+
iptc_data.update(iptc_records)
|
|
292
|
+
|
|
293
|
+
return iptc_data
|
|
294
|
+
|
|
295
|
+
# If it's bytes, proceed with the original implementation
|
|
296
|
+
if not isinstance(data, bytes):
|
|
297
|
+
self.logger.debug(f"Expected bytes for Photoshop data, got {type(data)}")
|
|
298
|
+
return {}
|
|
299
|
+
|
|
300
|
+
# Find Photoshop resource markers
|
|
301
|
+
offset = data.find(b'8BIM')
|
|
302
|
+
if offset < 0:
|
|
303
|
+
return {}
|
|
304
|
+
|
|
305
|
+
io_data = BytesIO(data)
|
|
306
|
+
io_data.seek(offset)
|
|
307
|
+
|
|
308
|
+
while True:
|
|
309
|
+
# Try to read a Photoshop resource block
|
|
310
|
+
try:
|
|
311
|
+
signature = io_data.read(4)
|
|
312
|
+
if signature != b'8BIM':
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
# Resource identifier (2 bytes)
|
|
316
|
+
resource_id = int.from_bytes(io_data.read(2), byteorder='big')
|
|
317
|
+
|
|
318
|
+
# Skip name: Pascal string padded to even length
|
|
319
|
+
name_len = io_data.read(1)[0]
|
|
320
|
+
name_bytes_to_read = name_len + (1 if name_len % 2 == 0 else 0)
|
|
321
|
+
io_data.read(name_bytes_to_read)
|
|
322
|
+
|
|
323
|
+
# Resource data
|
|
324
|
+
size = int.from_bytes(io_data.read(4), byteorder='big')
|
|
325
|
+
padded_size = size + (1 if size % 2 == 1 else 0)
|
|
326
|
+
|
|
327
|
+
resource_data = io_data.read(padded_size)[:size] # Trim padding if present
|
|
328
|
+
|
|
329
|
+
# Process specific resource types
|
|
330
|
+
if resource_id == 0x0404: # IPTC-NAA record (0x0404)
|
|
331
|
+
iptc_records = self._parse_iptc_data(resource_data)
|
|
332
|
+
iptc_data.update(iptc_records)
|
|
333
|
+
elif resource_id == 0x040F: # IPTCDigest (0x040F)
|
|
334
|
+
iptc_data['IPTCDigest'] = resource_data.hex()
|
|
335
|
+
elif resource_id == 0x0425: # EXIF data (1045)
|
|
336
|
+
# Already handled by the EXIF extraction but could process here if needed
|
|
337
|
+
pass
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
self.logger.debug(f"Error parsing Photoshop resource block: {e}")
|
|
341
|
+
break
|
|
342
|
+
|
|
343
|
+
return iptc_data
|
|
344
|
+
except Exception as e:
|
|
345
|
+
self.logger.debug(f"Error parsing Photoshop data: {e}")
|
|
346
|
+
return {}
|
|
347
|
+
|
|
348
|
+
def _parse_iptc_data(self, data: bytes) -> dict:
|
|
349
|
+
"""
|
|
350
|
+
Parse raw IPTC data bytes.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
data: Raw IPTC data bytes.
|
|
354
|
+
Returns:
|
|
355
|
+
Dictionary of extracted IPTC fields.
|
|
356
|
+
"""
|
|
357
|
+
iptc_data = {}
|
|
358
|
+
try:
|
|
359
|
+
# IPTC marker (0x1C) followed by record number (1 byte) and dataset number (1 byte)
|
|
360
|
+
i = 0
|
|
361
|
+
while i < len(data):
|
|
362
|
+
# Look for IPTC marker
|
|
363
|
+
if i + 4 <= len(data) and data[i] == 0x1C:
|
|
364
|
+
record = data[i + 1]
|
|
365
|
+
dataset = data[i + 2]
|
|
366
|
+
|
|
367
|
+
# Length of the data field (can be 1, 2, or 4 bytes)
|
|
368
|
+
if data[i + 3] & 0x80: # Check if the high bit is set
|
|
369
|
+
# Extended length - 4 bytes
|
|
370
|
+
if i + 8 <= len(data):
|
|
371
|
+
length = int.from_bytes(data[i + 4:i + 8], byteorder='big')
|
|
372
|
+
i += 8
|
|
373
|
+
else:
|
|
374
|
+
break
|
|
375
|
+
else:
|
|
376
|
+
# Standard length - 1 byte
|
|
377
|
+
length = data[i + 3]
|
|
378
|
+
i += 4
|
|
379
|
+
|
|
380
|
+
# Check if we have enough data
|
|
381
|
+
if i + length <= len(data):
|
|
382
|
+
field_data = data[i:i + length]
|
|
383
|
+
|
|
384
|
+
# Convert to string if possible
|
|
385
|
+
try:
|
|
386
|
+
field_value = field_data.decode('utf-8', errors='replace')
|
|
387
|
+
except UnicodeDecodeError:
|
|
388
|
+
field_value = field_data.hex()
|
|
389
|
+
|
|
390
|
+
# Map record:dataset to meaningful names - simplified example
|
|
391
|
+
key = f"{record}:{dataset}"
|
|
392
|
+
# Known IPTC fields
|
|
393
|
+
iptc_fields = {
|
|
394
|
+
"2:5": "ObjectName",
|
|
395
|
+
"2:25": "Keywords",
|
|
396
|
+
"2:80": "By-line",
|
|
397
|
+
"2:105": "Headline",
|
|
398
|
+
"2:110": "Credit",
|
|
399
|
+
"2:115": "Source",
|
|
400
|
+
"2:120": "Caption-Abstract",
|
|
401
|
+
"2:122": "Writer-Editor",
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
field_name = iptc_fields.get(key, f"IPTC_{key}")
|
|
405
|
+
iptc_data[field_name] = field_value
|
|
406
|
+
|
|
407
|
+
i += length
|
|
408
|
+
else:
|
|
409
|
+
break
|
|
410
|
+
else:
|
|
411
|
+
i += 1
|
|
412
|
+
|
|
413
|
+
return iptc_data
|
|
414
|
+
except Exception as e:
|
|
415
|
+
self.logger.debug(f"Error parsing IPTC data: {e}")
|
|
416
|
+
return {}
|
|
417
|
+
|
|
418
|
+
def _extract_apple_gps_from_mime(self, mime_data: bytes, exif_data: Dict) -> None:
|
|
419
|
+
"""
|
|
420
|
+
Extract GPS data from Apple's MIME metadata in HEIF files.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
mime_data: MIME metadata bytes
|
|
424
|
+
exif_data: Dictionary to update with GPS data
|
|
425
|
+
"""
|
|
426
|
+
try:
|
|
427
|
+
# Apple stores GPS in a complex binary format
|
|
428
|
+
# We'll search for specific patterns indicating GPS data
|
|
429
|
+
# Look for patterns that might indicate GPS coordinates
|
|
430
|
+
# Apple often stores these as 8-byte IEEE-754 double-precision values
|
|
431
|
+
lat_pattern = re.compile(b'CNTH.{4,32}?lat[a-z]*', re.DOTALL)
|
|
432
|
+
lon_pattern = re.compile(b'CNTH.{4,32}?lon[a-z]*', re.DOTALL)
|
|
433
|
+
|
|
434
|
+
lat_match = lat_pattern.search(mime_data)
|
|
435
|
+
lon_match = lon_pattern.search(mime_data)
|
|
436
|
+
|
|
437
|
+
if lat_match and lon_match:
|
|
438
|
+
# Try to find the 8-byte double values after the identifiers
|
|
439
|
+
lat_pos = lat_match.end()
|
|
440
|
+
lon_pos = lon_match.end()
|
|
441
|
+
|
|
442
|
+
# Ensure we have enough bytes to extract the doubles
|
|
443
|
+
if len(mime_data) >= lat_pos + 8 and len(mime_data) >= lon_pos + 8:
|
|
444
|
+
try:
|
|
445
|
+
latitude = struct.unpack('>d', mime_data[lat_pos:lat_pos + 8])[0]
|
|
446
|
+
longitude = struct.unpack('>d', mime_data[lon_pos:lon_pos + 8])[0]
|
|
447
|
+
|
|
448
|
+
# Only use if values seem reasonable
|
|
449
|
+
if -90 <= latitude <= 90 and -180 <= longitude <= 180:
|
|
450
|
+
if "GPSInfo" not in exif_data:
|
|
451
|
+
exif_data["GPSInfo"] = {}
|
|
452
|
+
|
|
453
|
+
exif_data["GPSInfo"]["GPSLatitude"] = (latitude, 0, 0)
|
|
454
|
+
exif_data["GPSInfo"]["GPSLongitude"] = (longitude, 0, 0)
|
|
455
|
+
exif_data["GPSInfo"]["GPSLatitudeRef"] = "N" if latitude >= 0 else "S"
|
|
456
|
+
exif_data["GPSInfo"]["GPSLongitudeRef"] = "E" if longitude >= 0 else "W"
|
|
457
|
+
except Exception:
|
|
458
|
+
# Silently fail if unpacking doesn't work
|
|
459
|
+
pass
|
|
460
|
+
except Exception as e:
|
|
461
|
+
self.logger.debug(f"Error extracting GPS from Apple MIME data: {e}")
|
|
462
|
+
|
|
463
|
+
def _extract_gps_from_apple_makernote(self, maker_note: Any) -> Optional[Dict]:
|
|
464
|
+
"""
|
|
465
|
+
Extract GPS data from Apple's MakerNote field.
|
|
466
|
+
|
|
467
|
+
Fixed version that properly handles Apple's MakerNote structure and
|
|
468
|
+
looks for actual GPS coordinates rather than test values.
|
|
469
|
+
"""
|
|
470
|
+
try:
|
|
471
|
+
# 1) Ensure we have raw bytes
|
|
472
|
+
if isinstance(maker_note, bytes):
|
|
473
|
+
data_bytes = maker_note
|
|
474
|
+
elif isinstance(maker_note, str):
|
|
475
|
+
data_bytes = maker_note.encode("latin-1", errors="ignore")
|
|
476
|
+
else:
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
# 2) Find and properly parse binary plists
|
|
480
|
+
gps_data = self._parse_apple_plists_for_gps(data_bytes)
|
|
481
|
+
if gps_data:
|
|
482
|
+
return gps_data
|
|
483
|
+
|
|
484
|
+
# 3) Try parsing as TIFF-style MakerNote first
|
|
485
|
+
gps_data = self._parse_tiff_makernote_gps(data_bytes)
|
|
486
|
+
if gps_data:
|
|
487
|
+
return gps_data
|
|
488
|
+
|
|
489
|
+
# 4) Enhanced fallback with better coordinate detection
|
|
490
|
+
gps_data = self._enhanced_regex_gps_search(data_bytes)
|
|
491
|
+
if gps_data:
|
|
492
|
+
return gps_data
|
|
493
|
+
|
|
494
|
+
return None
|
|
495
|
+
|
|
496
|
+
except Exception as e:
|
|
497
|
+
if hasattr(self, 'logger'):
|
|
498
|
+
self.logger.warning(f"Error extracting GPS from Apple MakerNote: {e}")
|
|
499
|
+
return None
|
|
500
|
+
|
|
501
|
+
def _parse_apple_plists_for_gps(self, data_bytes: bytes) -> Optional[Dict]:
|
|
502
|
+
"""Parse binary plists properly with length headers"""
|
|
503
|
+
bplist_marker = b"bplist00"
|
|
504
|
+
offset = 0
|
|
505
|
+
|
|
506
|
+
while True:
|
|
507
|
+
idx = data_bytes.find(bplist_marker, offset)
|
|
508
|
+
if idx < 0:
|
|
509
|
+
break
|
|
510
|
+
|
|
511
|
+
try:
|
|
512
|
+
# Parse the plist properly by reading its length
|
|
513
|
+
plist_data = self._extract_single_plist(data_bytes, idx)
|
|
514
|
+
if not plist_data:
|
|
515
|
+
offset = idx + len(bplist_marker)
|
|
516
|
+
continue
|
|
517
|
+
|
|
518
|
+
parsed = plistlib.loads(plist_data)
|
|
519
|
+
coords = self._find_gps_in_plist(parsed)
|
|
520
|
+
if coords:
|
|
521
|
+
return coords
|
|
522
|
+
|
|
523
|
+
except Exception:
|
|
524
|
+
pass
|
|
525
|
+
|
|
526
|
+
offset = idx + len(bplist_marker)
|
|
527
|
+
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
def _extract_single_plist(self, data: bytes, start_idx: int) -> Optional[bytes]:
|
|
531
|
+
"""Extract a single binary plist with proper length calculation"""
|
|
532
|
+
try:
|
|
533
|
+
# Binary plist format: 8-byte header + data + trailer
|
|
534
|
+
if start_idx + 8 >= len(data):
|
|
535
|
+
return None
|
|
536
|
+
|
|
537
|
+
# Try different approaches to find plist end
|
|
538
|
+
# Method 1: Look for next bplist or end of data
|
|
539
|
+
next_bplist = data.find(b"bplist00", start_idx + 8)
|
|
540
|
+
if next_bplist > 0:
|
|
541
|
+
candidate = data[start_idx:next_bplist]
|
|
542
|
+
else:
|
|
543
|
+
# Try parsing increasingly larger chunks
|
|
544
|
+
for size in [32, 64, 128, 256, 512, 1024, 2048]:
|
|
545
|
+
if start_idx + size > len(data):
|
|
546
|
+
candidate = data[start_idx:]
|
|
547
|
+
break
|
|
548
|
+
candidate = data[start_idx:start_idx + size]
|
|
549
|
+
try:
|
|
550
|
+
plistlib.loads(candidate)
|
|
551
|
+
return candidate
|
|
552
|
+
except Exception:
|
|
553
|
+
continue
|
|
554
|
+
candidate = data[start_idx:]
|
|
555
|
+
|
|
556
|
+
# Validate by trying to parse
|
|
557
|
+
try:
|
|
558
|
+
plistlib.loads(candidate)
|
|
559
|
+
return candidate
|
|
560
|
+
except Exception:
|
|
561
|
+
return None
|
|
562
|
+
|
|
563
|
+
except Exception:
|
|
564
|
+
return None
|
|
565
|
+
|
|
566
|
+
def _find_gps_in_plist(self, obj: Any, path: str = "") -> Optional[Dict]:
|
|
567
|
+
"""
|
|
568
|
+
Enhanced GPS coordinate finder that looks for various GPS-related keys
|
|
569
|
+
and validates coordinate ranges more strictly
|
|
570
|
+
"""
|
|
571
|
+
# Common GPS key patterns in Apple plists
|
|
572
|
+
gps_lat_keys = [
|
|
573
|
+
"Latitude", "latitude", "lat", "GPSLatitude",
|
|
574
|
+
"Location.Latitude", "coordinates.latitude"
|
|
575
|
+
]
|
|
576
|
+
gps_lon_keys = [
|
|
577
|
+
"Longitude", "longitude", "lon", "lng", "GPSLongitude",
|
|
578
|
+
"Location.Longitude", "coordinates.longitude"
|
|
579
|
+
]
|
|
580
|
+
|
|
581
|
+
if isinstance(obj, dict):
|
|
582
|
+
# Direct GPS coordinate check
|
|
583
|
+
lat_val = None
|
|
584
|
+
lon_val = None
|
|
585
|
+
|
|
586
|
+
# Look for latitude
|
|
587
|
+
for lat_key in gps_lat_keys:
|
|
588
|
+
if lat_key in obj:
|
|
589
|
+
try:
|
|
590
|
+
lat_val = float(obj[lat_key])
|
|
591
|
+
break
|
|
592
|
+
except Exception:
|
|
593
|
+
continue
|
|
594
|
+
|
|
595
|
+
# Look for longitude
|
|
596
|
+
for lon_key in gps_lon_keys:
|
|
597
|
+
if lon_key in obj:
|
|
598
|
+
try:
|
|
599
|
+
lon_val = float(obj[lon_key])
|
|
600
|
+
break
|
|
601
|
+
except Exception:
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
# Validate coordinates
|
|
605
|
+
if lat_val is not None and lon_val is not None:
|
|
606
|
+
if self._are_valid_coordinates(lat_val, lon_val):
|
|
607
|
+
return {"latitude": lat_val, "longitude": lon_val}
|
|
608
|
+
|
|
609
|
+
# Look for nested coordinate structures
|
|
610
|
+
for key, value in obj.items():
|
|
611
|
+
if any(term in key.lower() for term in ["location", "gps", "coord", "position"]):
|
|
612
|
+
result = self._find_gps_in_plist(value, f"{path}.{key}")
|
|
613
|
+
if result:
|
|
614
|
+
return result
|
|
615
|
+
|
|
616
|
+
# Recurse into all values
|
|
617
|
+
for key, value in obj.items():
|
|
618
|
+
result = self._find_gps_in_plist(value, f"{path}.{key}")
|
|
619
|
+
if result:
|
|
620
|
+
return result
|
|
621
|
+
|
|
622
|
+
elif isinstance(obj, (list, tuple)):
|
|
623
|
+
for i, item in enumerate(obj):
|
|
624
|
+
result = self._find_gps_in_plist(item, f"{path}[{i}]")
|
|
625
|
+
if result:
|
|
626
|
+
return result
|
|
627
|
+
|
|
628
|
+
return None
|
|
629
|
+
|
|
630
|
+
def _are_valid_coordinates(self, lat: float, lon: float) -> bool:
|
|
631
|
+
"""
|
|
632
|
+
Enhanced coordinate validation that rejects obvious test/dummy values
|
|
633
|
+
"""
|
|
634
|
+
# Basic range check
|
|
635
|
+
if not (-90 <= lat <= 90 and -180 <= lon <= 180):
|
|
636
|
+
return False
|
|
637
|
+
|
|
638
|
+
# Reject obvious test values
|
|
639
|
+
test_values = [
|
|
640
|
+
0.0, 1.0, 2.0, 2.1, 2.2, 3.0, 4.0, 5.0, 10.0, -1.0, -2.0, 123.0, 123.456, 90.0, 180.0
|
|
641
|
+
]
|
|
642
|
+
|
|
643
|
+
if lat in test_values and lon in test_values:
|
|
644
|
+
return False
|
|
645
|
+
|
|
646
|
+
# Reject coordinates that are too close to (0,0) unless specifically valid
|
|
647
|
+
if abs(lat) < 0.01 and abs(lon) < 0.01:
|
|
648
|
+
return False
|
|
649
|
+
|
|
650
|
+
# Reject coordinates where both values are the same (likely test data)
|
|
651
|
+
if lat == lon:
|
|
652
|
+
return False
|
|
653
|
+
|
|
654
|
+
# Additional validation: check for reasonable precision
|
|
655
|
+
# Real GPS coordinates usually have more precision
|
|
656
|
+
lat_str = str(lat)
|
|
657
|
+
lon_str = str(lon)
|
|
658
|
+
|
|
659
|
+
# If both coordinates have very low precision, they might be test values
|
|
660
|
+
if '.' in lat_str and '.' in lon_str:
|
|
661
|
+
lat_decimals = len(lat_str.split('.')[1])
|
|
662
|
+
lon_decimals = len(lon_str.split('.')[1])
|
|
663
|
+
if lat_decimals <= 1 and lon_decimals <= 1 and abs(lat) < 10 and abs(lon) < 10:
|
|
664
|
+
return False
|
|
665
|
+
|
|
666
|
+
return True
|
|
667
|
+
|
|
668
|
+
def _parse_tiff_makernote_gps(self, data_bytes: bytes) -> Optional[Dict]:
|
|
669
|
+
"""
|
|
670
|
+
Parse Apple's TIFF-style MakerNote entries for GPS data
|
|
671
|
+
"""
|
|
672
|
+
try:
|
|
673
|
+
# Look for TIFF structure in the MakerNote
|
|
674
|
+
if len(data_bytes) < 12:
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
# Check for TIFF byte order marks
|
|
678
|
+
if data_bytes[:2] in [b'II', b'MM']:
|
|
679
|
+
return self._parse_tiff_entries(data_bytes)
|
|
680
|
+
|
|
681
|
+
# Apple MakerNote often starts with "Apple iOS" followed by TIFF data
|
|
682
|
+
apple_marker = data_bytes.find(b'Apple iOS')
|
|
683
|
+
if apple_marker >= 0:
|
|
684
|
+
tiff_start = apple_marker + 9 # Length of "Apple iOS"
|
|
685
|
+
if tiff_start < len(data_bytes):
|
|
686
|
+
return self._parse_tiff_entries(data_bytes[tiff_start:])
|
|
687
|
+
|
|
688
|
+
return None
|
|
689
|
+
|
|
690
|
+
except Exception:
|
|
691
|
+
return None
|
|
692
|
+
|
|
693
|
+
def _parse_tiff_entries(self, data: bytes) -> Optional[Dict]:
|
|
694
|
+
"""Parse TIFF-style directory entries looking for GPS tags"""
|
|
695
|
+
try:
|
|
696
|
+
if len(data) < 8:
|
|
697
|
+
return None
|
|
698
|
+
|
|
699
|
+
# Determine byte order
|
|
700
|
+
if data[:2] == b'II':
|
|
701
|
+
endian = '<' # Little endian
|
|
702
|
+
elif data[:2] == b'MM':
|
|
703
|
+
endian = '>' # Big endian
|
|
704
|
+
else:
|
|
705
|
+
return None
|
|
706
|
+
|
|
707
|
+
# Skip to first IFD
|
|
708
|
+
offset = struct.unpack(f'{endian}I', data[4:8])[0]
|
|
709
|
+
if offset >= len(data):
|
|
710
|
+
return None
|
|
711
|
+
|
|
712
|
+
# Read number of directory entries
|
|
713
|
+
if offset + 2 >= len(data):
|
|
714
|
+
return None
|
|
715
|
+
|
|
716
|
+
num_entries = struct.unpack(f'{endian}H', data[offset:offset + 2])[0]
|
|
717
|
+
offset += 2
|
|
718
|
+
|
|
719
|
+
# Parse each entry
|
|
720
|
+
for i in range(min(num_entries, 100)): # Limit to prevent infinite loops
|
|
721
|
+
if offset + 12 > len(data):
|
|
722
|
+
break
|
|
723
|
+
|
|
724
|
+
entry = data[offset:offset + 12]
|
|
725
|
+
tag, type_id, count, value_offset = struct.unpack(f'{endian}HHII', entry)
|
|
726
|
+
|
|
727
|
+
# Look for GPS-related tags (these are hypothetical Apple GPS tags)
|
|
728
|
+
if tag in [0x0001, 0x0002, 0x0003, 0x0004]: # Common GPS tag IDs
|
|
729
|
+
# This would need more specific implementation based on Apple's actual tags
|
|
730
|
+
pass
|
|
731
|
+
|
|
732
|
+
offset += 12
|
|
733
|
+
|
|
734
|
+
return None
|
|
735
|
+
|
|
736
|
+
except Exception:
|
|
737
|
+
return None
|
|
738
|
+
|
|
739
|
+
def _enhanced_regex_gps_search(self, data_bytes: bytes) -> Optional[Dict]:
|
|
740
|
+
"""
|
|
741
|
+
Enhanced regex search that's more discriminating about coordinate patterns
|
|
742
|
+
"""
|
|
743
|
+
try:
|
|
744
|
+
# Try UTF-8 first, then latin-1
|
|
745
|
+
for encoding in ['utf-8', 'latin-1']:
|
|
746
|
+
try:
|
|
747
|
+
text = data_bytes.decode(encoding, errors='ignore')
|
|
748
|
+
break
|
|
749
|
+
except UnicodeDecodeError:
|
|
750
|
+
continue
|
|
751
|
+
else:
|
|
752
|
+
return None
|
|
753
|
+
|
|
754
|
+
# Look for coordinate patterns in various formats
|
|
755
|
+
patterns = [
|
|
756
|
+
# Decimal degrees with high precision
|
|
757
|
+
r'(?:lat|latitude)[:\s=]*([+-]?\d{1,2}\.\d{4,})',
|
|
758
|
+
r'(?:lon|lng|longitude)[:\s=]*([+-]?\d{1,3}\.\d{4,})',
|
|
759
|
+
# Coordinates in JSON-like structures
|
|
760
|
+
r'"(?:lat|latitude)"\s*:\s*([+-]?\d+\.\d+)',
|
|
761
|
+
r'"(?:lon|lng|longitude)"\s*:\s*([+-]?\d+\.\d+)',
|
|
762
|
+
# Coordinates with more context
|
|
763
|
+
r'(?:coordinate|position|location)[^0-9]*([+-]?\d+\.\d{4,})[^0-9]*([+-]?\d+\.\d{4,})'
|
|
764
|
+
]
|
|
765
|
+
|
|
766
|
+
for pattern in patterns:
|
|
767
|
+
matches = re.finditer(pattern, text, re.IGNORECASE)
|
|
768
|
+
for match in matches:
|
|
769
|
+
try:
|
|
770
|
+
if len(match.groups()) == 2:
|
|
771
|
+
lat, lon = float(match.group(1)), float(match.group(2))
|
|
772
|
+
else:
|
|
773
|
+
# Look for the next coordinate nearby
|
|
774
|
+
coord = float(match.group(1))
|
|
775
|
+
# This needs more sophisticated logic
|
|
776
|
+
continue
|
|
777
|
+
|
|
778
|
+
if self._are_valid_coordinates(lat, lon):
|
|
779
|
+
return {"latitude": lat, "longitude": lon}
|
|
780
|
+
except Exception:
|
|
781
|
+
continue
|
|
782
|
+
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
except Exception:
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
async def extract_exif_heif(self, heif_image) -> Optional[Dict]:
|
|
789
|
+
"""
|
|
790
|
+
Extract EXIF data from a HEIF/HEIC image using the heif library.
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
heif_image: HEIF image object
|
|
794
|
+
Returns:
|
|
795
|
+
Dictionary of EXIF data or None if no EXIF data exists
|
|
796
|
+
"""
|
|
797
|
+
try:
|
|
798
|
+
# Get EXIF metadata from HEIF image
|
|
799
|
+
exif_data = {}
|
|
800
|
+
|
|
801
|
+
# Extract metadata from HEIF
|
|
802
|
+
for metadata in heif_image.metadata or []:
|
|
803
|
+
if metadata.type == 'Exif':
|
|
804
|
+
# HEIF EXIF data typically starts with a header offset
|
|
805
|
+
exif_bytes = metadata.data
|
|
806
|
+
if exif_bytes and len(exif_bytes) > 8:
|
|
807
|
+
# Skip the EXIF header (usually 8 bytes) to get to the TIFF data
|
|
808
|
+
exif_stream = BytesIO(exif_bytes)
|
|
809
|
+
# Try to extract EXIF data from the TIFF-formatted portion
|
|
810
|
+
try:
|
|
811
|
+
# Need to process the EXIF data in TIFF format
|
|
812
|
+
exif_stream.seek(8) # Skip the Exif\0\0 header
|
|
813
|
+
exif_image = Image.open(exif_stream)
|
|
814
|
+
# Extract all EXIF data from the embedded TIFF
|
|
815
|
+
exif_info = exif_image._getexif() or {}
|
|
816
|
+
|
|
817
|
+
# Process the EXIF data as we do with PIL images
|
|
818
|
+
gps_info = {}
|
|
819
|
+
for tag, value in exif_info.items():
|
|
820
|
+
decoded = TAGS.get(tag, tag)
|
|
821
|
+
if decoded == "GPSInfo":
|
|
822
|
+
for t in value:
|
|
823
|
+
sub_decoded = GPSTAGS.get(t, t)
|
|
824
|
+
gps_info[sub_decoded] = value[t]
|
|
825
|
+
exif_data["GPSInfo"] = gps_info
|
|
826
|
+
else:
|
|
827
|
+
exif_data[decoded] = _make_serialisable(value)
|
|
828
|
+
except Exception as e:
|
|
829
|
+
self.logger.debug(f"Error processing HEIF EXIF data: {e}")
|
|
830
|
+
|
|
831
|
+
# Apple HEIF files may store GPS in 'mime' type metadata with 'CNTH' format
|
|
832
|
+
elif metadata.type == 'mime':
|
|
833
|
+
try:
|
|
834
|
+
# Check for Apple-specific GPS metadata
|
|
835
|
+
mime_data = metadata.data
|
|
836
|
+
if b'CNTH' in mime_data:
|
|
837
|
+
# This is a special Apple container format
|
|
838
|
+
# Extract GPS data from CNTH container
|
|
839
|
+
self._extract_apple_gps_from_mime(mime_data, exif_data)
|
|
840
|
+
except Exception as e:
|
|
841
|
+
self.logger.debug(f"Error processing Apple MIME metadata: {e}")
|
|
842
|
+
|
|
843
|
+
# Extract GPS datetime if available and requested
|
|
844
|
+
if self.extract_geoloc:
|
|
845
|
+
# First try standard GPSInfo
|
|
846
|
+
if "GPSInfo" in exif_data:
|
|
847
|
+
gps_datetime = self.extract_gps_datetime(exif_data)
|
|
848
|
+
if gps_datetime.get("latitude") is not None and gps_datetime.get("longitude") is not None:
|
|
849
|
+
exif_data['gps_info'] = gps_datetime
|
|
850
|
+
|
|
851
|
+
# If no GPS found yet, try Apple's MakerNote for GPS data
|
|
852
|
+
has_gps_info = 'gps_info' in exif_data
|
|
853
|
+
has_valid_gps = has_gps_info and exif_data['gps_info'].get('latitude') is not None
|
|
854
|
+
|
|
855
|
+
if (not has_gps_info or not has_valid_gps) and 'MakerNote' in exif_data:
|
|
856
|
+
apple_gps = self._extract_gps_from_apple_makernote(exif_data['MakerNote'])
|
|
857
|
+
if apple_gps:
|
|
858
|
+
# If we found GPS data in MakerNote, use it
|
|
859
|
+
datetime = exif_data.get("DateTimeOriginal") or exif_data.get("DateTime")
|
|
860
|
+
exif_data['gps_info'] = {
|
|
861
|
+
"datetime": datetime,
|
|
862
|
+
"latitude": apple_gps.get("latitude"),
|
|
863
|
+
"longitude": apple_gps.get("longitude")
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
return _json_safe(exif_data) if exif_data else None
|
|
867
|
+
|
|
868
|
+
except Exception as e:
|
|
869
|
+
self.logger.error(f'Error extracting HEIF EXIF data: {e}')
|
|
870
|
+
return None
|
|
871
|
+
|
|
872
|
+
async def extract_exif_data(self, image) -> dict:
|
|
873
|
+
"""
|
|
874
|
+
Extract EXIF data from the image file object.
|
|
875
|
+
|
|
876
|
+
Args:
|
|
877
|
+
image: The PIL Image object.
|
|
878
|
+
Returns:
|
|
879
|
+
Dictionary of EXIF data or empty dict if no EXIF data exists.
|
|
880
|
+
"""
|
|
881
|
+
exif = {}
|
|
882
|
+
# Check Modify Date (if any):
|
|
883
|
+
try:
|
|
884
|
+
modify_date = get_xmp_modify_date(image)
|
|
885
|
+
if modify_date:
|
|
886
|
+
exif["ModifyDate"] = modify_date
|
|
887
|
+
except Exception as e:
|
|
888
|
+
self.logger.debug(f"Error getting XMP ModifyDate: {e}")
|
|
889
|
+
|
|
890
|
+
if hasattr(image, 'getexif'):
|
|
891
|
+
# For JPEG and some other formats that support _getexif()
|
|
892
|
+
try:
|
|
893
|
+
if exif_data := image.getexif():
|
|
894
|
+
gps_info = {}
|
|
895
|
+
for tag_id, value in exif_data.items():
|
|
896
|
+
tag_name = ExifTags.TAGS.get(tag_id, tag_id)
|
|
897
|
+
if isinstance(tag_name, (int, float)):
|
|
898
|
+
# Skip numeric tags that are not strings
|
|
899
|
+
continue
|
|
900
|
+
# Convert EXIF data to a readable format
|
|
901
|
+
if tag_name == "UserComment" and isinstance(value, str):
|
|
902
|
+
try:
|
|
903
|
+
# Try to decode base64 UserComment
|
|
904
|
+
decoded_value = base64.b64decode(value).decode('utf-8', errors='replace')
|
|
905
|
+
exif[tag_name] = decoded_value
|
|
906
|
+
except Exception:
|
|
907
|
+
# If decoding fails, use original value
|
|
908
|
+
exif[tag_name] = _make_serialisable(value)
|
|
909
|
+
else:
|
|
910
|
+
exif[tag_name] = _make_serialisable(value)
|
|
911
|
+
if tag_name == "GPSInfo":
|
|
912
|
+
# value is itself a dict of numeric sub‐tags:
|
|
913
|
+
gps_ifd = {}
|
|
914
|
+
if isinstance(value, dict):
|
|
915
|
+
try:
|
|
916
|
+
for sub_id, sub_val in value.items():
|
|
917
|
+
sub_name = GPSTAGS.get(sub_id, sub_id)
|
|
918
|
+
gps_ifd[sub_name] = sub_val
|
|
919
|
+
exif["GPSInfo"] = gps_ifd
|
|
920
|
+
except Exception:
|
|
921
|
+
for t in value:
|
|
922
|
+
sub_decoded = GPSTAGS.get(t, t)
|
|
923
|
+
gps_info[sub_decoded] = value[t]
|
|
924
|
+
exif["GPSInfo"] = gps_info
|
|
925
|
+
else:
|
|
926
|
+
gps_info = {}
|
|
927
|
+
gps_raw = exif_data.get_ifd(IFD.GPSInfo) or {}
|
|
928
|
+
for sub_tag_id, sub_val in gps_raw.items():
|
|
929
|
+
sub_name = GPSTAGS.get(sub_tag_id, sub_tag_id)
|
|
930
|
+
gps_info[sub_name] = sub_val
|
|
931
|
+
exif["GPSInfo"] = gps_info
|
|
932
|
+
# Aperture, shutter, flash, lens, tz offset, etc
|
|
933
|
+
ifd = exif_data.get_ifd(0x8769)
|
|
934
|
+
for key, val in ifd.items():
|
|
935
|
+
exif[ExifTags.TAGS[key]] = _make_serialisable(val)
|
|
936
|
+
for ifd_id in IFD:
|
|
937
|
+
try:
|
|
938
|
+
ifd = exif_data.get_ifd(ifd_id)
|
|
939
|
+
if ifd_id == IFD.GPSInfo:
|
|
940
|
+
resolve = GPSTAGS
|
|
941
|
+
else:
|
|
942
|
+
resolve = TAGS
|
|
943
|
+
for k, v in ifd.items():
|
|
944
|
+
tag = resolve.get(k, k)
|
|
945
|
+
if isinstance(tag, int):
|
|
946
|
+
continue
|
|
947
|
+
try:
|
|
948
|
+
exif[tag] = _make_serialisable(v)
|
|
949
|
+
except Exception:
|
|
950
|
+
exif[tag] = v
|
|
951
|
+
except KeyError:
|
|
952
|
+
pass
|
|
953
|
+
except Exception as e:
|
|
954
|
+
self.logger.warning(
|
|
955
|
+
f'Error extracting EXIF data: {e}'
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
elif hasattr(image, 'tag') and hasattr(image, 'tag_v2'):
|
|
959
|
+
# For TIFF images which store data in tag and tag_v2 attributes
|
|
960
|
+
# Extract from tag_v2 first (more detailed)
|
|
961
|
+
gps_info = {}
|
|
962
|
+
try:
|
|
963
|
+
for tag, value in image.tag_v2.items():
|
|
964
|
+
tag_name = TAGS.get(tag, tag)
|
|
965
|
+
if isinstance(tag_name, int):
|
|
966
|
+
# Skip numeric tags that are not strings
|
|
967
|
+
continue
|
|
968
|
+
# Convert EXIF data to a readable format
|
|
969
|
+
if tag_name == "GPSInfo":
|
|
970
|
+
# For TIFF images, GPS data might be in a nested IFD
|
|
971
|
+
if isinstance(value, dict):
|
|
972
|
+
for gps_tag, gps_value in value.items():
|
|
973
|
+
gps_tag_name = GPSTAGS.get(gps_tag, gps_tag)
|
|
974
|
+
gps_info[gps_tag_name] = gps_value
|
|
975
|
+
exif["GPSInfo"] = gps_info
|
|
976
|
+
else:
|
|
977
|
+
exif[tag_name] = _make_serialisable(value)
|
|
978
|
+
except Exception as e:
|
|
979
|
+
self.logger.debug(f'Error extracting TIFF EXIF data: {e}')
|
|
980
|
+
# If tag_v2 is not available or empty, fall back to tag
|
|
981
|
+
|
|
982
|
+
# Fall back to tag if needed
|
|
983
|
+
if not exif and hasattr(image, 'tag'):
|
|
984
|
+
try:
|
|
985
|
+
for tag, value in image.tag.items():
|
|
986
|
+
tag_name = TAGS.get(tag, tag)
|
|
987
|
+
exif[tag_name] = _make_serialisable(value)
|
|
988
|
+
except Exception as e:
|
|
989
|
+
self.logger.debug(f'Error extracting TIFF TAG data: {e}')
|
|
990
|
+
|
|
991
|
+
else:
|
|
992
|
+
# For other formats, try to extract directly from image.info
|
|
993
|
+
try:
|
|
994
|
+
for key, value in image.info.items():
|
|
995
|
+
if isinstance(key, int):
|
|
996
|
+
continue
|
|
997
|
+
if key.startswith('exif'):
|
|
998
|
+
# Some formats store EXIF data with keys like 'exif' or 'exif_ifd'
|
|
999
|
+
if isinstance(value, dict):
|
|
1000
|
+
exif.update(value)
|
|
1001
|
+
elif isinstance(value, bytes):
|
|
1002
|
+
# Try to parse bytes as EXIF data
|
|
1003
|
+
exif_stream = BytesIO(value)
|
|
1004
|
+
try:
|
|
1005
|
+
exif_image = TiffImagePlugin.TiffImageFile(exif_stream)
|
|
1006
|
+
if hasattr(exif_image, 'tag_v2'):
|
|
1007
|
+
for tag, val in exif_image.tag_v2.items():
|
|
1008
|
+
tag_name = TAGS.get(tag, tag)
|
|
1009
|
+
exif[tag_name] = _make_serialisable(val)
|
|
1010
|
+
except Exception as e:
|
|
1011
|
+
self.logger.warning(f"Error parsing EXIF bytes: {e}")
|
|
1012
|
+
else:
|
|
1013
|
+
# Add other metadata
|
|
1014
|
+
exif[key] = _make_serialisable(value)
|
|
1015
|
+
except Exception as e:
|
|
1016
|
+
self.logger.warning(f'Unable to extract EXIF from from image.info: {e}')
|
|
1017
|
+
|
|
1018
|
+
# Extract GPS datetime if available
|
|
1019
|
+
if self.extract_geoloc and "GPSInfo" in exif:
|
|
1020
|
+
try:
|
|
1021
|
+
if gps_datetime := self.extract_gps_datetime(exif):
|
|
1022
|
+
exif['gps_info'] = gps_datetime
|
|
1023
|
+
except Exception as e:
|
|
1024
|
+
self.logger.warning(
|
|
1025
|
+
f"Error extracting GPS datetime: {e}"
|
|
1026
|
+
)
|
|
1027
|
+
# If no GPSInfo, check for MakerNote which might contain GPS data
|
|
1028
|
+
if self.extract_geoloc and "MakerNote" in exif:
|
|
1029
|
+
if gps_info := self._extract_gps_from_apple_makernote(exif["MakerNote"]):
|
|
1030
|
+
print('RESULT MAKER > ', gps_info)
|
|
1031
|
+
if not exif.get('gps_info', None):
|
|
1032
|
+
exif['gps_info'] = gps_info
|
|
1033
|
+
# If we have no GPSInfo, check for XMP metadata
|
|
1034
|
+
if self.extract_geoloc and "XML:com.adobe.xmp" in image.info:
|
|
1035
|
+
try:
|
|
1036
|
+
xmp_data = image.info["XML:com.adobe.xmp"]
|
|
1037
|
+
if isinstance(xmp_data, str):
|
|
1038
|
+
# Simple pattern matching for GPS in XMP
|
|
1039
|
+
lat_match = re.search(r'GPSLatitude="([^"]+)"', xmp_data)
|
|
1040
|
+
lon_match = re.search(r'GPSLongitude="([^"]+)"', xmp_data)
|
|
1041
|
+
if lat_match and lon_match:
|
|
1042
|
+
latitude = float(lat_match.group(1))
|
|
1043
|
+
longitude = float(lon_match.group(1))
|
|
1044
|
+
exif['gps_info'] = {
|
|
1045
|
+
"latitude": latitude,
|
|
1046
|
+
"longitude": longitude
|
|
1047
|
+
}
|
|
1048
|
+
except Exception as e:
|
|
1049
|
+
self.logger.warning(f"Error extracting GPS from XMP: {e}")
|
|
1050
|
+
# If we have no GPSInfo, check for IPTC metadata
|
|
1051
|
+
if self.extract_geoloc and "IPTCDigest" in image.info:
|
|
1052
|
+
exif['gps_info'] = image.info["IPTCDigest"]
|
|
1053
|
+
# If we have no GPSInfo, check for IPTC metadata
|
|
1054
|
+
if self.extract_geoloc and "IPTC" in image.info:
|
|
1055
|
+
iptc_data = self._parse_photoshop_data(image.info["IPTC"])
|
|
1056
|
+
if iptc_data:
|
|
1057
|
+
exif.update(iptc_data)
|
|
1058
|
+
|
|
1059
|
+
return _json_safe(exif) if exif else {}
|
|
1060
|
+
|
|
1061
|
+
async def analyze(self, image: Optional[Image.Image] = None, heif: Any = None, **kwargs) -> dict:
|
|
1062
|
+
"""
|
|
1063
|
+
Extract EXIF data from the given image.
|
|
1064
|
+
|
|
1065
|
+
:param image: PIL Image object (optional)
|
|
1066
|
+
:param heif: HEIF image object (optional)
|
|
1067
|
+
:return: Dictionary containing EXIF data
|
|
1068
|
+
"""
|
|
1069
|
+
try:
|
|
1070
|
+
exif_data = {}
|
|
1071
|
+
|
|
1072
|
+
# Process HEIF image if provided (prioritize over PIL)
|
|
1073
|
+
if heif is not None:
|
|
1074
|
+
try:
|
|
1075
|
+
heif_exif = await self.extract_exif_heif(heif)
|
|
1076
|
+
if heif_exif:
|
|
1077
|
+
# Update with HEIF data, prioritizing it over PIL data if both exist
|
|
1078
|
+
exif_data.update(heif_exif)
|
|
1079
|
+
except Exception as e:
|
|
1080
|
+
self.logger.error(f"Error extracting EXIF from HEIF image: {e}")
|
|
1081
|
+
|
|
1082
|
+
# Process PIL image if provided
|
|
1083
|
+
if image is not None:
|
|
1084
|
+
try:
|
|
1085
|
+
pil_exif = await self.extract_exif_data(image)
|
|
1086
|
+
if pil_exif:
|
|
1087
|
+
exif_data.update(pil_exif)
|
|
1088
|
+
except Exception as e:
|
|
1089
|
+
self.logger.error(f"Error extracting EXIF from PIL image: {e}")
|
|
1090
|
+
|
|
1091
|
+
# Extract IPTC data
|
|
1092
|
+
try:
|
|
1093
|
+
pil_iptc = await self.extract_iptc_data(image)
|
|
1094
|
+
if pil_iptc:
|
|
1095
|
+
exif_data.update(pil_iptc)
|
|
1096
|
+
except Exception as e:
|
|
1097
|
+
self.logger.error(
|
|
1098
|
+
f"Error extracting IPTC data from PIL image: {e}"
|
|
1099
|
+
)
|
|
1100
|
+
return exif_data
|
|
1101
|
+
except Exception as e:
|
|
1102
|
+
self.logger.error(f"Error in EXIF analysis: {str(e)}")
|
|
1103
|
+
return {}
|