slidesherlock 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/README.md +239 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/env.py +103 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/script.py.mako +24 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/001_initial_schema.py +78 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/002_add_job_status_and_artifact_fields.py +60 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/003_evidence_index_tables.py +112 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/004_add_requested_language.py +23 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/005_add_job_config_json.py +23 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/__init__.py +1 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/database.py +17 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/main.py +504 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/models.py +180 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/package.json +27 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/requirements.txt +6 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/schemas.py +62 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/src/db.ts +98 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/src/index.ts +123 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tests/__init__.py +1 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tests/test_api.py +64 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tsconfig.json +17 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/worker.py +82 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/package.json +30 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/requirements.txt +6 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/db.ts +118 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/index.ts +62 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/init-minio.ts +30 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/pipeline.ts +379 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/tsconfig.json +17 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/worker.py +22 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/__init__.py +1 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/alignment.py +113 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_config.py +49 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_prepare.py +245 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_processor.py +114 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/composer.py +462 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/diagram_understand.py +723 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/doctor.py +148 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/evidence_index.py +531 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/explain_plan.py +107 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_classifier.py +282 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_extract.py +174 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_understand.py +516 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/jest.config.js +14 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/llm_provider.py +250 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/merge_engine.py +275 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/narration_blueprint.py +285 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/narration_source.py +234 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/native_graph.py +323 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/notes_config.py +115 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/ocr.py +133 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/overlay_renderer.py +321 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/package.json +23 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/photo_understand.py +236 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/pipeline.py +332 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/ppt_parser.py +250 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/presets.py +79 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/rag.py +86 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/script_context.py +151 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/script_generator.py +141 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/slide_caption_fallback.py +180 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/diagram.ts +193 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/index.ts +7 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/script.ts +136 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/storage.ts +87 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/diagram.test.ts +133 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/timeline.test.ts +155 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/verifier.test.ts +110 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/timeline.ts +112 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/verifier.ts +316 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/__init__.py +22 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/audio.py +112 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/evidence.py +96 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/graph.py +134 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/ingest.py +152 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/render.py +265 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/script.py +160 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/translate.py +128 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/verify.py +144 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/video.py +405 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/storage.py +89 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/subtitle_generator.py +71 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/__init__.py +1 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_audio_prepare.py +299 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_diagram_understand.py +142 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_doctor.py +32 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_classifier.py +42 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_evidence_integration.py +90 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_extract.py +45 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_understand.py +109 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_merge_engine.py +166 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_narration_blueprint.py +119 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_narration_source.py +246 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_notes_config.py +103 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_on_screen_notes.py +92 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_photo_understand.py +31 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_presets.py +63 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_script_context.py +89 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_script_plan.py +121 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_slide_caption_fallback.py +99 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_subtitle_generator.py +44 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_timeline_alignment.py +70 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_translation.py +58 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_variants.py +32 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_verifier.py +263 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_video_config.py +44 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_vision_day3_integration.py +232 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_vision_provider_openai.py +144 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/timeline_builder.py +286 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translation.py +183 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translator_provider.py +67 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translator_provider_llm.py +39 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tsconfig.json +18 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tts_provider.py +191 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/variants.py +33 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/verifier.py +643 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/video_config.py +88 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_config.py +65 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_graph.py +150 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_provider.py +246 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_provider_openai.py +414 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/package.json +20 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/index.ts +7 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/llm.ts +62 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/ocr.ts +47 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/tts.ts +45 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/tsconfig.json +18 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/package.json +20 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/src/index.ts +246 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/tsconfig.json +18 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/requirements.txt +49 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/create_sample_connectors_ppt.py +117 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/evidence_index_migrate.sh +33 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/run_demo.py +83 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/slidesherlock_cli.py +83 -0
- slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/verify_evidence_id_stable.py +144 -0
- slidesherlock-1.1.0/.gitignore +52 -0
- slidesherlock-1.1.0/CITATION.cff +23 -0
- slidesherlock-1.1.0/CONTRIBUTING.md +96 -0
- slidesherlock-1.1.0/LICENSE +191 -0
- slidesherlock-1.1.0/PKG-INFO +444 -0
- slidesherlock-1.1.0/README.md +392 -0
- slidesherlock-1.1.0/SECURITY.md +31 -0
- slidesherlock-1.1.0/alembic/env.py +103 -0
- slidesherlock-1.1.0/alembic/script.py.mako +24 -0
- slidesherlock-1.1.0/alembic/versions/001_initial_schema.py +78 -0
- slidesherlock-1.1.0/alembic/versions/002_add_job_status_and_artifact_fields.py +60 -0
- slidesherlock-1.1.0/alembic/versions/003_evidence_index_tables.py +112 -0
- slidesherlock-1.1.0/alembic/versions/004_add_requested_language.py +23 -0
- slidesherlock-1.1.0/alembic/versions/005_add_job_config_json.py +23 -0
- slidesherlock-1.1.0/apps/__init__.py +0 -0
- slidesherlock-1.1.0/apps/api/__init__.py +1 -0
- slidesherlock-1.1.0/apps/api/database.py +90 -0
- slidesherlock-1.1.0/apps/api/main.py +847 -0
- slidesherlock-1.1.0/apps/api/models.py +174 -0
- slidesherlock-1.1.0/apps/api/requirements.txt +6 -0
- slidesherlock-1.1.0/apps/api/schemas.py +62 -0
- slidesherlock-1.1.0/apps/api/tests/__init__.py +1 -0
- slidesherlock-1.1.0/apps/api/tests/test_api.py +64 -0
- slidesherlock-1.1.0/apps/api/tests/test_database_init.py +65 -0
- slidesherlock-1.1.0/apps/api/worker.py +79 -0
- slidesherlock-1.1.0/apps/web/index.html +14 -0
- slidesherlock-1.1.0/apps/web/package.json +29 -0
- slidesherlock-1.1.0/apps/web/pnpm-lock.yaml +1716 -0
- slidesherlock-1.1.0/apps/web/postcss.config.js +6 -0
- slidesherlock-1.1.0/apps/web/src/App.tsx +27 -0
- slidesherlock-1.1.0/apps/web/src/api/client.ts +136 -0
- slidesherlock-1.1.0/apps/web/src/api/mock.ts +256 -0
- slidesherlock-1.1.0/apps/web/src/components/ActivityFeed.tsx +199 -0
- slidesherlock-1.1.0/apps/web/src/components/DropZone.tsx +174 -0
- slidesherlock-1.1.0/apps/web/src/components/EvidenceTrail.tsx +103 -0
- slidesherlock-1.1.0/apps/web/src/components/FocusPanel.tsx +208 -0
- slidesherlock-1.1.0/apps/web/src/components/GlowButton.tsx +57 -0
- slidesherlock-1.1.0/apps/web/src/components/Layout.tsx +47 -0
- slidesherlock-1.1.0/apps/web/src/components/MetricBar.tsx +83 -0
- slidesherlock-1.1.0/apps/web/src/components/PipelineTrack.tsx +167 -0
- slidesherlock-1.1.0/apps/web/src/components/PresetCard.tsx +89 -0
- slidesherlock-1.1.0/apps/web/src/components/ProgressBar.tsx +43 -0
- slidesherlock-1.1.0/apps/web/src/components/StageCard.tsx +156 -0
- slidesherlock-1.1.0/apps/web/src/components/ThemeToggle.tsx +34 -0
- slidesherlock-1.1.0/apps/web/src/components/VideoPlayer.tsx +328 -0
- slidesherlock-1.1.0/apps/web/src/config/stages.ts +87 -0
- slidesherlock-1.1.0/apps/web/src/contexts/ThemeContext.tsx +64 -0
- slidesherlock-1.1.0/apps/web/src/index.css +172 -0
- slidesherlock-1.1.0/apps/web/src/main.tsx +10 -0
- slidesherlock-1.1.0/apps/web/src/pages/ProgressPage.tsx +161 -0
- slidesherlock-1.1.0/apps/web/src/pages/ResultPage.tsx +249 -0
- slidesherlock-1.1.0/apps/web/src/pages/UploadPage.tsx +253 -0
- slidesherlock-1.1.0/apps/web/src/utils/confetti.ts +20 -0
- slidesherlock-1.1.0/apps/web/tailwind.config.ts +83 -0
- slidesherlock-1.1.0/apps/web/tsconfig.json +22 -0
- slidesherlock-1.1.0/apps/web/tsconfig.node.json +23 -0
- slidesherlock-1.1.0/apps/web/vite.config.ts +19 -0
- slidesherlock-1.1.0/apps/worker/package.json +30 -0
- slidesherlock-1.1.0/apps/worker/requirements.txt +6 -0
- slidesherlock-1.1.0/apps/worker/tsconfig.json +17 -0
- slidesherlock-1.1.0/apps/worker/worker.py +22 -0
- slidesherlock-1.1.0/fonts/README.md +42 -0
- slidesherlock-1.1.0/packages/core/__init__.py +1 -0
- slidesherlock-1.1.0/packages/core/alignment.py +123 -0
- slidesherlock-1.1.0/packages/core/audio_config.py +52 -0
- slidesherlock-1.1.0/packages/core/audio_prepare.py +333 -0
- slidesherlock-1.1.0/packages/core/audio_processor.py +160 -0
- slidesherlock-1.1.0/packages/core/composer.py +829 -0
- slidesherlock-1.1.0/packages/core/diagram_understand.py +869 -0
- slidesherlock-1.1.0/packages/core/doctor.py +166 -0
- slidesherlock-1.1.0/packages/core/evidence_index.py +574 -0
- slidesherlock-1.1.0/packages/core/evidence_report.py +191 -0
- slidesherlock-1.1.0/packages/core/explain_plan.py +116 -0
- slidesherlock-1.1.0/packages/core/image_classifier.py +304 -0
- slidesherlock-1.1.0/packages/core/image_extract.py +178 -0
- slidesherlock-1.1.0/packages/core/image_understand.py +532 -0
- slidesherlock-1.1.0/packages/core/llm_backend.py +144 -0
- slidesherlock-1.1.0/packages/core/llm_config.py +191 -0
- slidesherlock-1.1.0/packages/core/llm_provider.py +278 -0
- slidesherlock-1.1.0/packages/core/llm_provider_openai.py +214 -0
- slidesherlock-1.1.0/packages/core/merge_engine.py +278 -0
- slidesherlock-1.1.0/packages/core/narration_blueprint.py +295 -0
- slidesherlock-1.1.0/packages/core/narration_rewriter.py +122 -0
- slidesherlock-1.1.0/packages/core/narration_source.py +238 -0
- slidesherlock-1.1.0/packages/core/native_graph.py +356 -0
- slidesherlock-1.1.0/packages/core/notes_config.py +128 -0
- slidesherlock-1.1.0/packages/core/ocr.py +140 -0
- slidesherlock-1.1.0/packages/core/overlay_renderer.py +412 -0
- slidesherlock-1.1.0/packages/core/photo_understand.py +260 -0
- slidesherlock-1.1.0/packages/core/pipeline.py +455 -0
- slidesherlock-1.1.0/packages/core/ppt_parser.py +250 -0
- slidesherlock-1.1.0/packages/core/presets.py +79 -0
- slidesherlock-1.1.0/packages/core/rag.py +86 -0
- slidesherlock-1.1.0/packages/core/script_context.py +165 -0
- slidesherlock-1.1.0/packages/core/script_generator.py +150 -0
- slidesherlock-1.1.0/packages/core/slide_caption_fallback.py +181 -0
- slidesherlock-1.1.0/packages/core/stages/__init__.py +22 -0
- slidesherlock-1.1.0/packages/core/stages/audio.py +139 -0
- slidesherlock-1.1.0/packages/core/stages/evidence.py +104 -0
- slidesherlock-1.1.0/packages/core/stages/graph.py +175 -0
- slidesherlock-1.1.0/packages/core/stages/ingest.py +162 -0
- slidesherlock-1.1.0/packages/core/stages/narrate.py +296 -0
- slidesherlock-1.1.0/packages/core/stages/render.py +277 -0
- slidesherlock-1.1.0/packages/core/stages/script.py +178 -0
- slidesherlock-1.1.0/packages/core/stages/translate.py +138 -0
- slidesherlock-1.1.0/packages/core/stages/verify.py +173 -0
- slidesherlock-1.1.0/packages/core/stages/video.py +588 -0
- slidesherlock-1.1.0/packages/core/storage.py +93 -0
- slidesherlock-1.1.0/packages/core/storage_backend.py +79 -0
- slidesherlock-1.1.0/packages/core/storage_local.py +61 -0
- slidesherlock-1.1.0/packages/core/subtitle_generator.py +94 -0
- slidesherlock-1.1.0/packages/core/tests/__init__.py +1 -0
- slidesherlock-1.1.0/packages/core/tests/test_audio_prepare.py +310 -0
- slidesherlock-1.1.0/packages/core/tests/test_diagram_understand.py +141 -0
- slidesherlock-1.1.0/packages/core/tests/test_doctor.py +32 -0
- slidesherlock-1.1.0/packages/core/tests/test_image_classifier.py +43 -0
- slidesherlock-1.1.0/packages/core/tests/test_image_evidence_integration.py +90 -0
- slidesherlock-1.1.0/packages/core/tests/test_image_extract.py +43 -0
- slidesherlock-1.1.0/packages/core/tests/test_image_understand.py +123 -0
- slidesherlock-1.1.0/packages/core/tests/test_merge_engine.py +195 -0
- slidesherlock-1.1.0/packages/core/tests/test_narration_blueprint.py +119 -0
- slidesherlock-1.1.0/packages/core/tests/test_narration_source.py +264 -0
- slidesherlock-1.1.0/packages/core/tests/test_notes_config.py +117 -0
- slidesherlock-1.1.0/packages/core/tests/test_on_screen_notes.py +93 -0
- slidesherlock-1.1.0/packages/core/tests/test_photo_understand.py +31 -0
- slidesherlock-1.1.0/packages/core/tests/test_presets.py +63 -0
- slidesherlock-1.1.0/packages/core/tests/test_script_context.py +121 -0
- slidesherlock-1.1.0/packages/core/tests/test_script_plan.py +148 -0
- slidesherlock-1.1.0/packages/core/tests/test_slide_caption_fallback.py +108 -0
- slidesherlock-1.1.0/packages/core/tests/test_storage_backend_registry.py +77 -0
- slidesherlock-1.1.0/packages/core/tests/test_subtitle_generator.py +44 -0
- slidesherlock-1.1.0/packages/core/tests/test_timeline_alignment.py +78 -0
- slidesherlock-1.1.0/packages/core/tests/test_translation.py +71 -0
- slidesherlock-1.1.0/packages/core/tests/test_variants.py +32 -0
- slidesherlock-1.1.0/packages/core/tests/test_verifier.py +358 -0
- slidesherlock-1.1.0/packages/core/tests/test_video_config.py +46 -0
- slidesherlock-1.1.0/packages/core/tests/test_video_encoder.py +54 -0
- slidesherlock-1.1.0/packages/core/tests/test_vision_day3_integration.py +247 -0
- slidesherlock-1.1.0/packages/core/tests/test_vision_provider_openai.py +154 -0
- slidesherlock-1.1.0/packages/core/timeline_builder.py +295 -0
- slidesherlock-1.1.0/packages/core/translation.py +201 -0
- slidesherlock-1.1.0/packages/core/translator_provider.py +97 -0
- slidesherlock-1.1.0/packages/core/translator_provider_llm.py +39 -0
- slidesherlock-1.1.0/packages/core/tts_provider.py +427 -0
- slidesherlock-1.1.0/packages/core/variants.py +46 -0
- slidesherlock-1.1.0/packages/core/verifier.py +676 -0
- slidesherlock-1.1.0/packages/core/video_config.py +105 -0
- slidesherlock-1.1.0/packages/core/video_encoder.py +98 -0
- slidesherlock-1.1.0/packages/core/vision_config.py +65 -0
- slidesherlock-1.1.0/packages/core/vision_graph.py +171 -0
- slidesherlock-1.1.0/packages/core/vision_provider.py +279 -0
- slidesherlock-1.1.0/packages/core/vision_provider_llm.py +27 -0
- slidesherlock-1.1.0/packages/core/vision_provider_openai.py +467 -0
- slidesherlock-1.1.0/pyproject.toml +100 -0
- slidesherlock-1.1.0/requirements.txt +50 -0
- slidesherlock-1.1.0/scripts/__init__.py +0 -0
- slidesherlock-1.1.0/scripts/batch_run.py +452 -0
- slidesherlock-1.1.0/scripts/check_minio.sh +75 -0
- slidesherlock-1.1.0/scripts/create_sample_connectors_ppt.py +126 -0
- slidesherlock-1.1.0/scripts/evidence_index_migrate.sh +33 -0
- slidesherlock-1.1.0/scripts/run_demo.py +100 -0
- slidesherlock-1.1.0/scripts/slidesherlock_cli.py +727 -0
- slidesherlock-1.1.0/scripts/test_api.sh +94 -0
- slidesherlock-1.1.0/scripts/test_api_connectors.sh +111 -0
- slidesherlock-1.1.0/scripts/test_render.sh +156 -0
- slidesherlock-1.1.0/scripts/verify_evidence_id_stable.py +155 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# SlideSherlock
|
|
2
|
+
|
|
3
|
+
Turn a PPTX into a narrated explainer video with visual guidance (highlight/trace/zoom), while preventing hallucinations using an Evidence Index + Verifier.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
SlideSherlock is built with an **artifact-first pipeline** where every stage writes outputs to S3/MinIO with stable paths and updates Postgres status. For a full pipeline diagram, stage breakdown, and **recommended next steps** (testing, production hardening, features, deployment), see **[ARCHITECTURE.md](ARCHITECTURE.md)**.
|
|
8
|
+
|
|
9
|
+
### Key Components
|
|
10
|
+
|
|
11
|
+
1. **Evidence Index**: Tracks all evidence with `evidence_id` + `source_ref` (bbox / ppt shape id / page+char offsets)
|
|
12
|
+
2. **Diagram Understanding**:
|
|
13
|
+
- `G_native` from PPT objects (shapes/connectors/groups)
|
|
14
|
+
- Optional `G_vision` from PNG + OCR
|
|
15
|
+
- Merged to `G_unified` with provenance (NATIVE/VISION/BOTH) + confidence + NEEDS_REVIEW flags
|
|
16
|
+
3. **Script Generation**: Produces segments with `claim_id`, `evidence_ids`, `entity_ids`
|
|
17
|
+
4. **Verifier**: Enforces grounding with PASS/REWRITE/REMOVE, loops until no REWRITE remains
|
|
18
|
+
5. **Timeline Builder**: Generates HIGHLIGHT/TRACE/ZOOM actions mapped to entity geometry
|
|
19
|
+
6. **Renderer**: Generates overlays and composes final video with FFmpeg
|
|
20
|
+
|
|
21
|
+
## Repository Structure
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
/slidesherlock
|
|
25
|
+
/apps
|
|
26
|
+
/api - REST API for job submission and status
|
|
27
|
+
/worker - Pipeline worker that processes jobs
|
|
28
|
+
/packages
|
|
29
|
+
/core - Core business logic (diagram, script, verifier, timeline)
|
|
30
|
+
/providers - LLM/TTS/OCR provider interfaces
|
|
31
|
+
/schemas - TypeScript schemas and types
|
|
32
|
+
/infra
|
|
33
|
+
/docker - Docker configurations
|
|
34
|
+
docker-compose.yml
|
|
35
|
+
Makefile
|
|
36
|
+
README.md
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Prerequisites
|
|
40
|
+
|
|
41
|
+
### System Dependencies
|
|
42
|
+
|
|
43
|
+
- **Python 3.11+** (3.12 recommended)
|
|
44
|
+
- **Docker and Docker Compose** (for PostgreSQL, Redis, MinIO)
|
|
45
|
+
- **LibreOffice** (for PPTX to PDF conversion)
|
|
46
|
+
- macOS: `brew install --cask libreoffice`
|
|
47
|
+
- Ubuntu/Debian: `sudo apt-get install libreoffice`
|
|
48
|
+
- CentOS/RHEL: `sudo yum install libreoffice`
|
|
49
|
+
- **Poppler** (for PDF to PNG conversion, required by pdf2image)
|
|
50
|
+
- macOS: `brew install poppler`
|
|
51
|
+
- Ubuntu/Debian: `sudo apt-get install poppler-utils`
|
|
52
|
+
- CentOS/RHEL: `sudo yum install poppler-utils`
|
|
53
|
+
- **FFmpeg** (for video composition - future stage)
|
|
54
|
+
|
|
55
|
+
### Python Dependencies
|
|
56
|
+
|
|
57
|
+
All Python dependencies are consolidated in `requirements.txt` and will be installed automatically with `make setup`.
|
|
58
|
+
|
|
59
|
+
## Quick Start
|
|
60
|
+
|
|
61
|
+
1. **Start infrastructure services:**
|
|
62
|
+
```bash
|
|
63
|
+
make up
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
2. **Install dependencies:**
|
|
67
|
+
```bash
|
|
68
|
+
make install
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
3. **Build all packages:**
|
|
72
|
+
```bash
|
|
73
|
+
make build
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
4. **Initialize database:**
|
|
77
|
+
```bash
|
|
78
|
+
make migrate
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
5. **Start the API server (in one terminal):**
|
|
82
|
+
```bash
|
|
83
|
+
make api
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
6. **Start the worker (in another terminal):**
|
|
87
|
+
```bash
|
|
88
|
+
make worker
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Usage
|
|
92
|
+
|
|
93
|
+
### Submit a Job
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
curl -X POST http://localhost:3000/jobs \
|
|
97
|
+
-F "file=@presentation.pptx"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Response:
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"job_id": "uuid-here",
|
|
104
|
+
"status": "PENDING",
|
|
105
|
+
"message": "Job submitted successfully"
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Check Job Status
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
curl http://localhost:3000/jobs/{job_id}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Get Artifacts
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Get script
|
|
119
|
+
curl http://localhost:3000/jobs/{job_id}/artifacts/script
|
|
120
|
+
|
|
121
|
+
# Get verify report
|
|
122
|
+
curl http://localhost:3000/jobs/{job_id}/artifacts/verify_report
|
|
123
|
+
|
|
124
|
+
# Get timeline
|
|
125
|
+
curl http://localhost:3000/jobs/{job_id}/artifacts/timeline
|
|
126
|
+
|
|
127
|
+
# Get final video
|
|
128
|
+
curl http://localhost:3000/jobs/{job_id}/artifacts/final_video -o output.mp4
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Configuration
|
|
132
|
+
|
|
133
|
+
### Environment Variables
|
|
134
|
+
|
|
135
|
+
**API Server:**
|
|
136
|
+
- `PORT` - API server port (default: 3000)
|
|
137
|
+
- `DATABASE_URL` - PostgreSQL connection string
|
|
138
|
+
- `REDIS_URL` - Redis connection string
|
|
139
|
+
- `MINIO_ENDPOINT` - MinIO endpoint URL
|
|
140
|
+
- `MINIO_ACCESS_KEY` - MinIO access key
|
|
141
|
+
- `MINIO_SECRET_KEY` - MinIO secret key
|
|
142
|
+
- `MINIO_BUCKET` - MinIO bucket name
|
|
143
|
+
|
|
144
|
+
**Worker:**
|
|
145
|
+
- `DATABASE_URL` - PostgreSQL connection string
|
|
146
|
+
- `REDIS_URL` - Redis connection string
|
|
147
|
+
- `MINIO_ENDPOINT` - MinIO endpoint URL
|
|
148
|
+
- `MINIO_ACCESS_KEY` - MinIO access key
|
|
149
|
+
- `MINIO_SECRET_KEY` - MinIO secret key
|
|
150
|
+
- `MINIO_BUCKET` - MinIO bucket name
|
|
151
|
+
- `OPENAI_API_KEY` - OpenAI API key (optional; see [Secrets](#secrets-never-commit) below)
|
|
152
|
+
- `USE_SYSTEM_TTS` - Set to "true" to use system TTS (macOS only)
|
|
153
|
+
|
|
154
|
+
### Secrets (never commit)
|
|
155
|
+
|
|
156
|
+
API keys and secrets must **not** be committed. Use environment variables or a local `.env` file.
|
|
157
|
+
|
|
158
|
+
1. **Copy the example file** (no secrets inside):
|
|
159
|
+
```bash
|
|
160
|
+
cp .env.example .env
|
|
161
|
+
```
|
|
162
|
+
2. **Edit `.env`** and set only the keys you need (e.g. `OPENAI_API_KEY=sk-...`).
|
|
163
|
+
3. **`.env` is in `.gitignore`** – it will not be pushed to GitHub.
|
|
164
|
+
|
|
165
|
+
**OpenAI vision (optional):** For real diagram/photo understanding, set `OPENAI_API_KEY` in `.env` and `VISION_PROVIDER=openai`. If the key is not set, the pipeline uses the stub vision provider (generic captions) and logs that the key is missing.
|
|
166
|
+
|
|
167
|
+
- **Enable:** `VISION_PROVIDER=openai` (and optionally `VISION_EXTRACTOR_PROVIDER=openai`).
|
|
168
|
+
- **Config (env):** `OPENAI_VISION_MODEL` (default `gpt-4o`), `OPENAI_VISION_TEMPERATURE` (default `0`), `OPENAI_VISION_TIMEOUT_SECONDS` (default `60`).
|
|
169
|
+
- **Caching:** Results are cached in MinIO by image hash + model + lang + prompt version to avoid repeat API charges. Set `VISION_CACHE_ENABLED=false` to disable; cache path is `VISION_CACHE_PREFIX` (default `jobs/{job_id}/cache/vision/`).
|
|
170
|
+
- **Costs:** Each uncached image uses one vision API call; enable caching for development and re-runs. Outputs are validated and stored as evidence (used by script/verifier in later pipeline stages).
|
|
171
|
+
|
|
172
|
+
## Pipeline Stages
|
|
173
|
+
|
|
174
|
+
1. **EXTRACTING**: Extract content from PPTX, convert slides to PNG
|
|
175
|
+
2. **DIAGRAM_ANALYSIS**: Build evidence index and analyze diagram structure
|
|
176
|
+
3. **SCRIPT_GENERATION**: Generate narrated script with evidence grounding
|
|
177
|
+
4. **VERIFICATION**: Verify and rewrite script until all segments pass
|
|
178
|
+
5. **TIMELINE_BUILDING**: Generate timeline with visual actions
|
|
179
|
+
6. **RENDERING**: Generate overlays and compose final video
|
|
180
|
+
7. **COMPLETED**: Job finished successfully
|
|
181
|
+
|
|
182
|
+
## Artifact Paths
|
|
183
|
+
|
|
184
|
+
All artifacts are stored in MinIO with the following structure:
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
jobs/{job_id}/
|
|
188
|
+
input.pptx
|
|
189
|
+
extracted.json
|
|
190
|
+
slides/
|
|
191
|
+
slide-0.png
|
|
192
|
+
slide-1.png
|
|
193
|
+
evidence/
|
|
194
|
+
index.json
|
|
195
|
+
graph/
|
|
196
|
+
native.json
|
|
197
|
+
vision.json (optional)
|
|
198
|
+
unified.json
|
|
199
|
+
script.json
|
|
200
|
+
verify_report.json
|
|
201
|
+
coverage.json
|
|
202
|
+
timeline/
|
|
203
|
+
timeline.json
|
|
204
|
+
overlays/
|
|
205
|
+
{action_id}.png
|
|
206
|
+
audio/
|
|
207
|
+
{segment_id}.wav
|
|
208
|
+
final.mp4
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Testing
|
|
212
|
+
|
|
213
|
+
Run tests:
|
|
214
|
+
```bash
|
|
215
|
+
make test
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Development
|
|
219
|
+
|
|
220
|
+
### No-Provider Mode
|
|
221
|
+
|
|
222
|
+
By default, SlideSherlock runs in "no-provider mode" where:
|
|
223
|
+
- LLM provider returns placeholder responses
|
|
224
|
+
- TTS provider returns empty audio
|
|
225
|
+
- OCR provider returns empty results
|
|
226
|
+
|
|
227
|
+
This allows testing the pipeline structure without external API dependencies.
|
|
228
|
+
|
|
229
|
+
### Adding Providers
|
|
230
|
+
|
|
231
|
+
To use real providers:
|
|
232
|
+
|
|
233
|
+
1. **OpenAI LLM**: Set `OPENAI_API_KEY` environment variable
|
|
234
|
+
2. **System TTS**: Set `USE_SYSTEM_TTS=true` (macOS only)
|
|
235
|
+
3. **OCR**: Implement a real OCR provider in `packages/providers/src/ocr.ts`
|
|
236
|
+
|
|
237
|
+
## License
|
|
238
|
+
|
|
239
|
+
MIT
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from logging.config import fileConfig
|
|
2
|
+
from sqlalchemy import engine_from_config
|
|
3
|
+
from sqlalchemy import pool
|
|
4
|
+
from alembic import context
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
# Add project root to path
|
|
9
|
+
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
10
|
+
sys.path.insert(0, project_root)
|
|
11
|
+
|
|
12
|
+
# Import using absolute imports from the apps.api package
|
|
13
|
+
from apps.api.database import Base # noqa: E402
|
|
14
|
+
|
|
15
|
+
# Import models so Alembic can detect them for autogenerate
|
|
16
|
+
from apps.api.models import ( # noqa: E402, F401
|
|
17
|
+
Project,
|
|
18
|
+
Job,
|
|
19
|
+
Artifact,
|
|
20
|
+
Slide,
|
|
21
|
+
Source,
|
|
22
|
+
EvidenceItem,
|
|
23
|
+
SourceRef,
|
|
24
|
+
ClaimLink,
|
|
25
|
+
EntityLink,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# this is the Alembic Config object, which provides
|
|
29
|
+
# access to the values within the .ini file in use.
|
|
30
|
+
config = context.config
|
|
31
|
+
|
|
32
|
+
# Interpret the config file for Python logging.
|
|
33
|
+
# This line sets up loggers basically.
|
|
34
|
+
if config.config_file_name is not None:
|
|
35
|
+
fileConfig(config.config_file_name)
|
|
36
|
+
|
|
37
|
+
# add your model's MetaData object here
|
|
38
|
+
# for 'autogenerate' support
|
|
39
|
+
target_metadata = Base.metadata
|
|
40
|
+
|
|
41
|
+
# other values from the config, defined by the needs of env.py,
|
|
42
|
+
# can be acquired:
|
|
43
|
+
# my_important_option = config.get_main_option("my_important_option")
|
|
44
|
+
# ... etc.
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_url():
|
|
48
|
+
return os.getenv(
|
|
49
|
+
"DATABASE_URL",
|
|
50
|
+
"postgresql://slidesherlock:slidesherlock@localhost:5433/slidesherlock",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def run_migrations_offline() -> None:
|
|
55
|
+
"""Run migrations in 'offline' mode.
|
|
56
|
+
|
|
57
|
+
This configures the context with just a URL
|
|
58
|
+
and not an Engine, though an Engine is acceptable
|
|
59
|
+
here as well. By skipping the Engine creation
|
|
60
|
+
we don't even need a DBAPI to be available.
|
|
61
|
+
|
|
62
|
+
Calls to context.execute() here emit the given string to the
|
|
63
|
+
script output.
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
url = get_url()
|
|
67
|
+
context.configure(
|
|
68
|
+
url=url,
|
|
69
|
+
target_metadata=target_metadata,
|
|
70
|
+
literal_binds=True,
|
|
71
|
+
dialect_opts={"paramstyle": "named"},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
with context.begin_transaction():
|
|
75
|
+
context.run_migrations()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def run_migrations_online() -> None:
|
|
79
|
+
"""Run migrations in 'online' mode.
|
|
80
|
+
|
|
81
|
+
In this scenario we need to create an Engine
|
|
82
|
+
and associate a connection with the context.
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
configuration = config.get_section(config.config_ini_section)
|
|
86
|
+
configuration["sqlalchemy.url"] = get_url()
|
|
87
|
+
connectable = engine_from_config(
|
|
88
|
+
configuration,
|
|
89
|
+
prefix="sqlalchemy.",
|
|
90
|
+
poolclass=pool.NullPool,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
with connectable.connect() as connection:
|
|
94
|
+
context.configure(connection=connection, target_metadata=target_metadata)
|
|
95
|
+
|
|
96
|
+
with context.begin_transaction():
|
|
97
|
+
context.run_migrations()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if context.is_offline_mode():
|
|
101
|
+
run_migrations_offline()
|
|
102
|
+
else:
|
|
103
|
+
run_migrations_online()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""${message}
|
|
2
|
+
|
|
3
|
+
Revision ID: ${up_revision}
|
|
4
|
+
Revises: ${down_revision | comma,n}
|
|
5
|
+
Create Date: ${create_date}
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
${imports if imports else ""}
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = ${repr(up_revision)}
|
|
14
|
+
down_revision = ${repr(down_revision)}
|
|
15
|
+
branch_labels = ${repr(branch_labels)}
|
|
16
|
+
depends_on = ${repr(depends_on)}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
${upgrades if upgrades else "pass"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def downgrade() -> None:
|
|
24
|
+
${downgrades if downgrades else "pass"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Initial schema: project, job, artifact
|
|
2
|
+
|
|
3
|
+
Revision ID: 001
|
|
4
|
+
Revises:
|
|
5
|
+
Create Date: 2024-01-01 00:00:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = "001"
|
|
14
|
+
down_revision = None
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
# Create projects table
|
|
21
|
+
op.create_table(
|
|
22
|
+
"projects",
|
|
23
|
+
sa.Column("project_id", sa.String(), nullable=False),
|
|
24
|
+
sa.Column("name", sa.String(), nullable=False),
|
|
25
|
+
sa.Column("description", sa.Text(), nullable=True),
|
|
26
|
+
sa.Column("created_at", sa.DateTime(), nullable=False),
|
|
27
|
+
sa.Column("updated_at", sa.DateTime(), nullable=False),
|
|
28
|
+
sa.PrimaryKeyConstraint("project_id"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Create jobs table
|
|
32
|
+
op.create_table(
|
|
33
|
+
"jobs",
|
|
34
|
+
sa.Column("job_id", sa.String(), nullable=False),
|
|
35
|
+
sa.Column("project_id", sa.String(), nullable=False),
|
|
36
|
+
sa.Column(
|
|
37
|
+
"status",
|
|
38
|
+
sa.Enum("PENDING", "PROCESSING", "DONE", "FAILED", name="jobstatus"),
|
|
39
|
+
nullable=False,
|
|
40
|
+
),
|
|
41
|
+
sa.Column("input_file_path", sa.String(), nullable=True),
|
|
42
|
+
sa.Column("error_message", sa.Text(), nullable=True),
|
|
43
|
+
sa.Column("created_at", sa.DateTime(), nullable=False),
|
|
44
|
+
sa.Column("updated_at", sa.DateTime(), nullable=False),
|
|
45
|
+
sa.ForeignKeyConstraint(
|
|
46
|
+
["project_id"],
|
|
47
|
+
["projects.project_id"],
|
|
48
|
+
),
|
|
49
|
+
sa.PrimaryKeyConstraint("job_id"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Create artifacts table
|
|
53
|
+
op.create_table(
|
|
54
|
+
"artifacts",
|
|
55
|
+
sa.Column("artifact_id", sa.String(), nullable=False),
|
|
56
|
+
sa.Column("project_id", sa.String(), nullable=False),
|
|
57
|
+
sa.Column("job_id", sa.String(), nullable=True),
|
|
58
|
+
sa.Column("artifact_type", sa.String(), nullable=False),
|
|
59
|
+
sa.Column("storage_path", sa.String(), nullable=False),
|
|
60
|
+
sa.Column("metadata_json", sa.Text(), nullable=True),
|
|
61
|
+
sa.Column("created_at", sa.DateTime(), nullable=False),
|
|
62
|
+
sa.ForeignKeyConstraint(
|
|
63
|
+
["project_id"],
|
|
64
|
+
["projects.project_id"],
|
|
65
|
+
),
|
|
66
|
+
sa.ForeignKeyConstraint(
|
|
67
|
+
["job_id"],
|
|
68
|
+
["jobs.job_id"],
|
|
69
|
+
),
|
|
70
|
+
sa.PrimaryKeyConstraint("artifact_id"),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def downgrade() -> None:
|
|
75
|
+
op.drop_table("artifacts")
|
|
76
|
+
op.drop_table("jobs")
|
|
77
|
+
op.drop_table("projects")
|
|
78
|
+
op.execute("DROP TYPE IF EXISTS jobstatus")
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Add QUEUED and RUNNING job status, add sha256 and size_bytes to artifacts
|
|
2
|
+
|
|
3
|
+
Revision ID: 002
|
|
4
|
+
Revises: 001
|
|
5
|
+
Create Date: 2024-01-27 12:00:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = "002"
|
|
14
|
+
down_revision = "001"
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
# Update JobStatus enum to include QUEUED and RUNNING
|
|
21
|
+
# PostgreSQL doesn't support IF NOT EXISTS for ALTER TYPE, so we use a DO block
|
|
22
|
+
op.execute(
|
|
23
|
+
"""
|
|
24
|
+
DO $$ BEGIN
|
|
25
|
+
IF NOT EXISTS (
|
|
26
|
+
SELECT 1 FROM pg_enum
|
|
27
|
+
WHERE enumlabel = 'QUEUED'
|
|
28
|
+
AND enumtypid = (SELECT oid FROM pg_type WHERE typname = 'jobstatus')
|
|
29
|
+
) THEN
|
|
30
|
+
ALTER TYPE jobstatus ADD VALUE 'QUEUED';
|
|
31
|
+
END IF;
|
|
32
|
+
END $$;
|
|
33
|
+
"""
|
|
34
|
+
)
|
|
35
|
+
op.execute(
|
|
36
|
+
"""
|
|
37
|
+
DO $$ BEGIN
|
|
38
|
+
IF NOT EXISTS (
|
|
39
|
+
SELECT 1 FROM pg_enum
|
|
40
|
+
WHERE enumlabel = 'RUNNING'
|
|
41
|
+
AND enumtypid = (SELECT oid FROM pg_type WHERE typname = 'jobstatus')
|
|
42
|
+
) THEN
|
|
43
|
+
ALTER TYPE jobstatus ADD VALUE 'RUNNING';
|
|
44
|
+
END IF;
|
|
45
|
+
END $$;
|
|
46
|
+
"""
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Add sha256 and size_bytes columns to artifacts table
|
|
50
|
+
op.add_column("artifacts", sa.Column("sha256", sa.String(), nullable=True))
|
|
51
|
+
op.add_column("artifacts", sa.Column("size_bytes", sa.String(), nullable=True))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def downgrade() -> None:
|
|
55
|
+
# Remove columns from artifacts table
|
|
56
|
+
op.drop_column("artifacts", "size_bytes")
|
|
57
|
+
op.drop_column("artifacts", "sha256")
|
|
58
|
+
|
|
59
|
+
# Note: PostgreSQL doesn't support removing enum values easily
|
|
60
|
+
# The enum values QUEUED and RUNNING will remain but won't be used
|
slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/003_evidence_index_tables.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Evidence index tables: slides, sources, evidence_items, source_refs, claim_links, entity_links
|
|
2
|
+
|
|
3
|
+
Revision ID: 003
|
|
4
|
+
Revises: 002
|
|
5
|
+
Create Date: 2024-01-28 12:00:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
|
|
12
|
+
revision = "003"
|
|
13
|
+
down_revision = "002"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.create_table(
|
|
20
|
+
"slides",
|
|
21
|
+
sa.Column("slide_id", sa.String(), nullable=False),
|
|
22
|
+
sa.Column("job_id", sa.String(), nullable=False),
|
|
23
|
+
sa.Column("slide_index", sa.Integer(), nullable=False),
|
|
24
|
+
sa.Column("slide_title", sa.Text(), nullable=True),
|
|
25
|
+
sa.Column("png_artifact_id", sa.String(), nullable=True),
|
|
26
|
+
sa.Column("pptx_ref", sa.String(), nullable=True),
|
|
27
|
+
sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
|
|
28
|
+
sa.ForeignKeyConstraint(["png_artifact_id"], ["artifacts.artifact_id"]),
|
|
29
|
+
sa.PrimaryKeyConstraint("slide_id"),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
op.create_table(
|
|
33
|
+
"sources",
|
|
34
|
+
sa.Column("source_id", sa.String(), nullable=False),
|
|
35
|
+
sa.Column("job_id", sa.String(), nullable=False),
|
|
36
|
+
sa.Column("type", sa.String(), nullable=False),
|
|
37
|
+
sa.Column("artifact_id", sa.String(), nullable=True),
|
|
38
|
+
sa.Column("slide_id", sa.String(), nullable=True),
|
|
39
|
+
sa.Column("created_at", sa.DateTime(), nullable=False),
|
|
40
|
+
sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
|
|
41
|
+
sa.ForeignKeyConstraint(["artifact_id"], ["artifacts.artifact_id"]),
|
|
42
|
+
sa.ForeignKeyConstraint(["slide_id"], ["slides.slide_id"]),
|
|
43
|
+
sa.PrimaryKeyConstraint("source_id"),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
op.create_table(
|
|
47
|
+
"evidence_items",
|
|
48
|
+
sa.Column("evidence_id", sa.String(), nullable=False),
|
|
49
|
+
sa.Column("job_id", sa.String(), nullable=False),
|
|
50
|
+
sa.Column("slide_id", sa.String(), nullable=True),
|
|
51
|
+
sa.Column("source_id", sa.String(), nullable=False),
|
|
52
|
+
sa.Column("kind", sa.String(), nullable=False),
|
|
53
|
+
sa.Column("content", sa.Text(), nullable=False),
|
|
54
|
+
sa.Column("content_hash", sa.String(), nullable=True),
|
|
55
|
+
sa.Column("confidence", sa.Float(), nullable=True),
|
|
56
|
+
sa.Column("language", sa.String(), nullable=True),
|
|
57
|
+
sa.Column("created_at", sa.DateTime(), nullable=False),
|
|
58
|
+
sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
|
|
59
|
+
sa.ForeignKeyConstraint(["slide_id"], ["slides.slide_id"]),
|
|
60
|
+
sa.ForeignKeyConstraint(["source_id"], ["sources.source_id"]),
|
|
61
|
+
sa.PrimaryKeyConstraint("evidence_id"),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
op.create_table(
|
|
65
|
+
"source_refs",
|
|
66
|
+
sa.Column("ref_id", sa.String(), nullable=False),
|
|
67
|
+
sa.Column("evidence_id", sa.String(), nullable=False),
|
|
68
|
+
sa.Column("ref_type", sa.String(), nullable=False),
|
|
69
|
+
sa.Column("slide_index", sa.Integer(), nullable=True),
|
|
70
|
+
sa.Column("ppt_shape_id", sa.String(), nullable=True),
|
|
71
|
+
sa.Column("ppt_paragraph_ix", sa.Integer(), nullable=True),
|
|
72
|
+
sa.Column("ppt_run_ix", sa.Integer(), nullable=True),
|
|
73
|
+
sa.Column("bbox_x", sa.Float(), nullable=True),
|
|
74
|
+
sa.Column("bbox_y", sa.Float(), nullable=True),
|
|
75
|
+
sa.Column("bbox_w", sa.Float(), nullable=True),
|
|
76
|
+
sa.Column("bbox_h", sa.Float(), nullable=True),
|
|
77
|
+
sa.Column("page_num", sa.Integer(), nullable=True),
|
|
78
|
+
sa.Column("char_start", sa.Integer(), nullable=True),
|
|
79
|
+
sa.Column("char_end", sa.Integer(), nullable=True),
|
|
80
|
+
sa.Column("url", sa.Text(), nullable=True),
|
|
81
|
+
sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
|
|
82
|
+
sa.PrimaryKeyConstraint("ref_id"),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
op.create_table(
|
|
86
|
+
"claim_links",
|
|
87
|
+
sa.Column("claim_link_id", sa.String(), nullable=False),
|
|
88
|
+
sa.Column("claim_id", sa.String(), nullable=False),
|
|
89
|
+
sa.Column("evidence_id", sa.String(), nullable=False),
|
|
90
|
+
sa.Column("weight", sa.Float(), nullable=True),
|
|
91
|
+
sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
|
|
92
|
+
sa.PrimaryKeyConstraint("claim_link_id"),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
op.create_table(
|
|
96
|
+
"entity_links",
|
|
97
|
+
sa.Column("entity_link_id", sa.String(), nullable=False),
|
|
98
|
+
sa.Column("entity_id", sa.String(), nullable=False),
|
|
99
|
+
sa.Column("evidence_id", sa.String(), nullable=False),
|
|
100
|
+
sa.Column("role", sa.String(), nullable=True),
|
|
101
|
+
sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
|
|
102
|
+
sa.PrimaryKeyConstraint("entity_link_id"),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def downgrade() -> None:
|
|
107
|
+
op.drop_table("entity_links")
|
|
108
|
+
op.drop_table("claim_links")
|
|
109
|
+
op.drop_table("source_refs")
|
|
110
|
+
op.drop_table("evidence_items")
|
|
111
|
+
op.drop_table("sources")
|
|
112
|
+
op.drop_table("slides")
|
slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/004_add_requested_language.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Add requested_language to jobs
|
|
2
|
+
|
|
3
|
+
Revision ID: 004
|
|
4
|
+
Revises: 003
|
|
5
|
+
Create Date: 2024-01-31 12:00:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
|
|
12
|
+
revision = "004"
|
|
13
|
+
down_revision = "003"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.add_column("jobs", sa.Column("requested_language", sa.String(), nullable=True))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def downgrade() -> None:
|
|
23
|
+
op.drop_column("jobs", "requested_language")
|
slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/005_add_job_config_json.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Add config_json to jobs (vision config, etc.)
|
|
2
|
+
|
|
3
|
+
Revision ID: 005
|
|
4
|
+
Revises: 004
|
|
5
|
+
Create Date: 2024-01-31 14:00:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from alembic import op
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
|
|
12
|
+
revision = "005"
|
|
13
|
+
down_revision = "004"
|
|
14
|
+
branch_labels = None
|
|
15
|
+
depends_on = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
op.add_column("jobs", sa.Column("config_json", sa.Text(), nullable=True))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def downgrade() -> None:
|
|
23
|
+
op.drop_column("jobs", "config_json")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# API package
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from sqlalchemy import create_engine
|
|
2
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
3
|
+
from sqlalchemy.orm import sessionmaker
|
|
4
|
+
import os
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
DATABASE_URL = os.getenv(
|
|
10
|
+
"DATABASE_URL",
|
|
11
|
+
"postgresql://slidesherlock:slidesherlock@localhost:5433/slidesherlock",
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
engine = create_engine(DATABASE_URL)
|
|
15
|
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
|
16
|
+
|
|
17
|
+
Base = declarative_base()
|