slidesherlock 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/README.md +239 -0
  2. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/env.py +103 -0
  3. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/script.py.mako +24 -0
  4. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/001_initial_schema.py +78 -0
  5. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/002_add_job_status_and_artifact_fields.py +60 -0
  6. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/003_evidence_index_tables.py +112 -0
  7. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/004_add_requested_language.py +23 -0
  8. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/alembic/versions/005_add_job_config_json.py +23 -0
  9. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/__init__.py +1 -0
  10. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/database.py +17 -0
  11. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/main.py +504 -0
  12. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/models.py +180 -0
  13. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/package.json +27 -0
  14. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/requirements.txt +6 -0
  15. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/schemas.py +62 -0
  16. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/src/db.ts +98 -0
  17. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/src/index.ts +123 -0
  18. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tests/__init__.py +1 -0
  19. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tests/test_api.py +64 -0
  20. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/tsconfig.json +17 -0
  21. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/api/worker.py +82 -0
  22. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/package.json +30 -0
  23. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/requirements.txt +6 -0
  24. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/db.ts +118 -0
  25. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/index.ts +62 -0
  26. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/init-minio.ts +30 -0
  27. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/src/pipeline.ts +379 -0
  28. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/tsconfig.json +17 -0
  29. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/apps/worker/worker.py +22 -0
  30. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/__init__.py +1 -0
  31. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/alignment.py +113 -0
  32. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_config.py +49 -0
  33. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_prepare.py +245 -0
  34. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/audio_processor.py +114 -0
  35. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/composer.py +462 -0
  36. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/diagram_understand.py +723 -0
  37. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/doctor.py +148 -0
  38. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/evidence_index.py +531 -0
  39. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/explain_plan.py +107 -0
  40. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_classifier.py +282 -0
  41. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_extract.py +174 -0
  42. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/image_understand.py +516 -0
  43. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/jest.config.js +14 -0
  44. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/llm_provider.py +250 -0
  45. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/merge_engine.py +275 -0
  46. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/narration_blueprint.py +285 -0
  47. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/narration_source.py +234 -0
  48. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/native_graph.py +323 -0
  49. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/notes_config.py +115 -0
  50. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/ocr.py +133 -0
  51. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/overlay_renderer.py +321 -0
  52. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/package.json +23 -0
  53. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/photo_understand.py +236 -0
  54. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/pipeline.py +332 -0
  55. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/ppt_parser.py +250 -0
  56. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/presets.py +79 -0
  57. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/rag.py +86 -0
  58. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/script_context.py +151 -0
  59. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/script_generator.py +141 -0
  60. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/slide_caption_fallback.py +180 -0
  61. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/diagram.ts +193 -0
  62. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/index.ts +7 -0
  63. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/script.ts +136 -0
  64. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/storage.ts +87 -0
  65. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/diagram.test.ts +133 -0
  66. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/timeline.test.ts +155 -0
  67. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/tests/verifier.test.ts +110 -0
  68. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/timeline.ts +112 -0
  69. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/src/verifier.ts +316 -0
  70. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/__init__.py +22 -0
  71. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/audio.py +112 -0
  72. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/evidence.py +96 -0
  73. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/graph.py +134 -0
  74. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/ingest.py +152 -0
  75. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/render.py +265 -0
  76. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/script.py +160 -0
  77. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/translate.py +128 -0
  78. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/verify.py +144 -0
  79. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/stages/video.py +405 -0
  80. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/storage.py +89 -0
  81. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/subtitle_generator.py +71 -0
  82. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/__init__.py +1 -0
  83. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_audio_prepare.py +299 -0
  84. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_diagram_understand.py +142 -0
  85. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_doctor.py +32 -0
  86. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_classifier.py +42 -0
  87. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_evidence_integration.py +90 -0
  88. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_extract.py +45 -0
  89. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_image_understand.py +109 -0
  90. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_merge_engine.py +166 -0
  91. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_narration_blueprint.py +119 -0
  92. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_narration_source.py +246 -0
  93. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_notes_config.py +103 -0
  94. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_on_screen_notes.py +92 -0
  95. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_photo_understand.py +31 -0
  96. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_presets.py +63 -0
  97. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_script_context.py +89 -0
  98. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_script_plan.py +121 -0
  99. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_slide_caption_fallback.py +99 -0
  100. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_subtitle_generator.py +44 -0
  101. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_timeline_alignment.py +70 -0
  102. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_translation.py +58 -0
  103. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_variants.py +32 -0
  104. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_verifier.py +263 -0
  105. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_video_config.py +44 -0
  106. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_vision_day3_integration.py +232 -0
  107. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tests/test_vision_provider_openai.py +144 -0
  108. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/timeline_builder.py +286 -0
  109. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translation.py +183 -0
  110. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translator_provider.py +67 -0
  111. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/translator_provider_llm.py +39 -0
  112. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tsconfig.json +18 -0
  113. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/tts_provider.py +191 -0
  114. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/variants.py +33 -0
  115. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/verifier.py +643 -0
  116. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/video_config.py +88 -0
  117. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_config.py +65 -0
  118. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_graph.py +150 -0
  119. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_provider.py +246 -0
  120. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/core/vision_provider_openai.py +414 -0
  121. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/package.json +20 -0
  122. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/index.ts +7 -0
  123. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/llm.ts +62 -0
  124. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/ocr.ts +47 -0
  125. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/src/tts.ts +45 -0
  126. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/providers/tsconfig.json +18 -0
  127. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/package.json +20 -0
  128. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/src/index.ts +246 -0
  129. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/packages/schemas/tsconfig.json +18 -0
  130. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/requirements.txt +49 -0
  131. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/create_sample_connectors_ppt.py +117 -0
  132. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/evidence_index_migrate.sh +33 -0
  133. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/run_demo.py +83 -0
  134. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/slidesherlock_cli.py +83 -0
  135. slidesherlock-1.1.0/.claude/worktrees/agent-a53fd5f5/scripts/verify_evidence_id_stable.py +144 -0
  136. slidesherlock-1.1.0/.gitignore +52 -0
  137. slidesherlock-1.1.0/CITATION.cff +23 -0
  138. slidesherlock-1.1.0/CONTRIBUTING.md +96 -0
  139. slidesherlock-1.1.0/LICENSE +191 -0
  140. slidesherlock-1.1.0/PKG-INFO +444 -0
  141. slidesherlock-1.1.0/README.md +392 -0
  142. slidesherlock-1.1.0/SECURITY.md +31 -0
  143. slidesherlock-1.1.0/alembic/env.py +103 -0
  144. slidesherlock-1.1.0/alembic/script.py.mako +24 -0
  145. slidesherlock-1.1.0/alembic/versions/001_initial_schema.py +78 -0
  146. slidesherlock-1.1.0/alembic/versions/002_add_job_status_and_artifact_fields.py +60 -0
  147. slidesherlock-1.1.0/alembic/versions/003_evidence_index_tables.py +112 -0
  148. slidesherlock-1.1.0/alembic/versions/004_add_requested_language.py +23 -0
  149. slidesherlock-1.1.0/alembic/versions/005_add_job_config_json.py +23 -0
  150. slidesherlock-1.1.0/apps/__init__.py +0 -0
  151. slidesherlock-1.1.0/apps/api/__init__.py +1 -0
  152. slidesherlock-1.1.0/apps/api/database.py +90 -0
  153. slidesherlock-1.1.0/apps/api/main.py +847 -0
  154. slidesherlock-1.1.0/apps/api/models.py +174 -0
  155. slidesherlock-1.1.0/apps/api/requirements.txt +6 -0
  156. slidesherlock-1.1.0/apps/api/schemas.py +62 -0
  157. slidesherlock-1.1.0/apps/api/tests/__init__.py +1 -0
  158. slidesherlock-1.1.0/apps/api/tests/test_api.py +64 -0
  159. slidesherlock-1.1.0/apps/api/tests/test_database_init.py +65 -0
  160. slidesherlock-1.1.0/apps/api/worker.py +79 -0
  161. slidesherlock-1.1.0/apps/web/index.html +14 -0
  162. slidesherlock-1.1.0/apps/web/package.json +29 -0
  163. slidesherlock-1.1.0/apps/web/pnpm-lock.yaml +1716 -0
  164. slidesherlock-1.1.0/apps/web/postcss.config.js +6 -0
  165. slidesherlock-1.1.0/apps/web/src/App.tsx +27 -0
  166. slidesherlock-1.1.0/apps/web/src/api/client.ts +136 -0
  167. slidesherlock-1.1.0/apps/web/src/api/mock.ts +256 -0
  168. slidesherlock-1.1.0/apps/web/src/components/ActivityFeed.tsx +199 -0
  169. slidesherlock-1.1.0/apps/web/src/components/DropZone.tsx +174 -0
  170. slidesherlock-1.1.0/apps/web/src/components/EvidenceTrail.tsx +103 -0
  171. slidesherlock-1.1.0/apps/web/src/components/FocusPanel.tsx +208 -0
  172. slidesherlock-1.1.0/apps/web/src/components/GlowButton.tsx +57 -0
  173. slidesherlock-1.1.0/apps/web/src/components/Layout.tsx +47 -0
  174. slidesherlock-1.1.0/apps/web/src/components/MetricBar.tsx +83 -0
  175. slidesherlock-1.1.0/apps/web/src/components/PipelineTrack.tsx +167 -0
  176. slidesherlock-1.1.0/apps/web/src/components/PresetCard.tsx +89 -0
  177. slidesherlock-1.1.0/apps/web/src/components/ProgressBar.tsx +43 -0
  178. slidesherlock-1.1.0/apps/web/src/components/StageCard.tsx +156 -0
  179. slidesherlock-1.1.0/apps/web/src/components/ThemeToggle.tsx +34 -0
  180. slidesherlock-1.1.0/apps/web/src/components/VideoPlayer.tsx +328 -0
  181. slidesherlock-1.1.0/apps/web/src/config/stages.ts +87 -0
  182. slidesherlock-1.1.0/apps/web/src/contexts/ThemeContext.tsx +64 -0
  183. slidesherlock-1.1.0/apps/web/src/index.css +172 -0
  184. slidesherlock-1.1.0/apps/web/src/main.tsx +10 -0
  185. slidesherlock-1.1.0/apps/web/src/pages/ProgressPage.tsx +161 -0
  186. slidesherlock-1.1.0/apps/web/src/pages/ResultPage.tsx +249 -0
  187. slidesherlock-1.1.0/apps/web/src/pages/UploadPage.tsx +253 -0
  188. slidesherlock-1.1.0/apps/web/src/utils/confetti.ts +20 -0
  189. slidesherlock-1.1.0/apps/web/tailwind.config.ts +83 -0
  190. slidesherlock-1.1.0/apps/web/tsconfig.json +22 -0
  191. slidesherlock-1.1.0/apps/web/tsconfig.node.json +23 -0
  192. slidesherlock-1.1.0/apps/web/vite.config.ts +19 -0
  193. slidesherlock-1.1.0/apps/worker/package.json +30 -0
  194. slidesherlock-1.1.0/apps/worker/requirements.txt +6 -0
  195. slidesherlock-1.1.0/apps/worker/tsconfig.json +17 -0
  196. slidesherlock-1.1.0/apps/worker/worker.py +22 -0
  197. slidesherlock-1.1.0/fonts/README.md +42 -0
  198. slidesherlock-1.1.0/packages/core/__init__.py +1 -0
  199. slidesherlock-1.1.0/packages/core/alignment.py +123 -0
  200. slidesherlock-1.1.0/packages/core/audio_config.py +52 -0
  201. slidesherlock-1.1.0/packages/core/audio_prepare.py +333 -0
  202. slidesherlock-1.1.0/packages/core/audio_processor.py +160 -0
  203. slidesherlock-1.1.0/packages/core/composer.py +829 -0
  204. slidesherlock-1.1.0/packages/core/diagram_understand.py +869 -0
  205. slidesherlock-1.1.0/packages/core/doctor.py +166 -0
  206. slidesherlock-1.1.0/packages/core/evidence_index.py +574 -0
  207. slidesherlock-1.1.0/packages/core/evidence_report.py +191 -0
  208. slidesherlock-1.1.0/packages/core/explain_plan.py +116 -0
  209. slidesherlock-1.1.0/packages/core/image_classifier.py +304 -0
  210. slidesherlock-1.1.0/packages/core/image_extract.py +178 -0
  211. slidesherlock-1.1.0/packages/core/image_understand.py +532 -0
  212. slidesherlock-1.1.0/packages/core/llm_backend.py +144 -0
  213. slidesherlock-1.1.0/packages/core/llm_config.py +191 -0
  214. slidesherlock-1.1.0/packages/core/llm_provider.py +278 -0
  215. slidesherlock-1.1.0/packages/core/llm_provider_openai.py +214 -0
  216. slidesherlock-1.1.0/packages/core/merge_engine.py +278 -0
  217. slidesherlock-1.1.0/packages/core/narration_blueprint.py +295 -0
  218. slidesherlock-1.1.0/packages/core/narration_rewriter.py +122 -0
  219. slidesherlock-1.1.0/packages/core/narration_source.py +238 -0
  220. slidesherlock-1.1.0/packages/core/native_graph.py +356 -0
  221. slidesherlock-1.1.0/packages/core/notes_config.py +128 -0
  222. slidesherlock-1.1.0/packages/core/ocr.py +140 -0
  223. slidesherlock-1.1.0/packages/core/overlay_renderer.py +412 -0
  224. slidesherlock-1.1.0/packages/core/photo_understand.py +260 -0
  225. slidesherlock-1.1.0/packages/core/pipeline.py +455 -0
  226. slidesherlock-1.1.0/packages/core/ppt_parser.py +250 -0
  227. slidesherlock-1.1.0/packages/core/presets.py +79 -0
  228. slidesherlock-1.1.0/packages/core/rag.py +86 -0
  229. slidesherlock-1.1.0/packages/core/script_context.py +165 -0
  230. slidesherlock-1.1.0/packages/core/script_generator.py +150 -0
  231. slidesherlock-1.1.0/packages/core/slide_caption_fallback.py +181 -0
  232. slidesherlock-1.1.0/packages/core/stages/__init__.py +22 -0
  233. slidesherlock-1.1.0/packages/core/stages/audio.py +139 -0
  234. slidesherlock-1.1.0/packages/core/stages/evidence.py +104 -0
  235. slidesherlock-1.1.0/packages/core/stages/graph.py +175 -0
  236. slidesherlock-1.1.0/packages/core/stages/ingest.py +162 -0
  237. slidesherlock-1.1.0/packages/core/stages/narrate.py +296 -0
  238. slidesherlock-1.1.0/packages/core/stages/render.py +277 -0
  239. slidesherlock-1.1.0/packages/core/stages/script.py +178 -0
  240. slidesherlock-1.1.0/packages/core/stages/translate.py +138 -0
  241. slidesherlock-1.1.0/packages/core/stages/verify.py +173 -0
  242. slidesherlock-1.1.0/packages/core/stages/video.py +588 -0
  243. slidesherlock-1.1.0/packages/core/storage.py +93 -0
  244. slidesherlock-1.1.0/packages/core/storage_backend.py +79 -0
  245. slidesherlock-1.1.0/packages/core/storage_local.py +61 -0
  246. slidesherlock-1.1.0/packages/core/subtitle_generator.py +94 -0
  247. slidesherlock-1.1.0/packages/core/tests/__init__.py +1 -0
  248. slidesherlock-1.1.0/packages/core/tests/test_audio_prepare.py +310 -0
  249. slidesherlock-1.1.0/packages/core/tests/test_diagram_understand.py +141 -0
  250. slidesherlock-1.1.0/packages/core/tests/test_doctor.py +32 -0
  251. slidesherlock-1.1.0/packages/core/tests/test_image_classifier.py +43 -0
  252. slidesherlock-1.1.0/packages/core/tests/test_image_evidence_integration.py +90 -0
  253. slidesherlock-1.1.0/packages/core/tests/test_image_extract.py +43 -0
  254. slidesherlock-1.1.0/packages/core/tests/test_image_understand.py +123 -0
  255. slidesherlock-1.1.0/packages/core/tests/test_merge_engine.py +195 -0
  256. slidesherlock-1.1.0/packages/core/tests/test_narration_blueprint.py +119 -0
  257. slidesherlock-1.1.0/packages/core/tests/test_narration_source.py +264 -0
  258. slidesherlock-1.1.0/packages/core/tests/test_notes_config.py +117 -0
  259. slidesherlock-1.1.0/packages/core/tests/test_on_screen_notes.py +93 -0
  260. slidesherlock-1.1.0/packages/core/tests/test_photo_understand.py +31 -0
  261. slidesherlock-1.1.0/packages/core/tests/test_presets.py +63 -0
  262. slidesherlock-1.1.0/packages/core/tests/test_script_context.py +121 -0
  263. slidesherlock-1.1.0/packages/core/tests/test_script_plan.py +148 -0
  264. slidesherlock-1.1.0/packages/core/tests/test_slide_caption_fallback.py +108 -0
  265. slidesherlock-1.1.0/packages/core/tests/test_storage_backend_registry.py +77 -0
  266. slidesherlock-1.1.0/packages/core/tests/test_subtitle_generator.py +44 -0
  267. slidesherlock-1.1.0/packages/core/tests/test_timeline_alignment.py +78 -0
  268. slidesherlock-1.1.0/packages/core/tests/test_translation.py +71 -0
  269. slidesherlock-1.1.0/packages/core/tests/test_variants.py +32 -0
  270. slidesherlock-1.1.0/packages/core/tests/test_verifier.py +358 -0
  271. slidesherlock-1.1.0/packages/core/tests/test_video_config.py +46 -0
  272. slidesherlock-1.1.0/packages/core/tests/test_video_encoder.py +54 -0
  273. slidesherlock-1.1.0/packages/core/tests/test_vision_day3_integration.py +247 -0
  274. slidesherlock-1.1.0/packages/core/tests/test_vision_provider_openai.py +154 -0
  275. slidesherlock-1.1.0/packages/core/timeline_builder.py +295 -0
  276. slidesherlock-1.1.0/packages/core/translation.py +201 -0
  277. slidesherlock-1.1.0/packages/core/translator_provider.py +97 -0
  278. slidesherlock-1.1.0/packages/core/translator_provider_llm.py +39 -0
  279. slidesherlock-1.1.0/packages/core/tts_provider.py +427 -0
  280. slidesherlock-1.1.0/packages/core/variants.py +46 -0
  281. slidesherlock-1.1.0/packages/core/verifier.py +676 -0
  282. slidesherlock-1.1.0/packages/core/video_config.py +105 -0
  283. slidesherlock-1.1.0/packages/core/video_encoder.py +98 -0
  284. slidesherlock-1.1.0/packages/core/vision_config.py +65 -0
  285. slidesherlock-1.1.0/packages/core/vision_graph.py +171 -0
  286. slidesherlock-1.1.0/packages/core/vision_provider.py +279 -0
  287. slidesherlock-1.1.0/packages/core/vision_provider_llm.py +27 -0
  288. slidesherlock-1.1.0/packages/core/vision_provider_openai.py +467 -0
  289. slidesherlock-1.1.0/pyproject.toml +100 -0
  290. slidesherlock-1.1.0/requirements.txt +50 -0
  291. slidesherlock-1.1.0/scripts/__init__.py +0 -0
  292. slidesherlock-1.1.0/scripts/batch_run.py +452 -0
  293. slidesherlock-1.1.0/scripts/check_minio.sh +75 -0
  294. slidesherlock-1.1.0/scripts/create_sample_connectors_ppt.py +126 -0
  295. slidesherlock-1.1.0/scripts/evidence_index_migrate.sh +33 -0
  296. slidesherlock-1.1.0/scripts/run_demo.py +100 -0
  297. slidesherlock-1.1.0/scripts/slidesherlock_cli.py +727 -0
  298. slidesherlock-1.1.0/scripts/test_api.sh +94 -0
  299. slidesherlock-1.1.0/scripts/test_api_connectors.sh +111 -0
  300. slidesherlock-1.1.0/scripts/test_render.sh +156 -0
  301. slidesherlock-1.1.0/scripts/verify_evidence_id_stable.py +155 -0
@@ -0,0 +1,239 @@
1
+ # SlideSherlock
2
+
3
+ Turn a PPTX into a narrated explainer video with visual guidance (highlight/trace/zoom), while preventing hallucinations using an Evidence Index + Verifier.
4
+
5
+ ## Architecture
6
+
7
+ SlideSherlock is built with an **artifact-first pipeline** where every stage writes outputs to S3/MinIO with stable paths and updates Postgres status. For a full pipeline diagram, stage breakdown, and **recommended next steps** (testing, production hardening, features, deployment), see **[ARCHITECTURE.md](ARCHITECTURE.md)**.
8
+
9
+ ### Key Components
10
+
11
+ 1. **Evidence Index**: Tracks all evidence with `evidence_id` + `source_ref` (bbox / ppt shape id / page+char offsets)
12
+ 2. **Diagram Understanding**:
13
+ - `G_native` from PPT objects (shapes/connectors/groups)
14
+ - Optional `G_vision` from PNG + OCR
15
+ - Merged to `G_unified` with provenance (NATIVE/VISION/BOTH) + confidence + NEEDS_REVIEW flags
16
+ 3. **Script Generation**: Produces segments with `claim_id`, `evidence_ids`, `entity_ids`
17
+ 4. **Verifier**: Enforces grounding with PASS/REWRITE/REMOVE, loops until no REWRITE remains
18
+ 5. **Timeline Builder**: Generates HIGHLIGHT/TRACE/ZOOM actions mapped to entity geometry
19
+ 6. **Renderer**: Generates overlays and composes final video with FFmpeg
20
+
21
+ ## Repository Structure
22
+
23
+ ```
24
+ /slidesherlock
25
+ /apps
26
+ /api - REST API for job submission and status
27
+ /worker - Pipeline worker that processes jobs
28
+ /packages
29
+ /core - Core business logic (diagram, script, verifier, timeline)
30
+ /providers - LLM/TTS/OCR provider interfaces
31
+ /schemas - TypeScript schemas and types
32
+ /infra
33
+ /docker - Docker configurations
34
+ docker-compose.yml
35
+ Makefile
36
+ README.md
37
+ ```
38
+
39
+ ## Prerequisites
40
+
41
+ ### System Dependencies
42
+
43
+ - **Python 3.11+** (3.12 recommended)
44
+ - **Docker and Docker Compose** (for PostgreSQL, Redis, MinIO)
45
+ - **LibreOffice** (for PPTX to PDF conversion)
46
+ - macOS: `brew install --cask libreoffice`
47
+ - Ubuntu/Debian: `sudo apt-get install libreoffice`
48
+ - CentOS/RHEL: `sudo yum install libreoffice`
49
+ - **Poppler** (for PDF to PNG conversion, required by pdf2image)
50
+ - macOS: `brew install poppler`
51
+ - Ubuntu/Debian: `sudo apt-get install poppler-utils`
52
+ - CentOS/RHEL: `sudo yum install poppler-utils`
53
+ - **FFmpeg** (for video composition - future stage)
54
+
55
+ ### Python Dependencies
56
+
57
+ All Python dependencies are consolidated in `requirements.txt` and will be installed automatically with `make setup`.
58
+
59
+ ## Quick Start
60
+
61
+ 1. **Start infrastructure services:**
62
+ ```bash
63
+ make up
64
+ ```
65
+
66
+ 2. **Install dependencies:**
67
+ ```bash
68
+ make install
69
+ ```
70
+
71
+ 3. **Build all packages:**
72
+ ```bash
73
+ make build
74
+ ```
75
+
76
+ 4. **Initialize database:**
77
+ ```bash
78
+ make migrate
79
+ ```
80
+
81
+ 5. **Start the API server (in one terminal):**
82
+ ```bash
83
+ make api
84
+ ```
85
+
86
+ 6. **Start the worker (in another terminal):**
87
+ ```bash
88
+ make worker
89
+ ```
90
+
91
+ ## Usage
92
+
93
+ ### Submit a Job
94
+
95
+ ```bash
96
+ curl -X POST http://localhost:3000/jobs \
97
+ -F "file=@presentation.pptx"
98
+ ```
99
+
100
+ Response:
101
+ ```json
102
+ {
103
+ "job_id": "uuid-here",
104
+ "status": "PENDING",
105
+ "message": "Job submitted successfully"
106
+ }
107
+ ```
108
+
109
+ ### Check Job Status
110
+
111
+ ```bash
112
+ curl http://localhost:3000/jobs/{job_id}
113
+ ```
114
+
115
+ ### Get Artifacts
116
+
117
+ ```bash
118
+ # Get script
119
+ curl http://localhost:3000/jobs/{job_id}/artifacts/script
120
+
121
+ # Get verify report
122
+ curl http://localhost:3000/jobs/{job_id}/artifacts/verify_report
123
+
124
+ # Get timeline
125
+ curl http://localhost:3000/jobs/{job_id}/artifacts/timeline
126
+
127
+ # Get final video
128
+ curl http://localhost:3000/jobs/{job_id}/artifacts/final_video -o output.mp4
129
+ ```
130
+
131
+ ## Configuration
132
+
133
+ ### Environment Variables
134
+
135
+ **API Server:**
136
+ - `PORT` - API server port (default: 3000)
137
+ - `DATABASE_URL` - PostgreSQL connection string
138
+ - `REDIS_URL` - Redis connection string
139
+ - `MINIO_ENDPOINT` - MinIO endpoint URL
140
+ - `MINIO_ACCESS_KEY` - MinIO access key
141
+ - `MINIO_SECRET_KEY` - MinIO secret key
142
+ - `MINIO_BUCKET` - MinIO bucket name
143
+
144
+ **Worker:**
145
+ - `DATABASE_URL` - PostgreSQL connection string
146
+ - `REDIS_URL` - Redis connection string
147
+ - `MINIO_ENDPOINT` - MinIO endpoint URL
148
+ - `MINIO_ACCESS_KEY` - MinIO access key
149
+ - `MINIO_SECRET_KEY` - MinIO secret key
150
+ - `MINIO_BUCKET` - MinIO bucket name
151
+ - `OPENAI_API_KEY` - OpenAI API key (optional; see [Secrets](#secrets-never-commit) below)
152
+ - `USE_SYSTEM_TTS` - Set to "true" to use system TTS (macOS only)
153
+
154
+ ### Secrets (never commit)
155
+
156
+ API keys and secrets must **not** be committed. Use environment variables or a local `.env` file.
157
+
158
+ 1. **Copy the example file** (no secrets inside):
159
+ ```bash
160
+ cp .env.example .env
161
+ ```
162
+ 2. **Edit `.env`** and set only the keys you need (e.g. `OPENAI_API_KEY=sk-...`).
163
+ 3. **`.env` is in `.gitignore`** – it will not be pushed to GitHub.
164
+
165
+ **OpenAI vision (optional):** For real diagram/photo understanding, set `OPENAI_API_KEY` in `.env` and `VISION_PROVIDER=openai`. If the key is not set, the pipeline uses the stub vision provider (generic captions) and logs that the key is missing.
166
+
167
+ - **Enable:** `VISION_PROVIDER=openai` (and optionally `VISION_EXTRACTOR_PROVIDER=openai`).
168
+ - **Config (env):** `OPENAI_VISION_MODEL` (default `gpt-4o`), `OPENAI_VISION_TEMPERATURE` (default `0`), `OPENAI_VISION_TIMEOUT_SECONDS` (default `60`).
169
+ - **Caching:** Results are cached in MinIO by image hash + model + lang + prompt version to avoid repeat API charges. Set `VISION_CACHE_ENABLED=false` to disable; cache path is `VISION_CACHE_PREFIX` (default `jobs/{job_id}/cache/vision/`).
170
+ - **Costs:** Each uncached image uses one vision API call; enable caching for development and re-runs. Outputs are validated and stored as evidence (used by script/verifier in later pipeline stages).
171
+
172
+ ## Pipeline Stages
173
+
174
+ 1. **EXTRACTING**: Extract content from PPTX, convert slides to PNG
175
+ 2. **DIAGRAM_ANALYSIS**: Build evidence index and analyze diagram structure
176
+ 3. **SCRIPT_GENERATION**: Generate narrated script with evidence grounding
177
+ 4. **VERIFICATION**: Verify and rewrite script until all segments pass
178
+ 5. **TIMELINE_BUILDING**: Generate timeline with visual actions
179
+ 6. **RENDERING**: Generate overlays and compose final video
180
+ 7. **COMPLETED**: Job finished successfully
181
+
182
+ ## Artifact Paths
183
+
184
+ All artifacts are stored in MinIO with the following structure:
185
+
186
+ ```
187
+ jobs/{job_id}/
188
+ input.pptx
189
+ extracted.json
190
+ slides/
191
+ slide-0.png
192
+ slide-1.png
193
+ evidence/
194
+ index.json
195
+ graph/
196
+ native.json
197
+ vision.json (optional)
198
+ unified.json
199
+ script.json
200
+ verify_report.json
201
+ coverage.json
202
+ timeline/
203
+ timeline.json
204
+ overlays/
205
+ {action_id}.png
206
+ audio/
207
+ {segment_id}.wav
208
+ final.mp4
209
+ ```
210
+
211
+ ## Testing
212
+
213
+ Run tests:
214
+ ```bash
215
+ make test
216
+ ```
217
+
218
+ ## Development
219
+
220
+ ### No-Provider Mode
221
+
222
+ By default, SlideSherlock runs in "no-provider mode" where:
223
+ - LLM provider returns placeholder responses
224
+ - TTS provider returns empty audio
225
+ - OCR provider returns empty results
226
+
227
+ This allows testing the pipeline structure without external API dependencies.
228
+
229
+ ### Adding Providers
230
+
231
+ To use real providers:
232
+
233
+ 1. **OpenAI LLM**: Set `OPENAI_API_KEY` environment variable
234
+ 2. **System TTS**: Set `USE_SYSTEM_TTS=true` (macOS only)
235
+ 3. **OCR**: Implement a real OCR provider in `packages/providers/src/ocr.ts`
236
+
237
+ ## License
238
+
239
+ MIT
@@ -0,0 +1,103 @@
1
+ from logging.config import fileConfig
2
+ from sqlalchemy import engine_from_config
3
+ from sqlalchemy import pool
4
+ from alembic import context
5
+ import os
6
+ import sys
7
+
8
+ # Add project root to path
9
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
10
+ sys.path.insert(0, project_root)
11
+
12
+ # Import using absolute imports from the apps.api package
13
+ from apps.api.database import Base # noqa: E402
14
+
15
+ # Import models so Alembic can detect them for autogenerate
16
+ from apps.api.models import ( # noqa: E402, F401
17
+ Project,
18
+ Job,
19
+ Artifact,
20
+ Slide,
21
+ Source,
22
+ EvidenceItem,
23
+ SourceRef,
24
+ ClaimLink,
25
+ EntityLink,
26
+ )
27
+
28
+ # this is the Alembic Config object, which provides
29
+ # access to the values within the .ini file in use.
30
+ config = context.config
31
+
32
+ # Interpret the config file for Python logging.
33
+ # This line sets up loggers basically.
34
+ if config.config_file_name is not None:
35
+ fileConfig(config.config_file_name)
36
+
37
+ # add your model's MetaData object here
38
+ # for 'autogenerate' support
39
+ target_metadata = Base.metadata
40
+
41
+ # other values from the config, defined by the needs of env.py,
42
+ # can be acquired:
43
+ # my_important_option = config.get_main_option("my_important_option")
44
+ # ... etc.
45
+
46
+
47
+ def get_url():
48
+ return os.getenv(
49
+ "DATABASE_URL",
50
+ "postgresql://slidesherlock:slidesherlock@localhost:5433/slidesherlock",
51
+ )
52
+
53
+
54
+ def run_migrations_offline() -> None:
55
+ """Run migrations in 'offline' mode.
56
+
57
+ This configures the context with just a URL
58
+ and not an Engine, though an Engine is acceptable
59
+ here as well. By skipping the Engine creation
60
+ we don't even need a DBAPI to be available.
61
+
62
+ Calls to context.execute() here emit the given string to the
63
+ script output.
64
+
65
+ """
66
+ url = get_url()
67
+ context.configure(
68
+ url=url,
69
+ target_metadata=target_metadata,
70
+ literal_binds=True,
71
+ dialect_opts={"paramstyle": "named"},
72
+ )
73
+
74
+ with context.begin_transaction():
75
+ context.run_migrations()
76
+
77
+
78
+ def run_migrations_online() -> None:
79
+ """Run migrations in 'online' mode.
80
+
81
+ In this scenario we need to create an Engine
82
+ and associate a connection with the context.
83
+
84
+ """
85
+ configuration = config.get_section(config.config_ini_section)
86
+ configuration["sqlalchemy.url"] = get_url()
87
+ connectable = engine_from_config(
88
+ configuration,
89
+ prefix="sqlalchemy.",
90
+ poolclass=pool.NullPool,
91
+ )
92
+
93
+ with connectable.connect() as connection:
94
+ context.configure(connection=connection, target_metadata=target_metadata)
95
+
96
+ with context.begin_transaction():
97
+ context.run_migrations()
98
+
99
+
100
+ if context.is_offline_mode():
101
+ run_migrations_offline()
102
+ else:
103
+ run_migrations_online()
@@ -0,0 +1,24 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ ${imports if imports else ""}
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = ${repr(up_revision)}
14
+ down_revision = ${repr(down_revision)}
15
+ branch_labels = ${repr(branch_labels)}
16
+ depends_on = ${repr(depends_on)}
17
+
18
+
19
+ def upgrade() -> None:
20
+ ${upgrades if upgrades else "pass"}
21
+
22
+
23
+ def downgrade() -> None:
24
+ ${downgrades if downgrades else "pass"}
@@ -0,0 +1,78 @@
1
+ """Initial schema: project, job, artifact
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2024-01-01 00:00:00.000000
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = "001"
14
+ down_revision = None
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade() -> None:
20
+ # Create projects table
21
+ op.create_table(
22
+ "projects",
23
+ sa.Column("project_id", sa.String(), nullable=False),
24
+ sa.Column("name", sa.String(), nullable=False),
25
+ sa.Column("description", sa.Text(), nullable=True),
26
+ sa.Column("created_at", sa.DateTime(), nullable=False),
27
+ sa.Column("updated_at", sa.DateTime(), nullable=False),
28
+ sa.PrimaryKeyConstraint("project_id"),
29
+ )
30
+
31
+ # Create jobs table
32
+ op.create_table(
33
+ "jobs",
34
+ sa.Column("job_id", sa.String(), nullable=False),
35
+ sa.Column("project_id", sa.String(), nullable=False),
36
+ sa.Column(
37
+ "status",
38
+ sa.Enum("PENDING", "PROCESSING", "DONE", "FAILED", name="jobstatus"),
39
+ nullable=False,
40
+ ),
41
+ sa.Column("input_file_path", sa.String(), nullable=True),
42
+ sa.Column("error_message", sa.Text(), nullable=True),
43
+ sa.Column("created_at", sa.DateTime(), nullable=False),
44
+ sa.Column("updated_at", sa.DateTime(), nullable=False),
45
+ sa.ForeignKeyConstraint(
46
+ ["project_id"],
47
+ ["projects.project_id"],
48
+ ),
49
+ sa.PrimaryKeyConstraint("job_id"),
50
+ )
51
+
52
+ # Create artifacts table
53
+ op.create_table(
54
+ "artifacts",
55
+ sa.Column("artifact_id", sa.String(), nullable=False),
56
+ sa.Column("project_id", sa.String(), nullable=False),
57
+ sa.Column("job_id", sa.String(), nullable=True),
58
+ sa.Column("artifact_type", sa.String(), nullable=False),
59
+ sa.Column("storage_path", sa.String(), nullable=False),
60
+ sa.Column("metadata_json", sa.Text(), nullable=True),
61
+ sa.Column("created_at", sa.DateTime(), nullable=False),
62
+ sa.ForeignKeyConstraint(
63
+ ["project_id"],
64
+ ["projects.project_id"],
65
+ ),
66
+ sa.ForeignKeyConstraint(
67
+ ["job_id"],
68
+ ["jobs.job_id"],
69
+ ),
70
+ sa.PrimaryKeyConstraint("artifact_id"),
71
+ )
72
+
73
+
74
+ def downgrade() -> None:
75
+ op.drop_table("artifacts")
76
+ op.drop_table("jobs")
77
+ op.drop_table("projects")
78
+ op.execute("DROP TYPE IF EXISTS jobstatus")
@@ -0,0 +1,60 @@
1
+ """Add QUEUED and RUNNING job status, add sha256 and size_bytes to artifacts
2
+
3
+ Revision ID: 002
4
+ Revises: 001
5
+ Create Date: 2024-01-27 12:00:00.000000
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = "002"
14
+ down_revision = "001"
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+
19
+ def upgrade() -> None:
20
+ # Update JobStatus enum to include QUEUED and RUNNING
21
+ # PostgreSQL doesn't support IF NOT EXISTS for ALTER TYPE, so we use a DO block
22
+ op.execute(
23
+ """
24
+ DO $$ BEGIN
25
+ IF NOT EXISTS (
26
+ SELECT 1 FROM pg_enum
27
+ WHERE enumlabel = 'QUEUED'
28
+ AND enumtypid = (SELECT oid FROM pg_type WHERE typname = 'jobstatus')
29
+ ) THEN
30
+ ALTER TYPE jobstatus ADD VALUE 'QUEUED';
31
+ END IF;
32
+ END $$;
33
+ """
34
+ )
35
+ op.execute(
36
+ """
37
+ DO $$ BEGIN
38
+ IF NOT EXISTS (
39
+ SELECT 1 FROM pg_enum
40
+ WHERE enumlabel = 'RUNNING'
41
+ AND enumtypid = (SELECT oid FROM pg_type WHERE typname = 'jobstatus')
42
+ ) THEN
43
+ ALTER TYPE jobstatus ADD VALUE 'RUNNING';
44
+ END IF;
45
+ END $$;
46
+ """
47
+ )
48
+
49
+ # Add sha256 and size_bytes columns to artifacts table
50
+ op.add_column("artifacts", sa.Column("sha256", sa.String(), nullable=True))
51
+ op.add_column("artifacts", sa.Column("size_bytes", sa.String(), nullable=True))
52
+
53
+
54
+ def downgrade() -> None:
55
+ # Remove columns from artifacts table
56
+ op.drop_column("artifacts", "size_bytes")
57
+ op.drop_column("artifacts", "sha256")
58
+
59
+ # Note: PostgreSQL doesn't support removing enum values easily
60
+ # The enum values QUEUED and RUNNING will remain but won't be used
@@ -0,0 +1,112 @@
1
+ """Evidence index tables: slides, sources, evidence_items, source_refs, claim_links, entity_links
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ Create Date: 2024-01-28 12:00:00.000000
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ revision = "003"
13
+ down_revision = "002"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.create_table(
20
+ "slides",
21
+ sa.Column("slide_id", sa.String(), nullable=False),
22
+ sa.Column("job_id", sa.String(), nullable=False),
23
+ sa.Column("slide_index", sa.Integer(), nullable=False),
24
+ sa.Column("slide_title", sa.Text(), nullable=True),
25
+ sa.Column("png_artifact_id", sa.String(), nullable=True),
26
+ sa.Column("pptx_ref", sa.String(), nullable=True),
27
+ sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
28
+ sa.ForeignKeyConstraint(["png_artifact_id"], ["artifacts.artifact_id"]),
29
+ sa.PrimaryKeyConstraint("slide_id"),
30
+ )
31
+
32
+ op.create_table(
33
+ "sources",
34
+ sa.Column("source_id", sa.String(), nullable=False),
35
+ sa.Column("job_id", sa.String(), nullable=False),
36
+ sa.Column("type", sa.String(), nullable=False),
37
+ sa.Column("artifact_id", sa.String(), nullable=True),
38
+ sa.Column("slide_id", sa.String(), nullable=True),
39
+ sa.Column("created_at", sa.DateTime(), nullable=False),
40
+ sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
41
+ sa.ForeignKeyConstraint(["artifact_id"], ["artifacts.artifact_id"]),
42
+ sa.ForeignKeyConstraint(["slide_id"], ["slides.slide_id"]),
43
+ sa.PrimaryKeyConstraint("source_id"),
44
+ )
45
+
46
+ op.create_table(
47
+ "evidence_items",
48
+ sa.Column("evidence_id", sa.String(), nullable=False),
49
+ sa.Column("job_id", sa.String(), nullable=False),
50
+ sa.Column("slide_id", sa.String(), nullable=True),
51
+ sa.Column("source_id", sa.String(), nullable=False),
52
+ sa.Column("kind", sa.String(), nullable=False),
53
+ sa.Column("content", sa.Text(), nullable=False),
54
+ sa.Column("content_hash", sa.String(), nullable=True),
55
+ sa.Column("confidence", sa.Float(), nullable=True),
56
+ sa.Column("language", sa.String(), nullable=True),
57
+ sa.Column("created_at", sa.DateTime(), nullable=False),
58
+ sa.ForeignKeyConstraint(["job_id"], ["jobs.job_id"]),
59
+ sa.ForeignKeyConstraint(["slide_id"], ["slides.slide_id"]),
60
+ sa.ForeignKeyConstraint(["source_id"], ["sources.source_id"]),
61
+ sa.PrimaryKeyConstraint("evidence_id"),
62
+ )
63
+
64
+ op.create_table(
65
+ "source_refs",
66
+ sa.Column("ref_id", sa.String(), nullable=False),
67
+ sa.Column("evidence_id", sa.String(), nullable=False),
68
+ sa.Column("ref_type", sa.String(), nullable=False),
69
+ sa.Column("slide_index", sa.Integer(), nullable=True),
70
+ sa.Column("ppt_shape_id", sa.String(), nullable=True),
71
+ sa.Column("ppt_paragraph_ix", sa.Integer(), nullable=True),
72
+ sa.Column("ppt_run_ix", sa.Integer(), nullable=True),
73
+ sa.Column("bbox_x", sa.Float(), nullable=True),
74
+ sa.Column("bbox_y", sa.Float(), nullable=True),
75
+ sa.Column("bbox_w", sa.Float(), nullable=True),
76
+ sa.Column("bbox_h", sa.Float(), nullable=True),
77
+ sa.Column("page_num", sa.Integer(), nullable=True),
78
+ sa.Column("char_start", sa.Integer(), nullable=True),
79
+ sa.Column("char_end", sa.Integer(), nullable=True),
80
+ sa.Column("url", sa.Text(), nullable=True),
81
+ sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
82
+ sa.PrimaryKeyConstraint("ref_id"),
83
+ )
84
+
85
+ op.create_table(
86
+ "claim_links",
87
+ sa.Column("claim_link_id", sa.String(), nullable=False),
88
+ sa.Column("claim_id", sa.String(), nullable=False),
89
+ sa.Column("evidence_id", sa.String(), nullable=False),
90
+ sa.Column("weight", sa.Float(), nullable=True),
91
+ sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
92
+ sa.PrimaryKeyConstraint("claim_link_id"),
93
+ )
94
+
95
+ op.create_table(
96
+ "entity_links",
97
+ sa.Column("entity_link_id", sa.String(), nullable=False),
98
+ sa.Column("entity_id", sa.String(), nullable=False),
99
+ sa.Column("evidence_id", sa.String(), nullable=False),
100
+ sa.Column("role", sa.String(), nullable=True),
101
+ sa.ForeignKeyConstraint(["evidence_id"], ["evidence_items.evidence_id"]),
102
+ sa.PrimaryKeyConstraint("entity_link_id"),
103
+ )
104
+
105
+
106
+ def downgrade() -> None:
107
+ op.drop_table("entity_links")
108
+ op.drop_table("claim_links")
109
+ op.drop_table("source_refs")
110
+ op.drop_table("evidence_items")
111
+ op.drop_table("sources")
112
+ op.drop_table("slides")
@@ -0,0 +1,23 @@
1
+ """Add requested_language to jobs
2
+
3
+ Revision ID: 004
4
+ Revises: 003
5
+ Create Date: 2024-01-31 12:00:00.000000
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ revision = "004"
13
+ down_revision = "003"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column("jobs", sa.Column("requested_language", sa.String(), nullable=True))
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.drop_column("jobs", "requested_language")
@@ -0,0 +1,23 @@
1
+ """Add config_json to jobs (vision config, etc.)
2
+
3
+ Revision ID: 005
4
+ Revises: 004
5
+ Create Date: 2024-01-31 14:00:00.000000
6
+
7
+ """
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ revision = "005"
13
+ down_revision = "004"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column("jobs", sa.Column("config_json", sa.Text(), nullable=True))
20
+
21
+
22
+ def downgrade() -> None:
23
+ op.drop_column("jobs", "config_json")
@@ -0,0 +1,17 @@
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ DATABASE_URL = os.getenv(
10
+ "DATABASE_URL",
11
+ "postgresql://slidesherlock:slidesherlock@localhost:5433/slidesherlock",
12
+ )
13
+
14
+ engine = create_engine(DATABASE_URL)
15
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
16
+
17
+ Base = declarative_base()