@steipete/summarize 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -1
- package/README.md +63 -17
- package/dist/cli.js +1 -1
- package/dist/esm/cache-keys.js +75 -0
- package/dist/esm/cache-keys.js.map +1 -0
- package/dist/esm/cache-slides-cleanup.js +47 -0
- package/dist/esm/cache-slides-cleanup.js.map +1 -0
- package/dist/esm/cache.js +14 -91
- package/dist/esm/cache.js.map +1 -1
- package/dist/esm/config/env.js +49 -0
- package/dist/esm/config/env.js.map +1 -0
- package/dist/esm/config/model.js +193 -0
- package/dist/esm/config/model.js.map +1 -0
- package/dist/esm/config/parse-helpers.js +50 -0
- package/dist/esm/config/parse-helpers.js.map +1 -0
- package/dist/esm/config/read.js +83 -0
- package/dist/esm/config/read.js.map +1 -0
- package/dist/esm/config/sections.js +438 -0
- package/dist/esm/config/sections.js.map +1 -0
- package/dist/esm/config/types.js +2 -0
- package/dist/esm/config/types.js.map +1 -0
- package/dist/esm/config.js +24 -807
- package/dist/esm/config.js.map +1 -1
- package/dist/esm/content/asset.js +2 -2
- package/dist/esm/content/asset.js.map +1 -1
- package/dist/esm/daemon/agent-model.js +235 -0
- package/dist/esm/daemon/agent-model.js.map +1 -0
- package/dist/esm/daemon/agent-request.js +87 -0
- package/dist/esm/daemon/agent-request.js.map +1 -0
- package/dist/esm/daemon/agent.js +42 -243
- package/dist/esm/daemon/agent.js.map +1 -1
- package/dist/esm/daemon/chat.js +69 -8
- package/dist/esm/daemon/chat.js.map +1 -1
- package/dist/esm/daemon/cli.js +21 -4
- package/dist/esm/daemon/cli.js.map +1 -1
- package/dist/esm/daemon/config.js +65 -9
- package/dist/esm/daemon/config.js.map +1 -1
- package/dist/esm/daemon/env-snapshot.js +4 -0
- package/dist/esm/daemon/env-snapshot.js.map +1 -1
- package/dist/esm/daemon/flow-context.js +8 -1
- package/dist/esm/daemon/flow-context.js.map +1 -1
- package/dist/esm/daemon/models.js +16 -0
- package/dist/esm/daemon/models.js.map +1 -1
- package/dist/esm/daemon/process-registry.js.map +1 -1
- package/dist/esm/daemon/server-admin-routes.js +134 -0
- package/dist/esm/daemon/server-admin-routes.js.map +1 -0
- package/dist/esm/daemon/server-agent-route.js +104 -0
- package/dist/esm/daemon/server-agent-route.js.map +1 -0
- package/dist/esm/daemon/server-http.js +89 -0
- package/dist/esm/daemon/server-http.js.map +1 -0
- package/dist/esm/daemon/server-session-routes.js +209 -0
- package/dist/esm/daemon/server-session-routes.js.map +1 -0
- package/dist/esm/daemon/server-session.js +118 -0
- package/dist/esm/daemon/server-session.js.map +1 -0
- package/dist/esm/daemon/server-sse.js +28 -0
- package/dist/esm/daemon/server-sse.js.map +1 -0
- package/dist/esm/daemon/server-summarize-execution.js +357 -0
- package/dist/esm/daemon/server-summarize-execution.js.map +1 -0
- package/dist/esm/daemon/server-summarize-request.js +119 -0
- package/dist/esm/daemon/server-summarize-request.js.map +1 -0
- package/dist/esm/daemon/server.js +72 -1121
- package/dist/esm/daemon/server.js.map +1 -1
- package/dist/esm/daemon/summarize-progress.js +1 -1
- package/dist/esm/daemon/summarize-progress.js.map +1 -1
- package/dist/esm/daemon/summarize.js.map +1 -1
- package/dist/esm/llm/cli-exec.js +75 -0
- package/dist/esm/llm/cli-exec.js.map +1 -0
- package/dist/esm/llm/cli-provider-output.js +191 -0
- package/dist/esm/llm/cli-provider-output.js.map +1 -0
- package/dist/esm/llm/cli.js +3 -212
- package/dist/esm/llm/cli.js.map +1 -1
- package/dist/esm/llm/generate-text-document.js +109 -0
- package/dist/esm/llm/generate-text-document.js.map +1 -0
- package/dist/esm/llm/generate-text-shared.js +102 -0
- package/dist/esm/llm/generate-text-shared.js.map +1 -0
- package/dist/esm/llm/generate-text-stream.js +258 -0
- package/dist/esm/llm/generate-text-stream.js.map +1 -0
- package/dist/esm/llm/generate-text.js +145 -480
- package/dist/esm/llm/generate-text.js.map +1 -1
- package/dist/esm/llm/model-id.js +21 -20
- package/dist/esm/llm/model-id.js.map +1 -1
- package/dist/esm/llm/provider-capabilities.js +2 -0
- package/dist/esm/llm/provider-capabilities.js.map +1 -0
- package/dist/esm/llm/provider-profile.js +142 -0
- package/dist/esm/llm/provider-profile.js.map +1 -0
- package/dist/esm/llm/providers/google.js +42 -5
- package/dist/esm/llm/providers/google.js.map +1 -1
- package/dist/esm/llm/providers/models.js +13 -0
- package/dist/esm/llm/providers/models.js.map +1 -1
- package/dist/esm/llm/providers/openai.js.map +1 -1
- package/dist/esm/llm/transcript-to-markdown.js.map +1 -1
- package/dist/esm/model-auto-cli.js +89 -0
- package/dist/esm/model-auto-cli.js.map +1 -0
- package/dist/esm/model-auto-rules.js +86 -0
- package/dist/esm/model-auto-rules.js.map +1 -0
- package/dist/esm/model-auto.js +10 -245
- package/dist/esm/model-auto.js.map +1 -1
- package/dist/esm/model-spec.js +23 -17
- package/dist/esm/model-spec.js.map +1 -1
- package/dist/esm/refresh-free.js +1 -1
- package/dist/esm/refresh-free.js.map +1 -1
- package/dist/esm/run/attachments.js +1 -1
- package/dist/esm/run/attachments.js.map +1 -1
- package/dist/esm/run/bird/exec.js +23 -0
- package/dist/esm/run/bird/exec.js.map +1 -0
- package/dist/esm/run/bird/media.js +171 -0
- package/dist/esm/run/bird/media.js.map +1 -0
- package/dist/esm/run/bird/parse.js +82 -0
- package/dist/esm/run/bird/parse.js.map +1 -0
- package/dist/esm/run/bird/types.js +2 -0
- package/dist/esm/run/bird/types.js.map +1 -0
- package/dist/esm/run/bird.js +86 -144
- package/dist/esm/run/bird.js.map +1 -1
- package/dist/esm/run/cache-state.js.map +1 -1
- package/dist/esm/run/constants.js +2 -1
- package/dist/esm/run/constants.js.map +1 -1
- package/dist/esm/run/env.js +3 -0
- package/dist/esm/run/env.js.map +1 -1
- package/dist/esm/run/finish-line-labels.js +76 -0
- package/dist/esm/run/finish-line-labels.js.map +1 -0
- package/dist/esm/run/finish-line-lengths.js +96 -0
- package/dist/esm/run/finish-line-lengths.js.map +1 -0
- package/dist/esm/run/finish-line.js +3 -169
- package/dist/esm/run/finish-line.js.map +1 -1
- package/dist/esm/run/flows/asset/extract.js.map +1 -1
- package/dist/esm/run/flows/asset/input.js +1 -1
- package/dist/esm/run/flows/asset/input.js.map +1 -1
- package/dist/esm/run/flows/asset/media.js +19 -10
- package/dist/esm/run/flows/asset/media.js.map +1 -1
- package/dist/esm/run/flows/asset/output.js.map +1 -1
- package/dist/esm/run/flows/asset/preprocess.js.map +1 -1
- package/dist/esm/run/flows/asset/summary-attempts.js +109 -0
- package/dist/esm/run/flows/asset/summary-attempts.js.map +1 -0
- package/dist/esm/run/flows/asset/summary.js +19 -107
- package/dist/esm/run/flows/asset/summary.js.map +1 -1
- package/dist/esm/run/flows/url/extract.js +7 -4
- package/dist/esm/run/flows/url/extract.js.map +1 -1
- package/dist/esm/run/flows/url/flow-progress.js +119 -0
- package/dist/esm/run/flows/url/flow-progress.js.map +1 -0
- package/dist/esm/run/flows/url/flow.js +22 -93
- package/dist/esm/run/flows/url/flow.js.map +1 -1
- package/dist/esm/run/flows/url/markdown.js +21 -3
- package/dist/esm/run/flows/url/markdown.js.map +1 -1
- package/dist/esm/run/flows/url/progress-status.js +56 -0
- package/dist/esm/run/flows/url/progress-status.js.map +1 -0
- package/dist/esm/run/flows/url/slides-output-render.js +78 -0
- package/dist/esm/run/flows/url/slides-output-render.js.map +1 -0
- package/dist/esm/run/flows/url/slides-output-state.js +86 -0
- package/dist/esm/run/flows/url/slides-output-state.js.map +1 -0
- package/dist/esm/run/flows/url/slides-output-stream.js +271 -0
- package/dist/esm/run/flows/url/slides-output-stream.js.map +1 -0
- package/dist/esm/run/flows/url/slides-output.js +29 -422
- package/dist/esm/run/flows/url/slides-output.js.map +1 -1
- package/dist/esm/run/flows/url/slides-text-markdown.js +431 -0
- package/dist/esm/run/flows/url/slides-text-markdown.js.map +1 -0
- package/dist/esm/run/flows/url/slides-text-transcript.js +199 -0
- package/dist/esm/run/flows/url/slides-text-transcript.js.map +1 -0
- package/dist/esm/run/flows/url/slides-text-types.js +2 -0
- package/dist/esm/run/flows/url/slides-text-types.js.map +1 -0
- package/dist/esm/run/flows/url/slides-text.js +2 -627
- package/dist/esm/run/flows/url/slides-text.js.map +1 -1
- package/dist/esm/run/flows/url/summary-finish.js +34 -0
- package/dist/esm/run/flows/url/summary-finish.js.map +1 -0
- package/dist/esm/run/flows/url/summary-json.js +32 -0
- package/dist/esm/run/flows/url/summary-json.js.map +1 -0
- package/dist/esm/run/flows/url/summary-prompt.js +147 -0
- package/dist/esm/run/flows/url/summary-prompt.js.map +1 -0
- package/dist/esm/run/flows/url/summary-resolution.js +320 -0
- package/dist/esm/run/flows/url/summary-resolution.js.map +1 -0
- package/dist/esm/run/flows/url/summary-timestamps.js +136 -0
- package/dist/esm/run/flows/url/summary-timestamps.js.map +1 -0
- package/dist/esm/run/flows/url/summary.js +49 -543
- package/dist/esm/run/flows/url/summary.js.map +1 -1
- package/dist/esm/run/help.js +9 -3
- package/dist/esm/run/help.js.map +1 -1
- package/dist/esm/run/markdown-transforms.js +89 -0
- package/dist/esm/run/markdown-transforms.js.map +1 -0
- package/dist/esm/run/markdown.js +1 -96
- package/dist/esm/run/markdown.js.map +1 -1
- package/dist/esm/run/run-env.js +28 -7
- package/dist/esm/run/run-env.js.map +1 -1
- package/dist/esm/run/run-settings-parse.js +73 -0
- package/dist/esm/run/run-settings-parse.js.map +1 -0
- package/dist/esm/run/run-settings.js +1 -72
- package/dist/esm/run/run-settings.js.map +1 -1
- package/dist/esm/run/runner-contexts.js +116 -0
- package/dist/esm/run/runner-contexts.js.map +1 -0
- package/dist/esm/run/runner-execution.js +62 -0
- package/dist/esm/run/runner-execution.js.map +1 -0
- package/dist/esm/run/runner-flags.js +97 -0
- package/dist/esm/run/runner-flags.js.map +1 -0
- package/dist/esm/run/runner-setup.js +109 -0
- package/dist/esm/run/runner-setup.js.map +1 -0
- package/dist/esm/run/runner-slides.js +38 -0
- package/dist/esm/run/runner-slides.js.map +1 -0
- package/dist/esm/run/runner.js +99 -390
- package/dist/esm/run/runner.js.map +1 -1
- package/dist/esm/run/slides-render.js +5 -2
- package/dist/esm/run/slides-render.js.map +1 -1
- package/dist/esm/run/stdin-temp-file.js +1 -1
- package/dist/esm/run/stdin-temp-file.js.map +1 -1
- package/dist/esm/run/streaming.js +1 -0
- package/dist/esm/run/streaming.js.map +1 -1
- package/dist/esm/run/summary-engine.js +26 -10
- package/dist/esm/run/summary-engine.js.map +1 -1
- package/dist/esm/run/summary-llm.js +2 -1
- package/dist/esm/run/summary-llm.js.map +1 -1
- package/dist/esm/run/terminal.js +4 -1
- package/dist/esm/run/terminal.js.map +1 -1
- package/dist/esm/run/transcriber-cli.js +1 -1
- package/dist/esm/run/transcriber-cli.js.map +1 -1
- package/dist/esm/slides/download.js +242 -0
- package/dist/esm/slides/download.js.map +1 -0
- package/dist/esm/slides/extract-finalize.js +98 -0
- package/dist/esm/slides/extract-finalize.js.map +1 -0
- package/dist/esm/slides/extract.js +64 -1621
- package/dist/esm/slides/extract.js.map +1 -1
- package/dist/esm/slides/frame-extraction.js +372 -0
- package/dist/esm/slides/frame-extraction.js.map +1 -0
- package/dist/esm/slides/ingest.js +167 -0
- package/dist/esm/slides/ingest.js.map +1 -0
- package/dist/esm/slides/ocr.js +91 -0
- package/dist/esm/slides/ocr.js.map +1 -0
- package/dist/esm/slides/process.js +218 -0
- package/dist/esm/slides/process.js.map +1 -0
- package/dist/esm/slides/scene-detection.js +387 -0
- package/dist/esm/slides/scene-detection.js.map +1 -0
- package/dist/esm/slides/source-id.js +42 -0
- package/dist/esm/slides/source-id.js.map +1 -0
- package/dist/esm/tty/progress/fetch-html.js.map +1 -1
- package/dist/esm/tty/progress/transcript.js +21 -8
- package/dist/esm/tty/progress/transcript.js.map +1 -1
- package/dist/esm/tty/spinner.js +8 -2
- package/dist/esm/tty/spinner.js.map +1 -1
- package/dist/esm/tty/website-progress.js +5 -3
- package/dist/esm/tty/website-progress.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/types/cache-keys.d.ts +44 -0
- package/dist/types/cache-slides-cleanup.d.ts +1 -0
- package/dist/types/cache.d.ts +1 -9
- package/dist/types/config/env.d.ts +6 -0
- package/dist/types/config/model.d.ts +3 -0
- package/dist/types/config/parse-helpers.d.ts +7 -0
- package/dist/types/config/read.d.ts +2 -0
- package/dist/types/config/sections.d.ts +33 -0
- package/dist/types/config/types.d.ts +230 -0
- package/dist/types/config.d.ts +3 -209
- package/dist/types/costs.d.ts +1 -1
- package/dist/types/daemon/agent-model.d.ts +40 -0
- package/dist/types/daemon/agent-request.d.ts +14 -0
- package/dist/types/daemon/chat.d.ts +3 -1
- package/dist/types/daemon/config.d.ts +13 -2
- package/dist/types/daemon/env-snapshot.d.ts +1 -1
- package/dist/types/daemon/flow-context.d.ts +1 -1
- package/dist/types/daemon/models.d.ts +1 -0
- package/dist/types/daemon/server-admin-routes.d.ts +22 -0
- package/dist/types/daemon/server-agent-route.d.ts +9 -0
- package/dist/types/daemon/server-http.d.ts +10 -0
- package/dist/types/daemon/server-session-routes.d.ts +11 -0
- package/dist/types/daemon/server-session.d.ts +52 -0
- package/dist/types/daemon/server-sse.d.ts +12 -0
- package/dist/types/daemon/server-summarize-execution.d.ts +70 -0
- package/dist/types/daemon/server-summarize-request.d.ts +36 -0
- package/dist/types/daemon/server.d.ts +3 -4
- package/dist/types/daemon/summarize.d.ts +1 -1
- package/dist/types/llm/cli-exec.d.ts +13 -0
- package/dist/types/llm/cli-provider-output.d.ts +16 -0
- package/dist/types/llm/generate-text-document.d.ts +34 -0
- package/dist/types/llm/generate-text-shared.d.ts +25 -0
- package/dist/types/llm/generate-text-stream.d.ts +26 -0
- package/dist/types/llm/generate-text.d.ts +6 -26
- package/dist/types/llm/html-to-markdown.d.ts +1 -1
- package/dist/types/llm/model-id.d.ts +1 -1
- package/dist/types/llm/provider-capabilities.d.ts +2 -0
- package/dist/types/llm/provider-profile.d.ts +31 -0
- package/dist/types/llm/providers/google.d.ts +6 -0
- package/dist/types/llm/providers/models.d.ts +5 -0
- package/dist/types/llm/transcript-to-markdown.d.ts +1 -1
- package/dist/types/model-auto-cli.d.ts +15 -0
- package/dist/types/model-auto-rules.d.ts +7 -0
- package/dist/types/model-auto.d.ts +5 -7
- package/dist/types/model-spec.d.ts +2 -2
- package/dist/types/run/attachments.d.ts +2 -2
- package/dist/types/run/bird/exec.d.ts +1 -0
- package/dist/types/run/bird/media.d.ts +3 -0
- package/dist/types/run/bird/parse.d.ts +3 -0
- package/dist/types/run/bird/types.d.ts +18 -0
- package/dist/types/run/bird.d.ts +12 -17
- package/dist/types/run/cache-state.d.ts +1 -1
- package/dist/types/run/constants.d.ts +2 -1
- package/dist/types/run/env.d.ts +1 -0
- package/dist/types/run/finish-line-labels.d.ts +29 -0
- package/dist/types/run/finish-line-lengths.d.ts +23 -0
- package/dist/types/run/finish-line.d.ts +2 -52
- package/dist/types/run/flows/asset/extract.d.ts +1 -1
- package/dist/types/run/flows/asset/input.d.ts +1 -1
- package/dist/types/run/flows/asset/preprocess.d.ts +1 -1
- package/dist/types/run/flows/asset/summary-attempts.d.ts +24 -0
- package/dist/types/run/flows/asset/summary.d.ts +6 -2
- package/dist/types/run/flows/url/flow-progress.d.ts +41 -0
- package/dist/types/run/flows/url/markdown.d.ts +2 -2
- package/dist/types/run/flows/url/progress-status.d.ts +16 -0
- package/dist/types/run/flows/url/slides-output-render.d.ts +43 -0
- package/dist/types/run/flows/url/slides-output-state.d.ts +21 -0
- package/dist/types/run/flows/url/slides-output-stream.d.ts +18 -0
- package/dist/types/run/flows/url/slides-output.d.ts +2 -17
- package/dist/types/run/flows/url/slides-text-markdown.d.ts +46 -0
- package/dist/types/run/flows/url/slides-text-transcript.d.ts +36 -0
- package/dist/types/run/flows/url/slides-text-types.d.ts +8 -0
- package/dist/types/run/flows/url/slides-text.d.ts +3 -87
- package/dist/types/run/flows/url/summary-finish.d.ts +16 -0
- package/dist/types/run/flows/url/summary-json.d.ts +51 -0
- package/dist/types/run/flows/url/summary-prompt.d.ts +22 -0
- package/dist/types/run/flows/url/summary-resolution.d.ts +31 -0
- package/dist/types/run/flows/url/summary-timestamps.d.ts +11 -0
- package/dist/types/run/flows/url/types.d.ts +4 -0
- package/dist/types/run/markdown-transforms.d.ts +3 -0
- package/dist/types/run/run-context.d.ts +4 -0
- package/dist/types/run/run-env.d.ts +4 -0
- package/dist/types/run/run-settings-parse.d.ts +5 -0
- package/dist/types/run/runner-contexts.d.ts +62 -0
- package/dist/types/run/runner-execution.d.ts +57 -0
- package/dist/types/run/runner-flags.d.ts +41 -0
- package/dist/types/run/runner-setup.d.ts +21 -0
- package/dist/types/run/runner-slides.d.ts +8 -0
- package/dist/types/run/streaming.d.ts +1 -1
- package/dist/types/run/summary-engine.d.ts +8 -4
- package/dist/types/run/summary-llm.d.ts +4 -3
- package/dist/types/run/terminal.d.ts +2 -0
- package/dist/types/run/types.d.ts +2 -2
- package/dist/types/slides/download.d.ts +29 -0
- package/dist/types/slides/extract-finalize.d.ts +57 -0
- package/dist/types/slides/extract.d.ts +1 -7
- package/dist/types/slides/frame-extraction.d.ts +38 -0
- package/dist/types/slides/ingest.d.ts +47 -0
- package/dist/types/slides/ocr.d.ts +5 -0
- package/dist/types/slides/process.d.ts +22 -0
- package/dist/types/slides/scene-detection.d.ts +75 -0
- package/dist/types/slides/source-id.d.ts +2 -0
- package/dist/types/version.d.ts +1 -1
- package/docs/_config.yml +1 -0
- package/docs/agent.md +3 -2
- package/docs/assets/site.css +134 -2
- package/docs/cache.md +2 -1
- package/docs/chrome-extension.md +12 -4
- package/docs/cli.md +2 -2
- package/docs/config.md +11 -4
- package/docs/index.html +5 -0
- package/docs/llm.md +5 -2
- package/docs/manual-tests.md +3 -0
- package/docs/media.md +3 -1
- package/docs/model-auto.md +2 -2
- package/docs/model-provider-resolution.md +57 -0
- package/docs/site/index.html +5 -0
- package/docs/slides-rendering-flow.md +46 -0
- package/docs/slides.md +5 -5
- package/docs/smoketest.md +1 -1
- package/docs/transcript-provider-flow.md +66 -0
- package/docs/website.md +1 -0
- package/docs/youtube.md +4 -2
- package/package.json +11 -11
|
@@ -1,20 +1,23 @@
|
|
|
1
|
-
import { createHash, randomUUID } from "node:crypto";
|
|
2
1
|
import { promises as fs } from "node:fs";
|
|
3
|
-
import { tmpdir } from "node:os";
|
|
4
2
|
import path from "node:path";
|
|
5
3
|
import { extractYouTubeVideoId, isDirectMediaUrl, isYouTubeUrl } from "../content/index.js";
|
|
6
|
-
import { spawnTracked } from "../processes.js";
|
|
7
4
|
import { resolveExecutableInPath } from "../run/env.js";
|
|
8
|
-
import {
|
|
9
|
-
|
|
5
|
+
import { buildSlidesMediaCacheKey, downloadRemoteVideo, downloadYoutubeVideo, formatBytes, resolveYoutubeStreamUrl, } from "./download.js";
|
|
6
|
+
import { buildSlideTimeline, buildSlidesChunkMeta, emitFinalSlides, emitPlaceholderSlides, renameSlidesWithTimestamps, SLIDES_PROGRESS, writeSlidesJson, } from "./extract-finalize.js";
|
|
7
|
+
import { detectSlideTimestamps, extractFramesAtTimestamps } from "./frame-extraction.js";
|
|
8
|
+
import { prepareSlidesInput } from "./ingest.js";
|
|
9
|
+
import { runOcrOnSlides } from "./ocr.js";
|
|
10
|
+
import { adjustTimestampWithinSegment, applyMaxSlidesFilter, applyMinDurationFilter, buildIntervalTimestamps, buildSceneSegments, clamp, filterTimestampsByMinDuration, findSceneSegment, mergeTimestamps, selectTimestampTargets, } from "./scene-detection.js";
|
|
11
|
+
import { buildDirectSourceId, buildYoutubeSourceId } from "./source-id.js";
|
|
12
|
+
import { readSlidesCacheIfValid, resolveSlidesDir, } from "./store.js";
|
|
10
13
|
const slidesLocks = new Map();
|
|
11
14
|
const YT_DLP_TIMEOUT_MS = 300_000;
|
|
12
|
-
const TESSERACT_TIMEOUT_MS = 120_000;
|
|
13
15
|
const DEFAULT_SLIDES_WORKERS = 8;
|
|
14
16
|
const DEFAULT_SLIDES_SAMPLE_COUNT = 8;
|
|
15
17
|
// Prefer broadly-decodable H.264/MP4 for ffmpeg stability.
|
|
16
18
|
// (Some "bestvideo" picks AV1 which can fail on certain ffmpeg builds / hwaccel setups.)
|
|
17
19
|
const DEFAULT_YT_DLP_FORMAT_EXTRACT = "bestvideo[height<=720][vcodec^=avc1][ext=mp4]/best[height<=720][vcodec^=avc1][ext=mp4]/bestvideo[height<=720][ext=mp4]/best[height<=720]";
|
|
20
|
+
export { parseShowinfoTimestamp, resolveExtractedTimestamp } from "./scene-detection.js";
|
|
18
21
|
function createSlidesLogger(logger) {
|
|
19
22
|
const logSlides = (message) => {
|
|
20
23
|
if (!logger)
|
|
@@ -55,26 +58,6 @@ function resolveSlidesStreamFallback(env) {
|
|
|
55
58
|
const raw = env.SLIDES_EXTRACT_STREAM?.trim().toLowerCase();
|
|
56
59
|
return raw === "1" || raw === "true" || raw === "yes";
|
|
57
60
|
}
|
|
58
|
-
function buildYtDlpCookiesArgs(cookiesFromBrowser) {
|
|
59
|
-
const value = typeof cookiesFromBrowser === "string" ? cookiesFromBrowser.trim() : "";
|
|
60
|
-
return value.length > 0 ? ["--cookies-from-browser", value] : [];
|
|
61
|
-
}
|
|
62
|
-
function buildSlidesMediaCacheKey(url) {
|
|
63
|
-
return `${url}#summarize-slides`;
|
|
64
|
-
}
|
|
65
|
-
function formatBytes(bytes) {
|
|
66
|
-
if (!Number.isFinite(bytes) || bytes <= 0)
|
|
67
|
-
return "0B";
|
|
68
|
-
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
69
|
-
let value = bytes;
|
|
70
|
-
let unit = units[0] ?? "B";
|
|
71
|
-
for (let i = 1; i < units.length && value >= 1024; i += 1) {
|
|
72
|
-
value /= 1024;
|
|
73
|
-
unit = units[i] ?? unit;
|
|
74
|
-
}
|
|
75
|
-
const rounded = value >= 100 ? Math.round(value) : Math.round(value * 10) / 10;
|
|
76
|
-
return `${rounded}${unit}`;
|
|
77
|
-
}
|
|
78
61
|
function resolveToolPath(binary, env, explicitEnvKey) {
|
|
79
62
|
const explicit = explicitEnvKey && typeof env[explicitEnvKey] === "string" ? env[explicitEnvKey]?.trim() : "";
|
|
80
63
|
if (explicit)
|
|
@@ -188,169 +171,32 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
188
171
|
}
|
|
189
172
|
const ocrEnabled = Boolean(settings.ocr && tesseractPath);
|
|
190
173
|
const ocrAvailable = Boolean(tesseractPath ?? resolveToolPath("tesseract", env, "TESSERACT_PATH"));
|
|
191
|
-
const P_PREPARE = 2;
|
|
192
|
-
const P_FETCH_VIDEO = 6;
|
|
193
|
-
const P_DOWNLOAD_VIDEO = 35;
|
|
194
|
-
const P_DETECT_SCENES = 60;
|
|
195
|
-
const P_EXTRACT_FRAMES = 90;
|
|
196
|
-
const P_OCR = 99;
|
|
197
|
-
const P_FINAL = 100;
|
|
198
174
|
{
|
|
199
175
|
const prepareStartedAt = Date.now();
|
|
200
176
|
await prepareSlidesDir(slidesDir);
|
|
201
177
|
logSlidesTiming("prepare output dir", prepareStartedAt);
|
|
202
178
|
}
|
|
203
|
-
reportSlidesProgress?.("preparing source",
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
reportSlidesProgress
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
const format = resolveSlidesYtDlpExtractFormat(env);
|
|
222
|
-
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
223
|
-
const downloadStartedAt = Date.now();
|
|
224
|
-
try {
|
|
225
|
-
const downloaded = await downloadYoutubeVideo({
|
|
226
|
-
ytDlpPath: ytDlp,
|
|
227
|
-
url: source.url,
|
|
228
|
-
timeoutMs,
|
|
229
|
-
format,
|
|
230
|
-
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
231
|
-
onProgress: (percent, detail) => {
|
|
232
|
-
const ratio = clamp(percent / 100, 0, 1);
|
|
233
|
-
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
234
|
-
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
235
|
-
},
|
|
236
|
-
});
|
|
237
|
-
const cached = mediaCacheKey
|
|
238
|
-
? await mediaCache?.put({
|
|
239
|
-
url: mediaCacheKey,
|
|
240
|
-
filePath: downloaded.filePath,
|
|
241
|
-
filename: path.basename(downloaded.filePath),
|
|
242
|
-
})
|
|
243
|
-
: null;
|
|
244
|
-
inputPath = cached?.filePath ?? downloaded.filePath;
|
|
245
|
-
inputCleanup = downloaded.cleanup;
|
|
246
|
-
logSlidesTiming(`yt-dlp download (detect+extract, format=${format})`, downloadStartedAt);
|
|
247
|
-
}
|
|
248
|
-
catch (error) {
|
|
249
|
-
if (!allowStreamFallback) {
|
|
250
|
-
throw error;
|
|
251
|
-
}
|
|
252
|
-
warnings.push(`Failed to download video; falling back to stream URL: ${String(error)}`);
|
|
253
|
-
reportSlidesProgress?.("fetching video", P_FETCH_VIDEO);
|
|
254
|
-
const streamStartedAt = Date.now();
|
|
255
|
-
const streamUrl = await resolveYoutubeStreamUrl({
|
|
256
|
-
ytDlpPath: ytDlp,
|
|
257
|
-
url: source.url,
|
|
258
|
-
format,
|
|
259
|
-
timeoutMs,
|
|
260
|
-
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
261
|
-
});
|
|
262
|
-
inputPath = streamUrl;
|
|
263
|
-
logSlidesTiming(`yt-dlp stream url (detect+extract, format=${format})`, streamStartedAt);
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
else if (source.kind === "direct") {
|
|
267
|
-
const shouldUseYtDlp = !isDirectMediaUrl(source.url);
|
|
268
|
-
if (shouldUseYtDlp) {
|
|
269
|
-
if (!ytDlpPath) {
|
|
270
|
-
throw new Error("Slides for remote videos require yt-dlp (set YT_DLP_PATH or install yt-dlp).");
|
|
271
|
-
}
|
|
272
|
-
const ytDlp = ytDlpPath;
|
|
273
|
-
const format = resolveSlidesYtDlpExtractFormat(env);
|
|
274
|
-
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
275
|
-
const downloadStartedAt = Date.now();
|
|
276
|
-
try {
|
|
277
|
-
const downloaded = await downloadYoutubeVideo({
|
|
278
|
-
ytDlpPath: ytDlp,
|
|
279
|
-
url: source.url,
|
|
280
|
-
timeoutMs,
|
|
281
|
-
format,
|
|
282
|
-
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
283
|
-
onProgress: (percent, detail) => {
|
|
284
|
-
const ratio = clamp(percent / 100, 0, 1);
|
|
285
|
-
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
286
|
-
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
287
|
-
},
|
|
288
|
-
});
|
|
289
|
-
const cached = mediaCacheKey
|
|
290
|
-
? await mediaCache?.put({
|
|
291
|
-
url: mediaCacheKey,
|
|
292
|
-
filePath: downloaded.filePath,
|
|
293
|
-
filename: path.basename(downloaded.filePath),
|
|
294
|
-
})
|
|
295
|
-
: null;
|
|
296
|
-
inputPath = cached?.filePath ?? downloaded.filePath;
|
|
297
|
-
inputCleanup = downloaded.cleanup;
|
|
298
|
-
logSlidesTiming(`yt-dlp download (direct source, format=${format})`, downloadStartedAt);
|
|
299
|
-
}
|
|
300
|
-
catch (error) {
|
|
301
|
-
if (!allowStreamFallback) {
|
|
302
|
-
throw error;
|
|
303
|
-
}
|
|
304
|
-
warnings.push(`Failed to download video; falling back to stream URL: ${String(error)}`);
|
|
305
|
-
reportSlidesProgress?.("fetching video", P_FETCH_VIDEO);
|
|
306
|
-
const streamStartedAt = Date.now();
|
|
307
|
-
const streamUrl = await resolveYoutubeStreamUrl({
|
|
308
|
-
ytDlpPath: ytDlp,
|
|
309
|
-
url: source.url,
|
|
310
|
-
format,
|
|
311
|
-
timeoutMs,
|
|
312
|
-
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
313
|
-
});
|
|
314
|
-
inputPath = streamUrl;
|
|
315
|
-
logSlidesTiming(`yt-dlp stream url (direct source, format=${format})`, streamStartedAt);
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
else {
|
|
319
|
-
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
320
|
-
const downloadStartedAt = Date.now();
|
|
321
|
-
try {
|
|
322
|
-
const downloaded = await downloadRemoteVideo({
|
|
323
|
-
url: source.url,
|
|
324
|
-
timeoutMs,
|
|
325
|
-
onProgress: (percent, detail) => {
|
|
326
|
-
const ratio = clamp(percent / 100, 0, 1);
|
|
327
|
-
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
328
|
-
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
329
|
-
},
|
|
330
|
-
});
|
|
331
|
-
const cached = mediaCacheKey
|
|
332
|
-
? await mediaCache?.put({
|
|
333
|
-
url: mediaCacheKey,
|
|
334
|
-
filePath: downloaded.filePath,
|
|
335
|
-
filename: path.basename(downloaded.filePath),
|
|
336
|
-
})
|
|
337
|
-
: null;
|
|
338
|
-
inputPath = cached?.filePath ?? downloaded.filePath;
|
|
339
|
-
inputCleanup = downloaded.cleanup;
|
|
340
|
-
logSlidesTiming("download direct video (detect+extract)", downloadStartedAt);
|
|
341
|
-
}
|
|
342
|
-
catch (error) {
|
|
343
|
-
if (!allowStreamFallback) {
|
|
344
|
-
throw error;
|
|
345
|
-
}
|
|
346
|
-
warnings.push(`Failed to download video; falling back to stream URL: ${String(error)}`);
|
|
347
|
-
inputPath = source.url;
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
}
|
|
179
|
+
reportSlidesProgress?.("preparing source", SLIDES_PROGRESS.PREPARE);
|
|
180
|
+
const { inputPath, inputCleanup, warnings: ingestWarnings, } = await prepareSlidesInput({
|
|
181
|
+
source,
|
|
182
|
+
mediaCache,
|
|
183
|
+
timeoutMs,
|
|
184
|
+
ytDlpPath,
|
|
185
|
+
ytDlpCookiesFromBrowser,
|
|
186
|
+
resolveSlidesYtDlpExtractFormat: () => resolveSlidesYtDlpExtractFormat(env),
|
|
187
|
+
resolveSlidesStreamFallback: () => resolveSlidesStreamFallback(env),
|
|
188
|
+
buildSlidesMediaCacheKey,
|
|
189
|
+
formatBytes,
|
|
190
|
+
reportSlidesProgress,
|
|
191
|
+
logSlidesTiming,
|
|
192
|
+
downloadYoutubeVideo,
|
|
193
|
+
downloadRemoteVideo,
|
|
194
|
+
resolveYoutubeStreamUrl,
|
|
195
|
+
});
|
|
196
|
+
warnings.push(...ingestWarnings);
|
|
351
197
|
try {
|
|
352
198
|
const ffmpegStartedAt = Date.now();
|
|
353
|
-
reportSlidesProgress?.("detecting scenes",
|
|
199
|
+
reportSlidesProgress?.("detecting scenes", SLIDES_PROGRESS.FETCH_VIDEO + 2);
|
|
354
200
|
const detection = await detectSlideTimestamps({
|
|
355
201
|
ffmpegPath: ffmpegBinary,
|
|
356
202
|
ffprobePath: ffprobeBinary,
|
|
@@ -364,13 +210,15 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
364
210
|
sampleCount: resolveSlidesSampleCount(env),
|
|
365
211
|
onSegmentProgress: (completed, total) => {
|
|
366
212
|
const ratio = total > 0 ? completed / total : 0;
|
|
367
|
-
const mapped =
|
|
213
|
+
const mapped = SLIDES_PROGRESS.FETCH_VIDEO +
|
|
214
|
+
2 +
|
|
215
|
+
ratio * (SLIDES_PROGRESS.DETECT_SCENES - (SLIDES_PROGRESS.FETCH_VIDEO + 2));
|
|
368
216
|
reportSlidesProgress?.("detecting scenes", mapped, total > 0 ? `(${completed}/${total})` : undefined);
|
|
369
217
|
},
|
|
370
218
|
logSlides,
|
|
371
219
|
logSlidesTiming,
|
|
372
220
|
});
|
|
373
|
-
reportSlidesProgress?.("detecting scenes",
|
|
221
|
+
reportSlidesProgress?.("detecting scenes", SLIDES_PROGRESS.DETECT_SCENES);
|
|
374
222
|
logSlidesTiming("ffmpeg scene-detect", ffmpegStartedAt);
|
|
375
223
|
const interval = buildIntervalTimestamps({
|
|
376
224
|
durationSeconds: detection.durationSeconds,
|
|
@@ -395,13 +243,13 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
395
243
|
const segment = findSceneSegment(sceneSegments, timestamp);
|
|
396
244
|
const adjusted = adjustTimestampWithinSegment(timestamp, segment);
|
|
397
245
|
return { index: index + 1, timestamp: adjusted, imagePath: "", segment };
|
|
398
|
-
}), settings.maxSlides, warnings)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
246
|
+
}), settings.maxSlides, warnings, (imagePath) => {
|
|
247
|
+
void fs.rm(imagePath, { force: true }).catch(() => { });
|
|
248
|
+
});
|
|
249
|
+
const chunkMeta = buildSlidesChunkMeta({ slidesDir, source, ocrAvailable });
|
|
250
|
+
const timelineSlides = buildSlideTimeline({
|
|
251
|
+
source,
|
|
403
252
|
slidesDir,
|
|
404
|
-
slidesDirId: buildSlidesDirId(slidesDir),
|
|
405
253
|
sceneThreshold: settings.sceneThreshold,
|
|
406
254
|
autoTuneThreshold: settings.autoTuneThreshold,
|
|
407
255
|
autoTune: detection.autoTune,
|
|
@@ -409,28 +257,21 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
409
257
|
minSlideDuration: settings.minDurationSeconds,
|
|
410
258
|
ocrRequested: settings.ocr,
|
|
411
259
|
ocrAvailable,
|
|
412
|
-
slides: trimmed.map(({ segment: _segment, ...slide }) => slide),
|
|
413
260
|
warnings,
|
|
414
|
-
|
|
261
|
+
slides: trimmed,
|
|
262
|
+
});
|
|
415
263
|
hooks?.onSlidesTimeline?.(timelineSlides);
|
|
416
264
|
// Emit placeholders immediately so the UI can render the slide list while frames are still extracting.
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
sourceKind: source.kind,
|
|
423
|
-
ocrAvailable,
|
|
424
|
-
};
|
|
425
|
-
for (const slide of trimmed) {
|
|
426
|
-
const { segment: _segment, ...payload } = slide;
|
|
427
|
-
hooks.onSlideChunk({ slide: { ...payload, imagePath: "" }, meta });
|
|
428
|
-
}
|
|
429
|
-
}
|
|
265
|
+
emitPlaceholderSlides({
|
|
266
|
+
slides: trimmed,
|
|
267
|
+
meta: chunkMeta,
|
|
268
|
+
onSlideChunk: hooks?.onSlideChunk,
|
|
269
|
+
});
|
|
430
270
|
const formatProgressCount = (completed, total) => total > 0 ? `(${completed}/${total})` : "";
|
|
431
271
|
const reportFrameProgress = (completed, total) => {
|
|
432
272
|
const ratio = total > 0 ? completed / total : 0;
|
|
433
|
-
reportSlidesProgress?.("extracting frames",
|
|
273
|
+
reportSlidesProgress?.("extracting frames", SLIDES_PROGRESS.DETECT_SCENES +
|
|
274
|
+
ratio * (SLIDES_PROGRESS.EXTRACT_FRAMES - SLIDES_PROGRESS.DETECT_SCENES), formatProgressCount(completed, total));
|
|
434
275
|
};
|
|
435
276
|
reportFrameProgress(0, trimmed.length);
|
|
436
277
|
const onSlideChunk = hooks?.onSlideChunk;
|
|
@@ -448,13 +289,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
448
289
|
onSlide: onSlideChunk
|
|
449
290
|
? (slide) => onSlideChunk({
|
|
450
291
|
slide,
|
|
451
|
-
meta:
|
|
452
|
-
slidesDir,
|
|
453
|
-
sourceUrl: source.url,
|
|
454
|
-
sourceId: source.sourceId,
|
|
455
|
-
sourceKind: source.kind,
|
|
456
|
-
ocrAvailable,
|
|
457
|
-
},
|
|
292
|
+
meta: chunkMeta,
|
|
458
293
|
})
|
|
459
294
|
: null,
|
|
460
295
|
logSlides,
|
|
@@ -466,7 +301,9 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
466
301
|
if (trimmed.length > 0 && typeof extractElapsedMs === "number") {
|
|
467
302
|
logSlides?.(`extract frames avgMsPerFrame=${Math.round(extractElapsedMs / trimmed.length)}`);
|
|
468
303
|
}
|
|
469
|
-
const rawSlides = applyMinDurationFilter(extractedSlides, settings.minDurationSeconds, warnings)
|
|
304
|
+
const rawSlides = applyMinDurationFilter(extractedSlides, settings.minDurationSeconds, warnings, (imagePath) => {
|
|
305
|
+
void fs.rm(imagePath, { force: true }).catch(() => { });
|
|
306
|
+
});
|
|
470
307
|
const renameStartedAt = Date.now();
|
|
471
308
|
const renamedSlides = await renameSlidesWithTimestamps(rawSlides, slidesDir);
|
|
472
309
|
logSlidesTiming?.("rename slides", renameStartedAt);
|
|
@@ -477,10 +314,10 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
477
314
|
if (ocrEnabled && tesseractPath) {
|
|
478
315
|
const ocrStartedAt = Date.now();
|
|
479
316
|
logSlides?.(`ocr start count=${renamedSlides.length} mode=parallel workers=${workers}`);
|
|
480
|
-
const ocrStartPercent =
|
|
317
|
+
const ocrStartPercent = SLIDES_PROGRESS.OCR - 3;
|
|
481
318
|
const reportOcrProgress = (completed, total) => {
|
|
482
319
|
const ratio = total > 0 ? completed / total : 0;
|
|
483
|
-
reportSlidesProgress?.("running OCR", ocrStartPercent + ratio * (
|
|
320
|
+
reportSlidesProgress?.("running OCR", ocrStartPercent + ratio * (SLIDES_PROGRESS.OCR - ocrStartPercent), formatProgressCount(completed, total));
|
|
484
321
|
};
|
|
485
322
|
reportOcrProgress(0, renamedSlides.length);
|
|
486
323
|
slidesWithOcr = await runOcrOnSlides(renamedSlides, tesseractPath, workers, reportOcrProgress);
|
|
@@ -489,27 +326,15 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
489
326
|
logSlides?.(`ocr avgMsPerSlide=${Math.round(elapsedMs / renamedSlides.length)}`);
|
|
490
327
|
}
|
|
491
328
|
}
|
|
492
|
-
reportSlidesProgress?.("finalizing",
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
sourceId: source.sourceId,
|
|
501
|
-
sourceKind: source.kind,
|
|
502
|
-
ocrAvailable,
|
|
503
|
-
},
|
|
504
|
-
});
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
const result = {
|
|
508
|
-
sourceUrl: source.url,
|
|
509
|
-
sourceKind: source.kind,
|
|
510
|
-
sourceId: source.sourceId,
|
|
329
|
+
reportSlidesProgress?.("finalizing", SLIDES_PROGRESS.FINAL - 1);
|
|
330
|
+
emitFinalSlides({
|
|
331
|
+
slides: slidesWithOcr,
|
|
332
|
+
meta: chunkMeta,
|
|
333
|
+
onSlideChunk: hooks?.onSlideChunk,
|
|
334
|
+
});
|
|
335
|
+
const result = buildSlideTimeline({
|
|
336
|
+
source,
|
|
511
337
|
slidesDir,
|
|
512
|
-
slidesDirId: buildSlidesDirId(slidesDir),
|
|
513
338
|
sceneThreshold: settings.sceneThreshold,
|
|
514
339
|
autoTuneThreshold: settings.autoTuneThreshold,
|
|
515
340
|
autoTune: detection.autoTune,
|
|
@@ -517,11 +342,11 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
517
342
|
minSlideDuration: settings.minDurationSeconds,
|
|
518
343
|
ocrRequested: settings.ocr,
|
|
519
344
|
ocrAvailable,
|
|
520
|
-
slides: slidesWithOcr,
|
|
521
345
|
warnings,
|
|
522
|
-
|
|
346
|
+
slides: slidesWithOcr,
|
|
347
|
+
});
|
|
523
348
|
await writeSlidesJson(result, slidesDir);
|
|
524
|
-
reportSlidesProgress?.("finalizing",
|
|
349
|
+
reportSlidesProgress?.("finalizing", SLIDES_PROGRESS.FINAL);
|
|
525
350
|
logSlidesTiming("slides total", totalStartedAt);
|
|
526
351
|
return result;
|
|
527
352
|
}
|
|
@@ -534,35 +359,6 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
534
359
|
hooks?.onSlidesProgress?.("Slides: queued");
|
|
535
360
|
});
|
|
536
361
|
}
|
|
537
|
-
export function parseShowinfoTimestamp(line) {
|
|
538
|
-
if (!line.includes("showinfo"))
|
|
539
|
-
return null;
|
|
540
|
-
const match = /pts_time:(\d+\.?\d*)/.exec(line);
|
|
541
|
-
if (!match)
|
|
542
|
-
return null;
|
|
543
|
-
const ts = Number(match[1]);
|
|
544
|
-
if (!Number.isFinite(ts))
|
|
545
|
-
return null;
|
|
546
|
-
return ts;
|
|
547
|
-
}
|
|
548
|
-
export function resolveExtractedTimestamp({ requested, actual, seekBase, }) {
|
|
549
|
-
if (!Number.isFinite(requested))
|
|
550
|
-
return 0;
|
|
551
|
-
if (actual == null || !Number.isFinite(actual) || actual < 0)
|
|
552
|
-
return requested;
|
|
553
|
-
const base = typeof seekBase === "number" && Number.isFinite(seekBase) && seekBase > 0 ? seekBase : null;
|
|
554
|
-
if (!base) {
|
|
555
|
-
// With -ss before -i, showinfo PTS resets near 0. Treat small values as offsets.
|
|
556
|
-
if (actual <= 5)
|
|
557
|
-
return requested + actual;
|
|
558
|
-
return actual;
|
|
559
|
-
}
|
|
560
|
-
const candidateRelative = base + actual;
|
|
561
|
-
const candidateAbsolute = actual;
|
|
562
|
-
const relativeDelta = Math.abs(candidateRelative - requested);
|
|
563
|
-
const absoluteDelta = Math.abs(candidateAbsolute - requested);
|
|
564
|
-
return relativeDelta <= absoluteDelta ? candidateRelative : candidateAbsolute;
|
|
565
|
-
}
|
|
566
362
|
async function prepareSlidesDir(slidesDir) {
|
|
567
363
|
await fs.mkdir(slidesDir, { recursive: true });
|
|
568
364
|
const entries = await fs.readdir(slidesDir);
|
|
@@ -575,1176 +371,6 @@ async function prepareSlidesDir(slidesDir) {
|
|
|
575
371
|
}
|
|
576
372
|
}));
|
|
577
373
|
}
|
|
578
|
-
async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, cookiesFromBrowser, onProgress, }) {
|
|
579
|
-
const dir = await fs.mkdtemp(path.join(tmpdir(), `summarize-slides-${randomUUID()}-`));
|
|
580
|
-
const outputTemplate = path.join(dir, "video.%(ext)s");
|
|
581
|
-
const progressTemplate = "progress:%(progress.downloaded_bytes)s|%(progress.total_bytes)s|%(progress.total_bytes_estimate)s";
|
|
582
|
-
const args = [
|
|
583
|
-
"-f",
|
|
584
|
-
format,
|
|
585
|
-
"--no-playlist",
|
|
586
|
-
"--no-warnings",
|
|
587
|
-
"--concurrent-fragments",
|
|
588
|
-
"4",
|
|
589
|
-
...buildYtDlpCookiesArgs(cookiesFromBrowser),
|
|
590
|
-
...(onProgress ? ["--progress", "--newline", "--progress-template", progressTemplate] : []),
|
|
591
|
-
"-o",
|
|
592
|
-
outputTemplate,
|
|
593
|
-
url,
|
|
594
|
-
];
|
|
595
|
-
await runProcess({
|
|
596
|
-
command: ytDlpPath,
|
|
597
|
-
args,
|
|
598
|
-
timeoutMs: Math.max(timeoutMs, YT_DLP_TIMEOUT_MS),
|
|
599
|
-
errorLabel: "yt-dlp",
|
|
600
|
-
onStderrLine: (line, handle) => {
|
|
601
|
-
if (!onProgress)
|
|
602
|
-
return;
|
|
603
|
-
const trimmed = line.trim();
|
|
604
|
-
if (trimmed.startsWith("progress:")) {
|
|
605
|
-
const payload = trimmed.slice("progress:".length);
|
|
606
|
-
const [downloadedRaw, totalRaw, estimateRaw] = payload.split("|");
|
|
607
|
-
const downloaded = Number.parseFloat(downloadedRaw);
|
|
608
|
-
if (!Number.isFinite(downloaded) || downloaded < 0)
|
|
609
|
-
return;
|
|
610
|
-
const totalCandidate = Number.parseFloat(totalRaw);
|
|
611
|
-
const estimateCandidate = Number.parseFloat(estimateRaw);
|
|
612
|
-
const totalBytes = Number.isFinite(totalCandidate) && totalCandidate > 0
|
|
613
|
-
? totalCandidate
|
|
614
|
-
: Number.isFinite(estimateCandidate) && estimateCandidate > 0
|
|
615
|
-
? estimateCandidate
|
|
616
|
-
: null;
|
|
617
|
-
if (!totalBytes || totalBytes <= 0)
|
|
618
|
-
return;
|
|
619
|
-
const percent = Math.max(0, Math.min(100, Math.round((downloaded / totalBytes) * 100)));
|
|
620
|
-
const detail = `(${formatBytes(downloaded)}/${formatBytes(totalBytes)})`;
|
|
621
|
-
onProgress(percent, detail);
|
|
622
|
-
handle?.setProgress(percent, detail);
|
|
623
|
-
return;
|
|
624
|
-
}
|
|
625
|
-
if (!trimmed.startsWith("[download]"))
|
|
626
|
-
return;
|
|
627
|
-
const percentMatch = trimmed.match(/\b(\d{1,3}(?:\.\d+)?)%\b/);
|
|
628
|
-
if (!percentMatch)
|
|
629
|
-
return;
|
|
630
|
-
const percent = Number(percentMatch[1]);
|
|
631
|
-
if (!Number.isFinite(percent) || percent < 0 || percent > 100)
|
|
632
|
-
return;
|
|
633
|
-
const etaMatch = trimmed.match(/\bETA\s+(\S+)\b/);
|
|
634
|
-
const speedMatch = trimmed.match(/\bat\s+(\S+)\b/);
|
|
635
|
-
const detailParts = [
|
|
636
|
-
speedMatch?.[1] ? `at ${speedMatch[1]}` : null,
|
|
637
|
-
etaMatch?.[1] ? `ETA ${etaMatch[1]}` : null,
|
|
638
|
-
].filter(Boolean);
|
|
639
|
-
const detail = detailParts.length ? detailParts.join(" ") : undefined;
|
|
640
|
-
onProgress(percent, detail);
|
|
641
|
-
handle?.setProgress(percent, detail ?? null);
|
|
642
|
-
},
|
|
643
|
-
onStdoutLine: onProgress
|
|
644
|
-
? (line, handle) => {
|
|
645
|
-
if (!line.trim().startsWith("progress:"))
|
|
646
|
-
return;
|
|
647
|
-
const payload = line.trim().slice("progress:".length);
|
|
648
|
-
const [downloadedRaw, totalRaw, estimateRaw] = payload.split("|");
|
|
649
|
-
const downloaded = Number.parseFloat(downloadedRaw);
|
|
650
|
-
if (!Number.isFinite(downloaded) || downloaded < 0)
|
|
651
|
-
return;
|
|
652
|
-
const totalCandidate = Number.parseFloat(totalRaw);
|
|
653
|
-
const estimateCandidate = Number.parseFloat(estimateRaw);
|
|
654
|
-
const totalBytes = Number.isFinite(totalCandidate) && totalCandidate > 0
|
|
655
|
-
? totalCandidate
|
|
656
|
-
: Number.isFinite(estimateCandidate) && estimateCandidate > 0
|
|
657
|
-
? estimateCandidate
|
|
658
|
-
: null;
|
|
659
|
-
if (!totalBytes || totalBytes <= 0)
|
|
660
|
-
return;
|
|
661
|
-
const percent = Math.max(0, Math.min(100, Math.round((downloaded / totalBytes) * 100)));
|
|
662
|
-
const detail = `(${formatBytes(downloaded)}/${formatBytes(totalBytes)})`;
|
|
663
|
-
onProgress(percent, detail);
|
|
664
|
-
handle?.setProgress(percent, detail);
|
|
665
|
-
}
|
|
666
|
-
: undefined,
|
|
667
|
-
});
|
|
668
|
-
const files = await fs.readdir(dir);
|
|
669
|
-
const candidates = [];
|
|
670
|
-
for (const entry of files) {
|
|
671
|
-
if (entry.endsWith(".part") || entry.endsWith(".ytdl"))
|
|
672
|
-
continue;
|
|
673
|
-
const filePath = path.join(dir, entry);
|
|
674
|
-
const stat = await fs.stat(filePath).catch(() => null);
|
|
675
|
-
if (stat?.isFile()) {
|
|
676
|
-
candidates.push({ filePath, size: stat.size });
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
if (candidates.length === 0) {
|
|
680
|
-
await fs.rm(dir, { recursive: true, force: true });
|
|
681
|
-
throw new Error("yt-dlp completed but no video file was downloaded.");
|
|
682
|
-
}
|
|
683
|
-
candidates.sort((a, b) => b.size - a.size);
|
|
684
|
-
const filePath = candidates[0].filePath;
|
|
685
|
-
return {
|
|
686
|
-
filePath,
|
|
687
|
-
cleanup: async () => {
|
|
688
|
-
await fs.rm(dir, { recursive: true, force: true });
|
|
689
|
-
},
|
|
690
|
-
};
|
|
691
|
-
}
|
|
692
|
-
async function downloadRemoteVideo({ url, timeoutMs, onProgress, }) {
|
|
693
|
-
const dir = await fs.mkdtemp(path.join(tmpdir(), `summarize-slides-${randomUUID()}-`));
|
|
694
|
-
let suffix = ".bin";
|
|
695
|
-
try {
|
|
696
|
-
const parsed = new URL(url);
|
|
697
|
-
const ext = path.extname(parsed.pathname);
|
|
698
|
-
if (ext)
|
|
699
|
-
suffix = ext;
|
|
700
|
-
}
|
|
701
|
-
catch {
|
|
702
|
-
// ignore
|
|
703
|
-
}
|
|
704
|
-
const filePath = path.join(dir, `video${suffix}`);
|
|
705
|
-
const controller = new AbortController();
|
|
706
|
-
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
707
|
-
try {
|
|
708
|
-
const res = await fetch(url, { signal: controller.signal });
|
|
709
|
-
if (!res.ok) {
|
|
710
|
-
throw new Error(`Download failed: ${res.status} ${res.statusText}`);
|
|
711
|
-
}
|
|
712
|
-
const totalRaw = res.headers.get("content-length");
|
|
713
|
-
const total = totalRaw ? Number(totalRaw) : 0;
|
|
714
|
-
const hasTotal = Number.isFinite(total) && total > 0;
|
|
715
|
-
const reader = res.body?.getReader();
|
|
716
|
-
if (!reader) {
|
|
717
|
-
throw new Error("Download failed: missing response body");
|
|
718
|
-
}
|
|
719
|
-
const handle = await fs.open(filePath, "w");
|
|
720
|
-
let downloaded = 0;
|
|
721
|
-
let lastPercent = -1;
|
|
722
|
-
let lastReportedBytes = 0;
|
|
723
|
-
const reportProgress = () => {
|
|
724
|
-
if (!onProgress)
|
|
725
|
-
return;
|
|
726
|
-
if (hasTotal) {
|
|
727
|
-
const percent = Math.max(0, Math.min(100, Math.round((downloaded / total) * 100)));
|
|
728
|
-
if (percent === lastPercent)
|
|
729
|
-
return;
|
|
730
|
-
lastPercent = percent;
|
|
731
|
-
const detail = `(${formatBytes(downloaded)}/${formatBytes(total)})`;
|
|
732
|
-
onProgress(percent, detail);
|
|
733
|
-
return;
|
|
734
|
-
}
|
|
735
|
-
if (downloaded - lastReportedBytes < 2 * 1024 * 1024)
|
|
736
|
-
return;
|
|
737
|
-
lastReportedBytes = downloaded;
|
|
738
|
-
onProgress(0, `(${formatBytes(downloaded)})`);
|
|
739
|
-
};
|
|
740
|
-
try {
|
|
741
|
-
while (true) {
|
|
742
|
-
const { done, value } = await reader.read();
|
|
743
|
-
if (done)
|
|
744
|
-
break;
|
|
745
|
-
if (!value)
|
|
746
|
-
continue;
|
|
747
|
-
await handle.write(value);
|
|
748
|
-
downloaded += value.byteLength;
|
|
749
|
-
reportProgress();
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
finally {
|
|
753
|
-
await handle.close();
|
|
754
|
-
}
|
|
755
|
-
if (hasTotal) {
|
|
756
|
-
onProgress?.(100, `(${formatBytes(downloaded)}/${formatBytes(total)})`);
|
|
757
|
-
}
|
|
758
|
-
return {
|
|
759
|
-
filePath,
|
|
760
|
-
cleanup: async () => {
|
|
761
|
-
await fs.rm(dir, { recursive: true, force: true });
|
|
762
|
-
},
|
|
763
|
-
};
|
|
764
|
-
}
|
|
765
|
-
catch (error) {
|
|
766
|
-
await fs.rm(dir, { recursive: true, force: true }).catch(() => null);
|
|
767
|
-
throw error;
|
|
768
|
-
}
|
|
769
|
-
finally {
|
|
770
|
-
clearTimeout(timeout);
|
|
771
|
-
}
|
|
772
|
-
}
|
|
773
|
-
async function resolveYoutubeStreamUrl({ ytDlpPath, url, timeoutMs, format, cookiesFromBrowser, }) {
|
|
774
|
-
const args = ["-f", format, ...buildYtDlpCookiesArgs(cookiesFromBrowser), "-g", url];
|
|
775
|
-
const output = await runProcessCapture({
|
|
776
|
-
command: ytDlpPath,
|
|
777
|
-
args,
|
|
778
|
-
timeoutMs: Math.max(timeoutMs, YT_DLP_TIMEOUT_MS),
|
|
779
|
-
errorLabel: "yt-dlp",
|
|
780
|
-
});
|
|
781
|
-
const lines = output
|
|
782
|
-
.split("\n")
|
|
783
|
-
.map((line) => line.trim())
|
|
784
|
-
.filter(Boolean);
|
|
785
|
-
if (lines.length === 0) {
|
|
786
|
-
throw new Error("yt-dlp did not return a stream URL.");
|
|
787
|
-
}
|
|
788
|
-
return lines[0];
|
|
789
|
-
}
|
|
790
|
-
async function detectSlideTimestamps({ ffmpegPath, ffprobePath, inputPath, sceneThreshold, autoTuneThreshold, env, timeoutMs, warnings, workers, sampleCount, onSegmentProgress, logSlides, logSlidesTiming, }) {
|
|
791
|
-
const probeStartedAt = Date.now();
|
|
792
|
-
const videoInfo = await probeVideoInfo({
|
|
793
|
-
ffprobePath,
|
|
794
|
-
env,
|
|
795
|
-
inputPath,
|
|
796
|
-
timeoutMs,
|
|
797
|
-
});
|
|
798
|
-
logSlidesTiming?.("ffprobe video info", probeStartedAt);
|
|
799
|
-
const calibration = await calibrateSceneThreshold({
|
|
800
|
-
ffmpegPath,
|
|
801
|
-
inputPath,
|
|
802
|
-
durationSeconds: videoInfo.durationSeconds,
|
|
803
|
-
sampleCount,
|
|
804
|
-
timeoutMs,
|
|
805
|
-
logSlides,
|
|
806
|
-
});
|
|
807
|
-
const baseThreshold = sceneThreshold;
|
|
808
|
-
const calibratedThreshold = calibration.threshold;
|
|
809
|
-
const chosenThreshold = autoTuneThreshold ? calibratedThreshold : baseThreshold;
|
|
810
|
-
if (autoTuneThreshold && chosenThreshold !== baseThreshold) {
|
|
811
|
-
warnings.push(`Auto-tuned scene threshold from ${baseThreshold} to ${chosenThreshold}`);
|
|
812
|
-
}
|
|
813
|
-
const segments = buildSegments(videoInfo.durationSeconds, workers);
|
|
814
|
-
const detectStartedAt = Date.now();
|
|
815
|
-
let effectiveThreshold = chosenThreshold;
|
|
816
|
-
let timestamps = await detectSceneTimestamps({
|
|
817
|
-
ffmpegPath,
|
|
818
|
-
inputPath,
|
|
819
|
-
threshold: effectiveThreshold,
|
|
820
|
-
timeoutMs,
|
|
821
|
-
segments,
|
|
822
|
-
workers,
|
|
823
|
-
onSegmentProgress,
|
|
824
|
-
});
|
|
825
|
-
logSlidesTiming?.(`scene detection base (threshold=${effectiveThreshold}, segments=${segments.length})`, detectStartedAt);
|
|
826
|
-
if (timestamps.length === 0) {
|
|
827
|
-
const fallbackThreshold = Math.max(0.05, roundThreshold(effectiveThreshold * 0.5));
|
|
828
|
-
if (fallbackThreshold !== effectiveThreshold) {
|
|
829
|
-
const retryStartedAt = Date.now();
|
|
830
|
-
timestamps = await detectSceneTimestamps({
|
|
831
|
-
ffmpegPath,
|
|
832
|
-
inputPath,
|
|
833
|
-
threshold: fallbackThreshold,
|
|
834
|
-
timeoutMs,
|
|
835
|
-
segments,
|
|
836
|
-
workers,
|
|
837
|
-
onSegmentProgress,
|
|
838
|
-
});
|
|
839
|
-
logSlidesTiming?.(`scene detection retry (threshold=${fallbackThreshold}, segments=${segments.length})`, retryStartedAt);
|
|
840
|
-
warnings.push(`Scene detection retry used lower threshold ${fallbackThreshold} after zero detections`);
|
|
841
|
-
if (timestamps.length > 0) {
|
|
842
|
-
effectiveThreshold = fallbackThreshold;
|
|
843
|
-
}
|
|
844
|
-
}
|
|
845
|
-
}
|
|
846
|
-
const autoTune = autoTuneThreshold
|
|
847
|
-
? {
|
|
848
|
-
enabled: true,
|
|
849
|
-
chosenThreshold: timestamps.length > 0 ? effectiveThreshold : baseThreshold,
|
|
850
|
-
confidence: calibration.confidence,
|
|
851
|
-
strategy: "hash",
|
|
852
|
-
}
|
|
853
|
-
: {
|
|
854
|
-
enabled: false,
|
|
855
|
-
chosenThreshold: baseThreshold,
|
|
856
|
-
confidence: 0,
|
|
857
|
-
strategy: "none",
|
|
858
|
-
};
|
|
859
|
-
return { timestamps, autoTune, durationSeconds: videoInfo.durationSeconds };
|
|
860
|
-
}
|
|
861
|
-
async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, timestamps, segments, durationSeconds, timeoutMs, workers, onProgress, onStatus, onSlide, logSlides, logSlidesTiming, }) {
|
|
862
|
-
const FRAME_ADJUST_RANGE_SECONDS = 10;
|
|
863
|
-
const FRAME_ADJUST_STEP_SECONDS = 2;
|
|
864
|
-
const FRAME_MIN_BRIGHTNESS = 0.24;
|
|
865
|
-
const FRAME_MIN_CONTRAST = 0.16;
|
|
866
|
-
const SEEK_PAD_SECONDS = 8;
|
|
867
|
-
const clampTimestamp = (value) => {
|
|
868
|
-
const upper = typeof durationSeconds === "number" && Number.isFinite(durationSeconds) && durationSeconds > 0
|
|
869
|
-
? Math.max(0, durationSeconds - 0.1)
|
|
870
|
-
: Number.POSITIVE_INFINITY;
|
|
871
|
-
return clamp(value, 0, upper);
|
|
872
|
-
};
|
|
873
|
-
const resolveSegmentBounds = (segment) => {
|
|
874
|
-
if (!segment)
|
|
875
|
-
return null;
|
|
876
|
-
const start = Math.max(0, segment.start);
|
|
877
|
-
const end = typeof segment.end === "number" && Number.isFinite(segment.end) ? segment.end : null;
|
|
878
|
-
if (end != null && end <= start)
|
|
879
|
-
return null;
|
|
880
|
-
return { start, end };
|
|
881
|
-
};
|
|
882
|
-
const resolveSegmentPadding = (segment) => {
|
|
883
|
-
if (!segment || segment.end == null)
|
|
884
|
-
return 0;
|
|
885
|
-
const duration = Math.max(0, segment.end - segment.start);
|
|
886
|
-
if (duration <= 0)
|
|
887
|
-
return 0;
|
|
888
|
-
return Math.min(1.5, Math.max(0.2, duration * 0.08));
|
|
889
|
-
};
|
|
890
|
-
const parseSignalstats = (line, stats) => {
|
|
891
|
-
if (!line.includes("lavfi.signalstats."))
|
|
892
|
-
return;
|
|
893
|
-
const match = line.match(/lavfi\.signalstats\.(YMIN|YMAX|YAVG)=(\d+(?:\.\d+)?)/);
|
|
894
|
-
if (!match)
|
|
895
|
-
return;
|
|
896
|
-
const value = Number(match[2]);
|
|
897
|
-
if (!Number.isFinite(value))
|
|
898
|
-
return;
|
|
899
|
-
if (match[1] === "YMIN")
|
|
900
|
-
stats.ymin = value;
|
|
901
|
-
if (match[1] === "YMAX")
|
|
902
|
-
stats.ymax = value;
|
|
903
|
-
if (match[1] === "YAVG")
|
|
904
|
-
stats.yavg = value;
|
|
905
|
-
};
|
|
906
|
-
const toQuality = (stats) => {
|
|
907
|
-
if (stats.ymin == null || stats.ymax == null || stats.yavg == null)
|
|
908
|
-
return null;
|
|
909
|
-
const brightness = clamp(stats.yavg / 255, 0, 1);
|
|
910
|
-
const contrast = clamp((stats.ymax - stats.ymin) / 255, 0, 1);
|
|
911
|
-
return { brightness, contrast };
|
|
912
|
-
};
|
|
913
|
-
const scoreQuality = (quality, deltaSeconds) => {
|
|
914
|
-
const penalty = Math.min(1, Math.abs(deltaSeconds) / FRAME_ADJUST_RANGE_SECONDS) * 0.05;
|
|
915
|
-
// Prefer brighter frames (dark-but-contrasty thumbnails are still unpleasant).
|
|
916
|
-
return quality.brightness * 0.55 + quality.contrast * 0.45 - penalty;
|
|
917
|
-
};
|
|
918
|
-
const extractFrame = async (timestamp, outputPath, opts) => {
|
|
919
|
-
const stats = { ymin: null, ymax: null, yavg: null };
|
|
920
|
-
let actualTimestamp = null;
|
|
921
|
-
const effectiveTimeoutMs = typeof opts?.timeoutMs === "number" && Number.isFinite(opts.timeoutMs) && opts.timeoutMs > 0
|
|
922
|
-
? opts.timeoutMs
|
|
923
|
-
: timeoutMs;
|
|
924
|
-
const seekBase = Math.max(0, timestamp - SEEK_PAD_SECONDS);
|
|
925
|
-
const seekOffset = Math.max(0, timestamp - seekBase);
|
|
926
|
-
const args = [
|
|
927
|
-
"-hide_banner",
|
|
928
|
-
...(seekBase > 0 ? ["-ss", String(seekBase)] : []),
|
|
929
|
-
"-i",
|
|
930
|
-
inputPath,
|
|
931
|
-
...(seekOffset > 0 ? ["-ss", String(seekOffset)] : []),
|
|
932
|
-
"-vf",
|
|
933
|
-
"signalstats,showinfo,metadata=print",
|
|
934
|
-
"-vframes",
|
|
935
|
-
"1",
|
|
936
|
-
"-q:v",
|
|
937
|
-
"2",
|
|
938
|
-
"-an",
|
|
939
|
-
"-sn",
|
|
940
|
-
"-update",
|
|
941
|
-
"1",
|
|
942
|
-
outputPath,
|
|
943
|
-
];
|
|
944
|
-
await runProcess({
|
|
945
|
-
command: ffmpegPath,
|
|
946
|
-
args,
|
|
947
|
-
timeoutMs: effectiveTimeoutMs,
|
|
948
|
-
errorLabel: "ffmpeg",
|
|
949
|
-
onStderrLine: (line) => {
|
|
950
|
-
if (actualTimestamp == null) {
|
|
951
|
-
const parsed = parseShowinfoTimestamp(line);
|
|
952
|
-
if (parsed != null)
|
|
953
|
-
actualTimestamp = parsed;
|
|
954
|
-
}
|
|
955
|
-
parseSignalstats(line, stats);
|
|
956
|
-
},
|
|
957
|
-
});
|
|
958
|
-
const stat = await fs.stat(outputPath).catch(() => null);
|
|
959
|
-
if (!stat?.isFile() || stat.size === 0) {
|
|
960
|
-
throw new Error(`ffmpeg produced no output frame at ${outputPath}`);
|
|
961
|
-
}
|
|
962
|
-
const quality = toQuality(stats);
|
|
963
|
-
return {
|
|
964
|
-
slide: { index: 0, timestamp, imagePath: outputPath },
|
|
965
|
-
quality,
|
|
966
|
-
actualTimestamp,
|
|
967
|
-
seekBase,
|
|
968
|
-
};
|
|
969
|
-
};
|
|
970
|
-
const slides = [];
|
|
971
|
-
const startedAt = Date.now();
|
|
972
|
-
const tasks = timestamps.map((timestamp, index) => async () => {
|
|
973
|
-
const segment = segments?.[index] ?? null;
|
|
974
|
-
const bounds = resolveSegmentBounds(segment);
|
|
975
|
-
const padding = resolveSegmentPadding(segment);
|
|
976
|
-
const clampedTimestamp = clampTimestamp(timestamp);
|
|
977
|
-
const safeTimestamp = bounds && bounds.end != null
|
|
978
|
-
? bounds.end - padding <= bounds.start + padding
|
|
979
|
-
? clampTimestamp(bounds.start + (bounds.end - bounds.start) * 0.5)
|
|
980
|
-
: clamp(clampedTimestamp, bounds.start + padding, bounds.end - padding)
|
|
981
|
-
: bounds
|
|
982
|
-
? Math.max(bounds.start + padding, clampedTimestamp)
|
|
983
|
-
: clampedTimestamp;
|
|
984
|
-
const outputPath = path.join(outputDir, `slide_${String(index + 1).padStart(4, "0")}.png`);
|
|
985
|
-
const extracted = await extractFrame(safeTimestamp, outputPath);
|
|
986
|
-
const resolvedTimestamp = resolveExtractedTimestamp({
|
|
987
|
-
requested: safeTimestamp,
|
|
988
|
-
actual: extracted.actualTimestamp,
|
|
989
|
-
seekBase: extracted.seekBase,
|
|
990
|
-
});
|
|
991
|
-
const delta = resolvedTimestamp - safeTimestamp;
|
|
992
|
-
if (Math.abs(delta) >= 0.25) {
|
|
993
|
-
const actualLabel = extracted.actualTimestamp != null && Number.isFinite(extracted.actualTimestamp)
|
|
994
|
-
? extracted.actualTimestamp.toFixed(2)
|
|
995
|
-
: "n/a";
|
|
996
|
-
logSlides?.(`frame pts slide=${index + 1} req=${safeTimestamp.toFixed(2)}s actual=${actualLabel}s base=${extracted.seekBase.toFixed(2)}s -> ${resolvedTimestamp.toFixed(2)}s delta=${delta.toFixed(2)}s`);
|
|
997
|
-
}
|
|
998
|
-
const imageVersion = Date.now();
|
|
999
|
-
onSlide?.({
|
|
1000
|
-
index: index + 1,
|
|
1001
|
-
timestamp: resolvedTimestamp,
|
|
1002
|
-
imagePath: outputPath,
|
|
1003
|
-
imageVersion,
|
|
1004
|
-
});
|
|
1005
|
-
return {
|
|
1006
|
-
index: index + 1,
|
|
1007
|
-
timestamp: resolvedTimestamp,
|
|
1008
|
-
requestedTimestamp: safeTimestamp,
|
|
1009
|
-
imagePath: outputPath,
|
|
1010
|
-
quality: extracted.quality,
|
|
1011
|
-
imageVersion,
|
|
1012
|
-
segment: bounds,
|
|
1013
|
-
};
|
|
1014
|
-
});
|
|
1015
|
-
const results = await runWithConcurrency(tasks, workers, onProgress ?? undefined);
|
|
1016
|
-
const ordered = results.filter(Boolean).sort((a, b) => a.index - b.index);
|
|
1017
|
-
const fixTasks = [];
|
|
1018
|
-
for (const frame of ordered) {
|
|
1019
|
-
slides.push({
|
|
1020
|
-
index: frame.index,
|
|
1021
|
-
timestamp: frame.timestamp,
|
|
1022
|
-
imagePath: frame.imagePath,
|
|
1023
|
-
imageVersion: frame.imageVersion,
|
|
1024
|
-
});
|
|
1025
|
-
const quality = frame.quality;
|
|
1026
|
-
if (!quality)
|
|
1027
|
-
continue;
|
|
1028
|
-
const shouldPreferBrighterFirstSlide = frame.index === 1 && frame.timestamp < 8;
|
|
1029
|
-
const needsAdjust = quality.brightness < FRAME_MIN_BRIGHTNESS ||
|
|
1030
|
-
quality.contrast < FRAME_MIN_CONTRAST ||
|
|
1031
|
-
(shouldPreferBrighterFirstSlide && (quality.brightness < 0.58 || quality.contrast < 0.2));
|
|
1032
|
-
if (!needsAdjust)
|
|
1033
|
-
continue;
|
|
1034
|
-
fixTasks.push(async () => {
|
|
1035
|
-
const bounds = resolveSegmentBounds(frame.segment ?? null);
|
|
1036
|
-
const padding = resolveSegmentPadding(frame.segment ?? null);
|
|
1037
|
-
const minTs = bounds
|
|
1038
|
-
? clampTimestamp(bounds.start + padding)
|
|
1039
|
-
: clampTimestamp(frame.timestamp - FRAME_ADJUST_RANGE_SECONDS);
|
|
1040
|
-
const maxTs = bounds && bounds.end != null
|
|
1041
|
-
? clampTimestamp(bounds.end - padding)
|
|
1042
|
-
: clampTimestamp(frame.timestamp + FRAME_ADJUST_RANGE_SECONDS);
|
|
1043
|
-
if (maxTs <= minTs)
|
|
1044
|
-
return;
|
|
1045
|
-
const baseTimestamp = clamp(frame.timestamp, minTs, maxTs);
|
|
1046
|
-
const maxRange = Math.min(FRAME_ADJUST_RANGE_SECONDS, maxTs - minTs);
|
|
1047
|
-
if (!Number.isFinite(maxRange) || maxRange < FRAME_ADJUST_STEP_SECONDS)
|
|
1048
|
-
return;
|
|
1049
|
-
const candidateOffsets = [];
|
|
1050
|
-
for (let offset = FRAME_ADJUST_STEP_SECONDS; offset <= maxRange; offset += FRAME_ADJUST_STEP_SECONDS) {
|
|
1051
|
-
candidateOffsets.push(offset, -offset);
|
|
1052
|
-
}
|
|
1053
|
-
let best = {
|
|
1054
|
-
timestamp: baseTimestamp,
|
|
1055
|
-
offsetSeconds: 0,
|
|
1056
|
-
quality,
|
|
1057
|
-
score: scoreQuality(quality, 0),
|
|
1058
|
-
};
|
|
1059
|
-
let selectedTimestamp = baseTimestamp;
|
|
1060
|
-
let didReplace = false;
|
|
1061
|
-
const minImproveDelta = shouldPreferBrighterFirstSlide ? 0.015 : 0.03;
|
|
1062
|
-
for (const offsetSeconds of candidateOffsets) {
|
|
1063
|
-
if (offsetSeconds === 0)
|
|
1064
|
-
continue;
|
|
1065
|
-
const candidateTimestamp = clamp(baseTimestamp + offsetSeconds, minTs, maxTs);
|
|
1066
|
-
if (Math.abs(candidateTimestamp - baseTimestamp) < 0.01)
|
|
1067
|
-
continue;
|
|
1068
|
-
const tempPath = path.join(outputDir, `slide_${String(frame.index).padStart(4, "0")}_alt.png`);
|
|
1069
|
-
try {
|
|
1070
|
-
const candidate = await extractFrame(candidateTimestamp, tempPath, {
|
|
1071
|
-
timeoutMs: Math.min(timeoutMs, 12_000),
|
|
1072
|
-
});
|
|
1073
|
-
if (!candidate.quality)
|
|
1074
|
-
continue;
|
|
1075
|
-
const resolvedCandidateTimestamp = resolveExtractedTimestamp({
|
|
1076
|
-
requested: candidateTimestamp,
|
|
1077
|
-
actual: candidate.actualTimestamp,
|
|
1078
|
-
seekBase: candidate.seekBase,
|
|
1079
|
-
});
|
|
1080
|
-
const score = scoreQuality(candidate.quality, offsetSeconds);
|
|
1081
|
-
if (score > best.score + minImproveDelta) {
|
|
1082
|
-
best = {
|
|
1083
|
-
timestamp: resolvedCandidateTimestamp,
|
|
1084
|
-
offsetSeconds,
|
|
1085
|
-
quality: candidate.quality,
|
|
1086
|
-
score,
|
|
1087
|
-
};
|
|
1088
|
-
try {
|
|
1089
|
-
await fs.rename(tempPath, frame.imagePath);
|
|
1090
|
-
}
|
|
1091
|
-
catch (err) {
|
|
1092
|
-
const code = err && typeof err === "object" && "code" in err ? String(err.code) : "";
|
|
1093
|
-
if (code === "EEXIST") {
|
|
1094
|
-
await fs.rm(frame.imagePath, { force: true }).catch(() => null);
|
|
1095
|
-
await fs.rename(tempPath, frame.imagePath);
|
|
1096
|
-
}
|
|
1097
|
-
else {
|
|
1098
|
-
throw err;
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
didReplace = true;
|
|
1102
|
-
selectedTimestamp = resolvedCandidateTimestamp;
|
|
1103
|
-
}
|
|
1104
|
-
else {
|
|
1105
|
-
await fs.rm(tempPath, { force: true }).catch(() => null);
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
catch {
|
|
1109
|
-
await fs.rm(tempPath, { force: true }).catch(() => null);
|
|
1110
|
-
}
|
|
1111
|
-
}
|
|
1112
|
-
if (!didReplace)
|
|
1113
|
-
return;
|
|
1114
|
-
const updatedVersion = Date.now();
|
|
1115
|
-
const slide = slides[frame.index - 1];
|
|
1116
|
-
if (slide) {
|
|
1117
|
-
slide.imageVersion = updatedVersion;
|
|
1118
|
-
slide.timestamp = selectedTimestamp;
|
|
1119
|
-
}
|
|
1120
|
-
if (selectedTimestamp !== frame.timestamp) {
|
|
1121
|
-
const offsetSeconds = (selectedTimestamp - frame.timestamp).toFixed(2);
|
|
1122
|
-
const baseBrightness = quality.brightness.toFixed(2);
|
|
1123
|
-
const baseContrast = quality.contrast.toFixed(2);
|
|
1124
|
-
const bestBrightness = best.quality?.brightness?.toFixed(2) ?? baseBrightness;
|
|
1125
|
-
const bestContrast = best.quality?.contrast?.toFixed(2) ?? baseContrast;
|
|
1126
|
-
logSlides?.(`thumbnail adjust slide=${frame.index} ts=${frame.timestamp.toFixed(2)}s -> ${selectedTimestamp.toFixed(2)}s offset=${offsetSeconds}s base=${baseBrightness}/${baseContrast} best=${bestBrightness}/${bestContrast}`);
|
|
1127
|
-
}
|
|
1128
|
-
onSlide?.({
|
|
1129
|
-
index: frame.index,
|
|
1130
|
-
timestamp: selectedTimestamp,
|
|
1131
|
-
imagePath: frame.imagePath,
|
|
1132
|
-
imageVersion: updatedVersion,
|
|
1133
|
-
});
|
|
1134
|
-
});
|
|
1135
|
-
}
|
|
1136
|
-
if (fixTasks.length > 0) {
|
|
1137
|
-
const fixStartedAt = Date.now();
|
|
1138
|
-
const THUMB_START = 90;
|
|
1139
|
-
const THUMB_END = 96;
|
|
1140
|
-
// Avoid UI "stuck" at a static percent while we do expensive refinement passes.
|
|
1141
|
-
onStatus?.(`Slides: improving thumbnails ${THUMB_START}%`);
|
|
1142
|
-
logSlides?.(`thumbnail adjust start count=${fixTasks.length} range=±${FRAME_ADJUST_RANGE_SECONDS}s step=${FRAME_ADJUST_STEP_SECONDS}s`);
|
|
1143
|
-
await runWithConcurrency(fixTasks, Math.min(4, workers), (completed, total) => {
|
|
1144
|
-
const ratio = total > 0 ? completed / total : 0;
|
|
1145
|
-
const percent = Math.round(THUMB_START + ratio * (THUMB_END - THUMB_START));
|
|
1146
|
-
onStatus?.(`Slides: improving thumbnails ${percent}%`);
|
|
1147
|
-
});
|
|
1148
|
-
onStatus?.(`Slides: improving thumbnails ${THUMB_END}%`);
|
|
1149
|
-
logSlidesTiming?.("thumbnail adjust done", fixStartedAt);
|
|
1150
|
-
}
|
|
1151
|
-
logSlidesTiming?.(`extract frame loop (count=${timestamps.length}, workers=${workers})`, startedAt);
|
|
1152
|
-
return slides;
|
|
1153
|
-
}
|
|
1154
|
-
function clamp(value, min, max) {
|
|
1155
|
-
if (value < min)
|
|
1156
|
-
return min;
|
|
1157
|
-
if (value > max)
|
|
1158
|
-
return max;
|
|
1159
|
-
return value;
|
|
1160
|
-
}
|
|
1161
|
-
function buildCalibrationSampleTimestamps(durationSeconds, sampleCount) {
|
|
1162
|
-
if (!durationSeconds || durationSeconds <= 0)
|
|
1163
|
-
return [0];
|
|
1164
|
-
const clamped = Math.max(3, Math.min(12, Math.round(sampleCount)));
|
|
1165
|
-
const startRatio = 0.05;
|
|
1166
|
-
const endRatio = 0.95;
|
|
1167
|
-
if (clamped === 1) {
|
|
1168
|
-
return [clamp(durationSeconds * 0.5, 0, durationSeconds - 0.1)];
|
|
1169
|
-
}
|
|
1170
|
-
const step = (endRatio - startRatio) / (clamped - 1);
|
|
1171
|
-
const points = [];
|
|
1172
|
-
for (let i = 0; i < clamped; i += 1) {
|
|
1173
|
-
const ratio = startRatio + step * i;
|
|
1174
|
-
points.push(clamp(durationSeconds * ratio, 0, durationSeconds - 0.1));
|
|
1175
|
-
}
|
|
1176
|
-
return points;
|
|
1177
|
-
}
|
|
1178
|
-
function computeDiffStats(values) {
|
|
1179
|
-
if (values.length === 0) {
|
|
1180
|
-
return { median: 0, p75: 0, p90: 0, max: 0 };
|
|
1181
|
-
}
|
|
1182
|
-
const sorted = [...values].sort((a, b) => a - b);
|
|
1183
|
-
const at = (p) => sorted[Math.min(sorted.length - 1, Math.max(0, Math.round(p)))] ?? 0;
|
|
1184
|
-
const median = at((sorted.length - 1) * 0.5);
|
|
1185
|
-
const p75 = at((sorted.length - 1) * 0.75);
|
|
1186
|
-
const p90 = at((sorted.length - 1) * 0.9);
|
|
1187
|
-
const max = sorted[sorted.length - 1] ?? 0;
|
|
1188
|
-
return { median, p75, p90, max };
|
|
1189
|
-
}
|
|
1190
|
-
function roundThreshold(value) {
|
|
1191
|
-
return Math.round(value * 100) / 100;
|
|
1192
|
-
}
|
|
1193
|
-
async function calibrateSceneThreshold({ ffmpegPath, inputPath, durationSeconds, sampleCount, timeoutMs, logSlides, }) {
|
|
1194
|
-
const timestamps = buildCalibrationSampleTimestamps(durationSeconds, sampleCount);
|
|
1195
|
-
if (timestamps.length < 2) {
|
|
1196
|
-
return { threshold: 0.2, confidence: 0 };
|
|
1197
|
-
}
|
|
1198
|
-
const hashes = [];
|
|
1199
|
-
for (const timestamp of timestamps) {
|
|
1200
|
-
const hash = await hashFrameAtTimestamp({
|
|
1201
|
-
ffmpegPath,
|
|
1202
|
-
inputPath,
|
|
1203
|
-
timestamp,
|
|
1204
|
-
timeoutMs,
|
|
1205
|
-
});
|
|
1206
|
-
if (hash)
|
|
1207
|
-
hashes.push(hash);
|
|
1208
|
-
}
|
|
1209
|
-
const diffs = [];
|
|
1210
|
-
for (let i = 1; i < hashes.length; i += 1) {
|
|
1211
|
-
const diff = computeHashDistanceRatio(hashes[i - 1], hashes[i]);
|
|
1212
|
-
diffs.push(diff);
|
|
1213
|
-
}
|
|
1214
|
-
const stats = computeDiffStats(diffs);
|
|
1215
|
-
const scaledMedian = stats.median * 0.15;
|
|
1216
|
-
const scaledP75 = stats.p75 * 0.2;
|
|
1217
|
-
const scaledP90 = stats.p90 * 0.25;
|
|
1218
|
-
let threshold = roundThreshold(Math.max(scaledMedian, scaledP75, scaledP90));
|
|
1219
|
-
if (stats.p75 >= 0.12) {
|
|
1220
|
-
threshold = Math.min(threshold, 0.05);
|
|
1221
|
-
}
|
|
1222
|
-
else if (stats.p90 < 0.05) {
|
|
1223
|
-
threshold = 0.05;
|
|
1224
|
-
}
|
|
1225
|
-
threshold = clamp(threshold, 0.05, 0.3);
|
|
1226
|
-
const confidence = diffs.length >= 2 ? clamp(stats.p75 / 0.25, 0, 1) : clamp(stats.max / 0.25, 0, 1);
|
|
1227
|
-
logSlides?.(`calibration samples=${timestamps.length} diffs=${diffs.length} median=${stats.median.toFixed(3)} p75=${stats.p75.toFixed(3)} threshold=${threshold}`);
|
|
1228
|
-
return { threshold, confidence };
|
|
1229
|
-
}
|
|
1230
|
-
function buildSegments(durationSeconds, workers) {
|
|
1231
|
-
if (!durationSeconds || durationSeconds <= 0 || workers <= 1) {
|
|
1232
|
-
return [{ start: 0, duration: durationSeconds ?? 0 }];
|
|
1233
|
-
}
|
|
1234
|
-
const clampedWorkers = Math.max(1, Math.min(16, Math.round(workers)));
|
|
1235
|
-
const segmentCount = Math.min(clampedWorkers, Math.ceil(durationSeconds / 60));
|
|
1236
|
-
const segmentDuration = durationSeconds / segmentCount;
|
|
1237
|
-
const segments = [];
|
|
1238
|
-
for (let i = 0; i < segmentCount; i += 1) {
|
|
1239
|
-
const start = i * segmentDuration;
|
|
1240
|
-
const remaining = durationSeconds - start;
|
|
1241
|
-
const duration = i === segmentCount - 1 ? remaining : segmentDuration;
|
|
1242
|
-
segments.push({ start, duration });
|
|
1243
|
-
}
|
|
1244
|
-
return segments;
|
|
1245
|
-
}
|
|
1246
|
-
async function detectSceneTimestamps({ ffmpegPath, inputPath, threshold, timeoutMs, segments, workers, onSegmentProgress, }) {
|
|
1247
|
-
const filter = `select='gt(scene,${threshold})',showinfo`;
|
|
1248
|
-
const defaultSegments = [{ start: 0, duration: 0 }];
|
|
1249
|
-
const usedSegments = segments && segments.length > 0 ? segments : defaultSegments;
|
|
1250
|
-
const concurrency = workers && workers > 0 ? workers : 1;
|
|
1251
|
-
const tasks = usedSegments.map((segment) => async () => {
|
|
1252
|
-
const args = [
|
|
1253
|
-
"-hide_banner",
|
|
1254
|
-
...(segment.duration > 0
|
|
1255
|
-
? ["-ss", String(segment.start), "-t", String(segment.duration)]
|
|
1256
|
-
: []),
|
|
1257
|
-
"-i",
|
|
1258
|
-
inputPath,
|
|
1259
|
-
"-vf",
|
|
1260
|
-
filter,
|
|
1261
|
-
"-fps_mode",
|
|
1262
|
-
"vfr",
|
|
1263
|
-
"-an",
|
|
1264
|
-
"-sn",
|
|
1265
|
-
"-f",
|
|
1266
|
-
"null",
|
|
1267
|
-
"-",
|
|
1268
|
-
];
|
|
1269
|
-
const timestamps = [];
|
|
1270
|
-
await runProcess({
|
|
1271
|
-
command: ffmpegPath,
|
|
1272
|
-
args,
|
|
1273
|
-
timeoutMs: Math.max(timeoutMs, FFMPEG_TIMEOUT_FALLBACK_MS),
|
|
1274
|
-
errorLabel: "ffmpeg",
|
|
1275
|
-
onStderrLine: (line) => {
|
|
1276
|
-
const ts = parseShowinfoTimestamp(line);
|
|
1277
|
-
if (ts != null)
|
|
1278
|
-
timestamps.push(ts + segment.start);
|
|
1279
|
-
},
|
|
1280
|
-
});
|
|
1281
|
-
return timestamps;
|
|
1282
|
-
});
|
|
1283
|
-
const results = await runWithConcurrency(tasks, concurrency, onSegmentProgress ?? undefined);
|
|
1284
|
-
const merged = results.flat();
|
|
1285
|
-
merged.sort((a, b) => a - b);
|
|
1286
|
-
return merged;
|
|
1287
|
-
}
|
|
1288
|
-
async function hashFrameAtTimestamp({ ffmpegPath, inputPath, timestamp, timeoutMs, }) {
|
|
1289
|
-
const filter = "scale=32:32,format=gray";
|
|
1290
|
-
const args = [
|
|
1291
|
-
"-hide_banner",
|
|
1292
|
-
"-ss",
|
|
1293
|
-
String(timestamp),
|
|
1294
|
-
"-i",
|
|
1295
|
-
inputPath,
|
|
1296
|
-
"-frames:v",
|
|
1297
|
-
"1",
|
|
1298
|
-
"-vf",
|
|
1299
|
-
filter,
|
|
1300
|
-
"-f",
|
|
1301
|
-
"rawvideo",
|
|
1302
|
-
"-pix_fmt",
|
|
1303
|
-
"gray",
|
|
1304
|
-
"-",
|
|
1305
|
-
];
|
|
1306
|
-
try {
|
|
1307
|
-
const buffer = await runProcessCaptureBuffer({
|
|
1308
|
-
command: ffmpegPath,
|
|
1309
|
-
args,
|
|
1310
|
-
timeoutMs,
|
|
1311
|
-
errorLabel: "ffmpeg",
|
|
1312
|
-
});
|
|
1313
|
-
if (buffer.length < 1024)
|
|
1314
|
-
return null;
|
|
1315
|
-
const bytes = buffer.subarray(0, 1024);
|
|
1316
|
-
return buildAverageHash(bytes);
|
|
1317
|
-
}
|
|
1318
|
-
catch {
|
|
1319
|
-
return null;
|
|
1320
|
-
}
|
|
1321
|
-
}
|
|
1322
|
-
function buildAverageHash(pixels) {
|
|
1323
|
-
let sum = 0;
|
|
1324
|
-
for (const value of pixels)
|
|
1325
|
-
sum += value;
|
|
1326
|
-
const avg = sum / pixels.length;
|
|
1327
|
-
const bits = new Uint8Array(pixels.length);
|
|
1328
|
-
for (let i = 0; i < pixels.length; i += 1) {
|
|
1329
|
-
bits[i] = pixels[i] >= avg ? 1 : 0;
|
|
1330
|
-
}
|
|
1331
|
-
return bits;
|
|
1332
|
-
}
|
|
1333
|
-
function computeHashDistanceRatio(a, b) {
|
|
1334
|
-
const len = Math.min(a.length, b.length);
|
|
1335
|
-
let diff = 0;
|
|
1336
|
-
for (let i = 0; i < len; i += 1) {
|
|
1337
|
-
if (a[i] !== b[i])
|
|
1338
|
-
diff += 1;
|
|
1339
|
-
}
|
|
1340
|
-
return len === 0 ? 0 : diff / len;
|
|
1341
|
-
}
|
|
1342
|
-
async function probeVideoInfo({ ffprobePath, env, inputPath, timeoutMs, }) {
|
|
1343
|
-
const probeBin = ffprobePath ?? resolveExecutableInPath("ffprobe", env);
|
|
1344
|
-
if (!probeBin)
|
|
1345
|
-
return { durationSeconds: null, width: null, height: null };
|
|
1346
|
-
const args = ["-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", inputPath];
|
|
1347
|
-
try {
|
|
1348
|
-
const output = await runProcessCapture({
|
|
1349
|
-
command: probeBin,
|
|
1350
|
-
args,
|
|
1351
|
-
timeoutMs: Math.min(timeoutMs, 30_000),
|
|
1352
|
-
errorLabel: "ffprobe",
|
|
1353
|
-
});
|
|
1354
|
-
const parsed = JSON.parse(output);
|
|
1355
|
-
let durationSeconds = null;
|
|
1356
|
-
let width = null;
|
|
1357
|
-
let height = null;
|
|
1358
|
-
for (const stream of parsed.streams ?? []) {
|
|
1359
|
-
if (stream.codec_type === "video") {
|
|
1360
|
-
if (width == null && typeof stream.width === "number")
|
|
1361
|
-
width = stream.width;
|
|
1362
|
-
if (height == null && typeof stream.height === "number")
|
|
1363
|
-
height = stream.height;
|
|
1364
|
-
const duration = Number(stream.duration);
|
|
1365
|
-
if (Number.isFinite(duration) && duration > 0)
|
|
1366
|
-
durationSeconds = duration;
|
|
1367
|
-
}
|
|
1368
|
-
}
|
|
1369
|
-
if (durationSeconds == null) {
|
|
1370
|
-
const formatDuration = Number(parsed.format?.duration);
|
|
1371
|
-
if (Number.isFinite(formatDuration) && formatDuration > 0)
|
|
1372
|
-
durationSeconds = formatDuration;
|
|
1373
|
-
}
|
|
1374
|
-
return { durationSeconds, width, height };
|
|
1375
|
-
}
|
|
1376
|
-
catch {
|
|
1377
|
-
return { durationSeconds: null, width: null, height: null };
|
|
1378
|
-
}
|
|
1379
|
-
}
|
|
1380
|
-
async function runProcess({ command, args, timeoutMs, errorLabel, onStderrLine, onStdoutLine, }) {
|
|
1381
|
-
await new Promise((resolve, reject) => {
|
|
1382
|
-
const { proc, handle } = spawnTracked(command, args, {
|
|
1383
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
1384
|
-
label: errorLabel,
|
|
1385
|
-
kind: errorLabel,
|
|
1386
|
-
captureOutput: false,
|
|
1387
|
-
});
|
|
1388
|
-
let stderr = "";
|
|
1389
|
-
let stderrBuffer = "";
|
|
1390
|
-
let stdoutBuffer = "";
|
|
1391
|
-
const flushLine = (line) => {
|
|
1392
|
-
if (onStderrLine)
|
|
1393
|
-
onStderrLine(line, handle);
|
|
1394
|
-
handle?.appendOutput("stderr", line);
|
|
1395
|
-
if (stderr.length < 8192) {
|
|
1396
|
-
stderr += line;
|
|
1397
|
-
if (!line.endsWith("\n"))
|
|
1398
|
-
stderr += "\n";
|
|
1399
|
-
}
|
|
1400
|
-
};
|
|
1401
|
-
if (proc.stderr) {
|
|
1402
|
-
proc.stderr.setEncoding("utf8");
|
|
1403
|
-
proc.stderr.on("data", (chunk) => {
|
|
1404
|
-
stderrBuffer += chunk;
|
|
1405
|
-
const lines = stderrBuffer.split(/\r?\n/);
|
|
1406
|
-
stderrBuffer = lines.pop() ?? "";
|
|
1407
|
-
for (const line of lines) {
|
|
1408
|
-
if (line)
|
|
1409
|
-
flushLine(line);
|
|
1410
|
-
}
|
|
1411
|
-
});
|
|
1412
|
-
}
|
|
1413
|
-
if (proc.stdout) {
|
|
1414
|
-
const handleStdoutLine = onStdoutLine ?? onStderrLine;
|
|
1415
|
-
if (handleStdoutLine) {
|
|
1416
|
-
proc.stdout.setEncoding("utf8");
|
|
1417
|
-
proc.stdout.on("data", (chunk) => {
|
|
1418
|
-
stdoutBuffer += chunk;
|
|
1419
|
-
const lines = stdoutBuffer.split(/\r?\n/);
|
|
1420
|
-
stdoutBuffer = lines.pop() ?? "";
|
|
1421
|
-
for (const line of lines) {
|
|
1422
|
-
if (!line)
|
|
1423
|
-
continue;
|
|
1424
|
-
handleStdoutLine(line, handle);
|
|
1425
|
-
handle?.appendOutput("stdout", line);
|
|
1426
|
-
}
|
|
1427
|
-
});
|
|
1428
|
-
}
|
|
1429
|
-
}
|
|
1430
|
-
const timeout = setTimeout(() => {
|
|
1431
|
-
proc.kill("SIGKILL");
|
|
1432
|
-
reject(new Error(`${errorLabel} timed out`));
|
|
1433
|
-
}, timeoutMs);
|
|
1434
|
-
proc.on("error", (error) => {
|
|
1435
|
-
clearTimeout(timeout);
|
|
1436
|
-
reject(error);
|
|
1437
|
-
});
|
|
1438
|
-
proc.on("close", (code) => {
|
|
1439
|
-
clearTimeout(timeout);
|
|
1440
|
-
if (stderrBuffer.trim().length > 0) {
|
|
1441
|
-
flushLine(stderrBuffer.trim());
|
|
1442
|
-
}
|
|
1443
|
-
if (stdoutBuffer.trim().length > 0) {
|
|
1444
|
-
const handleStdoutLine = onStdoutLine ?? onStderrLine;
|
|
1445
|
-
if (handleStdoutLine)
|
|
1446
|
-
handleStdoutLine(stdoutBuffer.trim(), handle);
|
|
1447
|
-
handle?.appendOutput("stdout", stdoutBuffer.trim());
|
|
1448
|
-
}
|
|
1449
|
-
if (code === 0) {
|
|
1450
|
-
resolve();
|
|
1451
|
-
return;
|
|
1452
|
-
}
|
|
1453
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1454
|
-
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1455
|
-
});
|
|
1456
|
-
});
|
|
1457
|
-
}
|
|
1458
|
-
function applyMinDurationFilter(slides, minDurationSeconds, warnings) {
|
|
1459
|
-
if (minDurationSeconds <= 0)
|
|
1460
|
-
return slides;
|
|
1461
|
-
const filtered = [];
|
|
1462
|
-
let lastTimestamp = -Infinity;
|
|
1463
|
-
for (const slide of slides) {
|
|
1464
|
-
if (slide.timestamp - lastTimestamp >= minDurationSeconds) {
|
|
1465
|
-
filtered.push(slide);
|
|
1466
|
-
lastTimestamp = slide.timestamp;
|
|
1467
|
-
}
|
|
1468
|
-
else {
|
|
1469
|
-
void fs.rm(slide.imagePath, { force: true }).catch(() => { });
|
|
1470
|
-
}
|
|
1471
|
-
}
|
|
1472
|
-
if (filtered.length < slides.length) {
|
|
1473
|
-
warnings.push(`Filtered ${slides.length - filtered.length} slides by min duration`);
|
|
1474
|
-
}
|
|
1475
|
-
return filtered.map((slide, index) => ({ ...slide, index: index + 1 }));
|
|
1476
|
-
}
|
|
1477
|
-
function mergeTimestamps(sceneTimestamps, intervalTimestamps, minDurationSeconds) {
|
|
1478
|
-
const merged = [...sceneTimestamps, ...intervalTimestamps].filter((value) => Number.isFinite(value));
|
|
1479
|
-
merged.sort((a, b) => a - b);
|
|
1480
|
-
if (merged.length === 0)
|
|
1481
|
-
return [];
|
|
1482
|
-
const result = [];
|
|
1483
|
-
const minGap = Math.max(0.1, minDurationSeconds * 0.5);
|
|
1484
|
-
for (const ts of merged) {
|
|
1485
|
-
if (result.length === 0 || ts - result[result.length - 1] >= minGap) {
|
|
1486
|
-
result.push(ts);
|
|
1487
|
-
}
|
|
1488
|
-
}
|
|
1489
|
-
return result;
|
|
1490
|
-
}
|
|
1491
|
-
function filterTimestampsByMinDuration(timestamps, minDurationSeconds) {
|
|
1492
|
-
if (minDurationSeconds <= 0)
|
|
1493
|
-
return timestamps.slice();
|
|
1494
|
-
const sorted = timestamps
|
|
1495
|
-
.filter((value) => Number.isFinite(value))
|
|
1496
|
-
.slice()
|
|
1497
|
-
.sort((a, b) => a - b);
|
|
1498
|
-
const filtered = [];
|
|
1499
|
-
let lastTimestamp = -Infinity;
|
|
1500
|
-
for (const ts of sorted) {
|
|
1501
|
-
if (ts - lastTimestamp >= minDurationSeconds) {
|
|
1502
|
-
filtered.push(ts);
|
|
1503
|
-
lastTimestamp = ts;
|
|
1504
|
-
}
|
|
1505
|
-
}
|
|
1506
|
-
return filtered;
|
|
1507
|
-
}
|
|
1508
|
-
function buildSceneSegments(sceneTimestamps, durationSeconds) {
|
|
1509
|
-
const sorted = sceneTimestamps
|
|
1510
|
-
.filter((value) => Number.isFinite(value) && value >= 0)
|
|
1511
|
-
.slice()
|
|
1512
|
-
.sort((a, b) => a - b);
|
|
1513
|
-
const deduped = [];
|
|
1514
|
-
for (const ts of sorted) {
|
|
1515
|
-
if (deduped.length === 0 || ts - deduped[deduped.length - 1] > 0.05) {
|
|
1516
|
-
deduped.push(ts);
|
|
1517
|
-
}
|
|
1518
|
-
}
|
|
1519
|
-
const starts = [0, ...deduped];
|
|
1520
|
-
const ends = [...deduped, durationSeconds];
|
|
1521
|
-
const segments = [];
|
|
1522
|
-
for (let i = 0; i < starts.length; i += 1) {
|
|
1523
|
-
const start = starts[i];
|
|
1524
|
-
const rawEnd = ends[i];
|
|
1525
|
-
const end = typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : null;
|
|
1526
|
-
segments.push({ start, end });
|
|
1527
|
-
}
|
|
1528
|
-
return segments;
|
|
1529
|
-
}
|
|
1530
|
-
function findSceneSegment(segments, timestamp) {
|
|
1531
|
-
if (segments.length === 0)
|
|
1532
|
-
return null;
|
|
1533
|
-
for (const segment of segments) {
|
|
1534
|
-
if (timestamp >= segment.start && (segment.end == null || timestamp < segment.end)) {
|
|
1535
|
-
return segment;
|
|
1536
|
-
}
|
|
1537
|
-
}
|
|
1538
|
-
return segments[segments.length - 1] ?? null;
|
|
1539
|
-
}
|
|
1540
|
-
function adjustTimestampWithinSegment(timestamp, segment) {
|
|
1541
|
-
if (!segment)
|
|
1542
|
-
return timestamp;
|
|
1543
|
-
const start = Math.max(0, segment.start);
|
|
1544
|
-
const end = segment.end;
|
|
1545
|
-
if (end == null || !Number.isFinite(end) || end <= start) {
|
|
1546
|
-
return Math.max(timestamp, start);
|
|
1547
|
-
}
|
|
1548
|
-
const duration = Math.max(0, end - start);
|
|
1549
|
-
const padding = Math.min(1.5, Math.max(0.2, duration * 0.08));
|
|
1550
|
-
if (duration <= padding * 2) {
|
|
1551
|
-
return start + duration * 0.5;
|
|
1552
|
-
}
|
|
1553
|
-
return clamp(timestamp, start + padding, end - padding);
|
|
1554
|
-
}
|
|
1555
|
-
function selectTimestampTargets({ targets, sceneTimestamps, minDurationSeconds, intervalSeconds, }) {
|
|
1556
|
-
const targetList = targets
|
|
1557
|
-
.filter((value) => Number.isFinite(value))
|
|
1558
|
-
.slice()
|
|
1559
|
-
.sort((a, b) => a - b);
|
|
1560
|
-
if (targetList.length === 0)
|
|
1561
|
-
return [];
|
|
1562
|
-
const sceneList = filterTimestampsByMinDuration(sceneTimestamps, Math.max(0.1, minDurationSeconds * 0.25));
|
|
1563
|
-
const windowSeconds = Math.max(2, Math.min(10, intervalSeconds * 0.35));
|
|
1564
|
-
const picked = [];
|
|
1565
|
-
let lastPicked = -Infinity;
|
|
1566
|
-
let sceneIndex = 0;
|
|
1567
|
-
for (const target of targetList) {
|
|
1568
|
-
while (sceneIndex < sceneList.length && sceneList[sceneIndex] < target - windowSeconds) {
|
|
1569
|
-
sceneIndex += 1;
|
|
1570
|
-
}
|
|
1571
|
-
let best = null;
|
|
1572
|
-
let bestDiff = Number.POSITIVE_INFINITY;
|
|
1573
|
-
for (let idx = sceneIndex; idx < sceneList.length; idx += 1) {
|
|
1574
|
-
const candidate = sceneList[idx];
|
|
1575
|
-
if (candidate > target + windowSeconds)
|
|
1576
|
-
break;
|
|
1577
|
-
const diff = Math.abs(candidate - target);
|
|
1578
|
-
if (diff < bestDiff) {
|
|
1579
|
-
best = candidate;
|
|
1580
|
-
bestDiff = diff;
|
|
1581
|
-
}
|
|
1582
|
-
}
|
|
1583
|
-
const candidate = best ?? target;
|
|
1584
|
-
const chosen = candidate - lastPicked >= minDurationSeconds ? candidate : target;
|
|
1585
|
-
picked.push(chosen);
|
|
1586
|
-
lastPicked = chosen;
|
|
1587
|
-
}
|
|
1588
|
-
return picked;
|
|
1589
|
-
}
|
|
1590
|
-
function buildIntervalTimestamps({ durationSeconds, minDurationSeconds, maxSlides, }) {
|
|
1591
|
-
if (!durationSeconds || durationSeconds <= 0)
|
|
1592
|
-
return null;
|
|
1593
|
-
const maxCount = Math.max(1, Math.floor(maxSlides));
|
|
1594
|
-
const targetCount = Math.min(maxCount, clamp(Math.round(durationSeconds / 180), 6, 20));
|
|
1595
|
-
const intervalSeconds = Math.max(minDurationSeconds, durationSeconds / targetCount);
|
|
1596
|
-
if (!Number.isFinite(intervalSeconds) || intervalSeconds <= 0)
|
|
1597
|
-
return null;
|
|
1598
|
-
const timestamps = [];
|
|
1599
|
-
for (let t = 0; t < durationSeconds; t += intervalSeconds) {
|
|
1600
|
-
timestamps.push(t);
|
|
1601
|
-
}
|
|
1602
|
-
return { timestamps, intervalSeconds };
|
|
1603
|
-
}
|
|
1604
|
-
async function runProcessCapture({ command, args, timeoutMs, errorLabel, }) {
|
|
1605
|
-
return new Promise((resolve, reject) => {
|
|
1606
|
-
const { proc, handle } = spawnTracked(command, args, {
|
|
1607
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
1608
|
-
label: errorLabel,
|
|
1609
|
-
kind: errorLabel,
|
|
1610
|
-
captureOutput: false,
|
|
1611
|
-
});
|
|
1612
|
-
let stdout = "";
|
|
1613
|
-
let stderr = "";
|
|
1614
|
-
let stdoutBuffer = "";
|
|
1615
|
-
let stderrBuffer = "";
|
|
1616
|
-
const timeout = setTimeout(() => {
|
|
1617
|
-
proc.kill("SIGKILL");
|
|
1618
|
-
reject(new Error(`${errorLabel} timed out`));
|
|
1619
|
-
}, timeoutMs);
|
|
1620
|
-
if (proc.stdout) {
|
|
1621
|
-
proc.stdout.setEncoding("utf8");
|
|
1622
|
-
proc.stdout.on("data", (chunk) => {
|
|
1623
|
-
stdout += chunk;
|
|
1624
|
-
stdoutBuffer += chunk;
|
|
1625
|
-
const lines = stdoutBuffer.split(/\r?\n/);
|
|
1626
|
-
stdoutBuffer = lines.pop() ?? "";
|
|
1627
|
-
for (const line of lines) {
|
|
1628
|
-
if (line)
|
|
1629
|
-
handle?.appendOutput("stdout", line);
|
|
1630
|
-
}
|
|
1631
|
-
});
|
|
1632
|
-
}
|
|
1633
|
-
if (proc.stderr) {
|
|
1634
|
-
proc.stderr.setEncoding("utf8");
|
|
1635
|
-
proc.stderr.on("data", (chunk) => {
|
|
1636
|
-
if (stderr.length < 8192) {
|
|
1637
|
-
stderr += chunk;
|
|
1638
|
-
}
|
|
1639
|
-
stderrBuffer += chunk;
|
|
1640
|
-
const lines = stderrBuffer.split(/\r?\n/);
|
|
1641
|
-
stderrBuffer = lines.pop() ?? "";
|
|
1642
|
-
for (const line of lines) {
|
|
1643
|
-
if (line)
|
|
1644
|
-
handle?.appendOutput("stderr", line);
|
|
1645
|
-
}
|
|
1646
|
-
});
|
|
1647
|
-
}
|
|
1648
|
-
proc.on("error", (error) => {
|
|
1649
|
-
clearTimeout(timeout);
|
|
1650
|
-
reject(error);
|
|
1651
|
-
});
|
|
1652
|
-
proc.on("close", (code) => {
|
|
1653
|
-
clearTimeout(timeout);
|
|
1654
|
-
if (stdoutBuffer.trim())
|
|
1655
|
-
handle?.appendOutput("stdout", stdoutBuffer.trim());
|
|
1656
|
-
if (stderrBuffer.trim())
|
|
1657
|
-
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1658
|
-
if (code === 0) {
|
|
1659
|
-
resolve(stdout);
|
|
1660
|
-
return;
|
|
1661
|
-
}
|
|
1662
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1663
|
-
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1664
|
-
});
|
|
1665
|
-
});
|
|
1666
|
-
}
|
|
1667
|
-
async function runProcessCaptureBuffer({ command, args, timeoutMs, errorLabel, }) {
|
|
1668
|
-
return new Promise((resolve, reject) => {
|
|
1669
|
-
const { proc, handle } = spawnTracked(command, args, {
|
|
1670
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
1671
|
-
label: errorLabel,
|
|
1672
|
-
kind: errorLabel,
|
|
1673
|
-
captureOutput: false,
|
|
1674
|
-
});
|
|
1675
|
-
const chunks = [];
|
|
1676
|
-
let stderr = "";
|
|
1677
|
-
let stderrBuffer = "";
|
|
1678
|
-
const timeout = setTimeout(() => {
|
|
1679
|
-
proc.kill("SIGKILL");
|
|
1680
|
-
reject(new Error(`${errorLabel} timed out`));
|
|
1681
|
-
}, timeoutMs);
|
|
1682
|
-
if (proc.stdout) {
|
|
1683
|
-
proc.stdout.on("data", (chunk) => {
|
|
1684
|
-
chunks.push(chunk);
|
|
1685
|
-
});
|
|
1686
|
-
}
|
|
1687
|
-
if (proc.stderr) {
|
|
1688
|
-
proc.stderr.setEncoding("utf8");
|
|
1689
|
-
proc.stderr.on("data", (chunk) => {
|
|
1690
|
-
if (stderr.length < 8192) {
|
|
1691
|
-
stderr += chunk;
|
|
1692
|
-
}
|
|
1693
|
-
stderrBuffer += chunk;
|
|
1694
|
-
const lines = stderrBuffer.split(/\r?\n/);
|
|
1695
|
-
stderrBuffer = lines.pop() ?? "";
|
|
1696
|
-
for (const line of lines) {
|
|
1697
|
-
if (line)
|
|
1698
|
-
handle?.appendOutput("stderr", line);
|
|
1699
|
-
}
|
|
1700
|
-
});
|
|
1701
|
-
}
|
|
1702
|
-
proc.on("error", (error) => {
|
|
1703
|
-
clearTimeout(timeout);
|
|
1704
|
-
reject(error);
|
|
1705
|
-
});
|
|
1706
|
-
proc.on("close", (code) => {
|
|
1707
|
-
clearTimeout(timeout);
|
|
1708
|
-
if (stderrBuffer.trim())
|
|
1709
|
-
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1710
|
-
if (code === 0) {
|
|
1711
|
-
resolve(Buffer.concat(chunks));
|
|
1712
|
-
return;
|
|
1713
|
-
}
|
|
1714
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1715
|
-
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1716
|
-
});
|
|
1717
|
-
});
|
|
1718
|
-
}
|
|
1719
|
-
function applyMaxSlidesFilter(slides, maxSlides, warnings) {
|
|
1720
|
-
if (maxSlides <= 0 || slides.length <= maxSlides)
|
|
1721
|
-
return slides;
|
|
1722
|
-
const kept = slides.slice(0, maxSlides);
|
|
1723
|
-
const removed = slides.slice(maxSlides);
|
|
1724
|
-
for (const slide of removed) {
|
|
1725
|
-
if (slide.imagePath) {
|
|
1726
|
-
void fs.rm(slide.imagePath, { force: true }).catch(() => { });
|
|
1727
|
-
}
|
|
1728
|
-
}
|
|
1729
|
-
warnings.push(`Trimmed slides to max ${maxSlides}`);
|
|
1730
|
-
return kept.map((slide, index) => ({ ...slide, index: index + 1 }));
|
|
1731
|
-
}
|
|
1732
|
-
async function renameSlidesWithTimestamps(slides, slidesDir) {
|
|
1733
|
-
const renamed = [];
|
|
1734
|
-
for (const slide of slides) {
|
|
1735
|
-
const timestampLabel = slide.timestamp.toFixed(2);
|
|
1736
|
-
const filename = `slide_${slide.index.toString().padStart(4, "0")}_${timestampLabel}s.png`;
|
|
1737
|
-
const nextPath = path.join(slidesDir, filename);
|
|
1738
|
-
if (slide.imagePath !== nextPath) {
|
|
1739
|
-
await fs.rename(slide.imagePath, nextPath).catch(async () => {
|
|
1740
|
-
await fs.copyFile(slide.imagePath, nextPath);
|
|
1741
|
-
await fs.rm(slide.imagePath, { force: true });
|
|
1742
|
-
});
|
|
1743
|
-
}
|
|
1744
|
-
renamed.push({ ...slide, imagePath: nextPath });
|
|
1745
|
-
}
|
|
1746
|
-
return renamed;
|
|
1747
|
-
}
|
|
1748
374
|
async function withSlidesLock(key, fn, onWait) {
|
|
1749
375
|
const previous = slidesLocks.get(key) ?? null;
|
|
1750
376
|
if (previous && onWait)
|
|
@@ -1765,187 +391,4 @@ async function withSlidesLock(key, fn, onWait) {
|
|
|
1765
391
|
}
|
|
1766
392
|
}
|
|
1767
393
|
}
|
|
1768
|
-
async function runWithConcurrency(tasks, workers, onProgress) {
|
|
1769
|
-
if (tasks.length === 0)
|
|
1770
|
-
return [];
|
|
1771
|
-
const concurrency = Math.max(1, Math.min(16, Math.round(workers)));
|
|
1772
|
-
const results = new Array(tasks.length);
|
|
1773
|
-
const total = tasks.length;
|
|
1774
|
-
let completed = 0;
|
|
1775
|
-
let nextIndex = 0;
|
|
1776
|
-
const worker = async () => {
|
|
1777
|
-
while (true) {
|
|
1778
|
-
const current = nextIndex;
|
|
1779
|
-
if (current >= tasks.length)
|
|
1780
|
-
return;
|
|
1781
|
-
nextIndex += 1;
|
|
1782
|
-
try {
|
|
1783
|
-
results[current] = await tasks[current]();
|
|
1784
|
-
}
|
|
1785
|
-
finally {
|
|
1786
|
-
completed += 1;
|
|
1787
|
-
onProgress?.(completed, total);
|
|
1788
|
-
}
|
|
1789
|
-
}
|
|
1790
|
-
};
|
|
1791
|
-
const runners = Array.from({ length: Math.min(concurrency, tasks.length) }, () => worker());
|
|
1792
|
-
await Promise.all(runners);
|
|
1793
|
-
return results;
|
|
1794
|
-
}
|
|
1795
|
-
async function runOcrOnSlides(slides, tesseractPath, workers, onProgress) {
|
|
1796
|
-
const tasks = slides.map((slide) => async () => {
|
|
1797
|
-
try {
|
|
1798
|
-
const text = await runTesseract(tesseractPath, slide.imagePath);
|
|
1799
|
-
const cleaned = cleanOcrText(text);
|
|
1800
|
-
return {
|
|
1801
|
-
...slide,
|
|
1802
|
-
ocrText: cleaned,
|
|
1803
|
-
ocrConfidence: estimateOcrConfidence(cleaned),
|
|
1804
|
-
};
|
|
1805
|
-
}
|
|
1806
|
-
catch {
|
|
1807
|
-
return { ...slide, ocrText: "", ocrConfidence: 0 };
|
|
1808
|
-
}
|
|
1809
|
-
});
|
|
1810
|
-
const results = await runWithConcurrency(tasks, workers, onProgress ?? undefined);
|
|
1811
|
-
return results.sort((a, b) => a.index - b.index);
|
|
1812
|
-
}
|
|
1813
|
-
async function runTesseract(tesseractPath, imagePath) {
|
|
1814
|
-
return new Promise((resolve, reject) => {
|
|
1815
|
-
const args = [imagePath, "stdout", "--oem", "3", "--psm", "6"];
|
|
1816
|
-
const { proc, handle } = spawnTracked(tesseractPath, args, {
|
|
1817
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
1818
|
-
label: "tesseract",
|
|
1819
|
-
kind: "tesseract",
|
|
1820
|
-
captureOutput: false,
|
|
1821
|
-
});
|
|
1822
|
-
let stdout = "";
|
|
1823
|
-
let stderr = "";
|
|
1824
|
-
let stderrBuffer = "";
|
|
1825
|
-
const timeout = setTimeout(() => {
|
|
1826
|
-
proc.kill("SIGKILL");
|
|
1827
|
-
reject(new Error("tesseract timed out"));
|
|
1828
|
-
}, TESSERACT_TIMEOUT_MS);
|
|
1829
|
-
if (proc.stdout) {
|
|
1830
|
-
proc.stdout.setEncoding("utf8");
|
|
1831
|
-
proc.stdout.on("data", (chunk) => {
|
|
1832
|
-
stdout += chunk;
|
|
1833
|
-
});
|
|
1834
|
-
}
|
|
1835
|
-
if (proc.stderr) {
|
|
1836
|
-
proc.stderr.setEncoding("utf8");
|
|
1837
|
-
proc.stderr.on("data", (chunk) => {
|
|
1838
|
-
if (stderr.length < 8192) {
|
|
1839
|
-
stderr += chunk;
|
|
1840
|
-
}
|
|
1841
|
-
stderrBuffer += chunk;
|
|
1842
|
-
const lines = stderrBuffer.split(/\r?\n/);
|
|
1843
|
-
stderrBuffer = lines.pop() ?? "";
|
|
1844
|
-
for (const line of lines) {
|
|
1845
|
-
if (line)
|
|
1846
|
-
handle?.appendOutput("stderr", line);
|
|
1847
|
-
}
|
|
1848
|
-
});
|
|
1849
|
-
}
|
|
1850
|
-
proc.on("error", (error) => {
|
|
1851
|
-
clearTimeout(timeout);
|
|
1852
|
-
reject(error);
|
|
1853
|
-
});
|
|
1854
|
-
proc.on("close", (code) => {
|
|
1855
|
-
clearTimeout(timeout);
|
|
1856
|
-
if (stderrBuffer.trim())
|
|
1857
|
-
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1858
|
-
if (code === 0) {
|
|
1859
|
-
resolve(stdout);
|
|
1860
|
-
return;
|
|
1861
|
-
}
|
|
1862
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1863
|
-
reject(new Error(`tesseract exited with code ${code}${suffix}`));
|
|
1864
|
-
});
|
|
1865
|
-
});
|
|
1866
|
-
}
|
|
1867
|
-
function cleanOcrText(text) {
|
|
1868
|
-
const lines = text
|
|
1869
|
-
.split(/\r?\n/)
|
|
1870
|
-
.map((line) => line.trim())
|
|
1871
|
-
.filter((line) => line.length >= 2)
|
|
1872
|
-
.filter((line) => !(line.length > 20 && !line.includes(" ")))
|
|
1873
|
-
.filter((line) => /[a-z0-9]/i.test(line));
|
|
1874
|
-
return lines.join("\n");
|
|
1875
|
-
}
|
|
1876
|
-
function estimateOcrConfidence(text) {
|
|
1877
|
-
if (!text)
|
|
1878
|
-
return 0;
|
|
1879
|
-
const total = text.length;
|
|
1880
|
-
if (total === 0)
|
|
1881
|
-
return 0;
|
|
1882
|
-
const alnum = Array.from(text).filter((char) => /[a-z0-9]/i.test(char)).length;
|
|
1883
|
-
return Math.min(1, alnum / total);
|
|
1884
|
-
}
|
|
1885
|
-
async function writeSlidesJson(result, slidesDir) {
|
|
1886
|
-
const slidesDirId = result.slidesDirId ?? buildSlidesDirId(slidesDir);
|
|
1887
|
-
const payload = {
|
|
1888
|
-
sourceUrl: result.sourceUrl,
|
|
1889
|
-
sourceKind: result.sourceKind,
|
|
1890
|
-
sourceId: result.sourceId,
|
|
1891
|
-
slidesDir,
|
|
1892
|
-
slidesDirId,
|
|
1893
|
-
sceneThreshold: result.sceneThreshold,
|
|
1894
|
-
autoTuneThreshold: result.autoTuneThreshold,
|
|
1895
|
-
autoTune: result.autoTune,
|
|
1896
|
-
maxSlides: result.maxSlides,
|
|
1897
|
-
minSlideDuration: result.minSlideDuration,
|
|
1898
|
-
ocrRequested: result.ocrRequested,
|
|
1899
|
-
ocrAvailable: result.ocrAvailable,
|
|
1900
|
-
slideCount: result.slides.length,
|
|
1901
|
-
warnings: result.warnings,
|
|
1902
|
-
slides: result.slides.map((slide) => ({
|
|
1903
|
-
...slide,
|
|
1904
|
-
imagePath: serializeSlideImagePath(slidesDir, slide.imagePath),
|
|
1905
|
-
})),
|
|
1906
|
-
};
|
|
1907
|
-
await fs.writeFile(path.join(slidesDir, "slides.json"), JSON.stringify(payload, null, 2), "utf8");
|
|
1908
|
-
}
|
|
1909
|
-
function buildDirectSourceId(url) {
|
|
1910
|
-
const parsed = (() => {
|
|
1911
|
-
try {
|
|
1912
|
-
return new URL(url);
|
|
1913
|
-
}
|
|
1914
|
-
catch {
|
|
1915
|
-
return null;
|
|
1916
|
-
}
|
|
1917
|
-
})();
|
|
1918
|
-
const hostSlug = resolveHostSlug(parsed);
|
|
1919
|
-
const rawName = parsed ? path.basename(parsed.pathname) : "video";
|
|
1920
|
-
const base = rawName.replace(/\.[a-z0-9]+$/i, "").trim() || "video";
|
|
1921
|
-
const slug = toSlug(base);
|
|
1922
|
-
const combined = [hostSlug, slug].filter(Boolean).join("-");
|
|
1923
|
-
const hash = createHash("sha1").update(url).digest("hex").slice(0, 8);
|
|
1924
|
-
return combined ? `${combined}-${hash}` : `video-${hash}`;
|
|
1925
|
-
}
|
|
1926
|
-
function buildYoutubeSourceId(videoId) {
|
|
1927
|
-
return `youtube-${videoId}`;
|
|
1928
|
-
}
|
|
1929
|
-
function resolveHostSlug(parsed) {
|
|
1930
|
-
if (!parsed?.hostname)
|
|
1931
|
-
return null;
|
|
1932
|
-
const host = parsed.hostname.toLowerCase();
|
|
1933
|
-
if (host.includes("youtube.com") || host === "youtu.be" || host.includes("youtu.be")) {
|
|
1934
|
-
return "youtube";
|
|
1935
|
-
}
|
|
1936
|
-
const slug = toSlug(host);
|
|
1937
|
-
return slug || null;
|
|
1938
|
-
}
|
|
1939
|
-
function toSlug(value) {
|
|
1940
|
-
const normalized = value
|
|
1941
|
-
.toLowerCase()
|
|
1942
|
-
.replace(/[^a-z0-9]+/g, "-")
|
|
1943
|
-
.replace(/^-+|-+$/g, "");
|
|
1944
|
-
if (!normalized)
|
|
1945
|
-
return "";
|
|
1946
|
-
const max = 64;
|
|
1947
|
-
if (normalized.length <= max)
|
|
1948
|
-
return normalized;
|
|
1949
|
-
return normalized.slice(0, max).replace(/-+$/g, "");
|
|
1950
|
-
}
|
|
1951
394
|
//# sourceMappingURL=extract.js.map
|