@steipete/summarize 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -28
- package/README.md +115 -30
- package/dist/cli.js +1 -1
- package/dist/esm/cache.js +67 -65
- package/dist/esm/cache.js.map +1 -1
- package/dist/esm/cli-main.js +27 -27
- package/dist/esm/cli-main.js.map +1 -1
- package/dist/esm/cli.js +2 -2
- package/dist/esm/cli.js.map +1 -1
- package/dist/esm/config.js +310 -166
- package/dist/esm/config.js.map +1 -1
- package/dist/esm/content/asset.js +53 -50
- package/dist/esm/content/asset.js.map +1 -1
- package/dist/esm/content/index.js +1 -1
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/costs.js +1 -1
- package/dist/esm/costs.js.map +1 -1
- package/dist/esm/daemon/agent.js +165 -164
- package/dist/esm/daemon/agent.js.map +1 -1
- package/dist/esm/daemon/auto-mode.js +3 -3
- package/dist/esm/daemon/auto-mode.js.map +1 -1
- package/dist/esm/daemon/chat.js +16 -14
- package/dist/esm/daemon/chat.js.map +1 -1
- package/dist/esm/daemon/cli-entrypoint.js +72 -0
- package/dist/esm/daemon/cli-entrypoint.js.map +1 -0
- package/dist/esm/daemon/cli.js +63 -87
- package/dist/esm/daemon/cli.js.map +1 -1
- package/dist/esm/daemon/config.js +15 -15
- package/dist/esm/daemon/config.js.map +1 -1
- package/dist/esm/daemon/constants.js +6 -6
- package/dist/esm/daemon/constants.js.map +1 -1
- package/dist/esm/daemon/env-merge.js.map +1 -1
- package/dist/esm/daemon/env-snapshot.js +36 -31
- package/dist/esm/daemon/env-snapshot.js.map +1 -1
- package/dist/esm/daemon/flow-context.js +59 -28
- package/dist/esm/daemon/flow-context.js.map +1 -1
- package/dist/esm/daemon/launchd.js +100 -55
- package/dist/esm/daemon/launchd.js.map +1 -1
- package/dist/esm/daemon/meta.js +5 -5
- package/dist/esm/daemon/meta.js.map +1 -1
- package/dist/esm/daemon/models.js +54 -31
- package/dist/esm/daemon/models.js.map +1 -1
- package/dist/esm/daemon/process-registry.js +15 -15
- package/dist/esm/daemon/process-registry.js.map +1 -1
- package/dist/esm/daemon/schtasks.js +42 -42
- package/dist/esm/daemon/schtasks.js.map +1 -1
- package/dist/esm/daemon/server.js +248 -244
- package/dist/esm/daemon/server.js.map +1 -1
- package/dist/esm/daemon/summarize-progress.js +11 -11
- package/dist/esm/daemon/summarize-progress.js.map +1 -1
- package/dist/esm/daemon/summarize.js +29 -29
- package/dist/esm/daemon/summarize.js.map +1 -1
- package/dist/esm/daemon/systemd.js +47 -47
- package/dist/esm/daemon/systemd.js.map +1 -1
- package/dist/esm/firecrawl.js +12 -12
- package/dist/esm/firecrawl.js.map +1 -1
- package/dist/esm/flags.js +32 -32
- package/dist/esm/flags.js.map +1 -1
- package/dist/esm/index.js +3 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/language.js +1 -1
- package/dist/esm/language.js.map +1 -1
- package/dist/esm/llm/cli.js +128 -64
- package/dist/esm/llm/cli.js.map +1 -1
- package/dist/esm/llm/errors.js +1 -1
- package/dist/esm/llm/errors.js.map +1 -1
- package/dist/esm/llm/generate-text.js +107 -98
- package/dist/esm/llm/generate-text.js.map +1 -1
- package/dist/esm/llm/google-models.js +17 -17
- package/dist/esm/llm/google-models.js.map +1 -1
- package/dist/esm/llm/html-to-markdown.js +3 -3
- package/dist/esm/llm/html-to-markdown.js.map +1 -1
- package/dist/esm/llm/model-id.js +38 -16
- package/dist/esm/llm/model-id.js.map +1 -1
- package/dist/esm/llm/prompt.js +5 -5
- package/dist/esm/llm/prompt.js.map +1 -1
- package/dist/esm/llm/providers/anthropic.js +33 -33
- package/dist/esm/llm/providers/anthropic.js.map +1 -1
- package/dist/esm/llm/providers/google.js +19 -19
- package/dist/esm/llm/providers/google.js.map +1 -1
- package/dist/esm/llm/providers/models.js +30 -30
- package/dist/esm/llm/providers/models.js.map +1 -1
- package/dist/esm/llm/providers/openai.js +35 -34
- package/dist/esm/llm/providers/openai.js.map +1 -1
- package/dist/esm/llm/providers/shared.js +8 -8
- package/dist/esm/llm/providers/shared.js.map +1 -1
- package/dist/esm/llm/transcript-to-markdown.js +9 -5
- package/dist/esm/llm/transcript-to-markdown.js.map +1 -1
- package/dist/esm/llm/usage.js +18 -18
- package/dist/esm/llm/usage.js.map +1 -1
- package/dist/esm/logging/daemon.js +21 -21
- package/dist/esm/logging/daemon.js.map +1 -1
- package/dist/esm/logging/ring-file.js +5 -5
- package/dist/esm/logging/ring-file.js.map +1 -1
- package/dist/esm/markitdown.js +21 -19
- package/dist/esm/markitdown.js.map +1 -1
- package/dist/esm/media-cache.js +39 -39
- package/dist/esm/media-cache.js.map +1 -1
- package/dist/esm/model-auto.js +175 -106
- package/dist/esm/model-auto.js.map +1 -1
- package/dist/esm/model-spec.js +52 -42
- package/dist/esm/model-spec.js.map +1 -1
- package/dist/esm/pricing/litellm.js +4 -4
- package/dist/esm/pricing/litellm.js.map +1 -1
- package/dist/esm/processes.js +1 -1
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/index.js +1 -1
- package/dist/esm/prompts/index.js.map +1 -1
- package/dist/esm/refresh-free.js +81 -81
- package/dist/esm/refresh-free.js.map +1 -1
- package/dist/esm/run/attachments.js +47 -44
- package/dist/esm/run/attachments.js.map +1 -1
- package/dist/esm/run/bird.js +26 -26
- package/dist/esm/run/bird.js.map +1 -1
- package/dist/esm/run/cache-state.js +7 -7
- package/dist/esm/run/cache-state.js.map +1 -1
- package/dist/esm/run/cli-fallback-state.js +45 -0
- package/dist/esm/run/cli-fallback-state.js.map +1 -0
- package/dist/esm/run/cli-preflight.js +24 -24
- package/dist/esm/run/cli-preflight.js.map +1 -1
- package/dist/esm/run/constants.js +12 -12
- package/dist/esm/run/constants.js.map +1 -1
- package/dist/esm/run/cookies/twitter.js +47 -47
- package/dist/esm/run/cookies/twitter.js.map +1 -1
- package/dist/esm/run/env.js +21 -15
- package/dist/esm/run/env.js.map +1 -1
- package/dist/esm/run/fetch-with-timeout.js +4 -4
- package/dist/esm/run/fetch-with-timeout.js.map +1 -1
- package/dist/esm/run/finish-line.js +68 -68
- package/dist/esm/run/finish-line.js.map +1 -1
- package/dist/esm/run/flows/asset/extract.js +15 -15
- package/dist/esm/run/flows/asset/extract.js.map +1 -1
- package/dist/esm/run/flows/asset/input.js +47 -66
- package/dist/esm/run/flows/asset/input.js.map +1 -1
- package/dist/esm/run/flows/asset/media-policy.js +1 -1
- package/dist/esm/run/flows/asset/media-policy.js.map +1 -1
- package/dist/esm/run/flows/asset/media.js +49 -40
- package/dist/esm/run/flows/asset/media.js.map +1 -1
- package/dist/esm/run/flows/asset/output.js +12 -12
- package/dist/esm/run/flows/asset/output.js.map +1 -1
- package/dist/esm/run/flows/asset/preprocess.js +79 -44
- package/dist/esm/run/flows/asset/preprocess.js.map +1 -1
- package/dist/esm/run/flows/asset/summary.js +173 -106
- package/dist/esm/run/flows/asset/summary.js.map +1 -1
- package/dist/esm/run/flows/url/extract.js +26 -26
- package/dist/esm/run/flows/url/extract.js.map +1 -1
- package/dist/esm/run/flows/url/flow.js +104 -98
- package/dist/esm/run/flows/url/flow.js.map +1 -1
- package/dist/esm/run/flows/url/markdown.js +57 -57
- package/dist/esm/run/flows/url/markdown.js.map +1 -1
- package/dist/esm/run/flows/url/slides-output.js +61 -59
- package/dist/esm/run/flows/url/slides-output.js.map +1 -1
- package/dist/esm/run/flows/url/slides-text.js +85 -85
- package/dist/esm/run/flows/url/slides-text.js.map +1 -1
- package/dist/esm/run/flows/url/summary.js +174 -107
- package/dist/esm/run/flows/url/summary.js.map +1 -1
- package/dist/esm/run/format.js +10 -10
- package/dist/esm/run/format.js.map +1 -1
- package/dist/esm/run/help.js +141 -135
- package/dist/esm/run/help.js.map +1 -1
- package/dist/esm/run/logging.js +10 -10
- package/dist/esm/run/logging.js.map +1 -1
- package/dist/esm/run/markdown.js +12 -12
- package/dist/esm/run/markdown.js.map +1 -1
- package/dist/esm/run/media-cache-state.js +5 -5
- package/dist/esm/run/media-cache-state.js.map +1 -1
- package/dist/esm/run/model-attempts.js.map +1 -1
- package/dist/esm/run/openrouter.js +11 -11
- package/dist/esm/run/openrouter.js.map +1 -1
- package/dist/esm/run/progress.js +1 -1
- package/dist/esm/run/progress.js.map +1 -1
- package/dist/esm/run/run-config.js +16 -16
- package/dist/esm/run/run-config.js.map +1 -1
- package/dist/esm/run/run-context.js +2 -2
- package/dist/esm/run/run-context.js.map +1 -1
- package/dist/esm/run/run-env.js +55 -54
- package/dist/esm/run/run-env.js.map +1 -1
- package/dist/esm/run/run-input.js +3 -3
- package/dist/esm/run/run-input.js.map +1 -1
- package/dist/esm/run/run-metrics.js +16 -16
- package/dist/esm/run/run-metrics.js.map +1 -1
- package/dist/esm/run/run-models.js +28 -23
- package/dist/esm/run/run-models.js.map +1 -1
- package/dist/esm/run/run-output.js +3 -3
- package/dist/esm/run/run-output.js.map +1 -1
- package/dist/esm/run/run-settings.js +83 -34
- package/dist/esm/run/run-settings.js.map +1 -1
- package/dist/esm/run/run-stream.js +4 -4
- package/dist/esm/run/run-stream.js.map +1 -1
- package/dist/esm/run/runner.js +166 -127
- package/dist/esm/run/runner.js.map +1 -1
- package/dist/esm/run/slides-cli.js +43 -42
- package/dist/esm/run/slides-cli.js.map +1 -1
- package/dist/esm/run/slides-render.js +36 -36
- package/dist/esm/run/slides-render.js.map +1 -1
- package/dist/esm/run/stdin-temp-file.js +77 -0
- package/dist/esm/run/stdin-temp-file.js.map +1 -0
- package/dist/esm/run/stream-output.js +7 -7
- package/dist/esm/run/stream-output.js.map +1 -1
- package/dist/esm/run/streaming.js +16 -16
- package/dist/esm/run/streaming.js.map +1 -1
- package/dist/esm/run/summary-engine.js +57 -51
- package/dist/esm/run/summary-engine.js.map +1 -1
- package/dist/esm/run/summary-llm.js +3 -3
- package/dist/esm/run/summary-llm.js.map +1 -1
- package/dist/esm/run/terminal.js +4 -4
- package/dist/esm/run/terminal.js.map +1 -1
- package/dist/esm/run/tips.js +2 -2
- package/dist/esm/run/tips.js.map +1 -1
- package/dist/esm/run/transcriber-cli.js +49 -49
- package/dist/esm/run/transcriber-cli.js.map +1 -1
- package/dist/esm/run.js +1 -1
- package/dist/esm/run.js.map +1 -1
- package/dist/esm/shared/contracts.js +1 -1
- package/dist/esm/shared/contracts.js.map +1 -1
- package/dist/esm/shared/sse-events.js +16 -16
- package/dist/esm/shared/sse-events.js.map +1 -1
- package/dist/esm/shared/streaming-merge.js +3 -3
- package/dist/esm/shared/streaming-merge.js.map +1 -1
- package/dist/esm/slides/extract.js +258 -249
- package/dist/esm/slides/extract.js.map +1 -1
- package/dist/esm/slides/index.js +3 -3
- package/dist/esm/slides/index.js.map +1 -1
- package/dist/esm/slides/settings.js +14 -14
- package/dist/esm/slides/settings.js.map +1 -1
- package/dist/esm/slides/store.js +9 -9
- package/dist/esm/slides/store.js.map +1 -1
- package/dist/esm/tty/format.js +13 -13
- package/dist/esm/tty/format.js.map +1 -1
- package/dist/esm/tty/osc-progress.js +1 -1
- package/dist/esm/tty/osc-progress.js.map +1 -1
- package/dist/esm/tty/progress/fetch-html.js +14 -14
- package/dist/esm/tty/progress/fetch-html.js.map +1 -1
- package/dist/esm/tty/progress/transcript.js +70 -62
- package/dist/esm/tty/progress/transcript.js.map +1 -1
- package/dist/esm/tty/spinner.js +20 -9
- package/dist/esm/tty/spinner.js.map +1 -1
- package/dist/esm/tty/theme.js +92 -92
- package/dist/esm/tty/theme.js.map +1 -1
- package/dist/esm/tty/website-progress.js +32 -32
- package/dist/esm/tty/website-progress.js.map +1 -1
- package/dist/esm/version.js +29 -29
- package/dist/esm/version.js.map +1 -1
- package/dist/types/cache.d.ts +6 -6
- package/dist/types/config.d.ts +49 -7
- package/dist/types/content/asset.d.ts +8 -6
- package/dist/types/content/index.d.ts +1 -1
- package/dist/types/costs.d.ts +3 -3
- package/dist/types/daemon/agent.d.ts +1 -1
- package/dist/types/daemon/auto-mode.d.ts +3 -3
- package/dist/types/daemon/chat.d.ts +2 -2
- package/dist/types/daemon/cli-entrypoint.d.ts +2 -0
- package/dist/types/daemon/config.d.ts +2 -2
- package/dist/types/daemon/env-merge.d.ts +1 -1
- package/dist/types/daemon/env-snapshot.d.ts +1 -1
- package/dist/types/daemon/flow-context.d.ts +7 -7
- package/dist/types/daemon/launchd.d.ts +8 -0
- package/dist/types/daemon/models.d.ts +6 -2
- package/dist/types/daemon/process-registry.d.ts +5 -5
- package/dist/types/daemon/server.d.ts +2 -2
- package/dist/types/daemon/summarize-progress.d.ts +1 -1
- package/dist/types/daemon/summarize.d.ts +7 -7
- package/dist/types/firecrawl.d.ts +1 -1
- package/dist/types/flags.d.ts +11 -11
- package/dist/types/index.d.ts +4 -4
- package/dist/types/language.d.ts +1 -1
- package/dist/types/llm/attachments.d.ts +1 -1
- package/dist/types/llm/cli.d.ts +3 -3
- package/dist/types/llm/generate-text.d.ts +7 -7
- package/dist/types/llm/html-to-markdown.d.ts +3 -3
- package/dist/types/llm/model-id.d.ts +1 -1
- package/dist/types/llm/prompt.d.ts +2 -2
- package/dist/types/llm/providers/anthropic.d.ts +3 -3
- package/dist/types/llm/providers/google.d.ts +3 -3
- package/dist/types/llm/providers/models.d.ts +2 -2
- package/dist/types/llm/providers/openai.d.ts +4 -4
- package/dist/types/llm/providers/shared.d.ts +2 -2
- package/dist/types/llm/transcript-to-markdown.d.ts +4 -2
- package/dist/types/llm/usage.d.ts +1 -1
- package/dist/types/logging/daemon.d.ts +4 -4
- package/dist/types/markitdown.d.ts +1 -1
- package/dist/types/media-cache.d.ts +2 -2
- package/dist/types/model-auto.d.ts +14 -4
- package/dist/types/model-spec.d.ts +10 -10
- package/dist/types/pricing/litellm.d.ts +1 -1
- package/dist/types/processes.d.ts +1 -1
- package/dist/types/prompts/index.d.ts +1 -1
- package/dist/types/run/attachments.d.ts +7 -7
- package/dist/types/run/bird.d.ts +2 -2
- package/dist/types/run/cache-state.d.ts +2 -2
- package/dist/types/run/cli-fallback-state.d.ts +6 -0
- package/dist/types/run/constants.d.ts +1 -1
- package/dist/types/run/cookies/twitter.d.ts +1 -1
- package/dist/types/run/env.d.ts +1 -1
- package/dist/types/run/finish-line.d.ts +5 -5
- package/dist/types/run/flows/asset/extract.d.ts +4 -4
- package/dist/types/run/flows/asset/input.d.ts +9 -3
- package/dist/types/run/flows/asset/media.d.ts +1 -1
- package/dist/types/run/flows/asset/output.d.ts +5 -5
- package/dist/types/run/flows/asset/preprocess.d.ts +23 -17
- package/dist/types/run/flows/asset/summary.d.ts +19 -17
- package/dist/types/run/flows/url/extract.d.ts +1 -1
- package/dist/types/run/flows/url/flow.d.ts +1 -1
- package/dist/types/run/flows/url/markdown.d.ts +6 -6
- package/dist/types/run/flows/url/slides-output.d.ts +7 -7
- package/dist/types/run/flows/url/slides-text.d.ts +9 -9
- package/dist/types/run/flows/url/summary.d.ts +11 -11
- package/dist/types/run/flows/url/types.d.ts +26 -22
- package/dist/types/run/format.d.ts +3 -3
- package/dist/types/run/help.d.ts +1 -1
- package/dist/types/run/media-cache-state.d.ts +2 -2
- package/dist/types/run/model-attempts.d.ts +1 -1
- package/dist/types/run/run-config.d.ts +4 -4
- package/dist/types/run/run-context.d.ts +3 -1
- package/dist/types/run/run-env.d.ts +3 -1
- package/dist/types/run/run-input.d.ts +2 -2
- package/dist/types/run/run-metrics.d.ts +3 -3
- package/dist/types/run/run-models.d.ts +3 -2
- package/dist/types/run/run-output.d.ts +1 -1
- package/dist/types/run/run-settings.d.ts +15 -6
- package/dist/types/run/run-stream.d.ts +2 -2
- package/dist/types/run/runner.d.ts +3 -2
- package/dist/types/run/slides-render.d.ts +4 -4
- package/dist/types/run/stdin-temp-file.d.ts +9 -0
- package/dist/types/run/stream-output.d.ts +1 -1
- package/dist/types/run/streaming.d.ts +4 -4
- package/dist/types/run/summary-engine.d.ts +11 -11
- package/dist/types/run/summary-llm.d.ts +5 -5
- package/dist/types/run/types.d.ts +4 -4
- package/dist/types/run.d.ts +1 -1
- package/dist/types/shared/contracts.d.ts +2 -2
- package/dist/types/shared/sse-events.d.ts +9 -9
- package/dist/types/slides/extract.d.ts +5 -4
- package/dist/types/slides/index.d.ts +5 -5
- package/dist/types/slides/store.d.ts +2 -2
- package/dist/types/slides/types.d.ts +2 -2
- package/dist/types/tty/osc-progress.d.ts +5 -5
- package/dist/types/tty/progress/fetch-html.d.ts +3 -3
- package/dist/types/tty/progress/transcript.d.ts +3 -3
- package/dist/types/tty/spinner.d.ts +2 -2
- package/dist/types/tty/theme.d.ts +2 -2
- package/dist/types/tty/website-progress.d.ts +3 -3
- package/dist/types/version.d.ts +1 -1
- package/docs/agent.md +38 -4
- package/docs/assets/site.js +46 -46
- package/docs/chrome-extension.md +11 -5
- package/docs/cli.md +59 -13
- package/docs/config.md +59 -10
- package/docs/extract-only.md +2 -0
- package/docs/index.html +33 -14
- package/docs/llm.md +7 -4
- package/docs/media.md +5 -4
- package/docs/model-auto.md +3 -2
- package/docs/nvidia-onnx-transcription.md +3 -3
- package/docs/openai.md +1 -1
- package/docs/releasing.md +3 -0
- package/docs/site/404.html +4 -1
- package/docs/site/assets/site.js +46 -46
- package/docs/site/docs/chrome-extension.html +18 -6
- package/docs/site/docs/config.html +29 -8
- package/docs/site/docs/extract-only.html +16 -4
- package/docs/site/docs/firecrawl.html +12 -3
- package/docs/site/docs/index.html +35 -6
- package/docs/site/docs/llm.html +19 -5
- package/docs/site/docs/openai.html +18 -5
- package/docs/site/docs/website.html +29 -9
- package/docs/site/docs/youtube.html +12 -3
- package/docs/site/index.html +33 -14
- package/docs/slides.md +13 -5
- package/docs/smoketest.md +29 -20
- package/docs/timestamps.md +21 -0
- package/docs/website.md +2 -1
- package/docs/youtube.md +4 -0
- package/package.json +36 -35
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { createHash, randomUUID } from
|
|
2
|
-
import { promises as fs } from
|
|
3
|
-
import { tmpdir } from
|
|
4
|
-
import path from
|
|
5
|
-
import { extractYouTubeVideoId, isDirectMediaUrl, isYouTubeUrl } from
|
|
6
|
-
import { spawnTracked } from
|
|
7
|
-
import { resolveExecutableInPath } from
|
|
8
|
-
import { buildSlidesDirId, readSlidesCacheIfValid, resolveSlidesDir, serializeSlideImagePath, } from
|
|
1
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
+
import { promises as fs } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { extractYouTubeVideoId, isDirectMediaUrl, isYouTubeUrl } from "../content/index.js";
|
|
6
|
+
import { spawnTracked } from "../processes.js";
|
|
7
|
+
import { resolveExecutableInPath } from "../run/env.js";
|
|
8
|
+
import { buildSlidesDirId, readSlidesCacheIfValid, resolveSlidesDir, serializeSlideImagePath, } from "./store.js";
|
|
9
9
|
const FFMPEG_TIMEOUT_FALLBACK_MS = 300_000;
|
|
10
10
|
const slidesLocks = new Map();
|
|
11
11
|
const YT_DLP_TIMEOUT_MS = 300_000;
|
|
@@ -14,7 +14,7 @@ const DEFAULT_SLIDES_WORKERS = 8;
|
|
|
14
14
|
const DEFAULT_SLIDES_SAMPLE_COUNT = 8;
|
|
15
15
|
// Prefer broadly-decodable H.264/MP4 for ffmpeg stability.
|
|
16
16
|
// (Some "bestvideo" picks AV1 which can fail on certain ffmpeg builds / hwaccel setups.)
|
|
17
|
-
const DEFAULT_YT_DLP_FORMAT_EXTRACT =
|
|
17
|
+
const DEFAULT_YT_DLP_FORMAT_EXTRACT = "bestvideo[height<=720][vcodec^=avc1][ext=mp4]/best[height<=720][vcodec^=avc1][ext=mp4]/bestvideo[height<=720][ext=mp4]/best[height<=720]";
|
|
18
18
|
function createSlidesLogger(logger) {
|
|
19
19
|
const logSlides = (message) => {
|
|
20
20
|
if (!logger)
|
|
@@ -53,17 +53,21 @@ function resolveSlidesYtDlpExtractFormat(env) {
|
|
|
53
53
|
}
|
|
54
54
|
function resolveSlidesStreamFallback(env) {
|
|
55
55
|
const raw = env.SLIDES_EXTRACT_STREAM?.trim().toLowerCase();
|
|
56
|
-
return raw ===
|
|
56
|
+
return raw === "1" || raw === "true" || raw === "yes";
|
|
57
|
+
}
|
|
58
|
+
function buildYtDlpCookiesArgs(cookiesFromBrowser) {
|
|
59
|
+
const value = typeof cookiesFromBrowser === "string" ? cookiesFromBrowser.trim() : "";
|
|
60
|
+
return value.length > 0 ? ["--cookies-from-browser", value] : [];
|
|
57
61
|
}
|
|
58
62
|
function buildSlidesMediaCacheKey(url) {
|
|
59
63
|
return `${url}#summarize-slides`;
|
|
60
64
|
}
|
|
61
65
|
function formatBytes(bytes) {
|
|
62
66
|
if (!Number.isFinite(bytes) || bytes <= 0)
|
|
63
|
-
return
|
|
64
|
-
const units = [
|
|
67
|
+
return "0B";
|
|
68
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
65
69
|
let value = bytes;
|
|
66
|
-
let unit = units[0] ??
|
|
70
|
+
let unit = units[0] ?? "B";
|
|
67
71
|
for (let i = 1; i < units.length && value >= 1024; i += 1) {
|
|
68
72
|
value /= 1024;
|
|
69
73
|
unit = units[i] ?? unit;
|
|
@@ -72,28 +76,28 @@ function formatBytes(bytes) {
|
|
|
72
76
|
return `${rounded}${unit}`;
|
|
73
77
|
}
|
|
74
78
|
function resolveToolPath(binary, env, explicitEnvKey) {
|
|
75
|
-
const explicit = explicitEnvKey && typeof env[explicitEnvKey] ===
|
|
79
|
+
const explicit = explicitEnvKey && typeof env[explicitEnvKey] === "string" ? env[explicitEnvKey]?.trim() : "";
|
|
76
80
|
if (explicit)
|
|
77
81
|
return resolveExecutableInPath(explicit, env);
|
|
78
82
|
return resolveExecutableInPath(binary, env);
|
|
79
83
|
}
|
|
80
84
|
export function resolveSlideSource({ url, extracted, }) {
|
|
81
85
|
const directUrl = extracted.video?.url ?? extracted.url;
|
|
82
|
-
const youtubeCandidate = extractYouTubeVideoId(extracted.video?.url ??
|
|
86
|
+
const youtubeCandidate = extractYouTubeVideoId(extracted.video?.url ?? "") ??
|
|
83
87
|
extractYouTubeVideoId(extracted.url) ??
|
|
84
88
|
extractYouTubeVideoId(url);
|
|
85
89
|
if (youtubeCandidate) {
|
|
86
90
|
return {
|
|
87
91
|
url: `https://www.youtube.com/watch?v=${youtubeCandidate}`,
|
|
88
|
-
kind:
|
|
92
|
+
kind: "youtube",
|
|
89
93
|
sourceId: buildYoutubeSourceId(youtubeCandidate),
|
|
90
94
|
};
|
|
91
95
|
}
|
|
92
|
-
if (extracted.video?.kind ===
|
|
96
|
+
if (extracted.video?.kind === "direct" || isDirectMediaUrl(directUrl) || isDirectMediaUrl(url)) {
|
|
93
97
|
const normalized = directUrl || url;
|
|
94
98
|
return {
|
|
95
99
|
url: normalized,
|
|
96
|
-
kind:
|
|
100
|
+
kind: "direct",
|
|
97
101
|
sourceId: buildDirectSourceId(normalized),
|
|
98
102
|
};
|
|
99
103
|
}
|
|
@@ -102,7 +106,7 @@ export function resolveSlideSource({ url, extracted, }) {
|
|
|
102
106
|
if (fallbackId) {
|
|
103
107
|
return {
|
|
104
108
|
url: `https://www.youtube.com/watch?v=${fallbackId}`,
|
|
105
|
-
kind:
|
|
109
|
+
kind: "youtube",
|
|
106
110
|
sourceId: buildYoutubeSourceId(fallbackId),
|
|
107
111
|
};
|
|
108
112
|
}
|
|
@@ -114,14 +118,14 @@ export function resolveSlideSourceFromUrl(url) {
|
|
|
114
118
|
if (youtubeCandidate) {
|
|
115
119
|
return {
|
|
116
120
|
url: `https://www.youtube.com/watch?v=${youtubeCandidate}`,
|
|
117
|
-
kind:
|
|
121
|
+
kind: "youtube",
|
|
118
122
|
sourceId: buildYoutubeSourceId(youtubeCandidate),
|
|
119
123
|
};
|
|
120
124
|
}
|
|
121
125
|
if (isDirectMediaUrl(url)) {
|
|
122
126
|
return {
|
|
123
127
|
url,
|
|
124
|
-
kind:
|
|
128
|
+
kind: "direct",
|
|
125
129
|
sourceId: buildDirectSourceId(url),
|
|
126
130
|
};
|
|
127
131
|
}
|
|
@@ -130,14 +134,14 @@ export function resolveSlideSourceFromUrl(url) {
|
|
|
130
134
|
if (fallbackId) {
|
|
131
135
|
return {
|
|
132
136
|
url: `https://www.youtube.com/watch?v=${fallbackId}`,
|
|
133
|
-
kind:
|
|
137
|
+
kind: "youtube",
|
|
134
138
|
sourceId: buildYoutubeSourceId(fallbackId),
|
|
135
139
|
};
|
|
136
140
|
}
|
|
137
141
|
}
|
|
138
142
|
return null;
|
|
139
143
|
}
|
|
140
|
-
export async function extractSlidesForSource({ source, settings, noCache = false, mediaCache = null, env, timeoutMs, ytDlpPath, ffmpegPath, tesseractPath, hooks, }) {
|
|
144
|
+
export async function extractSlidesForSource({ source, settings, noCache = false, mediaCache = null, env, timeoutMs, ytDlpPath, ytDlpCookiesFromBrowser, ffmpegPath, tesseractPath, hooks, }) {
|
|
141
145
|
const slidesDir = resolveSlidesDir(settings.outputDir, source.sourceId);
|
|
142
146
|
return withSlidesLock(slidesDir, async () => {
|
|
143
147
|
const { logSlides, logSlidesTiming } = createSlidesLogger(hooks?.onSlidesLog ?? null);
|
|
@@ -152,12 +156,12 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
152
156
|
const onSlidesProgress = hooks?.onSlidesProgress;
|
|
153
157
|
if (!onSlidesProgress)
|
|
154
158
|
return null;
|
|
155
|
-
let lastText =
|
|
159
|
+
let lastText = "";
|
|
156
160
|
let lastPercent = 0;
|
|
157
161
|
return (label, percent, detail) => {
|
|
158
162
|
const clamped = clamp(Math.round(percent), 0, 100);
|
|
159
163
|
const nextPercent = Math.max(lastPercent, clamped);
|
|
160
|
-
const suffix = detail ? ` ${detail}` :
|
|
164
|
+
const suffix = detail ? ` ${detail}` : "";
|
|
161
165
|
const text = `Slides: ${label}${suffix} ${nextPercent}%`;
|
|
162
166
|
if (text === lastText)
|
|
163
167
|
return;
|
|
@@ -170,20 +174,20 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
170
174
|
const workers = resolveSlidesWorkers(env);
|
|
171
175
|
const totalStartedAt = Date.now();
|
|
172
176
|
logSlides(`pipeline=ingest(sequential)->scene-detect(parallel:${workers})->extract-frames(parallel:${workers})->ocr(parallel:${workers})`);
|
|
173
|
-
const ffmpegBinary = ffmpegPath ?? resolveToolPath(
|
|
177
|
+
const ffmpegBinary = ffmpegPath ?? resolveToolPath("ffmpeg", env, "FFMPEG_PATH");
|
|
174
178
|
if (!ffmpegBinary) {
|
|
175
|
-
throw new Error(
|
|
179
|
+
throw new Error("Missing ffmpeg (install ffmpeg or add it to PATH).");
|
|
176
180
|
}
|
|
177
|
-
const ffprobeBinary = resolveToolPath(
|
|
181
|
+
const ffprobeBinary = resolveToolPath("ffprobe", env, "FFPROBE_PATH");
|
|
178
182
|
if (settings.ocr && !tesseractPath) {
|
|
179
|
-
const resolved = resolveToolPath(
|
|
183
|
+
const resolved = resolveToolPath("tesseract", env, "TESSERACT_PATH");
|
|
180
184
|
if (!resolved) {
|
|
181
|
-
throw new Error(
|
|
185
|
+
throw new Error("Missing tesseract OCR (install tesseract or skip --slides-ocr).");
|
|
182
186
|
}
|
|
183
187
|
tesseractPath = resolved;
|
|
184
188
|
}
|
|
185
189
|
const ocrEnabled = Boolean(settings.ocr && tesseractPath);
|
|
186
|
-
const ocrAvailable = Boolean(tesseractPath ?? resolveToolPath(
|
|
190
|
+
const ocrAvailable = Boolean(tesseractPath ?? resolveToolPath("tesseract", env, "TESSERACT_PATH"));
|
|
187
191
|
const P_PREPARE = 2;
|
|
188
192
|
const P_FETCH_VIDEO = 6;
|
|
189
193
|
const P_DOWNLOAD_VIDEO = 35;
|
|
@@ -194,9 +198,9 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
194
198
|
{
|
|
195
199
|
const prepareStartedAt = Date.now();
|
|
196
200
|
await prepareSlidesDir(slidesDir);
|
|
197
|
-
logSlidesTiming(
|
|
201
|
+
logSlidesTiming("prepare output dir", prepareStartedAt);
|
|
198
202
|
}
|
|
199
|
-
reportSlidesProgress?.(
|
|
203
|
+
reportSlidesProgress?.("preparing source", P_PREPARE);
|
|
200
204
|
const allowStreamFallback = resolveSlidesStreamFallback(env);
|
|
201
205
|
let inputPath = source.url;
|
|
202
206
|
let inputCleanup = null;
|
|
@@ -204,18 +208,18 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
204
208
|
const cachedMedia = mediaCacheKey ? await mediaCache?.get({ url: mediaCacheKey }) : null;
|
|
205
209
|
if (cachedMedia) {
|
|
206
210
|
inputPath = cachedMedia.filePath;
|
|
207
|
-
const detail = typeof cachedMedia.sizeBytes ===
|
|
211
|
+
const detail = typeof cachedMedia.sizeBytes === "number"
|
|
208
212
|
? `(${formatBytes(cachedMedia.sizeBytes)})`
|
|
209
213
|
: undefined;
|
|
210
|
-
reportSlidesProgress?.(
|
|
214
|
+
reportSlidesProgress?.("using cached video", P_DOWNLOAD_VIDEO, detail);
|
|
211
215
|
}
|
|
212
|
-
else if (source.kind ===
|
|
216
|
+
else if (source.kind === "youtube") {
|
|
213
217
|
if (!ytDlpPath) {
|
|
214
|
-
throw new Error(
|
|
218
|
+
throw new Error("Slides for YouTube require yt-dlp (set YT_DLP_PATH or install yt-dlp).");
|
|
215
219
|
}
|
|
216
220
|
const ytDlp = ytDlpPath;
|
|
217
221
|
const format = resolveSlidesYtDlpExtractFormat(env);
|
|
218
|
-
reportSlidesProgress?.(
|
|
222
|
+
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
219
223
|
const downloadStartedAt = Date.now();
|
|
220
224
|
try {
|
|
221
225
|
const downloaded = await downloadYoutubeVideo({
|
|
@@ -223,10 +227,11 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
223
227
|
url: source.url,
|
|
224
228
|
timeoutMs,
|
|
225
229
|
format,
|
|
230
|
+
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
226
231
|
onProgress: (percent, detail) => {
|
|
227
232
|
const ratio = clamp(percent / 100, 0, 1);
|
|
228
233
|
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
229
|
-
reportSlidesProgress?.(
|
|
234
|
+
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
230
235
|
},
|
|
231
236
|
});
|
|
232
237
|
const cached = mediaCacheKey
|
|
@@ -245,27 +250,28 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
245
250
|
throw error;
|
|
246
251
|
}
|
|
247
252
|
warnings.push(`Failed to download video; falling back to stream URL: ${String(error)}`);
|
|
248
|
-
reportSlidesProgress?.(
|
|
253
|
+
reportSlidesProgress?.("fetching video", P_FETCH_VIDEO);
|
|
249
254
|
const streamStartedAt = Date.now();
|
|
250
255
|
const streamUrl = await resolveYoutubeStreamUrl({
|
|
251
256
|
ytDlpPath: ytDlp,
|
|
252
257
|
url: source.url,
|
|
253
258
|
format,
|
|
254
259
|
timeoutMs,
|
|
260
|
+
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
255
261
|
});
|
|
256
262
|
inputPath = streamUrl;
|
|
257
263
|
logSlidesTiming(`yt-dlp stream url (detect+extract, format=${format})`, streamStartedAt);
|
|
258
264
|
}
|
|
259
265
|
}
|
|
260
|
-
else if (source.kind ===
|
|
266
|
+
else if (source.kind === "direct") {
|
|
261
267
|
const shouldUseYtDlp = !isDirectMediaUrl(source.url);
|
|
262
268
|
if (shouldUseYtDlp) {
|
|
263
269
|
if (!ytDlpPath) {
|
|
264
|
-
throw new Error(
|
|
270
|
+
throw new Error("Slides for remote videos require yt-dlp (set YT_DLP_PATH or install yt-dlp).");
|
|
265
271
|
}
|
|
266
272
|
const ytDlp = ytDlpPath;
|
|
267
273
|
const format = resolveSlidesYtDlpExtractFormat(env);
|
|
268
|
-
reportSlidesProgress?.(
|
|
274
|
+
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
269
275
|
const downloadStartedAt = Date.now();
|
|
270
276
|
try {
|
|
271
277
|
const downloaded = await downloadYoutubeVideo({
|
|
@@ -273,10 +279,11 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
273
279
|
url: source.url,
|
|
274
280
|
timeoutMs,
|
|
275
281
|
format,
|
|
282
|
+
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
276
283
|
onProgress: (percent, detail) => {
|
|
277
284
|
const ratio = clamp(percent / 100, 0, 1);
|
|
278
285
|
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
279
|
-
reportSlidesProgress?.(
|
|
286
|
+
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
280
287
|
},
|
|
281
288
|
});
|
|
282
289
|
const cached = mediaCacheKey
|
|
@@ -295,20 +302,21 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
295
302
|
throw error;
|
|
296
303
|
}
|
|
297
304
|
warnings.push(`Failed to download video; falling back to stream URL: ${String(error)}`);
|
|
298
|
-
reportSlidesProgress?.(
|
|
305
|
+
reportSlidesProgress?.("fetching video", P_FETCH_VIDEO);
|
|
299
306
|
const streamStartedAt = Date.now();
|
|
300
307
|
const streamUrl = await resolveYoutubeStreamUrl({
|
|
301
308
|
ytDlpPath: ytDlp,
|
|
302
309
|
url: source.url,
|
|
303
310
|
format,
|
|
304
311
|
timeoutMs,
|
|
312
|
+
cookiesFromBrowser: ytDlpCookiesFromBrowser,
|
|
305
313
|
});
|
|
306
314
|
inputPath = streamUrl;
|
|
307
315
|
logSlidesTiming(`yt-dlp stream url (direct source, format=${format})`, streamStartedAt);
|
|
308
316
|
}
|
|
309
317
|
}
|
|
310
318
|
else {
|
|
311
|
-
reportSlidesProgress?.(
|
|
319
|
+
reportSlidesProgress?.("downloading video", P_FETCH_VIDEO);
|
|
312
320
|
const downloadStartedAt = Date.now();
|
|
313
321
|
try {
|
|
314
322
|
const downloaded = await downloadRemoteVideo({
|
|
@@ -317,7 +325,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
317
325
|
onProgress: (percent, detail) => {
|
|
318
326
|
const ratio = clamp(percent / 100, 0, 1);
|
|
319
327
|
const mapped = P_FETCH_VIDEO + ratio * (P_DOWNLOAD_VIDEO - P_FETCH_VIDEO);
|
|
320
|
-
reportSlidesProgress?.(
|
|
328
|
+
reportSlidesProgress?.("downloading video", mapped, detail);
|
|
321
329
|
},
|
|
322
330
|
});
|
|
323
331
|
const cached = mediaCacheKey
|
|
@@ -329,7 +337,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
329
337
|
: null;
|
|
330
338
|
inputPath = cached?.filePath ?? downloaded.filePath;
|
|
331
339
|
inputCleanup = downloaded.cleanup;
|
|
332
|
-
logSlidesTiming(
|
|
340
|
+
logSlidesTiming("download direct video (detect+extract)", downloadStartedAt);
|
|
333
341
|
}
|
|
334
342
|
catch (error) {
|
|
335
343
|
if (!allowStreamFallback) {
|
|
@@ -342,7 +350,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
342
350
|
}
|
|
343
351
|
try {
|
|
344
352
|
const ffmpegStartedAt = Date.now();
|
|
345
|
-
reportSlidesProgress?.(
|
|
353
|
+
reportSlidesProgress?.("detecting scenes", P_FETCH_VIDEO + 2);
|
|
346
354
|
const detection = await detectSlideTimestamps({
|
|
347
355
|
ffmpegPath: ffmpegBinary,
|
|
348
356
|
ffprobePath: ffprobeBinary,
|
|
@@ -357,13 +365,13 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
357
365
|
onSegmentProgress: (completed, total) => {
|
|
358
366
|
const ratio = total > 0 ? completed / total : 0;
|
|
359
367
|
const mapped = P_FETCH_VIDEO + 2 + ratio * (P_DETECT_SCENES - (P_FETCH_VIDEO + 2));
|
|
360
|
-
reportSlidesProgress?.(
|
|
368
|
+
reportSlidesProgress?.("detecting scenes", mapped, total > 0 ? `(${completed}/${total})` : undefined);
|
|
361
369
|
},
|
|
362
370
|
logSlides,
|
|
363
371
|
logSlidesTiming,
|
|
364
372
|
});
|
|
365
|
-
reportSlidesProgress?.(
|
|
366
|
-
logSlidesTiming(
|
|
373
|
+
reportSlidesProgress?.("detecting scenes", P_DETECT_SCENES);
|
|
374
|
+
logSlidesTiming("ffmpeg scene-detect", ffmpegStartedAt);
|
|
367
375
|
const interval = buildIntervalTimestamps({
|
|
368
376
|
durationSeconds: detection.durationSeconds,
|
|
369
377
|
minDurationSeconds: settings.minDurationSeconds,
|
|
@@ -371,7 +379,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
371
379
|
});
|
|
372
380
|
const combined = mergeTimestamps(detection.timestamps, interval?.timestamps ?? [], settings.minDurationSeconds);
|
|
373
381
|
if (combined.length === 0) {
|
|
374
|
-
throw new Error(
|
|
382
|
+
throw new Error("No slides detected; try adjusting slide extraction settings.");
|
|
375
383
|
}
|
|
376
384
|
const sceneSegments = buildSceneSegments(detection.timestamps, detection.durationSeconds);
|
|
377
385
|
const selected = interval?.timestamps.length
|
|
@@ -386,7 +394,7 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
386
394
|
const trimmed = applyMaxSlidesFilter(spaced.map((timestamp, index) => {
|
|
387
395
|
const segment = findSceneSegment(sceneSegments, timestamp);
|
|
388
396
|
const adjusted = adjustTimestampWithinSegment(timestamp, segment);
|
|
389
|
-
return { index: index + 1, timestamp: adjusted, imagePath:
|
|
397
|
+
return { index: index + 1, timestamp: adjusted, imagePath: "", segment };
|
|
390
398
|
}), settings.maxSlides, warnings);
|
|
391
399
|
const timelineSlides = {
|
|
392
400
|
sourceUrl: source.url,
|
|
@@ -416,13 +424,13 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
416
424
|
};
|
|
417
425
|
for (const slide of trimmed) {
|
|
418
426
|
const { segment: _segment, ...payload } = slide;
|
|
419
|
-
hooks.onSlideChunk({ slide: { ...payload, imagePath:
|
|
427
|
+
hooks.onSlideChunk({ slide: { ...payload, imagePath: "" }, meta });
|
|
420
428
|
}
|
|
421
429
|
}
|
|
422
|
-
const formatProgressCount = (completed, total) => total > 0 ? `(${completed}/${total})` :
|
|
430
|
+
const formatProgressCount = (completed, total) => total > 0 ? `(${completed}/${total})` : "";
|
|
423
431
|
const reportFrameProgress = (completed, total) => {
|
|
424
432
|
const ratio = total > 0 ? completed / total : 0;
|
|
425
|
-
reportSlidesProgress?.(
|
|
433
|
+
reportSlidesProgress?.("extracting frames", P_DETECT_SCENES + ratio * (P_EXTRACT_FRAMES - P_DETECT_SCENES), formatProgressCount(completed, total));
|
|
426
434
|
};
|
|
427
435
|
reportFrameProgress(0, trimmed.length);
|
|
428
436
|
const onSlideChunk = hooks?.onSlideChunk;
|
|
@@ -455,15 +463,15 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
455
463
|
const extractFramesStartedAt = Date.now();
|
|
456
464
|
const extractedSlides = await extractFrames();
|
|
457
465
|
const extractElapsedMs = logSlidesTiming?.(`extract frames (count=${trimmed.length}, parallel=${workers})`, extractFramesStartedAt);
|
|
458
|
-
if (trimmed.length > 0 && typeof extractElapsedMs ===
|
|
466
|
+
if (trimmed.length > 0 && typeof extractElapsedMs === "number") {
|
|
459
467
|
logSlides?.(`extract frames avgMsPerFrame=${Math.round(extractElapsedMs / trimmed.length)}`);
|
|
460
468
|
}
|
|
461
469
|
const rawSlides = applyMinDurationFilter(extractedSlides, settings.minDurationSeconds, warnings);
|
|
462
470
|
const renameStartedAt = Date.now();
|
|
463
471
|
const renamedSlides = await renameSlidesWithTimestamps(rawSlides, slidesDir);
|
|
464
|
-
logSlidesTiming?.(
|
|
472
|
+
logSlidesTiming?.("rename slides", renameStartedAt);
|
|
465
473
|
if (renamedSlides.length === 0) {
|
|
466
|
-
throw new Error(
|
|
474
|
+
throw new Error("No slides extracted; try lowering --slides-scene-threshold.");
|
|
467
475
|
}
|
|
468
476
|
let slidesWithOcr = renamedSlides;
|
|
469
477
|
if (ocrEnabled && tesseractPath) {
|
|
@@ -472,16 +480,16 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
472
480
|
const ocrStartPercent = P_OCR - 3;
|
|
473
481
|
const reportOcrProgress = (completed, total) => {
|
|
474
482
|
const ratio = total > 0 ? completed / total : 0;
|
|
475
|
-
reportSlidesProgress?.(
|
|
483
|
+
reportSlidesProgress?.("running OCR", ocrStartPercent + ratio * (P_OCR - ocrStartPercent), formatProgressCount(completed, total));
|
|
476
484
|
};
|
|
477
485
|
reportOcrProgress(0, renamedSlides.length);
|
|
478
486
|
slidesWithOcr = await runOcrOnSlides(renamedSlides, tesseractPath, workers, reportOcrProgress);
|
|
479
|
-
const elapsedMs = logSlidesTiming?.(
|
|
480
|
-
if (renamedSlides.length > 0 && typeof elapsedMs ===
|
|
487
|
+
const elapsedMs = logSlidesTiming?.("ocr done", ocrStartedAt);
|
|
488
|
+
if (renamedSlides.length > 0 && typeof elapsedMs === "number") {
|
|
481
489
|
logSlides?.(`ocr avgMsPerSlide=${Math.round(elapsedMs / renamedSlides.length)}`);
|
|
482
490
|
}
|
|
483
491
|
}
|
|
484
|
-
reportSlidesProgress?.(
|
|
492
|
+
reportSlidesProgress?.("finalizing", P_FINAL - 1);
|
|
485
493
|
if (hooks?.onSlideChunk) {
|
|
486
494
|
for (const slide of slidesWithOcr) {
|
|
487
495
|
hooks.onSlideChunk({
|
|
@@ -513,8 +521,8 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
513
521
|
warnings,
|
|
514
522
|
};
|
|
515
523
|
await writeSlidesJson(result, slidesDir);
|
|
516
|
-
reportSlidesProgress?.(
|
|
517
|
-
logSlidesTiming(
|
|
524
|
+
reportSlidesProgress?.("finalizing", P_FINAL);
|
|
525
|
+
logSlidesTiming("slides total", totalStartedAt);
|
|
518
526
|
return result;
|
|
519
527
|
}
|
|
520
528
|
finally {
|
|
@@ -523,11 +531,11 @@ export async function extractSlidesForSource({ source, settings, noCache = false
|
|
|
523
531
|
}
|
|
524
532
|
}
|
|
525
533
|
}, () => {
|
|
526
|
-
hooks?.onSlidesProgress?.(
|
|
534
|
+
hooks?.onSlidesProgress?.("Slides: queued");
|
|
527
535
|
});
|
|
528
536
|
}
|
|
529
537
|
export function parseShowinfoTimestamp(line) {
|
|
530
|
-
if (!line.includes(
|
|
538
|
+
if (!line.includes("showinfo"))
|
|
531
539
|
return null;
|
|
532
540
|
const match = /pts_time:(\d+\.?\d*)/.exec(line);
|
|
533
541
|
if (!match)
|
|
@@ -542,7 +550,7 @@ export function resolveExtractedTimestamp({ requested, actual, seekBase, }) {
|
|
|
542
550
|
return 0;
|
|
543
551
|
if (actual == null || !Number.isFinite(actual) || actual < 0)
|
|
544
552
|
return requested;
|
|
545
|
-
const base = typeof seekBase ===
|
|
553
|
+
const base = typeof seekBase === "number" && Number.isFinite(seekBase) && seekBase > 0 ? seekBase : null;
|
|
546
554
|
if (!base) {
|
|
547
555
|
// With -ss before -i, showinfo PTS resets near 0. Treat small values as offsets.
|
|
548
556
|
if (actual <= 5)
|
|
@@ -559,27 +567,28 @@ async function prepareSlidesDir(slidesDir) {
|
|
|
559
567
|
await fs.mkdir(slidesDir, { recursive: true });
|
|
560
568
|
const entries = await fs.readdir(slidesDir);
|
|
561
569
|
await Promise.all(entries.map(async (entry) => {
|
|
562
|
-
if (entry.startsWith(
|
|
570
|
+
if (entry.startsWith("slide_") && entry.endsWith(".png")) {
|
|
563
571
|
await fs.rm(path.join(slidesDir, entry), { force: true });
|
|
564
572
|
}
|
|
565
|
-
if (entry ===
|
|
573
|
+
if (entry === "slides.json") {
|
|
566
574
|
await fs.rm(path.join(slidesDir, entry), { force: true });
|
|
567
575
|
}
|
|
568
576
|
}));
|
|
569
577
|
}
|
|
570
|
-
async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgress, }) {
|
|
578
|
+
async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, cookiesFromBrowser, onProgress, }) {
|
|
571
579
|
const dir = await fs.mkdtemp(path.join(tmpdir(), `summarize-slides-${randomUUID()}-`));
|
|
572
|
-
const outputTemplate = path.join(dir,
|
|
573
|
-
const progressTemplate =
|
|
580
|
+
const outputTemplate = path.join(dir, "video.%(ext)s");
|
|
581
|
+
const progressTemplate = "progress:%(progress.downloaded_bytes)s|%(progress.total_bytes)s|%(progress.total_bytes_estimate)s";
|
|
574
582
|
const args = [
|
|
575
|
-
|
|
583
|
+
"-f",
|
|
576
584
|
format,
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
...(
|
|
582
|
-
|
|
585
|
+
"--no-playlist",
|
|
586
|
+
"--no-warnings",
|
|
587
|
+
"--concurrent-fragments",
|
|
588
|
+
"4",
|
|
589
|
+
...buildYtDlpCookiesArgs(cookiesFromBrowser),
|
|
590
|
+
...(onProgress ? ["--progress", "--newline", "--progress-template", progressTemplate] : []),
|
|
591
|
+
"-o",
|
|
583
592
|
outputTemplate,
|
|
584
593
|
url,
|
|
585
594
|
];
|
|
@@ -587,14 +596,14 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
587
596
|
command: ytDlpPath,
|
|
588
597
|
args,
|
|
589
598
|
timeoutMs: Math.max(timeoutMs, YT_DLP_TIMEOUT_MS),
|
|
590
|
-
errorLabel:
|
|
599
|
+
errorLabel: "yt-dlp",
|
|
591
600
|
onStderrLine: (line, handle) => {
|
|
592
601
|
if (!onProgress)
|
|
593
602
|
return;
|
|
594
603
|
const trimmed = line.trim();
|
|
595
|
-
if (trimmed.startsWith(
|
|
596
|
-
const payload = trimmed.slice(
|
|
597
|
-
const [downloadedRaw, totalRaw, estimateRaw] = payload.split(
|
|
604
|
+
if (trimmed.startsWith("progress:")) {
|
|
605
|
+
const payload = trimmed.slice("progress:".length);
|
|
606
|
+
const [downloadedRaw, totalRaw, estimateRaw] = payload.split("|");
|
|
598
607
|
const downloaded = Number.parseFloat(downloadedRaw);
|
|
599
608
|
if (!Number.isFinite(downloaded) || downloaded < 0)
|
|
600
609
|
return;
|
|
@@ -613,7 +622,7 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
613
622
|
handle?.setProgress(percent, detail);
|
|
614
623
|
return;
|
|
615
624
|
}
|
|
616
|
-
if (!trimmed.startsWith(
|
|
625
|
+
if (!trimmed.startsWith("[download]"))
|
|
617
626
|
return;
|
|
618
627
|
const percentMatch = trimmed.match(/\b(\d{1,3}(?:\.\d+)?)%\b/);
|
|
619
628
|
if (!percentMatch)
|
|
@@ -627,16 +636,16 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
627
636
|
speedMatch?.[1] ? `at ${speedMatch[1]}` : null,
|
|
628
637
|
etaMatch?.[1] ? `ETA ${etaMatch[1]}` : null,
|
|
629
638
|
].filter(Boolean);
|
|
630
|
-
const detail = detailParts.length ? detailParts.join(
|
|
639
|
+
const detail = detailParts.length ? detailParts.join(" ") : undefined;
|
|
631
640
|
onProgress(percent, detail);
|
|
632
641
|
handle?.setProgress(percent, detail ?? null);
|
|
633
642
|
},
|
|
634
643
|
onStdoutLine: onProgress
|
|
635
644
|
? (line, handle) => {
|
|
636
|
-
if (!line.trim().startsWith(
|
|
645
|
+
if (!line.trim().startsWith("progress:"))
|
|
637
646
|
return;
|
|
638
|
-
const payload = line.trim().slice(
|
|
639
|
-
const [downloadedRaw, totalRaw, estimateRaw] = payload.split(
|
|
647
|
+
const payload = line.trim().slice("progress:".length);
|
|
648
|
+
const [downloadedRaw, totalRaw, estimateRaw] = payload.split("|");
|
|
640
649
|
const downloaded = Number.parseFloat(downloadedRaw);
|
|
641
650
|
if (!Number.isFinite(downloaded) || downloaded < 0)
|
|
642
651
|
return;
|
|
@@ -659,7 +668,7 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
659
668
|
const files = await fs.readdir(dir);
|
|
660
669
|
const candidates = [];
|
|
661
670
|
for (const entry of files) {
|
|
662
|
-
if (entry.endsWith(
|
|
671
|
+
if (entry.endsWith(".part") || entry.endsWith(".ytdl"))
|
|
663
672
|
continue;
|
|
664
673
|
const filePath = path.join(dir, entry);
|
|
665
674
|
const stat = await fs.stat(filePath).catch(() => null);
|
|
@@ -669,7 +678,7 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
669
678
|
}
|
|
670
679
|
if (candidates.length === 0) {
|
|
671
680
|
await fs.rm(dir, { recursive: true, force: true });
|
|
672
|
-
throw new Error(
|
|
681
|
+
throw new Error("yt-dlp completed but no video file was downloaded.");
|
|
673
682
|
}
|
|
674
683
|
candidates.sort((a, b) => b.size - a.size);
|
|
675
684
|
const filePath = candidates[0].filePath;
|
|
@@ -682,7 +691,7 @@ async function downloadYoutubeVideo({ ytDlpPath, url, timeoutMs, format, onProgr
|
|
|
682
691
|
}
|
|
683
692
|
async function downloadRemoteVideo({ url, timeoutMs, onProgress, }) {
|
|
684
693
|
const dir = await fs.mkdtemp(path.join(tmpdir(), `summarize-slides-${randomUUID()}-`));
|
|
685
|
-
let suffix =
|
|
694
|
+
let suffix = ".bin";
|
|
686
695
|
try {
|
|
687
696
|
const parsed = new URL(url);
|
|
688
697
|
const ext = path.extname(parsed.pathname);
|
|
@@ -700,14 +709,14 @@ async function downloadRemoteVideo({ url, timeoutMs, onProgress, }) {
|
|
|
700
709
|
if (!res.ok) {
|
|
701
710
|
throw new Error(`Download failed: ${res.status} ${res.statusText}`);
|
|
702
711
|
}
|
|
703
|
-
const totalRaw = res.headers.get(
|
|
712
|
+
const totalRaw = res.headers.get("content-length");
|
|
704
713
|
const total = totalRaw ? Number(totalRaw) : 0;
|
|
705
714
|
const hasTotal = Number.isFinite(total) && total > 0;
|
|
706
715
|
const reader = res.body?.getReader();
|
|
707
716
|
if (!reader) {
|
|
708
|
-
throw new Error(
|
|
717
|
+
throw new Error("Download failed: missing response body");
|
|
709
718
|
}
|
|
710
|
-
const handle = await fs.open(filePath,
|
|
719
|
+
const handle = await fs.open(filePath, "w");
|
|
711
720
|
let downloaded = 0;
|
|
712
721
|
let lastPercent = -1;
|
|
713
722
|
let lastReportedBytes = 0;
|
|
@@ -761,20 +770,20 @@ async function downloadRemoteVideo({ url, timeoutMs, onProgress, }) {
|
|
|
761
770
|
clearTimeout(timeout);
|
|
762
771
|
}
|
|
763
772
|
}
|
|
764
|
-
async function resolveYoutubeStreamUrl({ ytDlpPath, url, timeoutMs, format, }) {
|
|
765
|
-
const args = [
|
|
773
|
+
async function resolveYoutubeStreamUrl({ ytDlpPath, url, timeoutMs, format, cookiesFromBrowser, }) {
|
|
774
|
+
const args = ["-f", format, ...buildYtDlpCookiesArgs(cookiesFromBrowser), "-g", url];
|
|
766
775
|
const output = await runProcessCapture({
|
|
767
776
|
command: ytDlpPath,
|
|
768
777
|
args,
|
|
769
778
|
timeoutMs: Math.max(timeoutMs, YT_DLP_TIMEOUT_MS),
|
|
770
|
-
errorLabel:
|
|
779
|
+
errorLabel: "yt-dlp",
|
|
771
780
|
});
|
|
772
781
|
const lines = output
|
|
773
|
-
.split(
|
|
782
|
+
.split("\n")
|
|
774
783
|
.map((line) => line.trim())
|
|
775
784
|
.filter(Boolean);
|
|
776
785
|
if (lines.length === 0) {
|
|
777
|
-
throw new Error(
|
|
786
|
+
throw new Error("yt-dlp did not return a stream URL.");
|
|
778
787
|
}
|
|
779
788
|
return lines[0];
|
|
780
789
|
}
|
|
@@ -786,7 +795,7 @@ async function detectSlideTimestamps({ ffmpegPath, ffprobePath, inputPath, scene
|
|
|
786
795
|
inputPath,
|
|
787
796
|
timeoutMs,
|
|
788
797
|
});
|
|
789
|
-
logSlidesTiming?.(
|
|
798
|
+
logSlidesTiming?.("ffprobe video info", probeStartedAt);
|
|
790
799
|
const calibration = await calibrateSceneThreshold({
|
|
791
800
|
ffmpegPath,
|
|
792
801
|
inputPath,
|
|
@@ -839,13 +848,13 @@ async function detectSlideTimestamps({ ffmpegPath, ffprobePath, inputPath, scene
|
|
|
839
848
|
enabled: true,
|
|
840
849
|
chosenThreshold: timestamps.length > 0 ? effectiveThreshold : baseThreshold,
|
|
841
850
|
confidence: calibration.confidence,
|
|
842
|
-
strategy:
|
|
851
|
+
strategy: "hash",
|
|
843
852
|
}
|
|
844
853
|
: {
|
|
845
854
|
enabled: false,
|
|
846
855
|
chosenThreshold: baseThreshold,
|
|
847
856
|
confidence: 0,
|
|
848
|
-
strategy:
|
|
857
|
+
strategy: "none",
|
|
849
858
|
};
|
|
850
859
|
return { timestamps, autoTune, durationSeconds: videoInfo.durationSeconds };
|
|
851
860
|
}
|
|
@@ -856,7 +865,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
856
865
|
const FRAME_MIN_CONTRAST = 0.16;
|
|
857
866
|
const SEEK_PAD_SECONDS = 8;
|
|
858
867
|
const clampTimestamp = (value) => {
|
|
859
|
-
const upper = typeof durationSeconds ===
|
|
868
|
+
const upper = typeof durationSeconds === "number" && Number.isFinite(durationSeconds) && durationSeconds > 0
|
|
860
869
|
? Math.max(0, durationSeconds - 0.1)
|
|
861
870
|
: Number.POSITIVE_INFINITY;
|
|
862
871
|
return clamp(value, 0, upper);
|
|
@@ -865,7 +874,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
865
874
|
if (!segment)
|
|
866
875
|
return null;
|
|
867
876
|
const start = Math.max(0, segment.start);
|
|
868
|
-
const end = typeof segment.end ===
|
|
877
|
+
const end = typeof segment.end === "number" && Number.isFinite(segment.end) ? segment.end : null;
|
|
869
878
|
if (end != null && end <= start)
|
|
870
879
|
return null;
|
|
871
880
|
return { start, end };
|
|
@@ -879,7 +888,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
879
888
|
return Math.min(1.5, Math.max(0.2, duration * 0.08));
|
|
880
889
|
};
|
|
881
890
|
const parseSignalstats = (line, stats) => {
|
|
882
|
-
if (!line.includes(
|
|
891
|
+
if (!line.includes("lavfi.signalstats."))
|
|
883
892
|
return;
|
|
884
893
|
const match = line.match(/lavfi\.signalstats\.(YMIN|YMAX|YAVG)=(\d+(?:\.\d+)?)/);
|
|
885
894
|
if (!match)
|
|
@@ -887,11 +896,11 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
887
896
|
const value = Number(match[2]);
|
|
888
897
|
if (!Number.isFinite(value))
|
|
889
898
|
return;
|
|
890
|
-
if (match[1] ===
|
|
899
|
+
if (match[1] === "YMIN")
|
|
891
900
|
stats.ymin = value;
|
|
892
|
-
if (match[1] ===
|
|
901
|
+
if (match[1] === "YMAX")
|
|
893
902
|
stats.ymax = value;
|
|
894
|
-
if (match[1] ===
|
|
903
|
+
if (match[1] === "YAVG")
|
|
895
904
|
stats.yavg = value;
|
|
896
905
|
};
|
|
897
906
|
const toQuality = (stats) => {
|
|
@@ -909,34 +918,34 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
909
918
|
const extractFrame = async (timestamp, outputPath, opts) => {
|
|
910
919
|
const stats = { ymin: null, ymax: null, yavg: null };
|
|
911
920
|
let actualTimestamp = null;
|
|
912
|
-
const effectiveTimeoutMs = typeof opts?.timeoutMs ===
|
|
921
|
+
const effectiveTimeoutMs = typeof opts?.timeoutMs === "number" && Number.isFinite(opts.timeoutMs) && opts.timeoutMs > 0
|
|
913
922
|
? opts.timeoutMs
|
|
914
923
|
: timeoutMs;
|
|
915
924
|
const seekBase = Math.max(0, timestamp - SEEK_PAD_SECONDS);
|
|
916
925
|
const seekOffset = Math.max(0, timestamp - seekBase);
|
|
917
926
|
const args = [
|
|
918
|
-
|
|
919
|
-
...(seekBase > 0 ? [
|
|
920
|
-
|
|
927
|
+
"-hide_banner",
|
|
928
|
+
...(seekBase > 0 ? ["-ss", String(seekBase)] : []),
|
|
929
|
+
"-i",
|
|
921
930
|
inputPath,
|
|
922
|
-
...(seekOffset > 0 ? [
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
931
|
+
...(seekOffset > 0 ? ["-ss", String(seekOffset)] : []),
|
|
932
|
+
"-vf",
|
|
933
|
+
"signalstats,showinfo,metadata=print",
|
|
934
|
+
"-vframes",
|
|
935
|
+
"1",
|
|
936
|
+
"-q:v",
|
|
937
|
+
"2",
|
|
938
|
+
"-an",
|
|
939
|
+
"-sn",
|
|
940
|
+
"-update",
|
|
941
|
+
"1",
|
|
933
942
|
outputPath,
|
|
934
943
|
];
|
|
935
944
|
await runProcess({
|
|
936
945
|
command: ffmpegPath,
|
|
937
946
|
args,
|
|
938
947
|
timeoutMs: effectiveTimeoutMs,
|
|
939
|
-
errorLabel:
|
|
948
|
+
errorLabel: "ffmpeg",
|
|
940
949
|
onStderrLine: (line) => {
|
|
941
950
|
if (actualTimestamp == null) {
|
|
942
951
|
const parsed = parseShowinfoTimestamp(line);
|
|
@@ -972,7 +981,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
972
981
|
: bounds
|
|
973
982
|
? Math.max(bounds.start + padding, clampedTimestamp)
|
|
974
983
|
: clampedTimestamp;
|
|
975
|
-
const outputPath = path.join(outputDir, `slide_${String(index + 1).padStart(4,
|
|
984
|
+
const outputPath = path.join(outputDir, `slide_${String(index + 1).padStart(4, "0")}.png`);
|
|
976
985
|
const extracted = await extractFrame(safeTimestamp, outputPath);
|
|
977
986
|
const resolvedTimestamp = resolveExtractedTimestamp({
|
|
978
987
|
requested: safeTimestamp,
|
|
@@ -983,7 +992,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
983
992
|
if (Math.abs(delta) >= 0.25) {
|
|
984
993
|
const actualLabel = extracted.actualTimestamp != null && Number.isFinite(extracted.actualTimestamp)
|
|
985
994
|
? extracted.actualTimestamp.toFixed(2)
|
|
986
|
-
:
|
|
995
|
+
: "n/a";
|
|
987
996
|
logSlides?.(`frame pts slide=${index + 1} req=${safeTimestamp.toFixed(2)}s actual=${actualLabel}s base=${extracted.seekBase.toFixed(2)}s -> ${resolvedTimestamp.toFixed(2)}s delta=${delta.toFixed(2)}s`);
|
|
988
997
|
}
|
|
989
998
|
const imageVersion = Date.now();
|
|
@@ -1056,7 +1065,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
1056
1065
|
const candidateTimestamp = clamp(baseTimestamp + offsetSeconds, minTs, maxTs);
|
|
1057
1066
|
if (Math.abs(candidateTimestamp - baseTimestamp) < 0.01)
|
|
1058
1067
|
continue;
|
|
1059
|
-
const tempPath = path.join(outputDir, `slide_${String(frame.index).padStart(4,
|
|
1068
|
+
const tempPath = path.join(outputDir, `slide_${String(frame.index).padStart(4, "0")}_alt.png`);
|
|
1060
1069
|
try {
|
|
1061
1070
|
const candidate = await extractFrame(candidateTimestamp, tempPath, {
|
|
1062
1071
|
timeoutMs: Math.min(timeoutMs, 12_000),
|
|
@@ -1080,8 +1089,8 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
1080
1089
|
await fs.rename(tempPath, frame.imagePath);
|
|
1081
1090
|
}
|
|
1082
1091
|
catch (err) {
|
|
1083
|
-
const code = err && typeof err ===
|
|
1084
|
-
if (code ===
|
|
1092
|
+
const code = err && typeof err === "object" && "code" in err ? String(err.code) : "";
|
|
1093
|
+
if (code === "EEXIST") {
|
|
1085
1094
|
await fs.rm(frame.imagePath, { force: true }).catch(() => null);
|
|
1086
1095
|
await fs.rename(tempPath, frame.imagePath);
|
|
1087
1096
|
}
|
|
@@ -1137,7 +1146,7 @@ async function extractFramesAtTimestamps({ ffmpegPath, inputPath, outputDir, tim
|
|
|
1137
1146
|
onStatus?.(`Slides: improving thumbnails ${percent}%`);
|
|
1138
1147
|
});
|
|
1139
1148
|
onStatus?.(`Slides: improving thumbnails ${THUMB_END}%`);
|
|
1140
|
-
logSlidesTiming?.(
|
|
1149
|
+
logSlidesTiming?.("thumbnail adjust done", fixStartedAt);
|
|
1141
1150
|
}
|
|
1142
1151
|
logSlidesTiming?.(`extract frame loop (count=${timestamps.length}, workers=${workers})`, startedAt);
|
|
1143
1152
|
return slides;
|
|
@@ -1241,28 +1250,28 @@ async function detectSceneTimestamps({ ffmpegPath, inputPath, threshold, timeout
|
|
|
1241
1250
|
const concurrency = workers && workers > 0 ? workers : 1;
|
|
1242
1251
|
const tasks = usedSegments.map((segment) => async () => {
|
|
1243
1252
|
const args = [
|
|
1244
|
-
|
|
1253
|
+
"-hide_banner",
|
|
1245
1254
|
...(segment.duration > 0
|
|
1246
|
-
? [
|
|
1255
|
+
? ["-ss", String(segment.start), "-t", String(segment.duration)]
|
|
1247
1256
|
: []),
|
|
1248
|
-
|
|
1257
|
+
"-i",
|
|
1249
1258
|
inputPath,
|
|
1250
|
-
|
|
1259
|
+
"-vf",
|
|
1251
1260
|
filter,
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1261
|
+
"-fps_mode",
|
|
1262
|
+
"vfr",
|
|
1263
|
+
"-an",
|
|
1264
|
+
"-sn",
|
|
1265
|
+
"-f",
|
|
1266
|
+
"null",
|
|
1267
|
+
"-",
|
|
1259
1268
|
];
|
|
1260
1269
|
const timestamps = [];
|
|
1261
1270
|
await runProcess({
|
|
1262
1271
|
command: ffmpegPath,
|
|
1263
1272
|
args,
|
|
1264
1273
|
timeoutMs: Math.max(timeoutMs, FFMPEG_TIMEOUT_FALLBACK_MS),
|
|
1265
|
-
errorLabel:
|
|
1274
|
+
errorLabel: "ffmpeg",
|
|
1266
1275
|
onStderrLine: (line) => {
|
|
1267
1276
|
const ts = parseShowinfoTimestamp(line);
|
|
1268
1277
|
if (ts != null)
|
|
@@ -1277,29 +1286,29 @@ async function detectSceneTimestamps({ ffmpegPath, inputPath, threshold, timeout
|
|
|
1277
1286
|
return merged;
|
|
1278
1287
|
}
|
|
1279
1288
|
async function hashFrameAtTimestamp({ ffmpegPath, inputPath, timestamp, timeoutMs, }) {
|
|
1280
|
-
const filter =
|
|
1289
|
+
const filter = "scale=32:32,format=gray";
|
|
1281
1290
|
const args = [
|
|
1282
|
-
|
|
1283
|
-
|
|
1291
|
+
"-hide_banner",
|
|
1292
|
+
"-ss",
|
|
1284
1293
|
String(timestamp),
|
|
1285
|
-
|
|
1294
|
+
"-i",
|
|
1286
1295
|
inputPath,
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1296
|
+
"-frames:v",
|
|
1297
|
+
"1",
|
|
1298
|
+
"-vf",
|
|
1290
1299
|
filter,
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1300
|
+
"-f",
|
|
1301
|
+
"rawvideo",
|
|
1302
|
+
"-pix_fmt",
|
|
1303
|
+
"gray",
|
|
1304
|
+
"-",
|
|
1296
1305
|
];
|
|
1297
1306
|
try {
|
|
1298
1307
|
const buffer = await runProcessCaptureBuffer({
|
|
1299
1308
|
command: ffmpegPath,
|
|
1300
1309
|
args,
|
|
1301
1310
|
timeoutMs,
|
|
1302
|
-
errorLabel:
|
|
1311
|
+
errorLabel: "ffmpeg",
|
|
1303
1312
|
});
|
|
1304
1313
|
if (buffer.length < 1024)
|
|
1305
1314
|
return null;
|
|
@@ -1331,26 +1340,26 @@ function computeHashDistanceRatio(a, b) {
|
|
|
1331
1340
|
return len === 0 ? 0 : diff / len;
|
|
1332
1341
|
}
|
|
1333
1342
|
async function probeVideoInfo({ ffprobePath, env, inputPath, timeoutMs, }) {
|
|
1334
|
-
const probeBin = ffprobePath ?? resolveExecutableInPath(
|
|
1343
|
+
const probeBin = ffprobePath ?? resolveExecutableInPath("ffprobe", env);
|
|
1335
1344
|
if (!probeBin)
|
|
1336
1345
|
return { durationSeconds: null, width: null, height: null };
|
|
1337
|
-
const args = [
|
|
1346
|
+
const args = ["-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", inputPath];
|
|
1338
1347
|
try {
|
|
1339
1348
|
const output = await runProcessCapture({
|
|
1340
1349
|
command: probeBin,
|
|
1341
1350
|
args,
|
|
1342
1351
|
timeoutMs: Math.min(timeoutMs, 30_000),
|
|
1343
|
-
errorLabel:
|
|
1352
|
+
errorLabel: "ffprobe",
|
|
1344
1353
|
});
|
|
1345
1354
|
const parsed = JSON.parse(output);
|
|
1346
1355
|
let durationSeconds = null;
|
|
1347
1356
|
let width = null;
|
|
1348
1357
|
let height = null;
|
|
1349
1358
|
for (const stream of parsed.streams ?? []) {
|
|
1350
|
-
if (stream.codec_type ===
|
|
1351
|
-
if (width == null && typeof stream.width ===
|
|
1359
|
+
if (stream.codec_type === "video") {
|
|
1360
|
+
if (width == null && typeof stream.width === "number")
|
|
1352
1361
|
width = stream.width;
|
|
1353
|
-
if (height == null && typeof stream.height ===
|
|
1362
|
+
if (height == null && typeof stream.height === "number")
|
|
1354
1363
|
height = stream.height;
|
|
1355
1364
|
const duration = Number(stream.duration);
|
|
1356
1365
|
if (Number.isFinite(duration) && duration > 0)
|
|
@@ -1371,30 +1380,30 @@ async function probeVideoInfo({ ffprobePath, env, inputPath, timeoutMs, }) {
|
|
|
1371
1380
|
async function runProcess({ command, args, timeoutMs, errorLabel, onStderrLine, onStdoutLine, }) {
|
|
1372
1381
|
await new Promise((resolve, reject) => {
|
|
1373
1382
|
const { proc, handle } = spawnTracked(command, args, {
|
|
1374
|
-
stdio: [
|
|
1383
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1375
1384
|
label: errorLabel,
|
|
1376
1385
|
kind: errorLabel,
|
|
1377
1386
|
captureOutput: false,
|
|
1378
1387
|
});
|
|
1379
|
-
let stderr =
|
|
1380
|
-
let stderrBuffer =
|
|
1381
|
-
let stdoutBuffer =
|
|
1388
|
+
let stderr = "";
|
|
1389
|
+
let stderrBuffer = "";
|
|
1390
|
+
let stdoutBuffer = "";
|
|
1382
1391
|
const flushLine = (line) => {
|
|
1383
1392
|
if (onStderrLine)
|
|
1384
1393
|
onStderrLine(line, handle);
|
|
1385
|
-
handle?.appendOutput(
|
|
1394
|
+
handle?.appendOutput("stderr", line);
|
|
1386
1395
|
if (stderr.length < 8192) {
|
|
1387
1396
|
stderr += line;
|
|
1388
|
-
if (!line.endsWith(
|
|
1389
|
-
stderr +=
|
|
1397
|
+
if (!line.endsWith("\n"))
|
|
1398
|
+
stderr += "\n";
|
|
1390
1399
|
}
|
|
1391
1400
|
};
|
|
1392
1401
|
if (proc.stderr) {
|
|
1393
|
-
proc.stderr.setEncoding(
|
|
1394
|
-
proc.stderr.on(
|
|
1402
|
+
proc.stderr.setEncoding("utf8");
|
|
1403
|
+
proc.stderr.on("data", (chunk) => {
|
|
1395
1404
|
stderrBuffer += chunk;
|
|
1396
1405
|
const lines = stderrBuffer.split(/\r?\n/);
|
|
1397
|
-
stderrBuffer = lines.pop() ??
|
|
1406
|
+
stderrBuffer = lines.pop() ?? "";
|
|
1398
1407
|
for (const line of lines) {
|
|
1399
1408
|
if (line)
|
|
1400
1409
|
flushLine(line);
|
|
@@ -1404,29 +1413,29 @@ async function runProcess({ command, args, timeoutMs, errorLabel, onStderrLine,
|
|
|
1404
1413
|
if (proc.stdout) {
|
|
1405
1414
|
const handleStdoutLine = onStdoutLine ?? onStderrLine;
|
|
1406
1415
|
if (handleStdoutLine) {
|
|
1407
|
-
proc.stdout.setEncoding(
|
|
1408
|
-
proc.stdout.on(
|
|
1416
|
+
proc.stdout.setEncoding("utf8");
|
|
1417
|
+
proc.stdout.on("data", (chunk) => {
|
|
1409
1418
|
stdoutBuffer += chunk;
|
|
1410
1419
|
const lines = stdoutBuffer.split(/\r?\n/);
|
|
1411
|
-
stdoutBuffer = lines.pop() ??
|
|
1420
|
+
stdoutBuffer = lines.pop() ?? "";
|
|
1412
1421
|
for (const line of lines) {
|
|
1413
1422
|
if (!line)
|
|
1414
1423
|
continue;
|
|
1415
1424
|
handleStdoutLine(line, handle);
|
|
1416
|
-
handle?.appendOutput(
|
|
1425
|
+
handle?.appendOutput("stdout", line);
|
|
1417
1426
|
}
|
|
1418
1427
|
});
|
|
1419
1428
|
}
|
|
1420
1429
|
}
|
|
1421
1430
|
const timeout = setTimeout(() => {
|
|
1422
|
-
proc.kill(
|
|
1431
|
+
proc.kill("SIGKILL");
|
|
1423
1432
|
reject(new Error(`${errorLabel} timed out`));
|
|
1424
1433
|
}, timeoutMs);
|
|
1425
|
-
proc.on(
|
|
1434
|
+
proc.on("error", (error) => {
|
|
1426
1435
|
clearTimeout(timeout);
|
|
1427
1436
|
reject(error);
|
|
1428
1437
|
});
|
|
1429
|
-
proc.on(
|
|
1438
|
+
proc.on("close", (code) => {
|
|
1430
1439
|
clearTimeout(timeout);
|
|
1431
1440
|
if (stderrBuffer.trim().length > 0) {
|
|
1432
1441
|
flushLine(stderrBuffer.trim());
|
|
@@ -1435,13 +1444,13 @@ async function runProcess({ command, args, timeoutMs, errorLabel, onStderrLine,
|
|
|
1435
1444
|
const handleStdoutLine = onStdoutLine ?? onStderrLine;
|
|
1436
1445
|
if (handleStdoutLine)
|
|
1437
1446
|
handleStdoutLine(stdoutBuffer.trim(), handle);
|
|
1438
|
-
handle?.appendOutput(
|
|
1447
|
+
handle?.appendOutput("stdout", stdoutBuffer.trim());
|
|
1439
1448
|
}
|
|
1440
1449
|
if (code === 0) {
|
|
1441
1450
|
resolve();
|
|
1442
1451
|
return;
|
|
1443
1452
|
}
|
|
1444
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` :
|
|
1453
|
+
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1445
1454
|
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1446
1455
|
});
|
|
1447
1456
|
});
|
|
@@ -1513,7 +1522,7 @@ function buildSceneSegments(sceneTimestamps, durationSeconds) {
|
|
|
1513
1522
|
for (let i = 0; i < starts.length; i += 1) {
|
|
1514
1523
|
const start = starts[i];
|
|
1515
1524
|
const rawEnd = ends[i];
|
|
1516
|
-
const end = typeof rawEnd ===
|
|
1525
|
+
const end = typeof rawEnd === "number" && Number.isFinite(rawEnd) && rawEnd > start ? rawEnd : null;
|
|
1517
1526
|
segments.push({ start, end });
|
|
1518
1527
|
}
|
|
1519
1528
|
return segments;
|
|
@@ -1595,62 +1604,62 @@ function buildIntervalTimestamps({ durationSeconds, minDurationSeconds, maxSlide
|
|
|
1595
1604
|
async function runProcessCapture({ command, args, timeoutMs, errorLabel, }) {
|
|
1596
1605
|
return new Promise((resolve, reject) => {
|
|
1597
1606
|
const { proc, handle } = spawnTracked(command, args, {
|
|
1598
|
-
stdio: [
|
|
1607
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1599
1608
|
label: errorLabel,
|
|
1600
1609
|
kind: errorLabel,
|
|
1601
1610
|
captureOutput: false,
|
|
1602
1611
|
});
|
|
1603
|
-
let stdout =
|
|
1604
|
-
let stderr =
|
|
1605
|
-
let stdoutBuffer =
|
|
1606
|
-
let stderrBuffer =
|
|
1612
|
+
let stdout = "";
|
|
1613
|
+
let stderr = "";
|
|
1614
|
+
let stdoutBuffer = "";
|
|
1615
|
+
let stderrBuffer = "";
|
|
1607
1616
|
const timeout = setTimeout(() => {
|
|
1608
|
-
proc.kill(
|
|
1617
|
+
proc.kill("SIGKILL");
|
|
1609
1618
|
reject(new Error(`${errorLabel} timed out`));
|
|
1610
1619
|
}, timeoutMs);
|
|
1611
1620
|
if (proc.stdout) {
|
|
1612
|
-
proc.stdout.setEncoding(
|
|
1613
|
-
proc.stdout.on(
|
|
1621
|
+
proc.stdout.setEncoding("utf8");
|
|
1622
|
+
proc.stdout.on("data", (chunk) => {
|
|
1614
1623
|
stdout += chunk;
|
|
1615
1624
|
stdoutBuffer += chunk;
|
|
1616
1625
|
const lines = stdoutBuffer.split(/\r?\n/);
|
|
1617
|
-
stdoutBuffer = lines.pop() ??
|
|
1626
|
+
stdoutBuffer = lines.pop() ?? "";
|
|
1618
1627
|
for (const line of lines) {
|
|
1619
1628
|
if (line)
|
|
1620
|
-
handle?.appendOutput(
|
|
1629
|
+
handle?.appendOutput("stdout", line);
|
|
1621
1630
|
}
|
|
1622
1631
|
});
|
|
1623
1632
|
}
|
|
1624
1633
|
if (proc.stderr) {
|
|
1625
|
-
proc.stderr.setEncoding(
|
|
1626
|
-
proc.stderr.on(
|
|
1634
|
+
proc.stderr.setEncoding("utf8");
|
|
1635
|
+
proc.stderr.on("data", (chunk) => {
|
|
1627
1636
|
if (stderr.length < 8192) {
|
|
1628
1637
|
stderr += chunk;
|
|
1629
1638
|
}
|
|
1630
1639
|
stderrBuffer += chunk;
|
|
1631
1640
|
const lines = stderrBuffer.split(/\r?\n/);
|
|
1632
|
-
stderrBuffer = lines.pop() ??
|
|
1641
|
+
stderrBuffer = lines.pop() ?? "";
|
|
1633
1642
|
for (const line of lines) {
|
|
1634
1643
|
if (line)
|
|
1635
|
-
handle?.appendOutput(
|
|
1644
|
+
handle?.appendOutput("stderr", line);
|
|
1636
1645
|
}
|
|
1637
1646
|
});
|
|
1638
1647
|
}
|
|
1639
|
-
proc.on(
|
|
1648
|
+
proc.on("error", (error) => {
|
|
1640
1649
|
clearTimeout(timeout);
|
|
1641
1650
|
reject(error);
|
|
1642
1651
|
});
|
|
1643
|
-
proc.on(
|
|
1652
|
+
proc.on("close", (code) => {
|
|
1644
1653
|
clearTimeout(timeout);
|
|
1645
1654
|
if (stdoutBuffer.trim())
|
|
1646
|
-
handle?.appendOutput(
|
|
1655
|
+
handle?.appendOutput("stdout", stdoutBuffer.trim());
|
|
1647
1656
|
if (stderrBuffer.trim())
|
|
1648
|
-
handle?.appendOutput(
|
|
1657
|
+
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1649
1658
|
if (code === 0) {
|
|
1650
1659
|
resolve(stdout);
|
|
1651
1660
|
return;
|
|
1652
1661
|
}
|
|
1653
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` :
|
|
1662
|
+
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1654
1663
|
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1655
1664
|
});
|
|
1656
1665
|
});
|
|
@@ -1658,51 +1667,51 @@ async function runProcessCapture({ command, args, timeoutMs, errorLabel, }) {
|
|
|
1658
1667
|
async function runProcessCaptureBuffer({ command, args, timeoutMs, errorLabel, }) {
|
|
1659
1668
|
return new Promise((resolve, reject) => {
|
|
1660
1669
|
const { proc, handle } = spawnTracked(command, args, {
|
|
1661
|
-
stdio: [
|
|
1670
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1662
1671
|
label: errorLabel,
|
|
1663
1672
|
kind: errorLabel,
|
|
1664
1673
|
captureOutput: false,
|
|
1665
1674
|
});
|
|
1666
1675
|
const chunks = [];
|
|
1667
|
-
let stderr =
|
|
1668
|
-
let stderrBuffer =
|
|
1676
|
+
let stderr = "";
|
|
1677
|
+
let stderrBuffer = "";
|
|
1669
1678
|
const timeout = setTimeout(() => {
|
|
1670
|
-
proc.kill(
|
|
1679
|
+
proc.kill("SIGKILL");
|
|
1671
1680
|
reject(new Error(`${errorLabel} timed out`));
|
|
1672
1681
|
}, timeoutMs);
|
|
1673
1682
|
if (proc.stdout) {
|
|
1674
|
-
proc.stdout.on(
|
|
1683
|
+
proc.stdout.on("data", (chunk) => {
|
|
1675
1684
|
chunks.push(chunk);
|
|
1676
1685
|
});
|
|
1677
1686
|
}
|
|
1678
1687
|
if (proc.stderr) {
|
|
1679
|
-
proc.stderr.setEncoding(
|
|
1680
|
-
proc.stderr.on(
|
|
1688
|
+
proc.stderr.setEncoding("utf8");
|
|
1689
|
+
proc.stderr.on("data", (chunk) => {
|
|
1681
1690
|
if (stderr.length < 8192) {
|
|
1682
1691
|
stderr += chunk;
|
|
1683
1692
|
}
|
|
1684
1693
|
stderrBuffer += chunk;
|
|
1685
1694
|
const lines = stderrBuffer.split(/\r?\n/);
|
|
1686
|
-
stderrBuffer = lines.pop() ??
|
|
1695
|
+
stderrBuffer = lines.pop() ?? "";
|
|
1687
1696
|
for (const line of lines) {
|
|
1688
1697
|
if (line)
|
|
1689
|
-
handle?.appendOutput(
|
|
1698
|
+
handle?.appendOutput("stderr", line);
|
|
1690
1699
|
}
|
|
1691
1700
|
});
|
|
1692
1701
|
}
|
|
1693
|
-
proc.on(
|
|
1702
|
+
proc.on("error", (error) => {
|
|
1694
1703
|
clearTimeout(timeout);
|
|
1695
1704
|
reject(error);
|
|
1696
1705
|
});
|
|
1697
|
-
proc.on(
|
|
1706
|
+
proc.on("close", (code) => {
|
|
1698
1707
|
clearTimeout(timeout);
|
|
1699
1708
|
if (stderrBuffer.trim())
|
|
1700
|
-
handle?.appendOutput(
|
|
1709
|
+
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1701
1710
|
if (code === 0) {
|
|
1702
1711
|
resolve(Buffer.concat(chunks));
|
|
1703
1712
|
return;
|
|
1704
1713
|
}
|
|
1705
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` :
|
|
1714
|
+
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1706
1715
|
reject(new Error(`${errorLabel} exited with code ${code}${suffix}`));
|
|
1707
1716
|
});
|
|
1708
1717
|
});
|
|
@@ -1724,7 +1733,7 @@ async function renameSlidesWithTimestamps(slides, slidesDir) {
|
|
|
1724
1733
|
const renamed = [];
|
|
1725
1734
|
for (const slide of slides) {
|
|
1726
1735
|
const timestampLabel = slide.timestamp.toFixed(2);
|
|
1727
|
-
const filename = `slide_${slide.index.toString().padStart(4,
|
|
1736
|
+
const filename = `slide_${slide.index.toString().padStart(4, "0")}_${timestampLabel}s.png`;
|
|
1728
1737
|
const nextPath = path.join(slidesDir, filename);
|
|
1729
1738
|
if (slide.imagePath !== nextPath) {
|
|
1730
1739
|
await fs.rename(slide.imagePath, nextPath).catch(async () => {
|
|
@@ -1795,7 +1804,7 @@ async function runOcrOnSlides(slides, tesseractPath, workers, onProgress) {
|
|
|
1795
1804
|
};
|
|
1796
1805
|
}
|
|
1797
1806
|
catch {
|
|
1798
|
-
return { ...slide, ocrText:
|
|
1807
|
+
return { ...slide, ocrText: "", ocrConfidence: 0 };
|
|
1799
1808
|
}
|
|
1800
1809
|
});
|
|
1801
1810
|
const results = await runWithConcurrency(tasks, workers, onProgress ?? undefined);
|
|
@@ -1803,54 +1812,54 @@ async function runOcrOnSlides(slides, tesseractPath, workers, onProgress) {
|
|
|
1803
1812
|
}
|
|
1804
1813
|
async function runTesseract(tesseractPath, imagePath) {
|
|
1805
1814
|
return new Promise((resolve, reject) => {
|
|
1806
|
-
const args = [imagePath,
|
|
1815
|
+
const args = [imagePath, "stdout", "--oem", "3", "--psm", "6"];
|
|
1807
1816
|
const { proc, handle } = spawnTracked(tesseractPath, args, {
|
|
1808
|
-
stdio: [
|
|
1809
|
-
label:
|
|
1810
|
-
kind:
|
|
1817
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1818
|
+
label: "tesseract",
|
|
1819
|
+
kind: "tesseract",
|
|
1811
1820
|
captureOutput: false,
|
|
1812
1821
|
});
|
|
1813
|
-
let stdout =
|
|
1814
|
-
let stderr =
|
|
1815
|
-
let stderrBuffer =
|
|
1822
|
+
let stdout = "";
|
|
1823
|
+
let stderr = "";
|
|
1824
|
+
let stderrBuffer = "";
|
|
1816
1825
|
const timeout = setTimeout(() => {
|
|
1817
|
-
proc.kill(
|
|
1818
|
-
reject(new Error(
|
|
1826
|
+
proc.kill("SIGKILL");
|
|
1827
|
+
reject(new Error("tesseract timed out"));
|
|
1819
1828
|
}, TESSERACT_TIMEOUT_MS);
|
|
1820
1829
|
if (proc.stdout) {
|
|
1821
|
-
proc.stdout.setEncoding(
|
|
1822
|
-
proc.stdout.on(
|
|
1830
|
+
proc.stdout.setEncoding("utf8");
|
|
1831
|
+
proc.stdout.on("data", (chunk) => {
|
|
1823
1832
|
stdout += chunk;
|
|
1824
1833
|
});
|
|
1825
1834
|
}
|
|
1826
1835
|
if (proc.stderr) {
|
|
1827
|
-
proc.stderr.setEncoding(
|
|
1828
|
-
proc.stderr.on(
|
|
1836
|
+
proc.stderr.setEncoding("utf8");
|
|
1837
|
+
proc.stderr.on("data", (chunk) => {
|
|
1829
1838
|
if (stderr.length < 8192) {
|
|
1830
1839
|
stderr += chunk;
|
|
1831
1840
|
}
|
|
1832
1841
|
stderrBuffer += chunk;
|
|
1833
1842
|
const lines = stderrBuffer.split(/\r?\n/);
|
|
1834
|
-
stderrBuffer = lines.pop() ??
|
|
1843
|
+
stderrBuffer = lines.pop() ?? "";
|
|
1835
1844
|
for (const line of lines) {
|
|
1836
1845
|
if (line)
|
|
1837
|
-
handle?.appendOutput(
|
|
1846
|
+
handle?.appendOutput("stderr", line);
|
|
1838
1847
|
}
|
|
1839
1848
|
});
|
|
1840
1849
|
}
|
|
1841
|
-
proc.on(
|
|
1850
|
+
proc.on("error", (error) => {
|
|
1842
1851
|
clearTimeout(timeout);
|
|
1843
1852
|
reject(error);
|
|
1844
1853
|
});
|
|
1845
|
-
proc.on(
|
|
1854
|
+
proc.on("close", (code) => {
|
|
1846
1855
|
clearTimeout(timeout);
|
|
1847
1856
|
if (stderrBuffer.trim())
|
|
1848
|
-
handle?.appendOutput(
|
|
1857
|
+
handle?.appendOutput("stderr", stderrBuffer.trim());
|
|
1849
1858
|
if (code === 0) {
|
|
1850
1859
|
resolve(stdout);
|
|
1851
1860
|
return;
|
|
1852
1861
|
}
|
|
1853
|
-
const suffix = stderr.trim() ? `: ${stderr.trim()}` :
|
|
1862
|
+
const suffix = stderr.trim() ? `: ${stderr.trim()}` : "";
|
|
1854
1863
|
reject(new Error(`tesseract exited with code ${code}${suffix}`));
|
|
1855
1864
|
});
|
|
1856
1865
|
});
|
|
@@ -1860,9 +1869,9 @@ function cleanOcrText(text) {
|
|
|
1860
1869
|
.split(/\r?\n/)
|
|
1861
1870
|
.map((line) => line.trim())
|
|
1862
1871
|
.filter((line) => line.length >= 2)
|
|
1863
|
-
.filter((line) => !(line.length > 20 && !line.includes(
|
|
1872
|
+
.filter((line) => !(line.length > 20 && !line.includes(" ")))
|
|
1864
1873
|
.filter((line) => /[a-z0-9]/i.test(line));
|
|
1865
|
-
return lines.join(
|
|
1874
|
+
return lines.join("\n");
|
|
1866
1875
|
}
|
|
1867
1876
|
function estimateOcrConfidence(text) {
|
|
1868
1877
|
if (!text)
|
|
@@ -1895,7 +1904,7 @@ async function writeSlidesJson(result, slidesDir) {
|
|
|
1895
1904
|
imagePath: serializeSlideImagePath(slidesDir, slide.imagePath),
|
|
1896
1905
|
})),
|
|
1897
1906
|
};
|
|
1898
|
-
await fs.writeFile(path.join(slidesDir,
|
|
1907
|
+
await fs.writeFile(path.join(slidesDir, "slides.json"), JSON.stringify(payload, null, 2), "utf8");
|
|
1899
1908
|
}
|
|
1900
1909
|
function buildDirectSourceId(url) {
|
|
1901
1910
|
const parsed = (() => {
|
|
@@ -1907,11 +1916,11 @@ function buildDirectSourceId(url) {
|
|
|
1907
1916
|
}
|
|
1908
1917
|
})();
|
|
1909
1918
|
const hostSlug = resolveHostSlug(parsed);
|
|
1910
|
-
const rawName = parsed ? path.basename(parsed.pathname) :
|
|
1911
|
-
const base = rawName.replace(/\.[a-z0-9]+$/i,
|
|
1919
|
+
const rawName = parsed ? path.basename(parsed.pathname) : "video";
|
|
1920
|
+
const base = rawName.replace(/\.[a-z0-9]+$/i, "").trim() || "video";
|
|
1912
1921
|
const slug = toSlug(base);
|
|
1913
|
-
const combined = [hostSlug, slug].filter(Boolean).join(
|
|
1914
|
-
const hash = createHash(
|
|
1922
|
+
const combined = [hostSlug, slug].filter(Boolean).join("-");
|
|
1923
|
+
const hash = createHash("sha1").update(url).digest("hex").slice(0, 8);
|
|
1915
1924
|
return combined ? `${combined}-${hash}` : `video-${hash}`;
|
|
1916
1925
|
}
|
|
1917
1926
|
function buildYoutubeSourceId(videoId) {
|
|
@@ -1921,8 +1930,8 @@ function resolveHostSlug(parsed) {
|
|
|
1921
1930
|
if (!parsed?.hostname)
|
|
1922
1931
|
return null;
|
|
1923
1932
|
const host = parsed.hostname.toLowerCase();
|
|
1924
|
-
if (host.includes(
|
|
1925
|
-
return
|
|
1933
|
+
if (host.includes("youtube.com") || host === "youtu.be" || host.includes("youtu.be")) {
|
|
1934
|
+
return "youtube";
|
|
1926
1935
|
}
|
|
1927
1936
|
const slug = toSlug(host);
|
|
1928
1937
|
return slug || null;
|
|
@@ -1930,13 +1939,13 @@ function resolveHostSlug(parsed) {
|
|
|
1930
1939
|
function toSlug(value) {
|
|
1931
1940
|
const normalized = value
|
|
1932
1941
|
.toLowerCase()
|
|
1933
|
-
.replace(/[^a-z0-9]+/g,
|
|
1934
|
-
.replace(/^-+|-+$/g,
|
|
1942
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
1943
|
+
.replace(/^-+|-+$/g, "");
|
|
1935
1944
|
if (!normalized)
|
|
1936
|
-
return
|
|
1945
|
+
return "";
|
|
1937
1946
|
const max = 64;
|
|
1938
1947
|
if (normalized.length <= max)
|
|
1939
1948
|
return normalized;
|
|
1940
|
-
return normalized.slice(0, max).replace(/-+$/g,
|
|
1949
|
+
return normalized.slice(0, max).replace(/-+$/g, "");
|
|
1941
1950
|
}
|
|
1942
1951
|
//# sourceMappingURL=extract.js.map
|