@steipete/summarize-core 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/esm/content/index.js +5 -5
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +23 -8
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/article.js +84 -83
- package/dist/esm/content/link-preview/content/article.js.map +1 -1
- package/dist/esm/content/link-preview/content/cleaner.js +23 -20
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -1
- package/dist/esm/content/link-preview/content/constants.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +46 -40
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js +18 -17
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js +31 -28
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +173 -90
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/jsonld.js +12 -12
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -1
- package/dist/esm/content/link-preview/content/parsers.js +20 -20
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -1
- package/dist/esm/content/link-preview/content/podcast-utils.js +34 -34
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/readability.js +16 -15
- package/dist/esm/content/link-preview/content/readability.js.map +1 -1
- package/dist/esm/content/link-preview/content/twitter-utils.js +24 -11
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/types.js +1 -1
- package/dist/esm/content/link-preview/content/types.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js +30 -22
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/video.js +19 -19
- package/dist/esm/content/link-preview/content/video.js.map +1 -1
- package/dist/esm/content/link-preview/content/visibility.js +121 -0
- package/dist/esm/content/link-preview/content/visibility.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +10 -10
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -1
- package/dist/esm/content/link-preview/deps.js +16 -16
- package/dist/esm/content/link-preview/deps.js.map +1 -1
- package/dist/esm/content/link-preview/fetch-with-timeout.js +4 -4
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -1
- package/dist/esm/content/link-preview/types.js +1 -1
- package/dist/esm/content/link-preview/types.js.map +1 -1
- package/dist/esm/content/transcript/cache.js +47 -22
- package/dist/esm/content/transcript/cache.js.map +1 -1
- package/dist/esm/content/transcript/index.js +71 -25
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/normalize.js +10 -10
- package/dist/esm/content/transcript/normalize.js.map +1 -1
- package/dist/esm/content/transcript/parse.js +125 -13
- package/dist/esm/content/transcript/parse.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +112 -81
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +38 -36
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple.js +5 -5
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/constants.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/itunes.js +44 -42
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/json.js +4 -4
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +66 -56
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/results.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/rss.js +42 -28
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +39 -38
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify.js +32 -32
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +52 -43
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-start.js +78 -0
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/api.js +74 -57
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/apify.js +7 -7
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions.js +112 -74
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +161 -101
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +97 -80
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/timestamps.js +79 -0
- package/dist/esm/content/transcript/timestamps.js.map +1 -0
- package/dist/esm/content/transcript/transcription-config.js +14 -0
- package/dist/esm/content/transcript/transcription-config.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +35 -35
- package/dist/esm/content/transcript/utils.js.map +1 -1
- package/dist/esm/content/url.js +59 -28
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/index.js +4 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/language.js +77 -77
- package/dist/esm/language.js.map +1 -1
- package/dist/esm/openai/base-url.js +35 -0
- package/dist/esm/openai/base-url.js.map +1 -0
- package/dist/esm/processes.js +121 -0
- package/dist/esm/processes.js.map +1 -0
- package/dist/esm/prompts/cli.js +17 -17
- package/dist/esm/prompts/cli.js.map +1 -1
- package/dist/esm/prompts/file.js +56 -40
- package/dist/esm/prompts/file.js.map +1 -1
- package/dist/esm/prompts/format.js +2 -2
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/index.js +5 -4
- package/dist/esm/prompts/index.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +96 -46
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/prompts/summary-lengths.js +10 -10
- package/dist/esm/prompts/summary-lengths.js.map +1 -1
- package/dist/esm/prompts/summary-system.js +13 -0
- package/dist/esm/prompts/summary-system.js.map +1 -0
- package/dist/esm/shared/contracts.js +1 -1
- package/dist/esm/shared/contracts.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js +319 -0
- package/dist/esm/transcription/onnx-cli.js.map +1 -0
- package/dist/esm/transcription/whisper/constants.js +3 -3
- package/dist/esm/transcription/whisper/constants.js.map +1 -1
- package/dist/esm/transcription/whisper/core.js +209 -52
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/fal.js +14 -14
- package/dist/esm/transcription/whisper/fal.js.map +1 -1
- package/dist/esm/transcription/whisper/ffmpeg.js +134 -82
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -1
- package/dist/esm/transcription/whisper/groq.js +46 -0
- package/dist/esm/transcription/whisper/groq.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +19 -13
- package/dist/esm/transcription/whisper/openai.js.map +1 -1
- package/dist/esm/transcription/whisper/utils.js +19 -19
- package/dist/esm/transcription/whisper/utils.js.map +1 -1
- package/dist/esm/transcription/whisper/whisper-cpp.js +68 -58
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/esm/transcription/whisper.js +4 -4
- package/dist/esm/transcription/whisper.js.map +1 -1
- package/dist/types/content/cache/types.d.ts +25 -1
- package/dist/types/content/index.d.ts +7 -7
- package/dist/types/content/link-preview/client.d.ts +9 -4
- package/dist/types/content/link-preview/content/cleaner.d.ts +1 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +2 -2
- package/dist/types/content/link-preview/content/firecrawl.d.ts +8 -7
- package/dist/types/content/link-preview/content/html.d.ts +9 -8
- package/dist/types/content/link-preview/content/index.d.ts +3 -3
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +1 -0
- package/dist/types/content/link-preview/content/types.d.ts +13 -8
- package/dist/types/content/link-preview/content/utils.d.ts +3 -3
- package/dist/types/content/link-preview/content/video.d.ts +1 -1
- package/dist/types/content/link-preview/content/visibility.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +44 -28
- package/dist/types/content/link-preview/types.d.ts +10 -4
- package/dist/types/content/transcript/cache.d.ts +9 -6
- package/dist/types/content/transcript/index.d.ts +9 -6
- package/dist/types/content/transcript/parse.d.ts +7 -0
- package/dist/types/content/transcript/providers/generic.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +4 -4
- package/dist/types/content/transcript/providers/podcast/media.d.ts +10 -6
- package/dist/types/content/transcript/providers/podcast/results.d.ts +3 -3
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -0
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast.d.ts +5 -5
- package/dist/types/content/transcript/providers/transcription-start.d.ts +32 -0
- package/dist/types/content/transcript/providers/youtube/api.d.ts +7 -2
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +6 -1
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +13 -6
- package/dist/types/content/transcript/providers/youtube.d.ts +1 -1
- package/dist/types/content/transcript/timestamps.d.ts +5 -0
- package/dist/types/content/transcript/transcription-config.d.ts +15 -0
- package/dist/types/content/transcript/types.d.ts +15 -7
- package/dist/types/content/transcript/utils.d.ts +1 -1
- package/dist/types/content/url.d.ts +5 -3
- package/dist/types/index.d.ts +5 -4
- package/dist/types/language.d.ts +4 -4
- package/dist/types/openai/base-url.d.ts +14 -0
- package/dist/types/processes.d.ts +50 -0
- package/dist/types/prompts/cli.d.ts +3 -3
- package/dist/types/prompts/file.d.ts +2 -2
- package/dist/types/prompts/index.d.ts +6 -5
- package/dist/types/prompts/link-summary.d.ts +9 -4
- package/dist/types/prompts/summary-lengths.d.ts +1 -1
- package/dist/types/prompts/summary-system.d.ts +1 -0
- package/dist/types/transcription/onnx-cli.d.ts +25 -0
- package/dist/types/transcription/whisper/core.d.ts +10 -3
- package/dist/types/transcription/whisper/ffmpeg.d.ts +5 -0
- package/dist/types/transcription/whisper/groq.d.ts +2 -0
- package/dist/types/transcription/whisper/openai.d.ts +6 -1
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +1 -1
- package/dist/types/transcription/whisper.d.ts +5 -5
- package/package.json +24 -18
- package/LICENSE +0 -21
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/fetcher.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/fetcher.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,eAAe,GAA2B;IAC9C,YAAY,EACV,iHAAiH;IACnH,MAAM,EACJ,kGAAkG;IACpG,iBAAiB,EAAE,gBAAgB;IACnC,eAAe,EAAE,UAAU;IAC3B,MAAM,EAAE,UAAU;CACnB,CAAC;AAEF,MAAM,0BAA0B,GAAG,IAAI,CAAC;AAYxC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,SAAuB,EACvB,GAAW,EACX,EACE,SAAS,EACT,UAAU,MACiF,EAAE;IAE/F,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAC;IAEhD,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,kBAAkB,GACtB,OAAO,SAAS,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzD,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,0BAA0B,CAAC;IACjC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;QAC9B,UAAU,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC,EAAE,kBAAkB,CAAC,CAAC;IAEvB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE;YACpC,OAAO,EAAE,eAAe;YACxB,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yCAAyC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;QAC/E,CAAC;QAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;QAE7C,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,WAAW,EAAE,IAAI,IAAI,CAAC;QAChF,IACE,WAAW;YACX,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;YAClC,CAAC,WAAW,CAAC,QAAQ,CAAC,uBAAuB,CAAC;YAC9C,CAAC,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC;YACxC,CAAC,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC;YACjC,CAAC,WAAW,CAAC,QAAQ,CAAC,qBAAqB,CAAC;YAC5C,CAAC,WAAW,CAAC,QAAQ,CAAC,sBAAsB,CAAC;YAC7C,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC,EAChC,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,qDAAqD,WAAW,EAAE,CAAC,CAAC;QACtF,CAAC;QAED,MAAM,UAAU,GAAG,CAAC,GAAG,EAAE;YACvB,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;YACnD,IAAI,CAAC,GAAG;gBAAE,OAAO,IAAI,CAAC;YACtB,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;YAC3B,OAAO,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC3E,CAAC,CAAC,EAAE,CAAC;QAEL,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC;YACxD,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,eAAe,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YACnF,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QAClC,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,IAAI,GAAG,EAAE,CAAC;QAEd,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC;QAEnF,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,IAAI;gBAAE,MAAM;YAChB,IAAI,CAAC,KAAK;gBAAE,SAAS;YACrB,eAAe,IAAI,KAAK,CAAC,UAAU,CAAC;YACpC,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAChD,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,GAAG,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC,CAAC;QAClF,CAAC;QAED,IAAI,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACzB,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC,CAAC;QAC5E,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACjE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,mBAA+C,EAC/C,UAKI,EAAE;IAEN,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACpC,MAAM,SAAS,GAAc,OAAO,CAAC,SAAS,IAAI,SAAS,CAAC;IAC5D,MAAM,UAAU,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IACxF,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1E,MAAM,WAAW,GAAyB;QACxC,SAAS,EAAE,KAAK;QAChB,IAAI,EAAE,KAAK;QACX,SAAS;QACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;QAC5D,KAAK,EAAE,IAAI;KACZ,CAAC;IAEF,IAAI,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;QACtB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,mCAAmC,CAAC,CAAC;QACvF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IACxC,CAAC;IAED,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,6BAA6B,CAAC,CAAC;QACjF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IACxC,CAAC;IAED,WAAW,CAAC,SAAS,GAAG,IAAI,CAAC;IAC7B,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,IAAI,WAAW,EAAE,CAAC,CAAC;IAE9E,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;QACzE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,uCAAuC,CAAC,CAAC;YAC3F,UAAU,EAAE,CAAC;gBACX,IAAI,EAAE,gBAAgB;gBACtB,GAAG;gBACH,EAAE,EAAE,KAAK;gBACT,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;QACxC,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,MAAM,aAAa,GACjB,OAAO,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5F,MAAM,SAAS,GACb,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC;QACpF,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,SAAS,EAAE,CAAC,CAAC;QAElF,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,oBAAoB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAC/E,CAAC;QACF,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/F,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import { resolveTranscriptForLink } from
|
|
2
|
-
import { extractArticleContent, extractPlainText } from
|
|
3
|
-
import { normalizeForPrompt } from
|
|
4
|
-
import { BLOCKED_HTML_HINT_PATTERN, MIN_HTML_CONTENT_CHARACTERS, MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK, MIN_METADATA_DESCRIPTION_CHARACTERS, READABILITY_RELATIVE_THRESHOLD, } from
|
|
5
|
-
import { extractJsonLdContent } from
|
|
6
|
-
import { extractMetadataFromFirecrawl, extractMetadataFromHtml } from
|
|
7
|
-
import { isPodcastHost, isPodcastLikeJsonLdType } from
|
|
8
|
-
import { appendNote, ensureTranscriptDiagnostics, finalizeExtractedLinkContent, pickFirstText, safeHostname, selectBaseContent, } from
|
|
9
|
-
import { detectPrimaryVideoFromHtml } from
|
|
1
|
+
import { resolveTranscriptForLink } from "../../transcript/index.js";
|
|
2
|
+
import { extractArticleContent, extractPlainText } from "./article.js";
|
|
3
|
+
import { normalizeForPrompt } from "./cleaner.js";
|
|
4
|
+
import { BLOCKED_HTML_HINT_PATTERN, MIN_HTML_CONTENT_CHARACTERS, MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK, MIN_METADATA_DESCRIPTION_CHARACTERS, READABILITY_RELATIVE_THRESHOLD, } from "./constants.js";
|
|
5
|
+
import { extractJsonLdContent } from "./jsonld.js";
|
|
6
|
+
import { extractMetadataFromFirecrawl, extractMetadataFromHtml } from "./parsers.js";
|
|
7
|
+
import { isPodcastHost, isPodcastLikeJsonLdType } from "./podcast-utils.js";
|
|
8
|
+
import { appendNote, ensureTranscriptDiagnostics, finalizeExtractedLinkContent, pickFirstText, safeHostname, selectBaseContent, } from "./utils.js";
|
|
9
|
+
import { detectPrimaryVideoFromHtml } from "./video.js";
|
|
10
10
|
export function shouldFallbackToFirecrawl(html) {
|
|
11
11
|
const plainText = normalizeForPrompt(extractPlainText(html));
|
|
12
12
|
if (BLOCKED_HTML_HINT_PATTERN.test(plainText))
|
|
@@ -20,10 +20,10 @@ export function shouldFallbackToFirecrawl(html) {
|
|
|
20
20
|
// the HTML document itself is large (SSR/app-shell pages, blocked pages without a match, etc.).
|
|
21
21
|
return html.length >= MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK;
|
|
22
22
|
}
|
|
23
|
-
export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCharacters, youtubeTranscriptMode, mediaTranscriptMode, firecrawlDiagnostics, markdownRequested, deps, }) {
|
|
24
|
-
const normalizedMarkdown = normalizeForPrompt(payload.markdown ??
|
|
23
|
+
export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCharacters, youtubeTranscriptMode, mediaTranscriptMode, transcriptTimestamps, firecrawlDiagnostics, markdownRequested, deps, }) {
|
|
24
|
+
const normalizedMarkdown = normalizeForPrompt(payload.markdown ?? "");
|
|
25
25
|
if (normalizedMarkdown.length === 0) {
|
|
26
|
-
firecrawlDiagnostics.notes = appendNote(firecrawlDiagnostics.notes,
|
|
26
|
+
firecrawlDiagnostics.notes = appendNote(firecrawlDiagnostics.notes, "Firecrawl markdown normalization yielded empty text");
|
|
27
27
|
return null;
|
|
28
28
|
}
|
|
29
29
|
const jsonLd = payload.html ? extractJsonLdContent(payload.html) : null;
|
|
@@ -31,6 +31,7 @@ export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCha
|
|
|
31
31
|
const transcriptResolution = await resolveTranscriptForLink(url, payload.html ?? null, deps, {
|
|
32
32
|
youtubeTranscriptMode,
|
|
33
33
|
mediaTranscriptMode,
|
|
34
|
+
transcriptTimestamps,
|
|
34
35
|
cacheMode,
|
|
35
36
|
});
|
|
36
37
|
const htmlMetadata = payload.html
|
|
@@ -44,7 +45,7 @@ export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCha
|
|
|
44
45
|
htmlMetadata.description,
|
|
45
46
|
]);
|
|
46
47
|
const siteName = pickFirstText([metadata.siteName, htmlMetadata.siteName, safeHostname(url)]);
|
|
47
|
-
const descriptionCandidate = description ? normalizeForPrompt(description) :
|
|
48
|
+
const descriptionCandidate = description ? normalizeForPrompt(description) : "";
|
|
48
49
|
const preferDescription = descriptionCandidate.length >= MIN_METADATA_DESCRIPTION_CHARACTERS &&
|
|
49
50
|
(isPodcastJsonLd ||
|
|
50
51
|
isPodcastHost(url) ||
|
|
@@ -53,11 +54,11 @@ export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCha
|
|
|
53
54
|
const baseCandidate = preferDescription ? descriptionCandidate : normalizedMarkdown;
|
|
54
55
|
const baseContent = selectBaseContent(baseCandidate, transcriptResolution.text);
|
|
55
56
|
if (baseContent.length === 0) {
|
|
56
|
-
firecrawlDiagnostics.notes = appendNote(firecrawlDiagnostics.notes,
|
|
57
|
+
firecrawlDiagnostics.notes = appendNote(firecrawlDiagnostics.notes, "Firecrawl produced content that normalized to an empty string");
|
|
57
58
|
return null;
|
|
58
59
|
}
|
|
59
60
|
firecrawlDiagnostics.used = true;
|
|
60
|
-
const transcriptDiagnostics = ensureTranscriptDiagnostics(transcriptResolution, cacheMode ??
|
|
61
|
+
const transcriptDiagnostics = ensureTranscriptDiagnostics(transcriptResolution, cacheMode ?? "default");
|
|
61
62
|
const video = payload.html ? detectPrimaryVideoFromHtml(payload.html, url) : null;
|
|
62
63
|
const isVideoOnly = !transcriptResolution.text &&
|
|
63
64
|
normalizedMarkdown.length < MIN_HTML_CONTENT_CHARACTERS &&
|
|
@@ -73,12 +74,12 @@ export async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCha
|
|
|
73
74
|
video,
|
|
74
75
|
isVideoOnly,
|
|
75
76
|
diagnostics: {
|
|
76
|
-
strategy:
|
|
77
|
+
strategy: "firecrawl",
|
|
77
78
|
firecrawl: firecrawlDiagnostics,
|
|
78
79
|
markdown: {
|
|
79
80
|
requested: markdownRequested,
|
|
80
81
|
used: true,
|
|
81
|
-
provider:
|
|
82
|
+
provider: "firecrawl",
|
|
82
83
|
},
|
|
83
84
|
transcript: transcriptDiagnostics,
|
|
84
85
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/firecrawl.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"firecrawl.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/firecrawl.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AACvE,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,2BAA2B,EAC3B,yCAAyC,EACzC,mCAAmC,EACnC,8BAA8B,GAC/B,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AACrF,OAAO,EAAE,aAAa,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAC5E,OAAO,EACL,UAAU,EACV,2BAA2B,EAC3B,4BAA4B,EAC5B,aAAa,EACb,YAAY,EACZ,iBAAiB,GAClB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,0BAA0B,EAAE,MAAM,YAAY,CAAC;AAExD,MAAM,UAAU,yBAAyB,CAAC,IAAY;IACpD,MAAM,SAAS,GAAG,kBAAkB,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,IAAI,yBAAyB,CAAC,IAAI,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3D,MAAM,UAAU,GAAG,kBAAkB,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,IAAI,UAAU,CAAC,MAAM,IAAI,2BAA2B,EAAE,CAAC;QACrD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,qGAAqG;IACrG,mGAAmG;IACnG,gGAAgG;IAChG,OAAO,IAAI,CAAC,MAAM,IAAI,yCAAyC,CAAC;AAClE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,EAC7C,GAAG,EACH,OAAO,EACP,SAAS,EACT,aAAa,EACb,qBAAqB,EACrB,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,iBAAiB,EACjB,IAAI,GAYL;IACC,MAAM,kBAAkB,GAAG,kBAAkB,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;IACtE,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpC,oBAAoB,CAAC,KAAK,GAAG,UAAU,CACrC,oBAAoB,CAAC,KAAK,EAC1B,qDAAqD,CACtD,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,oBAAoB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACxE,MAAM,eAAe,GAAG,uBAAuB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAE9D,MAAM,oBAAoB,GAAG,MAAM,wBAAwB,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,EAAE;QAC3F,qBAAqB;QACrB,mBAAmB;QACnB,oBAAoB;QACpB,SAAS;KACV,CAAC,CAAC;IACH,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI;QAC/B,CAAC,CAAC,uBAAuB,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;QAC5C,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,4BAA4B,CAAC,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC;IAExE,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;IACjF,MAAM,WAAW,GAAG,aAAa,CAAC;QAChC,MAAM,EAAE,WAAW;QACnB,QAAQ,CAAC,WAAW;QACpB,YAAY,CAAC,WAAW;KACzB,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,YAAY,CAAC,QAAQ,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAE9F,MAAM,oBAAoB,GAAG,WAAW,CAAC,CAAC,CAAC,kBAAkB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAChF,MAAM,iBAAiB,GACrB,oBAAoB,CAAC,MAAM,IAAI,mCAAmC;QAClE,CAAC,eAAe;YACd,aAAa,CAAC,GAAG,CAAC;YAClB,kBAAkB,CAAC,MAAM,GAAG,2BAA2B;YACvD,oBAAoB,CAAC,MAAM,IAAI,kBAAkB,CAAC,MAAM,GAAG,8BAA8B,CAAC,CAAC;IAC/F,MAAM,aAAa,GAAG,iBAAiB,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,kBAAkB,CAAC;IACpF,MAAM,WAAW,GAAG,iBAAiB,CAAC,aAAa,EAAE,oBAAoB,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,oBAAoB,CAAC,KAAK,GAAG,UAAU,CACrC,oBAAoB,CAAC,KAAK,EAC1B,+DAA+D,CAChE,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,oBAAoB,CAAC,IAAI,GAAG,IAAI,CAAC;IAEjC,MAAM,qBAAqB,GAAG,2BAA2B,CACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAC;IAEF,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,0BAA0B,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAClF,MAAM,WAAW,GACf,CAAC,oBAAoB,CAAC,IAAI;QAC1B,kBAAkB,CAAC,MAAM,GAAG,2BAA2B;QACvD,KAAK,KAAK,IAAI,CAAC;IAEjB,OAAO,4BAA4B,CAAC;QAClC,GAAG;QACH,WAAW;QACX,aAAa;QACb,KAAK;QACL,WAAW;QACX,QAAQ;QACR,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW,EAAE;YACX,QAAQ,EAAE,WAAW;YACrB,SAAS,EAAE,oBAAoB;YAC/B,QAAQ,EAAE;gBACR,SAAS,EAAE,iBAAiB;gBAC5B,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,WAAW;aACtB;YACD,UAAU,EAAE,qBAAqB;SAClC;KACF,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
import { resolveTranscriptForLink } from
|
|
2
|
-
import { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from
|
|
3
|
-
import { extractArticleContent, sanitizeHtmlForMarkdownConversion } from
|
|
4
|
-
import { normalizeForPrompt } from
|
|
5
|
-
import { MIN_HTML_CONTENT_CHARACTERS, MIN_METADATA_DESCRIPTION_CHARACTERS, MIN_READABILITY_CONTENT_CHARACTERS, READABILITY_RELATIVE_THRESHOLD, } from
|
|
6
|
-
import { extractJsonLdContent } from
|
|
7
|
-
import { extractMetadataFromHtml } from
|
|
8
|
-
import { isPodcastHost, isPodcastLikeJsonLdType } from
|
|
9
|
-
import { extractReadabilityFromHtml, toReadabilityHtml } from
|
|
10
|
-
import { ensureTranscriptDiagnostics, finalizeExtractedLinkContent, pickFirstText, selectBaseContent, } from
|
|
11
|
-
import { detectPrimaryVideoFromHtml } from
|
|
12
|
-
import { extractYouTubeShortDescription } from
|
|
1
|
+
import { resolveTranscriptForLink } from "../../transcript/index.js";
|
|
2
|
+
import { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from "../../url.js";
|
|
3
|
+
import { extractArticleContent, sanitizeHtmlForMarkdownConversion } from "./article.js";
|
|
4
|
+
import { normalizeForPrompt } from "./cleaner.js";
|
|
5
|
+
import { MIN_HTML_CONTENT_CHARACTERS, MIN_METADATA_DESCRIPTION_CHARACTERS, MIN_READABILITY_CONTENT_CHARACTERS, READABILITY_RELATIVE_THRESHOLD, } from "./constants.js";
|
|
6
|
+
import { extractJsonLdContent } from "./jsonld.js";
|
|
7
|
+
import { extractMetadataFromHtml } from "./parsers.js";
|
|
8
|
+
import { isPodcastHost, isPodcastLikeJsonLdType } from "./podcast-utils.js";
|
|
9
|
+
import { extractReadabilityFromHtml, toReadabilityHtml } from "./readability.js";
|
|
10
|
+
import { ensureTranscriptDiagnostics, finalizeExtractedLinkContent, pickFirstText, selectBaseContent, } from "./utils.js";
|
|
11
|
+
import { detectPrimaryVideoFromHtml } from "./video.js";
|
|
12
|
+
import { extractYouTubeShortDescription } from "./youtube.js";
|
|
13
13
|
const LEADING_CONTROL_PATTERN = /^[\s\p{Cc}]+/u;
|
|
14
14
|
function stripLeadingTitle(content, title) {
|
|
15
15
|
if (!(content && title)) {
|
|
@@ -24,12 +24,12 @@ function stripLeadingTitle(content, title) {
|
|
|
24
24
|
return content;
|
|
25
25
|
}
|
|
26
26
|
const remainderOriginal = trimmedContent.slice(normalizedTitle.length);
|
|
27
|
-
const remainder = remainderOriginal.replace(LEADING_CONTROL_PATTERN,
|
|
27
|
+
const remainder = remainderOriginal.replace(LEADING_CONTROL_PATTERN, "");
|
|
28
28
|
return remainder;
|
|
29
29
|
}
|
|
30
|
-
export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCharacters, youtubeTranscriptMode, mediaTranscriptMode, firecrawlDiagnostics, markdownRequested, markdownMode, timeoutMs, deps, readabilityCandidate, }) {
|
|
30
|
+
export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCharacters, youtubeTranscriptMode, mediaTranscriptMode, transcriptTimestamps, firecrawlDiagnostics, markdownRequested, markdownMode, timeoutMs, deps, readabilityCandidate, }) {
|
|
31
31
|
if (isYouTubeVideoUrl(url) && !extractYouTubeVideoId(url)) {
|
|
32
|
-
throw new Error(
|
|
32
|
+
throw new Error("Invalid YouTube video id in URL");
|
|
33
33
|
}
|
|
34
34
|
const { title, description, siteName } = extractMetadataFromHtml(html, url);
|
|
35
35
|
const jsonLd = extractJsonLdContent(html);
|
|
@@ -37,12 +37,12 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
37
37
|
const mergedDescription = pickFirstText([jsonLd?.description, description]);
|
|
38
38
|
const isPodcastJsonLd = isPodcastLikeJsonLdType(jsonLd?.type);
|
|
39
39
|
const readability = readabilityCandidate ?? (await extractReadabilityFromHtml(html, url));
|
|
40
|
-
const readabilityText = readability?.text ? normalizeForPrompt(readability.text) :
|
|
40
|
+
const readabilityText = readability?.text ? normalizeForPrompt(readability.text) : "";
|
|
41
41
|
const readabilityHtml = toReadabilityHtml(readability);
|
|
42
42
|
const normalizedSegmentsFromHtml = normalizeForPrompt(extractArticleContent(html));
|
|
43
43
|
const normalizedSegmentsFromReadabilityHtml = readabilityHtml
|
|
44
44
|
? normalizeForPrompt(extractArticleContent(readabilityHtml))
|
|
45
|
-
:
|
|
45
|
+
: "";
|
|
46
46
|
const preferReadabilityHtml = normalizedSegmentsFromReadabilityHtml.length >= MIN_READABILITY_CONTENT_CHARACTERS &&
|
|
47
47
|
(normalizedSegmentsFromHtml.length < MIN_HTML_CONTENT_CHARACTERS ||
|
|
48
48
|
normalizedSegmentsFromReadabilityHtml.length >=
|
|
@@ -56,7 +56,7 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
56
56
|
readabilityText.length >= normalizedSegmentsFromHtml.length * READABILITY_RELATIVE_THRESHOLD);
|
|
57
57
|
const preferReadability = preferReadabilityHtml || preferReadabilityText;
|
|
58
58
|
const effectiveNormalized = preferReadabilityText ? readabilityText : normalizedSegments;
|
|
59
|
-
const descriptionCandidate = mergedDescription ? normalizeForPrompt(mergedDescription) :
|
|
59
|
+
const descriptionCandidate = mergedDescription ? normalizeForPrompt(mergedDescription) : "";
|
|
60
60
|
const preferDescription = descriptionCandidate.length >= MIN_METADATA_DESCRIPTION_CHARACTERS &&
|
|
61
61
|
(isPodcastJsonLd ||
|
|
62
62
|
isPodcastHost(url) ||
|
|
@@ -70,6 +70,7 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
70
70
|
const transcriptResolution = await resolveTranscriptForLink(url, html, deps, {
|
|
71
71
|
youtubeTranscriptMode,
|
|
72
72
|
mediaTranscriptMode,
|
|
73
|
+
transcriptTimestamps,
|
|
73
74
|
cacheMode,
|
|
74
75
|
});
|
|
75
76
|
const youtubeDescription = transcriptResolution.text === null ? extractYouTubeShortDescription(html) : null;
|
|
@@ -80,7 +81,7 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
80
81
|
if (baseContent === normalizedSegments) {
|
|
81
82
|
baseContent = stripLeadingTitle(baseContent, mergedTitle ?? title);
|
|
82
83
|
}
|
|
83
|
-
const transcriptDiagnostics = ensureTranscriptDiagnostics(transcriptResolution, cacheMode ??
|
|
84
|
+
const transcriptDiagnostics = ensureTranscriptDiagnostics(transcriptResolution, cacheMode ?? "default");
|
|
84
85
|
const markdownDiagnostics = await (async () => {
|
|
85
86
|
if (!markdownRequested) {
|
|
86
87
|
return { requested: false, used: false, provider: null, notes: null };
|
|
@@ -90,7 +91,7 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
90
91
|
requested: true,
|
|
91
92
|
used: false,
|
|
92
93
|
provider: null,
|
|
93
|
-
notes:
|
|
94
|
+
notes: "Skipping Markdown conversion for YouTube URLs",
|
|
94
95
|
};
|
|
95
96
|
}
|
|
96
97
|
if (!deps.convertHtmlToMarkdown) {
|
|
@@ -98,11 +99,11 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
98
99
|
requested: true,
|
|
99
100
|
used: false,
|
|
100
101
|
provider: null,
|
|
101
|
-
notes:
|
|
102
|
+
notes: "No HTML→Markdown converter configured",
|
|
102
103
|
};
|
|
103
104
|
}
|
|
104
105
|
try {
|
|
105
|
-
const htmlForMarkdown = markdownMode ===
|
|
106
|
+
const htmlForMarkdown = markdownMode === "readability" && readabilityHtml ? readabilityHtml : html;
|
|
106
107
|
const sanitizedHtml = sanitizeHtmlForMarkdownConversion(htmlForMarkdown);
|
|
107
108
|
const markdown = await deps.convertHtmlToMarkdown({
|
|
108
109
|
url,
|
|
@@ -117,16 +118,16 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
117
118
|
requested: true,
|
|
118
119
|
used: false,
|
|
119
120
|
provider: null,
|
|
120
|
-
notes:
|
|
121
|
+
notes: "HTML→Markdown conversion returned empty content",
|
|
121
122
|
};
|
|
122
123
|
}
|
|
123
124
|
baseContent = normalizedMarkdown;
|
|
124
125
|
return {
|
|
125
126
|
requested: true,
|
|
126
127
|
used: true,
|
|
127
|
-
provider:
|
|
128
|
-
notes: markdownMode ===
|
|
129
|
-
?
|
|
128
|
+
provider: "llm",
|
|
129
|
+
notes: markdownMode === "readability" && readabilityHtml
|
|
130
|
+
? "Readability HTML used for markdown input"
|
|
130
131
|
: null,
|
|
131
132
|
};
|
|
132
133
|
}
|
|
@@ -141,7 +142,9 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
141
142
|
}
|
|
142
143
|
})();
|
|
143
144
|
const video = detectPrimaryVideoFromHtml(html, url);
|
|
144
|
-
const isVideoOnly = !transcriptResolution.text &&
|
|
145
|
+
const isVideoOnly = !transcriptResolution.text &&
|
|
146
|
+
baseContent.length < MIN_HTML_CONTENT_CHARACTERS &&
|
|
147
|
+
video !== null;
|
|
145
148
|
return finalizeExtractedLinkContent({
|
|
146
149
|
url,
|
|
147
150
|
baseContent,
|
|
@@ -153,7 +156,7 @@ export async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCha
|
|
|
153
156
|
video,
|
|
154
157
|
isVideoOnly,
|
|
155
158
|
diagnostics: {
|
|
156
|
-
strategy:
|
|
159
|
+
strategy: "html",
|
|
157
160
|
firecrawl: firecrawlDiagnostics,
|
|
158
161
|
markdown: markdownDiagnostics,
|
|
159
162
|
transcript: transcriptDiagnostics,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/html.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/html.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AACtF,OAAO,EAAE,qBAAqB,EAAE,iCAAiC,EAAE,MAAM,cAAc,CAAC;AACxF,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAClD,OAAO,EACL,2BAA2B,EAC3B,mCAAmC,EACnC,kCAAkC,EAClC,8BAA8B,GAC/B,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAC5E,OAAO,EAAE,0BAA0B,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACjF,OAAO,EACL,2BAA2B,EAC3B,4BAA4B,EAC5B,aAAa,EACb,iBAAiB,GAClB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,0BAA0B,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,8BAA8B,EAAE,MAAM,cAAc,CAAC;AAE9D,MAAM,uBAAuB,GAAG,eAAe,CAAC;AAEhD,SAAS,iBAAiB,CAAC,OAAe,EAAE,KAAgC;IAC1E,IAAI,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;IACrC,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAC3C,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;QAC5E,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,iBAAiB,GAAG,cAAc,CAAC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IACvE,MAAM,SAAS,GAAG,iBAAiB,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAC;IACzE,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,EAChD,GAAG,EACH,IAAI,EACJ,SAAS,EACT,aAAa,EACb,qBAAqB,EACrB,mBAAmB,EACnB,oBAAoB,EACpB,oBAAoB,EACpB,iBAAiB,EACjB,YAAY,EACZ,SAAS,EACT,IAAI,EACJ,oBAAoB,GAerB;IACC,IAAI,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IACrD,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,uBAAuB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAC5E,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,WAAW,GAAG,aAAa,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;IAC1D,MAAM,iBAAiB,GAAG,aAAa,CAAC,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC5E,MAAM,eAAe,GAAG,uBAAuB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,oBAAoB,IAAI,CAAC,MAAM,0BAA0B,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;IAC1F,MAAM,eAAe,GAAG,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACtF,MAAM,eAAe,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAEvD,MAAM,0BAA0B,GAAG,kBAAkB,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;IACnF,MAAM,qCAAqC,GAAG,eAAe;QAC3D,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,eAAe,CAAC,CAAC;QAC5D,CAAC,CAAC,EAAE,CAAC;IACP,MAAM,qBAAqB,GACzB,qCAAqC,CAAC,MAAM,IAAI,kCAAkC;QAClF,CAAC,0BAA0B,CAAC,MAAM,GAAG,2BAA2B;YAC9D,qCAAqC,CAAC,MAAM;gBAC1C,0BAA0B,CAAC,MAAM,GAAG,8BAA8B,CAAC,CAAC;IAC1E,MAAM,kBAAkB,GAAG,qBAAqB;QAC9C,CAAC,CAAC,qCAAqC;QACvC,CAAC,CAAC,0BAA0B,CAAC;IAE/B,MAAM,qBAAqB,GACzB,CAAC,qBAAqB;QACtB,eAAe,CAAC,MAAM,IAAI,kCAAkC;QAC5D,CAAC,0BAA0B,CAAC,MAAM,GAAG,2BAA2B;YAC9D,eAAe,CAAC,MAAM,IAAI,0BAA0B,CAAC,MAAM,GAAG,8BAA8B,CAAC,CAAC;IAClG,MAAM,iBAAiB,GAAG,qBAAqB,IAAI,qBAAqB,CAAC;IACzE,MAAM,mBAAmB,GAAG,qBAAqB,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,kBAAkB,CAAC;IACzF,MAAM,oBAAoB,GAAG,iBAAiB,CAAC,CAAC,CAAC,kBAAkB,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5F,MAAM,iBAAiB,GACrB,oBAAoB,CAAC,MAAM,IAAI,mCAAmC;QAClE,CAAC,eAAe;YACd,aAAa,CAAC,GAAG,CAAC;YAClB,CAAC,CAAC,iBAAiB;gBACjB,CAAC,mBAAmB,CAAC,MAAM,GAAG,2BAA2B;oBACvD,oBAAoB,CAAC,MAAM;wBACzB,mBAAmB,CAAC,MAAM,GAAG,8BAA8B,CAAC,CAAC,CAAC,CAAC;IACzE,MAAM,kCAAkC,GAAG,iBAAiB;QAC1D,CAAC,CAAC,oBAAoB;QACtB,CAAC,CAAC,mBAAmB,CAAC;IACxB,MAAM,oBAAoB,GAAG,MAAM,wBAAwB,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;QAC3E,qBAAqB;QACrB,mBAAmB;QACnB,oBAAoB;QACpB,SAAS;KACV,CAAC,CAAC;IAEH,MAAM,kBAAkB,GACtB,oBAAoB,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,8BAA8B,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACnF,MAAM,aAAa,GAAG,kBAAkB;QACtC,CAAC,CAAC,kBAAkB,CAAC,kBAAkB,CAAC;QACxC,CAAC,CAAC,kCAAkC,CAAC;IAEvC,IAAI,WAAW,GAAG,iBAAiB,CAAC,aAAa,EAAE,oBAAoB,CAAC,IAAI,CAAC,CAAC;IAC9E,IAAI,WAAW,KAAK,kBAAkB,EAAE,CAAC;QACvC,WAAW,GAAG,iBAAiB,CAAC,WAAW,EAAE,WAAW,IAAI,KAAK,CAAC,CAAC;IACrE,CAAC;IAED,MAAM,qBAAqB,GAAG,2BAA2B,CACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAC;IAEF,MAAM,mBAAmB,GAAwB,MAAM,CAAC,KAAK,IAAI,EAAE;QACjE,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACxE,CAAC;QAED,IAAI,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,+CAA+C;aACvD,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC;YAChC,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,uCAAuC;aAC/C,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,eAAe,GACnB,YAAY,KAAK,aAAa,IAAI,eAAe,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC;YAC7E,MAAM,aAAa,GAAG,iCAAiC,CAAC,eAAe,CAAC,CAAC;YACzE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC;gBAChD,GAAG;gBACH,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,IAAI,KAAK;gBAC3B,QAAQ;gBACR,SAAS;aACV,CAAC,CAAC;YACH,MAAM,kBAAkB,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YACxD,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACpC,OAAO;oBACL,SAAS,EAAE,IAAI;oBACf,IAAI,EAAE,KAAK;oBACX,QAAQ,EAAE,IAAI;oBACd,KAAK,EAAE,iDAAiD;iBACzD,CAAC;YACJ,CAAC;YAED,WAAW,GAAG,kBAAkB,CAAC;YACjC,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,KAAK;gBACf,KAAK,EACH,YAAY,KAAK,aAAa,IAAI,eAAe;oBAC/C,CAAC,CAAC,0CAA0C;oBAC5C,CAAC,CAAC,IAAI;aACX,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACvE,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,oCAAoC,OAAO,EAAE;aACrD,CAAC;QACJ,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACpD,MAAM,WAAW,GACf,CAAC,oBAAoB,CAAC,IAAI;QAC1B,WAAW,CAAC,MAAM,GAAG,2BAA2B;QAChD,KAAK,KAAK,IAAI,CAAC;IAEjB,OAAO,4BAA4B,CAAC;QAClC,GAAG;QACH,WAAW;QACX,aAAa;QACb,KAAK,EAAE,WAAW,IAAI,KAAK;QAC3B,WAAW,EAAE,iBAAiB,IAAI,WAAW;QAC7C,QAAQ;QACR,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW,EAAE;YACX,QAAQ,EAAE,MAAM;YAChB,SAAS,EAAE,oBAAoB;YAC/B,QAAQ,EAAE,mBAAmB;YAC7B,UAAU,EAAE,qBAAqB;SAClC;KACF,CAAC,CAAC;AACL,CAAC"}
|