@steipete/summarize-core 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/dist/esm/content/cache/types.js +2 -0
  2. package/dist/esm/content/cache/types.js.map +1 -0
  3. package/dist/esm/content/index.js +1 -0
  4. package/dist/esm/content/index.js.map +1 -1
  5. package/dist/esm/content/link-preview/client.js +3 -0
  6. package/dist/esm/content/link-preview/client.js.map +1 -1
  7. package/dist/esm/content/link-preview/content/fetcher.js +1 -1
  8. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
  9. package/dist/esm/content/link-preview/content/html.js +1 -1
  10. package/dist/esm/content/link-preview/content/html.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/index.js +22 -3
  12. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  13. package/dist/esm/content/link-preview/deps.js.map +1 -1
  14. package/dist/esm/content/transcript/index.js +1 -0
  15. package/dist/esm/content/transcript/index.js.map +1 -1
  16. package/dist/esm/content/transcript/providers/generic.js +84 -4
  17. package/dist/esm/content/transcript/providers/generic.js.map +1 -1
  18. package/dist/esm/content/transcript/providers/podcast.js +1 -0
  19. package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
  20. package/dist/esm/content/transcript/providers/youtube/captions.js +35 -14
  21. package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
  22. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +84 -12
  23. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
  24. package/dist/esm/content/transcript/providers/youtube.js +38 -2
  25. package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
  26. package/dist/esm/content/transcript/utils.js +25 -69
  27. package/dist/esm/content/transcript/utils.js.map +1 -1
  28. package/dist/esm/content/url.js +76 -0
  29. package/dist/esm/content/url.js.map +1 -0
  30. package/dist/esm/prompts/cli.js +25 -5
  31. package/dist/esm/prompts/cli.js.map +1 -1
  32. package/dist/esm/prompts/file.js +51 -12
  33. package/dist/esm/prompts/file.js.map +1 -1
  34. package/dist/esm/prompts/format.js +26 -0
  35. package/dist/esm/prompts/format.js.map +1 -0
  36. package/dist/esm/prompts/link-summary.js +51 -22
  37. package/dist/esm/prompts/link-summary.js.map +1 -1
  38. package/dist/types/content/cache/types.d.ts +25 -0
  39. package/dist/types/content/index.d.ts +3 -1
  40. package/dist/types/content/link-preview/client.d.ts +6 -1
  41. package/dist/types/content/link-preview/content/types.d.ts +1 -1
  42. package/dist/types/content/link-preview/deps.d.ts +11 -20
  43. package/dist/types/content/transcript/cache.d.ts +1 -1
  44. package/dist/types/content/transcript/providers/generic.d.ts +1 -1
  45. package/dist/types/content/transcript/providers/youtube/captions.d.ts +3 -1
  46. package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +3 -1
  47. package/dist/types/content/transcript/types.d.ts +2 -1
  48. package/dist/types/content/transcript/utils.d.ts +1 -3
  49. package/dist/types/content/url.d.ts +8 -0
  50. package/dist/types/prompts/cli.d.ts +4 -1
  51. package/dist/types/prompts/file.d.ts +9 -2
  52. package/dist/types/prompts/format.d.ts +14 -0
  53. package/dist/types/prompts/link-summary.d.ts +4 -1
  54. package/package.json +10 -10
  55. package/dist/cjs/content/index.js +0 -14
  56. package/dist/cjs/content/index.js.map +0 -1
  57. package/dist/cjs/content/link-preview/client.js +0 -31
  58. package/dist/cjs/content/link-preview/client.js.map +0 -1
  59. package/dist/cjs/content/link-preview/content/article.js +0 -164
  60. package/dist/cjs/content/link-preview/content/article.js.map +0 -1
  61. package/dist/cjs/content/link-preview/content/cleaner.js +0 -63
  62. package/dist/cjs/content/link-preview/content/cleaner.js.map +0 -1
  63. package/dist/cjs/content/link-preview/content/constants.js +0 -10
  64. package/dist/cjs/content/link-preview/content/constants.js.map +0 -1
  65. package/dist/cjs/content/link-preview/content/fetcher.js +0 -128
  66. package/dist/cjs/content/link-preview/content/fetcher.js.map +0 -1
  67. package/dist/cjs/content/link-preview/content/firecrawl.js +0 -90
  68. package/dist/cjs/content/link-preview/content/firecrawl.js.map +0 -1
  69. package/dist/cjs/content/link-preview/content/html.js +0 -165
  70. package/dist/cjs/content/link-preview/content/html.js.map +0 -1
  71. package/dist/cjs/content/link-preview/content/index.js +0 -348
  72. package/dist/cjs/content/link-preview/content/index.js.map +0 -1
  73. package/dist/cjs/content/link-preview/content/jsonld.js +0 -80
  74. package/dist/cjs/content/link-preview/content/jsonld.js.map +0 -1
  75. package/dist/cjs/content/link-preview/content/parsers.js +0 -81
  76. package/dist/cjs/content/link-preview/content/parsers.js.map +0 -1
  77. package/dist/cjs/content/link-preview/content/podcast-utils.js +0 -85
  78. package/dist/cjs/content/link-preview/content/podcast-utils.js.map +0 -1
  79. package/dist/cjs/content/link-preview/content/readability.js +0 -90
  80. package/dist/cjs/content/link-preview/content/readability.js.map +0 -1
  81. package/dist/cjs/content/link-preview/content/twitter-utils.js +0 -74
  82. package/dist/cjs/content/link-preview/content/twitter-utils.js.map +0 -1
  83. package/dist/cjs/content/link-preview/content/types.js +0 -7
  84. package/dist/cjs/content/link-preview/content/types.js.map +0 -1
  85. package/dist/cjs/content/link-preview/content/utils.js +0 -177
  86. package/dist/cjs/content/link-preview/content/utils.js.map +0 -1
  87. package/dist/cjs/content/link-preview/content/video.js +0 -99
  88. package/dist/cjs/content/link-preview/content/video.js.map +0 -1
  89. package/dist/cjs/content/link-preview/content/youtube.js +0 -85
  90. package/dist/cjs/content/link-preview/content/youtube.js.map +0 -1
  91. package/dist/cjs/content/link-preview/deps.js +0 -23
  92. package/dist/cjs/content/link-preview/deps.js.map +0 -1
  93. package/dist/cjs/content/link-preview/fetch-with-timeout.js +0 -38
  94. package/dist/cjs/content/link-preview/fetch-with-timeout.js.map +0 -1
  95. package/dist/cjs/content/link-preview/types.js +0 -5
  96. package/dist/cjs/content/link-preview/types.js.map +0 -1
  97. package/dist/cjs/content/transcript/cache.js +0 -85
  98. package/dist/cjs/content/transcript/cache.js.map +0 -1
  99. package/dist/cjs/content/transcript/index.js +0 -134
  100. package/dist/cjs/content/transcript/index.js.map +0 -1
  101. package/dist/cjs/content/transcript/normalize.js +0 -49
  102. package/dist/cjs/content/transcript/normalize.js.map +0 -1
  103. package/dist/cjs/content/transcript/providers/generic.js +0 -16
  104. package/dist/cjs/content/transcript/providers/generic.js.map +0 -1
  105. package/dist/cjs/content/transcript/providers/podcast/apple-flow.js +0 -226
  106. package/dist/cjs/content/transcript/providers/podcast/apple-flow.js.map +0 -1
  107. package/dist/cjs/content/transcript/providers/podcast/apple.js +0 -43
  108. package/dist/cjs/content/transcript/providers/podcast/apple.js.map +0 -1
  109. package/dist/cjs/content/transcript/providers/podcast/constants.js +0 -11
  110. package/dist/cjs/content/transcript/providers/podcast/constants.js.map +0 -1
  111. package/dist/cjs/content/transcript/providers/podcast/flow-context.js +0 -3
  112. package/dist/cjs/content/transcript/providers/podcast/flow-context.js.map +0 -1
  113. package/dist/cjs/content/transcript/providers/podcast/itunes.js +0 -139
  114. package/dist/cjs/content/transcript/providers/podcast/itunes.js.map +0 -1
  115. package/dist/cjs/content/transcript/providers/podcast/json.js +0 -43
  116. package/dist/cjs/content/transcript/providers/podcast/json.js.map +0 -1
  117. package/dist/cjs/content/transcript/providers/podcast/media.js +0 -355
  118. package/dist/cjs/content/transcript/providers/podcast/media.js.map +0 -1
  119. package/dist/cjs/content/transcript/providers/podcast/results.js +0 -32
  120. package/dist/cjs/content/transcript/providers/podcast/results.js.map +0 -1
  121. package/dist/cjs/content/transcript/providers/podcast/rss.js +0 -262
  122. package/dist/cjs/content/transcript/providers/podcast/rss.js.map +0 -1
  123. package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js +0 -221
  124. package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js.map +0 -1
  125. package/dist/cjs/content/transcript/providers/podcast/spotify.js +0 -119
  126. package/dist/cjs/content/transcript/providers/podcast/spotify.js.map +0 -1
  127. package/dist/cjs/content/transcript/providers/podcast.js +0 -260
  128. package/dist/cjs/content/transcript/providers/podcast.js.map +0 -1
  129. package/dist/cjs/content/transcript/providers/youtube/api.js +0 -264
  130. package/dist/cjs/content/transcript/providers/youtube/api.js.map +0 -1
  131. package/dist/cjs/content/transcript/providers/youtube/apify.js +0 -59
  132. package/dist/cjs/content/transcript/providers/youtube/apify.js.map +0 -1
  133. package/dist/cjs/content/transcript/providers/youtube/captions.js +0 -413
  134. package/dist/cjs/content/transcript/providers/youtube/captions.js.map +0 -1
  135. package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js +0 -170
  136. package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js.map +0 -1
  137. package/dist/cjs/content/transcript/providers/youtube.js +0 -178
  138. package/dist/cjs/content/transcript/providers/youtube.js.map +0 -1
  139. package/dist/cjs/content/transcript/types.js +0 -3
  140. package/dist/cjs/content/transcript/types.js.map +0 -1
  141. package/dist/cjs/content/transcript/utils.js +0 -303
  142. package/dist/cjs/content/transcript/utils.js.map +0 -1
  143. package/dist/cjs/index.js +0 -22
  144. package/dist/cjs/index.js.map +0 -1
  145. package/dist/cjs/language.js +0 -132
  146. package/dist/cjs/language.js.map +0 -1
  147. package/dist/cjs/package.json +0 -3
  148. package/dist/cjs/prompts/cli.js +0 -23
  149. package/dist/cjs/prompts/cli.js.map +0 -1
  150. package/dist/cjs/prompts/file.js +0 -52
  151. package/dist/cjs/prompts/file.js.map +0 -1
  152. package/dist/cjs/prompts/index.js +0 -14
  153. package/dist/cjs/prompts/index.js.map +0 -1
  154. package/dist/cjs/prompts/link-summary.js +0 -122
  155. package/dist/cjs/prompts/link-summary.js.map +0 -1
  156. package/dist/cjs/shared/contracts.js +0 -5
  157. package/dist/cjs/shared/contracts.js.map +0 -1
  158. package/dist/cjs/transcription/whisper/constants.js +0 -11
  159. package/dist/cjs/transcription/whisper/constants.js.map +0 -1
  160. package/dist/cjs/transcription/whisper/core.js +0 -307
  161. package/dist/cjs/transcription/whisper/core.js.map +0 -1
  162. package/dist/cjs/transcription/whisper/fal.js +0 -44
  163. package/dist/cjs/transcription/whisper/fal.js.map +0 -1
  164. package/dist/cjs/transcription/whisper/ffmpeg.js +0 -187
  165. package/dist/cjs/transcription/whisper/ffmpeg.js.map +0 -1
  166. package/dist/cjs/transcription/whisper/openai.js +0 -51
  167. package/dist/cjs/transcription/whisper/openai.js.map +0 -1
  168. package/dist/cjs/transcription/whisper/types.js +0 -3
  169. package/dist/cjs/transcription/whisper/types.js.map +0 -1
  170. package/dist/cjs/transcription/whisper/utils.js +0 -70
  171. package/dist/cjs/transcription/whisper/utils.js.map +0 -1
  172. package/dist/cjs/transcription/whisper/whisper-cpp.js +0 -232
  173. package/dist/cjs/transcription/whisper/whisper-cpp.js.map +0 -1
  174. package/dist/cjs/transcription/whisper.js +0 -15
  175. package/dist/cjs/transcription/whisper.js.map +0 -1
@@ -1,165 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildResultFromHtmlDocument = buildResultFromHtmlDocument;
4
- const index_js_1 = require("../../transcript/index.js");
5
- const utils_js_1 = require("../../transcript/utils.js");
6
- const article_js_1 = require("./article.js");
7
- const cleaner_js_1 = require("./cleaner.js");
8
- const constants_js_1 = require("./constants.js");
9
- const jsonld_js_1 = require("./jsonld.js");
10
- const parsers_js_1 = require("./parsers.js");
11
- const podcast_utils_js_1 = require("./podcast-utils.js");
12
- const readability_js_1 = require("./readability.js");
13
- const utils_js_2 = require("./utils.js");
14
- const video_js_1 = require("./video.js");
15
- const youtube_js_1 = require("./youtube.js");
16
- const LEADING_CONTROL_PATTERN = /^[\s\p{Cc}]+/u;
17
- function stripLeadingTitle(content, title) {
18
- if (!(content && title)) {
19
- return content;
20
- }
21
- const normalizedTitle = title.trim();
22
- if (normalizedTitle.length === 0) {
23
- return content;
24
- }
25
- const trimmedContent = content.trimStart();
26
- if (!trimmedContent.toLowerCase().startsWith(normalizedTitle.toLowerCase())) {
27
- return content;
28
- }
29
- const remainderOriginal = trimmedContent.slice(normalizedTitle.length);
30
- const remainder = remainderOriginal.replace(LEADING_CONTROL_PATTERN, '');
31
- return remainder;
32
- }
33
- async function buildResultFromHtmlDocument({ url, html, cacheMode, maxCharacters, youtubeTranscriptMode, firecrawlDiagnostics, markdownRequested, markdownMode, timeoutMs, deps, readabilityCandidate, }) {
34
- if ((0, utils_js_1.isYouTubeVideoUrl)(url) && !(0, utils_js_1.extractYouTubeVideoId)(url)) {
35
- throw new Error('Invalid YouTube video id in URL');
36
- }
37
- const { title, description, siteName } = (0, parsers_js_1.extractMetadataFromHtml)(html, url);
38
- const jsonLd = (0, jsonld_js_1.extractJsonLdContent)(html);
39
- const mergedTitle = (0, utils_js_2.pickFirstText)([jsonLd?.title, title]);
40
- const mergedDescription = (0, utils_js_2.pickFirstText)([jsonLd?.description, description]);
41
- const isPodcastJsonLd = (0, podcast_utils_js_1.isPodcastLikeJsonLdType)(jsonLd?.type);
42
- const readability = readabilityCandidate ?? (await (0, readability_js_1.extractReadabilityFromHtml)(html, url));
43
- const readabilityText = readability?.text ? (0, cleaner_js_1.normalizeForPrompt)(readability.text) : '';
44
- const readabilityHtml = (0, readability_js_1.toReadabilityHtml)(readability);
45
- const normalizedSegmentsFromHtml = (0, cleaner_js_1.normalizeForPrompt)((0, article_js_1.extractArticleContent)(html));
46
- const normalizedSegmentsFromReadabilityHtml = readabilityHtml
47
- ? (0, cleaner_js_1.normalizeForPrompt)((0, article_js_1.extractArticleContent)(readabilityHtml))
48
- : '';
49
- const preferReadabilityHtml = normalizedSegmentsFromReadabilityHtml.length >= constants_js_1.MIN_READABILITY_CONTENT_CHARACTERS &&
50
- (normalizedSegmentsFromHtml.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS ||
51
- normalizedSegmentsFromReadabilityHtml.length >=
52
- normalizedSegmentsFromHtml.length * constants_js_1.READABILITY_RELATIVE_THRESHOLD);
53
- const normalizedSegments = preferReadabilityHtml
54
- ? normalizedSegmentsFromReadabilityHtml
55
- : normalizedSegmentsFromHtml;
56
- const preferReadabilityText = !preferReadabilityHtml &&
57
- readabilityText.length >= constants_js_1.MIN_READABILITY_CONTENT_CHARACTERS &&
58
- (normalizedSegmentsFromHtml.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS ||
59
- readabilityText.length >= normalizedSegmentsFromHtml.length * constants_js_1.READABILITY_RELATIVE_THRESHOLD);
60
- const preferReadability = preferReadabilityHtml || preferReadabilityText;
61
- const effectiveNormalized = preferReadabilityText ? readabilityText : normalizedSegments;
62
- const descriptionCandidate = mergedDescription ? (0, cleaner_js_1.normalizeForPrompt)(mergedDescription) : '';
63
- const preferDescription = descriptionCandidate.length >= constants_js_1.MIN_METADATA_DESCRIPTION_CHARACTERS &&
64
- (isPodcastJsonLd ||
65
- (0, podcast_utils_js_1.isPodcastHost)(url) ||
66
- (!preferReadability &&
67
- (effectiveNormalized.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS ||
68
- descriptionCandidate.length >=
69
- effectiveNormalized.length * constants_js_1.READABILITY_RELATIVE_THRESHOLD)));
70
- const effectiveNormalizedWithDescription = preferDescription
71
- ? descriptionCandidate
72
- : effectiveNormalized;
73
- const transcriptResolution = await (0, index_js_1.resolveTranscriptForLink)(url, html, deps, {
74
- youtubeTranscriptMode,
75
- cacheMode,
76
- });
77
- const youtubeDescription = transcriptResolution.text === null ? (0, youtube_js_1.extractYouTubeShortDescription)(html) : null;
78
- const baseCandidate = youtubeDescription
79
- ? (0, cleaner_js_1.normalizeForPrompt)(youtubeDescription)
80
- : effectiveNormalizedWithDescription;
81
- let baseContent = (0, utils_js_2.selectBaseContent)(baseCandidate, transcriptResolution.text);
82
- if (baseContent === normalizedSegments) {
83
- baseContent = stripLeadingTitle(baseContent, mergedTitle ?? title);
84
- }
85
- const transcriptDiagnostics = (0, utils_js_2.ensureTranscriptDiagnostics)(transcriptResolution, cacheMode ?? 'default');
86
- const markdownDiagnostics = await (async () => {
87
- if (!markdownRequested) {
88
- return { requested: false, used: false, provider: null, notes: null };
89
- }
90
- if ((0, utils_js_1.isYouTubeUrl)(url)) {
91
- return {
92
- requested: true,
93
- used: false,
94
- provider: null,
95
- notes: 'Skipping Markdown conversion for YouTube URLs',
96
- };
97
- }
98
- if (!deps.convertHtmlToMarkdown) {
99
- return {
100
- requested: true,
101
- used: false,
102
- provider: null,
103
- notes: 'No HTML→Markdown converter configured',
104
- };
105
- }
106
- try {
107
- const htmlForMarkdown = markdownMode === 'readability' && readabilityHtml ? readabilityHtml : html;
108
- const sanitizedHtml = (0, article_js_1.sanitizeHtmlForMarkdownConversion)(htmlForMarkdown);
109
- const markdown = await deps.convertHtmlToMarkdown({
110
- url,
111
- html: sanitizedHtml,
112
- title: mergedTitle ?? title,
113
- siteName,
114
- timeoutMs,
115
- });
116
- const normalizedMarkdown = (0, cleaner_js_1.normalizeForPrompt)(markdown);
117
- if (normalizedMarkdown.length === 0) {
118
- return {
119
- requested: true,
120
- used: false,
121
- provider: null,
122
- notes: 'HTML→Markdown conversion returned empty content',
123
- };
124
- }
125
- baseContent = normalizedMarkdown;
126
- return {
127
- requested: true,
128
- used: true,
129
- provider: 'llm',
130
- notes: markdownMode === 'readability' && readabilityHtml
131
- ? 'Readability HTML used for markdown input'
132
- : null,
133
- };
134
- }
135
- catch (error) {
136
- const message = error instanceof Error ? error.message : String(error);
137
- return {
138
- requested: true,
139
- used: false,
140
- provider: null,
141
- notes: `HTML→Markdown conversion failed: ${message}`,
142
- };
143
- }
144
- })();
145
- const video = (0, video_js_1.detectPrimaryVideoFromHtml)(html, url);
146
- const isVideoOnly = !transcriptResolution.text && baseContent.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS && video !== null;
147
- return (0, utils_js_2.finalizeExtractedLinkContent)({
148
- url,
149
- baseContent,
150
- maxCharacters,
151
- title: mergedTitle ?? title,
152
- description: mergedDescription ?? description,
153
- siteName,
154
- transcriptResolution,
155
- video,
156
- isVideoOnly,
157
- diagnostics: {
158
- strategy: 'html',
159
- firecrawl: firecrawlDiagnostics,
160
- markdown: markdownDiagnostics,
161
- transcript: transcriptDiagnostics,
162
- },
163
- });
164
- }
165
- //# sourceMappingURL=html.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"html.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/html.ts"],"names":[],"mappings":";;AAgDA,kEAiLC;AAjOD,wDAAoE;AACpE,wDAAkG;AAGlG,6CAAuF;AACvF,6CAAiD;AACjD,iDAKuB;AACvB,2CAAkD;AAClD,6CAAsD;AACtD,yDAA2E;AAC3E,qDAAgF;AAEhF,yCAKmB;AACnB,yCAAuD;AACvD,6CAA6D;AAE7D,MAAM,uBAAuB,GAAG,eAAe,CAAA;AAE/C,SAAS,iBAAiB,CAAC,OAAe,EAAE,KAAgC;IAC1E,IAAI,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,EAAE,CAAA;IACpC,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,SAAS,EAAE,CAAA;IAC1C,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;QAC5E,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,MAAM,iBAAiB,GAAG,cAAc,CAAC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAA;IACtE,MAAM,SAAS,GAAG,iBAAiB,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAA;IACxE,OAAO,SAAS,CAAA;AAClB,CAAC;AAEM,KAAK,UAAU,2BAA2B,CAAC,EAChD,GAAG,EACH,IAAI,EACJ,SAAS,EACT,aAAa,EACb,qBAAqB,EACrB,oBAAoB,EACpB,iBAAiB,EACjB,YAAY,EACZ,SAAS,EACT,IAAI,EACJ,oBAAoB,GAarB;IACC,IAAI,IAAA,4BAAiB,EAAC,GAAG,CAAC,IAAI,CAAC,IAAA,gCAAqB,EAAC,GAAG,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAA;IACpD,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,IAAA,oCAAuB,EAAC,IAAI,EAAE,GAAG,CAAC,CAAA;IAC3E,MAAM,MAAM,GAAG,IAAA,gCAAoB,EAAC,IAAI,CAAC,CAAA;IACzC,MAAM,WAAW,GAAG,IAAA,wBAAa,EAAC,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAA;IACzD,MAAM,iBAAiB,GAAG,IAAA,wBAAa,EAAC,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC,CAAA;IAC3E,MAAM,eAAe,GAAG,IAAA,0CAAuB,EAAC,MAAM,EAAE,IAAI,CAAC,CAAA;IAC7D,MAAM,WAAW,GAAG,oBAAoB,IAAI,CAAC,MAAM,IAAA,2CAA0B,EAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAA;IACzF,MAAM,eAAe,GAAG,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACrF,MAAM,eAAe,GAAG,IAAA,kCAAiB,EAAC,WAAW,CAAC,CAAA;IAEtD,MAAM,0BAA0B,GAAG,IAAA,+BAAkB,EAAC,IAAA,kCAAqB,EAAC,IAAI,CAAC,CAAC,CAAA;IAClF,MAAM,qCAAqC,GAAG,eAAe;QAC3D,CAAC,CAAC,IAAA,+BAAkB,EAAC,IAAA,kCAAqB,EAAC,eAAe,CAAC,CAAC;QAC5D,CAAC,CAAC,EAAE,CAAA;IACN,MAAM,qBAAqB,GACzB,qCAAqC,CAAC,MAAM,IAAI,iDAAkC;QAClF,CAAC,0BAA0B,CAAC,MAAM,GAAG,0CAA2B;YAC9D,qCAAqC,CAAC,MAAM;gBAC1C,0BAA0B,CAAC,MAAM,GAAG,6CAA8B,CAAC,CAAA;IACzE,MAAM,kBAAkB,GAAG,qBAAqB;QAC9C,CAAC,CAAC,qCAAqC;QACvC,CAAC,CAAC,0BAA0B,CAAA;IAE9B,MAAM,qBAAqB,GACzB,CAAC,qBAAqB;QACtB,eAAe,CAAC,MAAM,IAAI,iDAAkC;QAC5D,CAAC,0BAA0B,CAAC,MAAM,GAAG,0CAA2B;YAC9D,eAAe,CAAC,MAAM,IAAI,0BAA0B,CAAC,MAAM,GAAG,6CAA8B,CAAC,CAAA;IACjG,MAAM,iBAAiB,GAAG,qBAAqB,IAAI,qBAAqB,CAAA;IACxE,MAAM,mBAAmB,GAAG,qBAAqB,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,kBAAkB,CAAA;IACxF,MAAM,oBAAoB,GAAG,iBAAiB,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC3F,MAAM,iBAAiB,GACrB,oBAAoB,CAAC,MAAM,IAAI,kDAAmC;QAClE,CAAC,eAAe;YACd,IAAA,gCAAa,EAAC,GAAG,CAAC;YAClB,CAAC,CAAC,iBAAiB;gBACjB,CAAC,mBAAmB,CAAC,MAAM,GAAG,0CAA2B;oBACvD,oBAAoB,CAAC,MAAM;wBACzB,mBAAmB,CAAC,MAAM,GAAG,6CAA8B,CAAC,CAAC,CAAC,CAAA;IACxE,MAAM,kCAAkC,GAAG,iBAAiB;QAC1D,CAAC,CAAC,oBAAoB;QACtB,CAAC,CAAC,mBAAmB,CAAA;IACvB,MAAM,oBAAoB,GAAG,MAAM,IAAA,mCAAwB,EAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;QAC3E,qBAAqB;QACrB,SAAS;KACV,CAAC,CAAA;IAEF,MAAM,kBAAkB,GACtB,oBAAoB,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,IAAA,2CAA8B,EAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAClF,MAAM,aAAa,GAAG,kBAAkB;QACtC,CAAC,CAAC,IAAA,+BAAkB,EAAC,kBAAkB,CAAC;QACxC,CAAC,CAAC,kCAAkC,CAAA;IAEtC,IAAI,WAAW,GAAG,IAAA,4BAAiB,EAAC,aAAa,EAAE,oBAAoB,CAAC,IAAI,CAAC,CAAA;IAC7E,IAAI,WAAW,KAAK,kBAAkB,EAAE,CAAC;QACvC,WAAW,GAAG,iBAAiB,CAAC,WAAW,EAAE,WAAW,IAAI,KAAK,CAAC,CAAA;IACpE,CAAC;IAED,MAAM,qBAAqB,GAAG,IAAA,sCAA2B,EACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAA;IAED,MAAM,mBAAmB,GAAwB,MAAM,CAAC,KAAK,IAAI,EAAE;QACjE,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACvB,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAA;QACvE,CAAC;QAED,IAAI,IAAA,uBAAY,EAAC,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,+CAA+C;aACvD,CAAA;QACH,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC;YAChC,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,uCAAuC;aAC/C,CAAA;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,eAAe,GACnB,YAAY,KAAK,aAAa,IAAI,eAAe,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAA;YAC5E,MAAM,aAAa,GAAG,IAAA,8CAAiC,EAAC,eAAe,CAAC,CAAA;YACxE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC;gBAChD,GAAG;gBACH,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,IAAI,KAAK;gBAC3B,QAAQ;gBACR,SAAS;aACV,CAAC,CAAA;YACF,MAAM,kBAAkB,GAAG,IAAA,+BAAkB,EAAC,QAAQ,CAAC,CAAA;YACvD,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACpC,OAAO;oBACL,SAAS,EAAE,IAAI;oBACf,IAAI,EAAE,KAAK;oBACX,QAAQ,EAAE,IAAI;oBACd,KAAK,EAAE,iDAAiD;iBACzD,CAAA;YACH,CAAC;YAED,WAAW,GAAG,kBAAkB,CAAA;YAChC,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,KAAK;gBACf,KAAK,EACH,YAAY,KAAK,aAAa,IAAI,eAAe;oBAC/C,CAAC,CAAC,0CAA0C;oBAC5C,CAAC,CAAC,IAAI;aACX,CAAA;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,OAAO;gBACL,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,KAAK;gBACX,QAAQ,EAAE,IAAI;gBACd,KAAK,EAAE,oCAAoC,OAAO,EAAE;aACrD,CAAA;QACH,CAAC;IACH,CAAC,CAAC,EAAE,CAAA;IAEJ,MAAM,KAAK,GAAG,IAAA,qCAA0B,EAAC,IAAI,EAAE,GAAG,CAAC,CAAA;IACnD,MAAM,WAAW,GACf,CAAC,oBAAoB,CAAC,IAAI,IAAI,WAAW,CAAC,MAAM,GAAG,0CAA2B,IAAI,KAAK,KAAK,IAAI,CAAA;IAElG,OAAO,IAAA,uCAA4B,EAAC;QAClC,GAAG;QACH,WAAW;QACX,aAAa;QACb,KAAK,EAAE,WAAW,IAAI,KAAK;QAC3B,WAAW,EAAE,iBAAiB,IAAI,WAAW;QAC7C,QAAQ;QACR,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW,EAAE;YACX,QAAQ,EAAE,MAAM;YAChB,SAAS,EAAE,oBAAoB;YAC/B,QAAQ,EAAE,mBAAmB;YAC7B,UAAU,EAAE,qBAAqB;SAClC;KACF,CAAC,CAAA;AACJ,CAAC"}
@@ -1,348 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.fetchLinkContent = fetchLinkContent;
4
- const index_js_1 = require("../../transcript/index.js");
5
- const utils_js_1 = require("../../transcript/utils.js");
6
- const cleaner_js_1 = require("./cleaner.js");
7
- const constants_js_1 = require("./constants.js");
8
- const fetcher_js_1 = require("./fetcher.js");
9
- const firecrawl_js_1 = require("./firecrawl.js");
10
- const html_js_1 = require("./html.js");
11
- const podcast_utils_js_1 = require("./podcast-utils.js");
12
- const readability_js_1 = require("./readability.js");
13
- const twitter_utils_js_1 = require("./twitter-utils.js");
14
- const utils_js_2 = require("./utils.js");
15
- async function fetchLinkContent(url, options, deps) {
16
- const timeoutMs = (0, utils_js_2.resolveTimeoutMs)(options);
17
- const cacheMode = (0, utils_js_2.resolveCacheMode)(options);
18
- const maxCharacters = (0, utils_js_2.resolveMaxCharacters)(options);
19
- const youtubeTranscriptMode = options?.youtubeTranscript ?? 'auto';
20
- const firecrawlMode = (0, utils_js_2.resolveFirecrawlMode)(options);
21
- const markdownRequested = (options?.format ?? 'text') === 'markdown';
22
- const markdownMode = options?.markdownMode ?? 'auto';
23
- const canUseFirecrawl = firecrawlMode !== 'off' && deps.scrapeWithFirecrawl !== null && !(0, utils_js_1.isYouTubeUrl)(url);
24
- const spotifyEpisodeId = (0, podcast_utils_js_1.extractSpotifyEpisodeId)(url);
25
- if (spotifyEpisodeId) {
26
- if (!deps.openaiApiKey && !deps.falApiKey) {
27
- throw new Error('Spotify episode transcription requires OPENAI_API_KEY or FAL_KEY (Whisper); otherwise you may only get a captcha/recaptcha HTML page.');
28
- }
29
- const transcriptResolution = await (0, index_js_1.resolveTranscriptForLink)(url, null, deps, {
30
- youtubeTranscriptMode,
31
- cacheMode,
32
- });
33
- if (!transcriptResolution.text) {
34
- const notes = transcriptResolution.diagnostics?.notes;
35
- const suffix = notes ? ` (${notes})` : '';
36
- throw new Error(`Failed to transcribe Spotify episode${suffix}`);
37
- }
38
- const transcriptDiagnostics = (0, utils_js_2.ensureTranscriptDiagnostics)(transcriptResolution, cacheMode ?? 'default');
39
- transcriptDiagnostics.notes = (0, utils_js_2.appendNote)(transcriptDiagnostics.notes, 'Spotify episode: skipped HTML fetch to avoid captcha pages');
40
- return (0, utils_js_2.finalizeExtractedLinkContent)({
41
- url,
42
- baseContent: (0, utils_js_2.selectBaseContent)('', transcriptResolution.text),
43
- maxCharacters,
44
- title: null,
45
- description: null,
46
- siteName: 'Spotify',
47
- transcriptResolution,
48
- video: null,
49
- isVideoOnly: false,
50
- diagnostics: {
51
- strategy: 'html',
52
- firecrawl: {
53
- attempted: false,
54
- used: false,
55
- cacheMode,
56
- cacheStatus: cacheMode === 'bypass' ? 'bypassed' : 'unknown',
57
- notes: 'Spotify short-circuit skipped HTML/Firecrawl',
58
- },
59
- markdown: {
60
- requested: markdownRequested,
61
- used: false,
62
- provider: null,
63
- notes: 'Spotify short-circuit uses transcript content',
64
- },
65
- transcript: transcriptDiagnostics,
66
- },
67
- });
68
- }
69
- const appleIds = (0, podcast_utils_js_1.extractApplePodcastIds)(url);
70
- if (appleIds) {
71
- if (!deps.openaiApiKey && !deps.falApiKey) {
72
- throw new Error('Apple Podcasts transcription requires OPENAI_API_KEY or FAL_KEY (Whisper); otherwise you may only get a slow/blocked HTML page.');
73
- }
74
- const transcriptResolution = await (0, index_js_1.resolveTranscriptForLink)(url, null, deps, {
75
- youtubeTranscriptMode,
76
- cacheMode,
77
- });
78
- if (!transcriptResolution.text) {
79
- const notes = transcriptResolution.diagnostics?.notes;
80
- const suffix = notes ? ` (${notes})` : '';
81
- throw new Error(`Failed to transcribe Apple Podcasts episode${suffix}`);
82
- }
83
- const transcriptDiagnostics = (0, utils_js_2.ensureTranscriptDiagnostics)(transcriptResolution, cacheMode ?? 'default');
84
- transcriptDiagnostics.notes = (0, utils_js_2.appendNote)(transcriptDiagnostics.notes, 'Apple Podcasts: skipped HTML fetch (prefer iTunes lookup / enclosures)');
85
- return (0, utils_js_2.finalizeExtractedLinkContent)({
86
- url,
87
- baseContent: (0, utils_js_2.selectBaseContent)('', transcriptResolution.text),
88
- maxCharacters,
89
- title: null,
90
- description: null,
91
- siteName: 'Apple Podcasts',
92
- transcriptResolution,
93
- video: null,
94
- isVideoOnly: false,
95
- diagnostics: {
96
- strategy: 'html',
97
- firecrawl: {
98
- attempted: false,
99
- used: false,
100
- cacheMode,
101
- cacheStatus: cacheMode === 'bypass' ? 'bypassed' : 'unknown',
102
- notes: 'Apple Podcasts short-circuit skipped HTML/Firecrawl',
103
- },
104
- markdown: {
105
- requested: markdownRequested,
106
- used: false,
107
- provider: null,
108
- notes: 'Apple Podcasts short-circuit uses transcript content',
109
- },
110
- transcript: transcriptDiagnostics,
111
- },
112
- });
113
- }
114
- let firecrawlAttempted = false;
115
- let firecrawlPayload = null;
116
- const firecrawlDiagnostics = {
117
- attempted: false,
118
- used: false,
119
- cacheMode,
120
- cacheStatus: cacheMode === 'bypass' ? 'bypassed' : 'unknown',
121
- notes: null,
122
- };
123
- const twitterStatus = (0, twitter_utils_js_1.isTwitterStatusUrl)(url);
124
- const nitterUrls = twitterStatus ? (0, twitter_utils_js_1.toNitterUrls)(url) : [];
125
- let birdError = null;
126
- let nitterError = null;
127
- const attemptFirecrawl = async (reason) => {
128
- if (!canUseFirecrawl) {
129
- return null;
130
- }
131
- if (!firecrawlAttempted) {
132
- const attempt = await (0, fetcher_js_1.fetchWithFirecrawl)(url, deps.scrapeWithFirecrawl, {
133
- timeoutMs,
134
- cacheMode,
135
- onProgress: deps.onProgress ?? null,
136
- reason,
137
- });
138
- firecrawlAttempted = true;
139
- firecrawlPayload = attempt.payload;
140
- firecrawlDiagnostics.attempted = attempt.diagnostics.attempted;
141
- firecrawlDiagnostics.used = attempt.diagnostics.used;
142
- firecrawlDiagnostics.cacheMode = attempt.diagnostics.cacheMode;
143
- firecrawlDiagnostics.cacheStatus = attempt.diagnostics.cacheStatus;
144
- firecrawlDiagnostics.notes = attempt.diagnostics.notes ?? null;
145
- }
146
- firecrawlDiagnostics.notes = (0, utils_js_2.appendNote)(firecrawlDiagnostics.notes, reason);
147
- if (!firecrawlPayload) {
148
- return null;
149
- }
150
- const firecrawlResult = await (0, firecrawl_js_1.buildResultFromFirecrawl)({
151
- url,
152
- payload: firecrawlPayload,
153
- cacheMode,
154
- maxCharacters,
155
- youtubeTranscriptMode,
156
- firecrawlDiagnostics,
157
- markdownRequested,
158
- deps,
159
- });
160
- if (firecrawlResult) {
161
- return firecrawlResult;
162
- }
163
- firecrawlDiagnostics.notes = (0, utils_js_2.appendNote)(firecrawlDiagnostics.notes, 'Firecrawl returned empty content');
164
- return null;
165
- };
166
- const attemptBird = async () => {
167
- if (!deps.readTweetWithBird || !twitterStatus) {
168
- return null;
169
- }
170
- deps.onProgress?.({ kind: 'bird-start', url });
171
- try {
172
- const tweet = await deps.readTweetWithBird({ url, timeoutMs });
173
- const text = tweet?.text?.trim() ?? '';
174
- if (text.length === 0) {
175
- deps.onProgress?.({ kind: 'bird-done', url, ok: false, textBytes: null });
176
- return null;
177
- }
178
- const title = tweet?.author?.username ? `@${tweet.author.username}` : null;
179
- const description = null;
180
- const siteName = 'X';
181
- const transcriptResolution = { text: null, source: null };
182
- const transcriptDiagnostics = (0, utils_js_2.ensureTranscriptDiagnostics)(transcriptResolution, cacheMode ?? 'default');
183
- const result = (0, utils_js_2.finalizeExtractedLinkContent)({
184
- url,
185
- baseContent: text,
186
- maxCharacters,
187
- title,
188
- description,
189
- siteName,
190
- transcriptResolution,
191
- video: null,
192
- isVideoOnly: false,
193
- diagnostics: {
194
- strategy: 'bird',
195
- firecrawl: firecrawlDiagnostics,
196
- markdown: {
197
- requested: markdownRequested,
198
- used: false,
199
- provider: null,
200
- notes: 'Bird tweet fetch provides plain text',
201
- },
202
- transcript: transcriptDiagnostics,
203
- },
204
- });
205
- deps.onProgress?.({
206
- kind: 'bird-done',
207
- url,
208
- ok: true,
209
- textBytes: Buffer.byteLength(result.content, 'utf8'),
210
- });
211
- return result;
212
- }
213
- catch (error) {
214
- birdError = error;
215
- deps.onProgress?.({ kind: 'bird-done', url, ok: false, textBytes: null });
216
- return null;
217
- }
218
- };
219
- const birdResult = await attemptBird();
220
- if (birdResult) {
221
- return birdResult;
222
- }
223
- const attemptNitter = async () => {
224
- if (nitterUrls.length === 0) {
225
- return null;
226
- }
227
- for (const nitterUrl of nitterUrls) {
228
- deps.onProgress?.({ kind: 'nitter-start', url: nitterUrl });
229
- try {
230
- const nitterHtml = await (0, fetcher_js_1.fetchHtmlDocument)(deps.fetch, nitterUrl, { timeoutMs });
231
- if (!nitterHtml.trim()) {
232
- nitterError = new Error(`Nitter returned empty body from ${new URL(nitterUrl).host}`);
233
- deps.onProgress?.({ kind: 'nitter-done', url: nitterUrl, ok: false, textBytes: null });
234
- continue;
235
- }
236
- if ((0, twitter_utils_js_1.isAnubisHtml)(nitterHtml)) {
237
- nitterError = new Error(`Nitter returned Anubis challenge from ${new URL(nitterUrl).host}`);
238
- deps.onProgress?.({ kind: 'nitter-done', url: nitterUrl, ok: false, textBytes: null });
239
- continue;
240
- }
241
- deps.onProgress?.({
242
- kind: 'nitter-done',
243
- url: nitterUrl,
244
- ok: true,
245
- textBytes: Buffer.byteLength(nitterHtml, 'utf8'),
246
- });
247
- return nitterHtml;
248
- }
249
- catch (error) {
250
- nitterError = error;
251
- deps.onProgress?.({ kind: 'nitter-done', url: nitterUrl, ok: false, textBytes: null });
252
- }
253
- }
254
- return null;
255
- };
256
- const nitterHtml = await attemptNitter();
257
- if (nitterHtml) {
258
- const nitterResult = await (0, html_js_1.buildResultFromHtmlDocument)({
259
- url,
260
- html: nitterHtml,
261
- cacheMode,
262
- maxCharacters,
263
- youtubeTranscriptMode,
264
- firecrawlDiagnostics,
265
- markdownRequested,
266
- markdownMode,
267
- timeoutMs,
268
- deps,
269
- readabilityCandidate: null,
270
- });
271
- if (!(0, twitter_utils_js_1.isBlockedTwitterContent)(nitterResult.content)) {
272
- nitterResult.diagnostics.strategy = 'nitter';
273
- return nitterResult;
274
- }
275
- nitterError = new Error('Nitter returned blocked or empty content');
276
- }
277
- if (firecrawlMode === 'always') {
278
- const firecrawlResult = await attemptFirecrawl('Firecrawl forced via options');
279
- if (firecrawlResult) {
280
- return firecrawlResult;
281
- }
282
- }
283
- let html = null;
284
- let htmlError = null;
285
- try {
286
- html = await (0, fetcher_js_1.fetchHtmlDocument)(deps.fetch, url, {
287
- timeoutMs,
288
- onProgress: deps.onProgress ?? null,
289
- });
290
- }
291
- catch (error) {
292
- htmlError = error;
293
- }
294
- if (!html) {
295
- if (!canUseFirecrawl) {
296
- throw htmlError instanceof Error ? htmlError : new Error('Failed to fetch HTML document');
297
- }
298
- const firecrawlResult = await attemptFirecrawl('HTML fetch failed; falling back to Firecrawl');
299
- if (firecrawlResult) {
300
- return firecrawlResult;
301
- }
302
- const firecrawlError = firecrawlDiagnostics.notes
303
- ? `; Firecrawl notes: ${firecrawlDiagnostics.notes}`
304
- : '';
305
- throw new Error(`Failed to fetch HTML document${firecrawlError}${htmlError instanceof Error ? `; HTML error: ${htmlError.message}` : ''}`);
306
- }
307
- let readabilityCandidate = null;
308
- if (firecrawlMode === 'auto' && (0, firecrawl_js_1.shouldFallbackToFirecrawl)(html)) {
309
- readabilityCandidate = await (0, readability_js_1.extractReadabilityFromHtml)(html, url);
310
- const readabilityText = readabilityCandidate?.text
311
- ? (0, cleaner_js_1.normalizeForPrompt)(readabilityCandidate.text)
312
- : '';
313
- if (readabilityText.length < constants_js_1.MIN_READABILITY_CONTENT_CHARACTERS) {
314
- const firecrawlResult = await attemptFirecrawl('HTML content looked blocked/thin; falling back to Firecrawl');
315
- if (firecrawlResult) {
316
- return firecrawlResult;
317
- }
318
- }
319
- }
320
- const htmlResult = await (0, html_js_1.buildResultFromHtmlDocument)({
321
- url,
322
- html,
323
- cacheMode,
324
- maxCharacters,
325
- youtubeTranscriptMode,
326
- firecrawlDiagnostics,
327
- markdownRequested,
328
- markdownMode,
329
- timeoutMs,
330
- deps,
331
- readabilityCandidate,
332
- });
333
- if (twitterStatus && (0, twitter_utils_js_1.isBlockedTwitterContent)(htmlResult.content)) {
334
- const birdNote = !deps.readTweetWithBird
335
- ? 'Bird not available'
336
- : birdError
337
- ? `Bird failed: ${birdError instanceof Error ? birdError.message : String(birdError)}`
338
- : 'Bird returned no text';
339
- const nitterNote = nitterUrls.length > 0
340
- ? nitterError
341
- ? `Nitter failed: ${nitterError instanceof Error ? nitterError.message : String(nitterError)}`
342
- : 'Nitter returned no text'
343
- : 'Nitter not available';
344
- throw new Error(`Unable to fetch tweet content from X. ${birdNote}. ${nitterNote}.`);
345
- }
346
- return htmlResult;
347
- }
348
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/index.ts"],"names":[],"mappings":";;AA6BA,4CA6YC;AA1aD,wDAAoE;AACpE,wDAAwD;AAGxD,6CAAiD;AACjD,iDAAmE;AACnE,6CAAoE;AACpE,iDAAoF;AACpF,uCAAuD;AACvD,yDAAoF;AACpF,qDAA6D;AAC7D,yDAK2B;AAE3B,yCASmB;AAEZ,KAAK,UAAU,gBAAgB,CACpC,GAAW,EACX,OAA4C,EAC5C,IAAqB;IAErB,MAAM,SAAS,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAA;IAC3C,MAAM,SAAS,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAA;IAC3C,MAAM,aAAa,GAAG,IAAA,+BAAoB,EAAC,OAAO,CAAC,CAAA;IACnD,MAAM,qBAAqB,GAAG,OAAO,EAAE,iBAAiB,IAAI,MAAM,CAAA;IAClE,MAAM,aAAa,GAAG,IAAA,+BAAoB,EAAC,OAAO,CAAC,CAAA;IACnD,MAAM,iBAAiB,GAAG,CAAC,OAAO,EAAE,MAAM,IAAI,MAAM,CAAC,KAAK,UAAU,CAAA;IACpE,MAAM,YAAY,GAAiB,OAAO,EAAE,YAAY,IAAI,MAAM,CAAA;IAElE,MAAM,eAAe,GACnB,aAAa,KAAK,KAAK,IAAI,IAAI,CAAC,mBAAmB,KAAK,IAAI,IAAI,CAAC,IAAA,uBAAY,EAAC,GAAG,CAAC,CAAA;IAEpF,MAAM,gBAAgB,GAAG,IAAA,0CAAuB,EAAC,GAAG,CAAC,CAAA;IACrD,IAAI,gBAAgB,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CACb,uIAAuI,CACxI,CAAA;QACH,CAAC;QAED,MAAM,oBAAoB,GAAG,MAAM,IAAA,mCAAwB,EAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;YAC3E,qBAAqB;YACrB,SAAS;SACV,CAAC,CAAA;QACF,IAAI,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,oBAAoB,CAAC,WAAW,EAAE,KAAK,CAAA;YACrD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;YACzC,MAAM,IAAI,KAAK,CAAC,uCAAuC,MAAM,EAAE,CAAC,CAAA;QAClE,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAA,sCAA2B,EACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAA;QACD,qBAAqB,CAAC,KAAK,GAAG,IAAA,qBAAU,EACtC,qBAAqB,CAAC,KAAK,EAC3B,4DAA4D,CAC7D,CAAA;QAED,OAAO,IAAA,uCAA4B,EAAC;YAClC,GAAG;YACH,WAAW,EAAE,IAAA,4BAAiB,EAAC,EAAE,EAAE,oBAAoB,CAAC,IAAI,CAAC;YAC7D,aAAa;YACb,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,IAAI;YACjB,QAAQ,EAAE,SAAS;YACnB,oBAAoB;YACpB,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,KAAK;YAClB,WAAW,EAAE;gBACX,QAAQ,EAAE,MAAM;gBAChB,SAAS,EAAE;oBACT,SAAS,EAAE,KAAK;oBAChB,IAAI,EAAE,KAAK;oBACX,SAAS;oBACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;oBAC5D,KAAK,EAAE,8CAA8C;iBACtD;gBACD,QAAQ,EAAE;oBACR,SAAS,EAAE,iBAAiB;oBAC5B,IAAI,EAAE,KAAK;oBACX,QAAQ,EAAE,IAAI;oBACd,KAAK,EAAE,+CAA+C;iBACvD;gBACD,UAAU,EAAE,qBAAqB;aAClC;SACF,CAAC,CAAA;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,IAAA,yCAAsB,EAAC,GAAG,CAAC,CAAA;IAC5C,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CACb,iIAAiI,CAClI,CAAA;QACH,CAAC;QAED,MAAM,oBAAoB,GAAG,MAAM,IAAA,mCAAwB,EAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;YAC3E,qBAAqB;YACrB,SAAS;SACV,CAAC,CAAA;QACF,IAAI,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,oBAAoB,CAAC,WAAW,EAAE,KAAK,CAAA;YACrD,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAA;YACzC,MAAM,IAAI,KAAK,CAAC,8CAA8C,MAAM,EAAE,CAAC,CAAA;QACzE,CAAC;QAED,MAAM,qBAAqB,GAAG,IAAA,sCAA2B,EACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAA;QACD,qBAAqB,CAAC,KAAK,GAAG,IAAA,qBAAU,EACtC,qBAAqB,CAAC,KAAK,EAC3B,wEAAwE,CACzE,CAAA;QAED,OAAO,IAAA,uCAA4B,EAAC;YAClC,GAAG;YACH,WAAW,EAAE,IAAA,4BAAiB,EAAC,EAAE,EAAE,oBAAoB,CAAC,IAAI,CAAC;YAC7D,aAAa;YACb,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,IAAI;YACjB,QAAQ,EAAE,gBAAgB;YAC1B,oBAAoB;YACpB,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,KAAK;YAClB,WAAW,EAAE;gBACX,QAAQ,EAAE,MAAM;gBAChB,SAAS,EAAE;oBACT,SAAS,EAAE,KAAK;oBAChB,IAAI,EAAE,KAAK;oBACX,SAAS;oBACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;oBAC5D,KAAK,EAAE,qDAAqD;iBAC7D;gBACD,QAAQ,EAAE;oBACR,SAAS,EAAE,iBAAiB;oBAC5B,IAAI,EAAE,KAAK;oBACX,QAAQ,EAAE,IAAI;oBACd,KAAK,EAAE,sDAAsD;iBAC9D;gBACD,UAAU,EAAE,qBAAqB;aAClC;SACF,CAAC,CAAA;IACJ,CAAC;IAED,IAAI,kBAAkB,GAAG,KAAK,CAAA;IAC9B,IAAI,gBAAgB,GAAiC,IAAI,CAAA;IACzD,MAAM,oBAAoB,GAAyB;QACjD,SAAS,EAAE,KAAK;QAChB,IAAI,EAAE,KAAK;QACX,SAAS;QACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;QAC5D,KAAK,EAAE,IAAI;KACZ,CAAA;IAED,MAAM,aAAa,GAAG,IAAA,qCAAkB,EAAC,GAAG,CAAC,CAAA;IAC7C,MAAM,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,IAAA,+BAAY,EAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACzD,IAAI,SAAS,GAAY,IAAI,CAAA;IAC7B,IAAI,WAAW,GAAY,IAAI,CAAA;IAE/B,MAAM,gBAAgB,GAAG,KAAK,EAAE,MAAc,EAAwC,EAAE;QACtF,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,IAAI,CAAC,kBAAkB,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,MAAM,IAAA,+BAAkB,EAAC,GAAG,EAAE,IAAI,CAAC,mBAAmB,EAAE;gBACtE,SAAS;gBACT,SAAS;gBACT,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;gBACnC,MAAM;aACP,CAAC,CAAA;YACF,kBAAkB,GAAG,IAAI,CAAA;YACzB,gBAAgB,GAAG,OAAO,CAAC,OAAO,CAAA;YAClC,oBAAoB,CAAC,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC,SAAS,CAAA;YAC9D,oBAAoB,CAAC,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,CAAA;YACpD,oBAAoB,CAAC,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC,SAAS,CAAA;YAC9D,oBAAoB,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,WAAW,CAAA;YAClE,oBAAoB,CAAC,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI,CAAA;QAChE,CAAC;QAED,oBAAoB,CAAC,KAAK,GAAG,IAAA,qBAAU,EAAC,oBAAoB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;QAE3E,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,eAAe,GAAG,MAAM,IAAA,uCAAwB,EAAC;YACrD,GAAG;YACH,OAAO,EAAE,gBAAgB;YACzB,SAAS;YACT,aAAa;YACb,qBAAqB;YACrB,oBAAoB;YACpB,iBAAiB;YACjB,IAAI;SACL,CAAC,CAAA;QACF,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,eAAe,CAAA;QACxB,CAAC;QAED,oBAAoB,CAAC,KAAK,GAAG,IAAA,qBAAU,EACrC,oBAAoB,CAAC,KAAK,EAC1B,kCAAkC,CACnC,CAAA;QACD,OAAO,IAAI,CAAA;IACb,CAAC,CAAA;IAED,MAAM,WAAW,GAAG,KAAK,IAA0C,EAAE;QACnE,IAAI,CAAC,IAAI,CAAC,iBAAiB,IAAI,CAAC,aAAa,EAAE,CAAC;YAC9C,OAAO,IAAI,CAAA;QACb,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAA;QAC9C,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAA;YAC9D,MAAM,IAAI,GAAG,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;YACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;gBACzE,OAAO,IAAI,CAAA;YACb,CAAC;YAED,MAAM,KAAK,GAAG,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;YAC1E,MAAM,WAAW,GAAG,IAAI,CAAA;YACxB,MAAM,QAAQ,GAAG,GAAG,CAAA;YACpB,MAAM,oBAAoB,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;YACzD,MAAM,qBAAqB,GAAG,IAAA,sCAA2B,EACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAA;YACD,MAAM,MAAM,GAAG,IAAA,uCAA4B,EAAC;gBAC1C,GAAG;gBACH,WAAW,EAAE,IAAI;gBACjB,aAAa;gBACb,KAAK;gBACL,WAAW;gBACX,QAAQ;gBACR,oBAAoB;gBACpB,KAAK,EAAE,IAAI;gBACX,WAAW,EAAE,KAAK;gBAClB,WAAW,EAAE;oBACX,QAAQ,EAAE,MAAM;oBAChB,SAAS,EAAE,oBAAoB;oBAC/B,QAAQ,EAAE;wBACR,SAAS,EAAE,iBAAiB;wBAC5B,IAAI,EAAE,KAAK;wBACX,QAAQ,EAAE,IAAI;wBACd,KAAK,EAAE,sCAAsC;qBAC9C;oBACD,UAAU,EAAE,qBAAqB;iBAClC;aACF,CAAC,CAAA;YACF,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,IAAI,EAAE,WAAW;gBACjB,GAAG;gBACH,EAAE,EAAE,IAAI;gBACR,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC;aACrD,CAAC,CAAA;YACF,OAAO,MAAM,CAAA;QACf,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,SAAS,GAAG,KAAK,CAAA;YACjB,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YACzE,OAAO,IAAI,CAAA;QACb,CAAC;IACH,CAAC,CAAA;IAED,MAAM,UAAU,GAAG,MAAM,WAAW,EAAE,CAAA;IACtC,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,UAAU,CAAA;IACnB,CAAC;IAED,MAAM,aAAa,GAAG,KAAK,IAA4B,EAAE;QACvD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAA;QACb,CAAC;QACD,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAA;YAC3D,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,IAAA,8BAAiB,EAAC,IAAI,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,SAAS,EAAE,CAAC,CAAA;gBAChF,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC;oBACvB,WAAW,GAAG,IAAI,KAAK,CAAC,mCAAmC,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;oBACrF,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;oBACtF,SAAQ;gBACV,CAAC;gBACD,IAAI,IAAA,+BAAY,EAAC,UAAU,CAAC,EAAE,CAAC;oBAC7B,WAAW,GAAG,IAAI,KAAK,CACrB,yCAAyC,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CACnE,CAAA;oBACD,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;oBACtF,SAAQ;gBACV,CAAC;gBACD,IAAI,CAAC,UAAU,EAAE,CAAC;oBAChB,IAAI,EAAE,aAAa;oBACnB,GAAG,EAAE,SAAS;oBACd,EAAE,EAAE,IAAI;oBACR,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,UAAU,EAAE,MAAM,CAAC;iBACjD,CAAC,CAAA;gBACF,OAAO,UAAU,CAAA;YACnB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,WAAW,GAAG,KAAK,CAAA;gBACnB,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YACxF,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC,CAAA;IAED,MAAM,UAAU,GAAG,MAAM,aAAa,EAAE,CAAA;IACxC,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,MAAM,IAAA,qCAA2B,EAAC;YACrD,GAAG;YACH,IAAI,EAAE,UAAU;YAChB,SAAS;YACT,aAAa;YACb,qBAAqB;YACrB,oBAAoB;YACpB,iBAAiB;YACjB,YAAY;YACZ,SAAS;YACT,IAAI;YACJ,oBAAoB,EAAE,IAAI;SAC3B,CAAC,CAAA;QACF,IAAI,CAAC,IAAA,0CAAuB,EAAC,YAAY,CAAC,OAAO,CAAC,EAAE,CAAC;YACnD,YAAY,CAAC,WAAW,CAAC,QAAQ,GAAG,QAAQ,CAAA;YAC5C,OAAO,YAAY,CAAA;QACrB,CAAC;QACD,WAAW,GAAG,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAA;IACrE,CAAC;IAED,IAAI,aAAa,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,eAAe,GAAG,MAAM,gBAAgB,CAAC,8BAA8B,CAAC,CAAA;QAC9E,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,eAAe,CAAA;QACxB,CAAC;IACH,CAAC;IAED,IAAI,IAAI,GAAkB,IAAI,CAAA;IAC9B,IAAI,SAAS,GAAY,IAAI,CAAA;IAE7B,IAAI,CAAC;QACH,IAAI,GAAG,MAAM,IAAA,8BAAiB,EAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE;YAC9C,SAAS;YACT,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;SACpC,CAAC,CAAA;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,SAAS,GAAG,KAAK,CAAA;IACnB,CAAC;IAED,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,MAAM,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAA;QAC3F,CAAC;QAED,MAAM,eAAe,GAAG,MAAM,gBAAgB,CAAC,8CAA8C,CAAC,CAAA;QAC9F,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,eAAe,CAAA;QACxB,CAAC;QAED,MAAM,cAAc,GAAG,oBAAoB,CAAC,KAAK;YAC/C,CAAC,CAAC,sBAAsB,oBAAoB,CAAC,KAAK,EAAE;YACpD,CAAC,CAAC,EAAE,CAAA;QACN,MAAM,IAAI,KAAK,CACb,gCAAgC,cAAc,GAC5C,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,iBAAiB,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EACtE,EAAE,CACH,CAAA;IACH,CAAC;IAED,IAAI,oBAAoB,GAAkE,IAAI,CAAA;IAE9F,IAAI,aAAa,KAAK,MAAM,IAAI,IAAA,wCAAyB,EAAC,IAAI,CAAC,EAAE,CAAC;QAChE,oBAAoB,GAAG,MAAM,IAAA,2CAA0B,EAAC,IAAI,EAAE,GAAG,CAAC,CAAA;QAClE,MAAM,eAAe,GAAG,oBAAoB,EAAE,IAAI;YAChD,CAAC,CAAC,IAAA,+BAAkB,EAAC,oBAAoB,CAAC,IAAI,CAAC;YAC/C,CAAC,CAAC,EAAE,CAAA;QACN,IAAI,eAAe,CAAC,MAAM,GAAG,iDAAkC,EAAE,CAAC;YAChE,MAAM,eAAe,GAAG,MAAM,gBAAgB,CAC5C,6DAA6D,CAC9D,CAAA;YACD,IAAI,eAAe,EAAE,CAAC;gBACpB,OAAO,eAAe,CAAA;YACxB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,IAAA,qCAA2B,EAAC;QACnD,GAAG;QACH,IAAI;QACJ,SAAS;QACT,aAAa;QACb,qBAAqB;QACrB,oBAAoB;QACpB,iBAAiB;QACjB,YAAY;QACZ,SAAS;QACT,IAAI;QACJ,oBAAoB;KACrB,CAAC,CAAA;IACF,IAAI,aAAa,IAAI,IAAA,0CAAuB,EAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACjE,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,iBAAiB;YACtC,CAAC,CAAC,oBAAoB;YACtB,CAAC,CAAC,SAAS;gBACT,CAAC,CAAC,gBAAgB,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE;gBACtF,CAAC,CAAC,uBAAuB,CAAA;QAC7B,MAAM,UAAU,GACd,UAAU,CAAC,MAAM,GAAG,CAAC;YACnB,CAAC,CAAC,WAAW;gBACX,CAAC,CAAC,kBAAkB,WAAW,YAAY,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;gBAC9F,CAAC,CAAC,yBAAyB;YAC7B,CAAC,CAAC,sBAAsB,CAAA;QAC5B,MAAM,IAAI,KAAK,CAAC,yCAAyC,QAAQ,KAAK,UAAU,GAAG,CAAC,CAAA;IACtF,CAAC;IACD,OAAO,UAAU,CAAA;AACnB,CAAC"}
@@ -1,80 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.extractJsonLdContent = extractJsonLdContent;
4
- const cheerio_1 = require("cheerio");
5
- const cleaner_js_1 = require("./cleaner.js");
6
- function extractJsonLdContent(html) {
7
- try {
8
- const $ = (0, cheerio_1.load)(html);
9
- const scripts = $('script[type="application/ld+json"]').toArray();
10
- const candidates = [];
11
- for (const script of scripts) {
12
- const raw = $(script).text();
13
- if (!raw)
14
- continue;
15
- try {
16
- const data = JSON.parse(raw);
17
- collectCandidates(data, candidates);
18
- }
19
- catch {
20
- // ignore malformed jsonld
21
- }
22
- }
23
- if (candidates.length === 0)
24
- return null;
25
- const sorted = candidates
26
- .map((c) => ({
27
- title: c.title ? (0, cleaner_js_1.normalizeCandidate)(c.title) : null,
28
- description: c.description ? (0, cleaner_js_1.normalizeCandidate)(c.description) : null,
29
- type: c.type ? (0, cleaner_js_1.normalizeCandidate)(c.type) : null,
30
- }))
31
- .filter((c) => c.title || c.description)
32
- .sort((a, b) => (b.description?.length ?? 0) - (a.description?.length ?? 0));
33
- return sorted[0] ?? null;
34
- }
35
- catch {
36
- return null;
37
- }
38
- }
39
- function collectCandidates(input, out) {
40
- if (!input)
41
- return;
42
- if (Array.isArray(input)) {
43
- for (const item of input)
44
- collectCandidates(item, out);
45
- return;
46
- }
47
- if (typeof input !== 'object')
48
- return;
49
- const record = input;
50
- if (record['@graph'] && Array.isArray(record['@graph'])) {
51
- collectCandidates(record['@graph'], out);
52
- }
53
- const type = extractType(record);
54
- if (type) {
55
- const title = firstString(record, ['name', 'headline', 'title']);
56
- const description = firstString(record, ['description', 'summary']);
57
- if (title || description) {
58
- out.push({ title: title ?? null, description: description ?? null, type });
59
- }
60
- }
61
- }
62
- function extractType(record) {
63
- const raw = record['@type'];
64
- if (typeof raw === 'string')
65
- return raw.toLowerCase();
66
- if (Array.isArray(raw)) {
67
- const found = raw.find((entry) => typeof entry === 'string');
68
- return typeof found === 'string' ? found.toLowerCase() : null;
69
- }
70
- return null;
71
- }
72
- function firstString(record, keys) {
73
- for (const key of keys) {
74
- const value = record[key];
75
- if (typeof value === 'string' && value.trim())
76
- return value.trim();
77
- }
78
- return null;
79
- }
80
- //# sourceMappingURL=jsonld.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"jsonld.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/jsonld.ts"],"names":[],"mappings":";;AAUA,oDAgCC;AA1CD,qCAA8B;AAE9B,6CAAiD;AAQjD,SAAgB,oBAAoB,CAAC,IAAY;IAC/C,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAA,cAAI,EAAC,IAAI,CAAC,CAAA;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,OAAO,EAAE,CAAA;QACjE,MAAM,UAAU,GAAoB,EAAE,CAAA;QAEtC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAA;YAC5B,IAAI,CAAC,GAAG;gBAAE,SAAQ;YAClB,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;gBAC5B,iBAAiB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAA;YACrC,CAAC;YAAC,MAAM,CAAC;gBACP,0BAA0B;YAC5B,CAAC;QACH,CAAC;QAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;QAExC,MAAM,MAAM,GAAG,UAAU;aACtB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACX,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI;YACnD,WAAW,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI;YACrE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;SACjD,CAAC,CAAC;aACF,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,WAAW,CAAC;aACvC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,MAAM,IAAI,CAAC,CAAC,CAAC,CAAA;QAE9E,OAAO,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAc,EAAE,GAAoB;IAC7D,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK;YAAE,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;QACtD,OAAM;IACR,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAM;IAErC,MAAM,MAAM,GAAG,KAAgC,CAAA;IAC/C,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QACxD,iBAAiB,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAA;IAC1C,CAAC;IAED,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,CAAA;IAChC,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC,CAAA;QAChE,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC,CAAA;QACnE,IAAI,KAAK,IAAI,WAAW,EAAE,CAAC;YACzB,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,IAAI,IAAI,EAAE,WAAW,EAAE,WAAW,IAAI,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;QAC5E,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,MAA+B;IAClD,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAA;IAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,GAAG,CAAC,WAAW,EAAE,CAAA;IACrD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAA;QAC5D,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;IAC/D,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,WAAW,CAAC,MAA+B,EAAE,IAAc;IAClE,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAA;QACzB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE;YAAE,OAAO,KAAK,CAAC,IAAI,EAAE,CAAA;IACpE,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC"}