websnap-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/parser.js ADDED
@@ -0,0 +1,340 @@
1
+ "use strict";
2
+ /**
3
+ * parser.ts - Extract clean article content from raw HTML
4
+ *
5
+ * Implements a simplified readability algorithm:
6
+ * 1. Strip non-content elements (nav, ads, scripts, styles, etc.)
7
+ * 2. Identify the main content container
8
+ * 3. Extract metadata (title, author, date)
9
+ * 4. Calculate reading statistics
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.parseContent = parseContent;
13
+ // Tags to completely remove (including children)
14
+ const REMOVE_TAGS = new Set([
15
+ "script",
16
+ "style",
17
+ "noscript",
18
+ "iframe",
19
+ "object",
20
+ "embed",
21
+ "applet",
22
+ "link",
23
+ "meta",
24
+ "svg",
25
+ "canvas",
26
+ "video",
27
+ "audio",
28
+ "source",
29
+ "track",
30
+ "map",
31
+ "area",
32
+ ]);
33
+ // Tags that typically contain navigation, not content
34
+ const NAV_TAGS = new Set([
35
+ "nav",
36
+ "header",
37
+ "footer",
38
+ "aside",
39
+ "menu",
40
+ "menuitem",
41
+ ]);
42
+ // Class/ID patterns that indicate non-content
43
+ const NOISE_PATTERNS = [
44
+ /\bad[s]?\b/i,
45
+ /\bbanner\b/i,
46
+ /\bbreadcrumb/i,
47
+ /\bcomment/i,
48
+ /\bcommunity/i,
49
+ /\bcover-wrap/i,
50
+ /\bfooter/i,
51
+ /\bheader\b/i,
52
+ /\blegend/i,
53
+ /\bmenu/i,
54
+ /\bmodal/i,
55
+ /\bnav\b/i,
56
+ /\bnavigation/i,
57
+ /\bpopup/i,
58
+ /\brelated/i,
59
+ /\bremark/i,
60
+ /\bsearch/i,
61
+ /\bshare/i,
62
+ /\bsidebar/i,
63
+ /\bsocial/i,
64
+ /\bsponsor/i,
65
+ /\btags?\b/i,
66
+ /\btoolbar/i,
67
+ /\bwidget/i,
68
+ /\bcookie/i,
69
+ /\bgdpr/i,
70
+ /\bnewsletter/i,
71
+ /\bsubscri/i,
72
+ /\bpromo/i,
73
+ /\brecommend/i,
74
+ /\btoast/i,
75
+ /\boverlay/i,
76
+ ];
77
+ // Class/ID patterns that indicate content
78
+ const CONTENT_PATTERNS = [
79
+ /\barticle/i,
80
+ /\bbody/i,
81
+ /\bcontent/i,
82
+ /\bentry/i,
83
+ /\bhentry/i,
84
+ /\bmain/i,
85
+ /\bpage/i,
86
+ /\bpost\b/i,
87
+ /\btext/i,
88
+ /\bblog/i,
89
+ /\bstory/i,
90
+ ];
91
+ /**
92
+ * Parse HTML and extract clean article content
93
+ */
94
+ function parseContent(html, url) {
95
+ // Extract metadata first (from head)
96
+ const title = extractTitle(html);
97
+ const author = extractMeta(html, [
98
+ 'meta[name="author"]',
99
+ 'meta[property="article:author"]',
100
+ 'meta[name="sailthru.author"]',
101
+ 'meta[name="parsely-author"]',
102
+ ]);
103
+ const date = extractMeta(html, [
104
+ 'meta[property="article:published_time"]',
105
+ 'meta[name="date"]',
106
+ 'meta[name="publishdate"]',
107
+ 'meta[name="sailthru.date"]',
108
+ 'meta[property="og:updated_time"]',
109
+ 'meta[name="parsely-pub-date"]',
110
+ ]);
111
+ const siteName = extractMeta(html, [
112
+ 'meta[property="og:site_name"]',
113
+ 'meta[name="application-name"]',
114
+ ]);
115
+ const description = extractMeta(html, [
116
+ 'meta[property="og:description"]',
117
+ 'meta[name="description"]',
118
+ 'meta[name="twitter:description"]',
119
+ ]);
120
+ // Clean and extract content
121
+ let cleanedHtml = removeTagsCompletely(html, REMOVE_TAGS);
122
+ cleanedHtml = removeNoiseElements(cleanedHtml);
123
+ const contentHtml = extractMainContent(cleanedHtml);
124
+ // Get plain text
125
+ const textContent = htmlToPlainText(contentHtml);
126
+ const words = textContent.split(/\s+/).filter((w) => w.length > 0);
127
+ const wordCount = words.length;
128
+ const readingTime = formatReadingTime(wordCount);
129
+ return {
130
+ title,
131
+ author: author ? cleanText(author) : null,
132
+ date: date ? formatDate(date) : null,
133
+ siteName: siteName ? cleanText(siteName) : null,
134
+ description: description ? cleanText(description) : null,
135
+ content: contentHtml,
136
+ textContent,
137
+ wordCount,
138
+ readingTime,
139
+ };
140
+ }
141
+ /**
142
+ * Extract the page title from multiple sources
143
+ */
144
+ function extractTitle(html) {
145
+ // Try og:title first
146
+ const ogTitle = extractMetaContent(html, 'property="og:title"');
147
+ if (ogTitle)
148
+ return cleanText(ogTitle);
149
+ // Try twitter:title
150
+ const twitterTitle = extractMetaContent(html, 'name="twitter:title"');
151
+ if (twitterTitle)
152
+ return cleanText(twitterTitle);
153
+ // Try <title> tag
154
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
155
+ if (titleMatch) {
156
+ let title = cleanText(titleMatch[1]);
157
+ // Remove site name suffix patterns like " | Site Name" or " - Site Name"
158
+ title = title.replace(/\s*[\|\-\u2013\u2014]\s*[^|\-\u2013\u2014]*$/, "");
159
+ return title;
160
+ }
161
+ // Try first <h1>
162
+ const h1Match = html.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i);
163
+ if (h1Match)
164
+ return cleanText(h1Match[1]);
165
+ return "Untitled";
166
+ }
167
+ /**
168
+ * Extract meta tag content by attribute selector patterns
169
+ */
170
+ function extractMeta(html, selectors) {
171
+ for (const selector of selectors) {
172
+ // Parse the selector to get attribute name and value
173
+ const attrMatch = selector.match(/meta\[(\w+)="([^"]+)"\]/);
174
+ if (!attrMatch)
175
+ continue;
176
+ const [, attrName, attrValue] = attrMatch;
177
+ const value = extractMetaContent(html, `${attrName}="${attrValue}"`);
178
+ if (value)
179
+ return value;
180
+ }
181
+ return null;
182
+ }
183
+ /**
184
+ * Extract content attribute from a meta tag
185
+ */
186
+ function extractMetaContent(html, attrString) {
187
+ // Match meta tag with the given attribute, extracting content
188
+ const escapedAttr = attrString.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
189
+ const patterns = [
190
+ new RegExp(`<meta[^>]*${escapedAttr}[^>]*content="([^"]*)"`, "i"),
191
+ new RegExp(`<meta[^>]*content="([^"]*)"[^>]*${escapedAttr}`, "i"),
192
+ new RegExp(`<meta[^>]*${escapedAttr}[^>]*content='([^']*)'`, "i"),
193
+ new RegExp(`<meta[^>]*content='([^']*)'[^>]*${escapedAttr}`, "i"),
194
+ ];
195
+ for (const pattern of patterns) {
196
+ const match = html.match(pattern);
197
+ if (match && match[1])
198
+ return match[1];
199
+ }
200
+ return null;
201
+ }
202
+ /**
203
+ * Completely remove specified tags and their content
204
+ */
205
+ function removeTagsCompletely(html, tags) {
206
+ let result = html;
207
+ for (const tag of tags) {
208
+ // Self-closing and opening/closing variants
209
+ const selfClosing = new RegExp(`<${tag}[^>]*/\\s*>`, "gi");
210
+ const withContent = new RegExp(`<${tag}[\\s>][\\s\\S]*?<\\/${tag}>`, "gi");
211
+ result = result.replace(withContent, "");
212
+ result = result.replace(selfClosing, "");
213
+ }
214
+ return result;
215
+ }
216
+ /**
217
+ * Remove elements that match noise patterns based on class/id
218
+ */
219
+ function removeNoiseElements(html) {
220
+ let result = html;
221
+ // Remove nav-type tags
222
+ for (const tag of NAV_TAGS) {
223
+ const regex = new RegExp(`<${tag}[\\s>][\\s\\S]*?<\\/${tag}>`, "gi");
224
+ result = result.replace(regex, "");
225
+ }
226
+ // Remove elements with noisy class/id attributes
227
+ // This is a heuristic: find opening tags with class/id matching noise patterns
228
+ // and try to remove the element
229
+ for (const pattern of NOISE_PATTERNS) {
230
+ // Match div/section/aside with matching class or id
231
+ const tagRegex = new RegExp(`<(div|section|aside|ul|ol|form|figure)[^>]*(?:class|id)="[^"]*${pattern.source}[^"]*"[\\s\\S]*?<\\/\\1>`, "gi");
232
+ result = result.replace(tagRegex, "");
233
+ }
234
+ // Remove hidden elements
235
+ result = result.replace(/<[^>]*(?:display\s*:\s*none|visibility\s*:\s*hidden|aria-hidden="true")[^>]*>[\s\S]*?<\/[^>]+>/gi, "");
236
+ return result;
237
+ }
238
+ /**
239
+ * Extract the main content area from the HTML
240
+ */
241
+ function extractMainContent(html) {
242
+ // Try to find <article> tag first
243
+ const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
244
+ if (articleMatch && articleMatch[1].length > 200) {
245
+ return articleMatch[1];
246
+ }
247
+ // Try <main> tag
248
+ const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
249
+ if (mainMatch && mainMatch[1].length > 200) {
250
+ return mainMatch[1];
251
+ }
252
+ // Try [role="main"]
253
+ const roleMainMatch = html.match(/<[^>]*role="main"[^>]*>([\s\S]*?)<\/[^>]+>/i);
254
+ if (roleMainMatch && roleMainMatch[1].length > 200) {
255
+ return roleMainMatch[1];
256
+ }
257
+ // Try content-indicative class/id
258
+ for (const pattern of CONTENT_PATTERNS) {
259
+ const regex = new RegExp(`<(div|section)[^>]*(?:class|id)="[^"]*${pattern.source}[^"]*"[^>]*>([\\s\\S]*?)<\\/\\1>`, "gi");
260
+ let match;
261
+ let best = "";
262
+ while ((match = regex.exec(html)) !== null) {
263
+ if (match[2].length > best.length) {
264
+ best = match[2];
265
+ }
266
+ }
267
+ if (best.length > 200)
268
+ return best;
269
+ }
270
+ // Fallback: find <body> content
271
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
272
+ if (bodyMatch)
273
+ return bodyMatch[1];
274
+ return html;
275
+ }
276
+ /**
277
+ * Convert HTML to plain text
278
+ */
279
+ function htmlToPlainText(html) {
280
+ return html
281
+ // Replace <br> and block elements with newlines
282
+ .replace(/<br\s*\/?>/gi, "\n")
283
+ .replace(/<\/(p|div|li|h[1-6]|blockquote|tr)>/gi, "\n")
284
+ .replace(/<(p|div|li|h[1-6]|blockquote|tr)[^>]*>/gi, "\n")
285
+ // Remove all remaining HTML tags
286
+ .replace(/<[^>]+>/g, "")
287
+ // Decode common HTML entities
288
+ .replace(/&amp;/g, "&")
289
+ .replace(/&lt;/g, "<")
290
+ .replace(/&gt;/g, ">")
291
+ .replace(/&quot;/g, '"')
292
+ .replace(/&#39;/g, "'")
293
+ .replace(/&nbsp;/g, " ")
294
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code)))
295
+ // Clean up whitespace
296
+ .replace(/[ \t]+/g, " ")
297
+ .replace(/\n{3,}/g, "\n\n")
298
+ .trim();
299
+ }
300
+ /**
301
+ * Clean extracted text
302
+ */
303
+ function cleanText(text) {
304
+ return text
305
+ .replace(/<[^>]+>/g, "")
306
+ .replace(/&amp;/g, "&")
307
+ .replace(/&lt;/g, "<")
308
+ .replace(/&gt;/g, ">")
309
+ .replace(/&quot;/g, '"')
310
+ .replace(/&#39;/g, "'")
311
+ .replace(/&nbsp;/g, " ")
312
+ .replace(/\s+/g, " ")
313
+ .trim();
314
+ }
315
+ /**
316
+ * Format a date string into a human-readable format
317
+ */
318
+ function formatDate(dateStr) {
319
+ try {
320
+ const date = new Date(dateStr);
321
+ if (isNaN(date.getTime()))
322
+ return dateStr;
323
+ return date.toLocaleDateString("en-US", {
324
+ year: "numeric",
325
+ month: "long",
326
+ day: "numeric",
327
+ });
328
+ }
329
+ catch {
330
+ return dateStr;
331
+ }
332
+ }
333
+ /**
334
+ * Calculate and format reading time
335
+ */
336
+ function formatReadingTime(wordCount) {
337
+ const minutes = Math.max(1, Math.ceil(wordCount / 238));
338
+ return `${minutes} min read`;
339
+ }
340
+ //# sourceMappingURL=parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAmGH,oCAiDC;AAtID,iDAAiD;AACjD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC;IAC1B,QAAQ;IACR,OAAO;IACP,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,QAAQ;IACR,MAAM;IACN,MAAM;IACN,KAAK;IACL,QAAQ;IACR,OAAO;IACP,OAAO;IACP,QAAQ;IACR,OAAO;IACP,KAAK;IACL,MAAM;CACP,CAAC,CAAC;AAEH,sDAAsD;AACtD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC;IACvB,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,MAAM;IACN,UAAU;CACX,CAAC,CAAC;AAEH,8CAA8C;AAC9C,MAAM,cAAc,GAAG;IACrB,aAAa;IACb,aAAa;IACb,eAAe;IACf,YAAY;IACZ,cAAc;IACd,eAAe;IACf,WAAW;IACX,aAAa;IACb,WAAW;IACX,SAAS;IACT,UAAU;IACV,UAAU;IACV,eAAe;IACf,UAAU;IACV,YAAY;IACZ,WAAW;IACX,WAAW;IACX,UAAU;IACV,YAAY;IACZ,WAAW;IACX,YAAY;IACZ,YAAY;IACZ,YAAY;IACZ,WAAW;IACX,WAAW;IACX,SAAS;IACT,eAAe;IACf,YAAY;IACZ,UAAU;IACV,cAAc;IACd,UAAU;IACV,YAAY;CACb,CAAC;AAEF,0CAA0C;AAC1C,MAAM,gBAAgB,GAAG;IACvB,YAAY;IACZ,SAAS;IACT,YAAY;IACZ,UAAU;IACV,WAAW;IACX,SAAS;IACT,SAAS;IACT,WAAW;IACX,SAAS;IACT,SAAS;IACT,UAAU;CACX,CAAC;AAEF;;GAEG;AACH,SAAgB,YAAY,CAAC,IAAY,EAAE,GAAW;IACpD,qCAAqC;IACrC,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACjC,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,EAAE;QAC/B,qBAAqB;QACrB,iCAAiC;QACjC,8BAA8B;QAC9B,6BAA6B;KAC9B,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,EAAE;QAC7B,yCAAyC;QACzC,mBAAmB;QACnB,0BAA0B;QAC1B,4BAA4B;QAC5B,kCAAkC;QAClC,+BAA+B;KAChC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,EAAE;QACjC,+BAA+B;QAC/B,+BAA+B;KAChC,CAAC,CAAC;IACH,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,EAAE;QACpC,iCAAiC;QACjC,0BAA0B;QAC1B,kCAAkC;KACnC,CAAC,CAAC;IAEH,4BAA4B;IAC5B,IAAI,WAAW,GAAG,oBAAoB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAC1D,WAAW,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;IAEpD,iBAAiB;IACjB,MAAM,WAAW,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;IAC/B,MAAM,WAAW,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAEjD,OAAO;QACL,KAAK;QACL,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI;QACzC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;QACpC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI;QAC/C,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI;QACxD,OAAO,EAAE,WAAW;QACpB,WAAW;QACX,SAAS;QACT,WAAW;KACZ,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,IAAY;IAChC,qBAAqB;IACrB,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;IAChE,IAAI,OAAO;QAAE,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC;IAEvC,oBAAoB;IACpB,MAAM,YAAY,GAAG,kBAAkB,CAAC,IAAI,EAAE,sBAAsB,CAAC,CAAC;IACtE,IAAI,YAAY;QAAE,OAAO,SAAS,CAAC,YAAY,CAAC,CAAC;IAEjD,kBAAkB;IAClB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IAClE,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,yEAAyE;QACzE,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,8CAA8C,EAAE,EAAE,CAAC,CAAC;QAC1E,OAAO,KAAK,CAAC;IACf,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IACzD,IAAI,OAAO;QAAE,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1C,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY,EAAE,SAAmB;IACpD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,qDAAqD;QACrD,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAC9B,yBAAyB,CAC1B,CAAC;QACF,IAAI,CAAC,SAAS;YAAE,SAAS;QAEzB,MAAM,CAAC,EAAE,QAAQ,EAAE,SAAS,CAAC,GAAG,SAAS,CAAC;QAC1C,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,QAAQ,KAAK,SAAS,GAAG,CAAC,CAAC;QACrE,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;IAC1B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAY,EAAE,UAAkB;IAC1D,8DAA8D;IAC9D,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG;QACf,IAAI,MAAM,CACR,aAAa,WAAW,wBAAwB,EAChD,GAAG,CACJ;QACD,IAAI,MAAM,CACR,mCAAmC,WAAW,EAAE,EAChD,GAAG,CACJ;QACD,IAAI,MAAM,CACR,aAAa,WAAW,wBAAwB,EAChD,GAAG,CACJ;QACD,IAAI,MAAM,CACR,mCAAmC,WAAW,EAAE,EAChD,GAAG,CACJ;KACF,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,IAAiB;IAC3D,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,4CAA4C;QAC5C,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,IAAI,GAAG,aAAa,EAAE,IAAI,CAAC,CAAC;QAC3D,MAAM,WAAW,GAAG,IAAI,MAAM,CAC5B,IAAI,GAAG,uBAAuB,GAAG,GAAG,EACpC,IAAI,CACL,CAAC;QACF,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QACzC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,IAAI,MAAM,GAAG,IAAI,CAAC;IAElB,uBAAuB;IACvB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,IAAI,GAAG,uBAAuB,GAAG,GAAG,EACpC,IAAI,CACL,CAAC;QACF,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,iDAAiD;IACjD,+EAA+E;IAC/E,gCAAgC;IAChC,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;QACrC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,IAAI,MAAM,CACzB,iEAAiE,OAAO,CAAC,MAAM,0BAA0B,EACzG,IAAI,CACL,CAAC;QACF,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,yBAAyB;IACzB,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,kGAAkG,EAClG,EAAE,CACH,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAY;IACtC,kCAAkC;IAClC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAC7B,sCAAsC,CACvC,CAAC;IACF,IAAI,YAAY,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACjD,OAAO,YAAY,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,iBAAiB;IACjB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC/D,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QAC3C,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,oBAAoB;IACpB,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAC9B,6CAA6C,CAC9C,CAAC;IACF,IAAI,aAAa,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACnD,OAAO,aAAa,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IAED,kCAAkC;IAClC,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,MAAM,CACtB,yCAAyC,OAAO,CAAC,MAAM,kCAAkC,EACzF,IAAI,CACL,CAAC;QACF,IAAI,KAAK,CAAC;QACV,IAAI,IAAI,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3C,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAClC,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;IACrC,CAAC;IAED,gCAAgC;IAChC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC/D,IAAI,SAAS;QAAE,OAAO,SAAS,CAAC,CAAC,CAAC,CAAC;IAEnC,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI;QACT,gDAAgD;SAC/C,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC;SAC7B,OAAO,CAAC,uCAAuC,EAAE,IAAI,CAAC;SACtD,OAAO,CAAC,0CAA0C,EAAE,IAAI,CAAC;QAC1D,iCAAiC;SAChC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;QACxB,8BAA8B;SAC7B,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACvE,sBAAsB;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,OAAe;IACjC,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAAE,OAAO,OAAO,CAAC;QAC1C,OAAO,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE;YACtC,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,MAAM;YACb,GAAG,EAAE,SAAS;SACf,CAAC,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,OAAO,CAAC;IACjB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC;IACxD,OAAO,GAAG,OAAO,WAAW,CAAC;AAC/B,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * summarizer.ts - AI-powered article summarization
3
+ *
4
+ * Supports multiple backends:
5
+ * 1. OpenAI API (set OPENAI_API_KEY env var)
6
+ * 2. Anthropic API (set ANTHROPIC_API_KEY env var)
7
+ * 3. Local LLM via Ollama (default, no API key needed)
8
+ * 4. Fallback: extractive summary (no AI needed)
9
+ */
10
+ import { ParsedArticle } from "./parser";
11
+ /**
12
+ * Generate a 3-sentence summary of the article
13
+ */
14
+ export declare function summarize(article: ParsedArticle): Promise<string>;
15
+ //# sourceMappingURL=summarizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summarizer.d.ts","sourceRoot":"","sources":["../src/summarizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAYzC;;GAEG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAmDvE"}
@@ -0,0 +1,197 @@
1
+ "use strict";
2
+ /**
3
+ * summarizer.ts - AI-powered article summarization
4
+ *
5
+ * Supports multiple backends:
6
+ * 1. OpenAI API (set OPENAI_API_KEY env var)
7
+ * 2. Anthropic API (set ANTHROPIC_API_KEY env var)
8
+ * 3. Local LLM via Ollama (default, no API key needed)
9
+ * 4. Fallback: extractive summary (no AI needed)
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.summarize = summarize;
13
+ const formatter_1 = require("./formatter");
14
+ /**
15
+ * Generate a 3-sentence summary of the article
16
+ */
17
+ async function summarize(article) {
18
+ const config = {
19
+ openaiApiKey: process.env.OPENAI_API_KEY,
20
+ openaiModel: process.env.OPENAI_MODEL || "gpt-4o-mini",
21
+ anthropicApiKey: process.env.ANTHROPIC_API_KEY,
22
+ anthropicModel: process.env.ANTHROPIC_MODEL || "claude-sonnet-4-20250514",
23
+ ollamaUrl: process.env.OLLAMA_URL || "http://127.0.0.1:11434",
24
+ ollamaModel: process.env.OLLAMA_MODEL || "llama3.2",
25
+ };
26
+ const prompt = (0, formatter_1.formatSummaryPrompt)(article);
27
+ // Try OpenAI first
28
+ if (config.openaiApiKey) {
29
+ try {
30
+ return await summarizeOpenAI(prompt, config);
31
+ }
32
+ catch (err) {
33
+ process.stderr.write(`\x1b[33mOpenAI failed:\x1b[0m ${err.message}\n`);
34
+ }
35
+ }
36
+ // Try Anthropic
37
+ if (config.anthropicApiKey) {
38
+ try {
39
+ return await summarizeAnthropic(prompt, config);
40
+ }
41
+ catch (err) {
42
+ process.stderr.write(`\x1b[33mAnthropic failed:\x1b[0m ${err.message}\n`);
43
+ }
44
+ }
45
+ // Try Ollama (local)
46
+ try {
47
+ return await summarizeOllama(prompt, config);
48
+ }
49
+ catch (err) {
50
+ process.stderr.write(`\x1b[33mOllama failed:\x1b[0m ${err.message}\n`);
51
+ }
52
+ // Fallback: extractive summary
53
+ process.stderr.write(`\x1b[33mNo AI backend available. Using extractive summary.\x1b[0m\n`);
54
+ process.stderr.write(`\x1b[90mSet OPENAI_API_KEY, ANTHROPIC_API_KEY, or run Ollama for AI summaries.\x1b[0m\n`);
55
+ return extractiveSummary(article);
56
+ }
57
+ /**
58
+ * OpenAI Chat Completions API
59
+ */
60
+ async function summarizeOpenAI(prompt, config) {
61
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
62
+ method: "POST",
63
+ headers: {
64
+ "Content-Type": "application/json",
65
+ Authorization: `Bearer ${config.openaiApiKey}`,
66
+ },
67
+ body: JSON.stringify({
68
+ model: config.openaiModel,
69
+ messages: [
70
+ {
71
+ role: "system",
72
+ content: "You are a concise article summarizer. Always respond with exactly 3 sentences.",
73
+ },
74
+ { role: "user", content: prompt },
75
+ ],
76
+ temperature: 0.3,
77
+ max_tokens: 300,
78
+ }),
79
+ });
80
+ if (!response.ok) {
81
+ const body = await response.text();
82
+ throw new Error(`OpenAI API ${response.status}: ${body.substring(0, 200)}`);
83
+ }
84
+ const data = (await response.json());
85
+ return data.choices?.[0]?.message?.content?.trim() || "Summary unavailable.";
86
+ }
87
+ /**
88
+ * Anthropic Messages API
89
+ */
90
+ async function summarizeAnthropic(prompt, config) {
91
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
92
+ method: "POST",
93
+ headers: {
94
+ "Content-Type": "application/json",
95
+ "x-api-key": config.anthropicApiKey,
96
+ "anthropic-version": "2023-06-01",
97
+ },
98
+ body: JSON.stringify({
99
+ model: config.anthropicModel,
100
+ max_tokens: 300,
101
+ messages: [{ role: "user", content: prompt }],
102
+ system: "You are a concise article summarizer. Always respond with exactly 3 sentences.",
103
+ }),
104
+ });
105
+ if (!response.ok) {
106
+ const body = await response.text();
107
+ throw new Error(`Anthropic API ${response.status}: ${body.substring(0, 200)}`);
108
+ }
109
+ const data = (await response.json());
110
+ return data.content?.[0]?.text?.trim() || "Summary unavailable.";
111
+ }
112
+ /**
113
+ * Ollama local LLM API
114
+ */
115
+ async function summarizeOllama(prompt, config) {
116
+ const controller = new AbortController();
117
+ const timer = setTimeout(() => controller.abort(), 60000);
118
+ try {
119
+ const response = await fetch(`${config.ollamaUrl}/api/generate`, {
120
+ method: "POST",
121
+ headers: { "Content-Type": "application/json" },
122
+ body: JSON.stringify({
123
+ model: config.ollamaModel,
124
+ prompt,
125
+ system: "You are a concise article summarizer. Always respond with exactly 3 sentences.",
126
+ stream: false,
127
+ options: {
128
+ temperature: 0.3,
129
+ num_predict: 300,
130
+ },
131
+ }),
132
+ signal: controller.signal,
133
+ });
134
+ if (!response.ok) {
135
+ throw new Error(`Ollama ${response.status}`);
136
+ }
137
+ const data = (await response.json());
138
+ return data.response?.trim() || "Summary unavailable.";
139
+ }
140
+ finally {
141
+ clearTimeout(timer);
142
+ }
143
+ }
144
+ /**
145
+ * Extractive summary fallback (no AI needed)
146
+ * Picks the most representative sentences from the article.
147
+ */
148
+ function extractiveSummary(article) {
149
+ const text = article.textContent;
150
+ // Split into sentences
151
+ const sentences = text
152
+ .split(/(?<=[.!?])\s+/)
153
+ .map((s) => s.trim())
154
+ .filter((s) => {
155
+ // Filter out very short or very long sentences
156
+ const words = s.split(/\s+/).length;
157
+ return words >= 5 && words <= 50;
158
+ });
159
+ if (sentences.length === 0) {
160
+ return article.description || "No summary available.";
161
+ }
162
+ if (sentences.length <= 3) {
163
+ return sentences.join(" ");
164
+ }
165
+ // Simple scoring: prefer sentences that appear early and contain key terms
166
+ const titleWords = new Set(article.title.toLowerCase().split(/\s+/).filter((w) => w.length > 3));
167
+ const scored = sentences.map((sentence, index) => {
168
+ let score = 0;
169
+ // Position bonus: first sentences are usually important
170
+ if (index === 0)
171
+ score += 5;
172
+ else if (index === 1)
173
+ score += 3;
174
+ else if (index === 2)
175
+ score += 2;
176
+ // Title word overlap
177
+ const words = sentence.toLowerCase().split(/\s+/);
178
+ for (const w of words) {
179
+ if (titleWords.has(w))
180
+ score += 2;
181
+ }
182
+ // Sentence length: prefer medium-length sentences
183
+ if (words.length >= 10 && words.length <= 30)
184
+ score += 1;
185
+ // Penalize sentences with too many special characters (likely not prose)
186
+ const specialChars = (sentence.match(/[^a-zA-Z0-9\s.,!?'"()-]/g) || []).length;
187
+ if (specialChars > 5)
188
+ score -= 3;
189
+ return { sentence, score, index };
190
+ });
191
+ // Sort by score, take top 3, then re-sort by position
192
+ scored.sort((a, b) => b.score - a.score);
193
+ const top3 = scored.slice(0, 3);
194
+ top3.sort((a, b) => a.index - b.index);
195
+ return top3.map((s) => s.sentence).join(" ");
196
+ }
197
+ //# sourceMappingURL=summarizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summarizer.js","sourceRoot":"","sources":["../src/summarizer.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAiBH,8BAmDC;AAjED,2CAAkD;AAWlD;;GAEG;AACI,KAAK,UAAU,SAAS,CAAC,OAAsB;IACpD,MAAM,MAAM,GAAqB;QAC/B,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;QACxC,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,aAAa;QACtD,eAAe,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAC9C,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,0BAA0B;QACzE,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,wBAAwB;QAC7D,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,UAAU;KACpD,CAAC;IAEF,MAAM,MAAM,GAAG,IAAA,+BAAmB,EAAC,OAAO,CAAC,CAAC;IAE5C,mBAAmB;IACnB,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACxB,IAAI,CAAC;YACH,OAAO,MAAM,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAC/C,CAAC;QAAC,OAAO,GAAQ,EAAE,CAAC;YAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,iCAAiC,GAAG,CAAC,OAAO,IAAI,CACjD,CAAC;QACJ,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,OAAO,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAClD,CAAC;QAAC,OAAO,GAAQ,EAAE,CAAC;YAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oCAAoC,GAAG,CAAC,OAAO,IAAI,CACpD,CAAC;QACJ,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,IAAI,CAAC;QACH,OAAO,MAAM,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,iCAAiC,GAAG,CAAC,OAAO,IAAI,CACjD,CAAC;IACJ,CAAC;IAED,+BAA+B;IAC/B,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,qEAAqE,CACtE,CAAC;IACF,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,yFAAyF,CAC1F,CAAC;IACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;AACpC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe,CAC5B,MAAc,EACd,MAAwB;IAExB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,4CAA4C,EAAE;QACzE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,aAAa,EAAE,UAAU,MAAM,CAAC,YAAY,EAAE;SAC/C;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,KAAK,EAAE,MAAM,CAAC,WAAW;YACzB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,QAAQ;oBACd,OAAO,EACL,gFAAgF;iBACnF;gBACD,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;aAClC;YACD,WAAW,EAAE,GAAG;YAChB,UAAU,EAAE,GAAG;SAChB,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,cAAc,QAAQ,CAAC,MAAM,KAAK,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;IAC5D,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,sBAAsB,CAAC;AAC/E,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB,CAC/B,MAAc,EACd,MAAwB;IAExB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,uCAAuC,EAAE;QACpE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,WAAW,EAAE,MAAM,CAAC,eAAgB;YACpC,mBAAmB,EAAE,YAAY;SAClC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACnB,KAAK,EAAE,MAAM,CAAC,cAAc;YAC5B,UAAU,EAAE,GAAG;YACf,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,MAAM,EACJ,gFAAgF;SACnF,CAAC;KACH,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CACb,iBAAiB,QAAQ,CAAC,MAAM,KAAK,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC9D,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;IAC5D,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,sBAAsB,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,eAAe,CAC5B,MAAc,EACd,MAAwB;IAExB,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC,CAAC;IAE1D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,CAAC,SAAS,eAAe,EAAE;YAC/D,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,MAAM,CAAC,WAAW;gBACzB,MAAM;gBACN,MAAM,EACJ,gFAAgF;gBAClF,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE;oBACP,WAAW,EAAE,GAAG;oBAChB,WAAW,EAAE,GAAG;iBACjB;aACF,CAAC;YACF,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,UAAU,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAC5D,OAAO,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,sBAAsB,CAAC;IACzD,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,OAAsB;IAC/C,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC;IAEjC,uBAAuB;IACvB,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,eAAe,CAAC;SACtB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACZ,+CAA+C;QAC/C,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACpC,OAAO,KAAK,IAAI,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;IACnC,CAAC,CAAC,CAAC;IAEL,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,OAAO,CAAC,WAAW,IAAI,uBAAuB,CAAC;IACxD,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,2EAA2E;IAC3E,MAAM,UAAU,GAAG,IAAI,GAAG,CACxB,OAAO,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CACrE,CAAC;IAEF,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE;QAC/C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,wDAAwD;QACxD,IAAI,KAAK,KAAK,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;aACvB,IAAI,KAAK,KAAK,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;aAC5B,IAAI,KAAK,KAAK,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAEjC,qBAAqB;QACrB,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAClD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAC;QACpC,CAAC;QAED,kDAAkD;QAClD,IAAI,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,KAAK,CAAC,MAAM,IAAI,EAAE;YAAE,KAAK,IAAI,CAAC,CAAC;QAEzD,yEAAyE;QACzE,MAAM,YAAY,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,0BAA0B,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAC/E,IAAI,YAAY,GAAG,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAEjC,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,sDAAsD;IACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAChC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEvC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC"}
package/package.json ADDED
@@ -0,0 +1,44 @@
1
+ {
2
+ "name": "websnap-reader",
3
+ "version": "1.0.0",
4
+ "description": "Turn any URL into clean markdown. A better reader mode for your terminal.",
5
+ "main": "dist/index.js",
6
+ "bin": {
7
+ "websnap": "dist/index.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "dev": "tsc --watch",
12
+ "start": "node dist/index.js",
13
+ "prepublishOnly": "npm run build"
14
+ },
15
+ "keywords": [
16
+ "cli",
17
+ "markdown",
18
+ "reader-mode",
19
+ "web-scraper",
20
+ "chrome-cdp",
21
+ "readability",
22
+ "article-extractor"
23
+ ],
24
+ "author": "Wilson Xu",
25
+ "license": "MIT",
26
+ "publishConfig": {
27
+ "access": "public"
28
+ },
29
+ "dependencies": {
30
+ "commander": "^12.1.0",
31
+ "node-html-markdown": "^1.3.0"
32
+ },
33
+ "devDependencies": {
34
+ "@types/node": "^20.11.0",
35
+ "typescript": "^5.3.3"
36
+ },
37
+ "engines": {
38
+ "node": ">=18.0.0"
39
+ },
40
+ "files": [
41
+ "dist",
42
+ "README.md"
43
+ ]
44
+ }