nodebench-mcp 2.15.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/LICENSE +21 -0
  2. package/NODEBENCH_AGENTS.md +2 -2
  3. package/README.md +514 -82
  4. package/dist/__tests__/analytics.test.d.ts +11 -0
  5. package/dist/__tests__/analytics.test.js +546 -0
  6. package/dist/__tests__/analytics.test.js.map +1 -0
  7. package/dist/__tests__/architectComplex.test.d.ts +1 -0
  8. package/dist/__tests__/architectComplex.test.js +375 -0
  9. package/dist/__tests__/architectComplex.test.js.map +1 -0
  10. package/dist/__tests__/architectSmoke.test.d.ts +1 -0
  11. package/dist/__tests__/architectSmoke.test.js +92 -0
  12. package/dist/__tests__/architectSmoke.test.js.map +1 -0
  13. package/dist/__tests__/dynamicLoading.test.d.ts +1 -0
  14. package/dist/__tests__/dynamicLoading.test.js +278 -0
  15. package/dist/__tests__/dynamicLoading.test.js.map +1 -0
  16. package/dist/__tests__/evalHarness.test.js +7 -2
  17. package/dist/__tests__/evalHarness.test.js.map +1 -1
  18. package/dist/__tests__/gaiaCapabilityEval.test.js +229 -12
  19. package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
  20. package/dist/__tests__/gaiaCapabilityMediaEval.test.js +194 -109
  21. package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +1 -1
  22. package/dist/__tests__/helpers/answerMatch.js +22 -22
  23. package/dist/__tests__/presetRealWorldBench.test.js +11 -2
  24. package/dist/__tests__/presetRealWorldBench.test.js.map +1 -1
  25. package/dist/__tests__/tools.test.js +10 -4
  26. package/dist/__tests__/tools.test.js.map +1 -1
  27. package/dist/__tests__/toolsetGatingEval.test.js +12 -4
  28. package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
  29. package/dist/analytics/index.d.ts +10 -0
  30. package/dist/analytics/index.js +11 -0
  31. package/dist/analytics/index.js.map +1 -0
  32. package/dist/analytics/projectDetector.d.ts +19 -0
  33. package/dist/analytics/projectDetector.js +259 -0
  34. package/dist/analytics/projectDetector.js.map +1 -0
  35. package/dist/analytics/schema.d.ts +57 -0
  36. package/dist/analytics/schema.js +157 -0
  37. package/dist/analytics/schema.js.map +1 -0
  38. package/dist/analytics/smartPreset.d.ts +63 -0
  39. package/dist/analytics/smartPreset.js +300 -0
  40. package/dist/analytics/smartPreset.js.map +1 -0
  41. package/dist/analytics/toolTracker.d.ts +59 -0
  42. package/dist/analytics/toolTracker.js +163 -0
  43. package/dist/analytics/toolTracker.js.map +1 -0
  44. package/dist/analytics/usageStats.d.ts +64 -0
  45. package/dist/analytics/usageStats.js +252 -0
  46. package/dist/analytics/usageStats.js.map +1 -0
  47. package/dist/db.js +359 -321
  48. package/dist/db.js.map +1 -1
  49. package/dist/index.d.ts +2 -1
  50. package/dist/index.js +653 -84
  51. package/dist/index.js.map +1 -1
  52. package/dist/tools/architectTools.d.ts +15 -0
  53. package/dist/tools/architectTools.js +304 -0
  54. package/dist/tools/architectTools.js.map +1 -0
  55. package/dist/tools/critterTools.js +14 -14
  56. package/dist/tools/emailTools.d.ts +15 -0
  57. package/dist/tools/emailTools.js +664 -0
  58. package/dist/tools/emailTools.js.map +1 -0
  59. package/dist/tools/metaTools.js +660 -0
  60. package/dist/tools/metaTools.js.map +1 -1
  61. package/dist/tools/parallelAgentTools.js +176 -176
  62. package/dist/tools/patternTools.js +11 -11
  63. package/dist/tools/progressiveDiscoveryTools.d.ts +5 -1
  64. package/dist/tools/progressiveDiscoveryTools.js +113 -21
  65. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  66. package/dist/tools/researchWritingTools.js +42 -42
  67. package/dist/tools/rssTools.d.ts +8 -0
  68. package/dist/tools/rssTools.js +833 -0
  69. package/dist/tools/rssTools.js.map +1 -0
  70. package/dist/tools/toolRegistry.d.ts +17 -0
  71. package/dist/tools/toolRegistry.js +236 -17
  72. package/dist/tools/toolRegistry.js.map +1 -1
  73. package/dist/tools/voiceBridgeTools.js +498 -498
  74. package/dist/toolsetRegistry.d.ts +10 -0
  75. package/dist/toolsetRegistry.js +84 -0
  76. package/dist/toolsetRegistry.js.map +1 -0
  77. package/package.json +12 -5
@@ -0,0 +1,833 @@
1
+ /**
2
+ * RSS Tools — Subscribe, fetch, and digest RSS/Atom feeds.
3
+ *
4
+ * Zero npm dependencies — uses Node's built-in `fetch` + simple XML parsing.
5
+ * Articles are tracked in SQLite for deduplication across fetches.
6
+ */
7
+ import { getDb } from "../db.js";
8
+ // ── SQLite schema ────────────────────────────────────────────────────────────
9
+ function ensureRssTables() {
10
+ const db = getDb();
11
+ db.exec(`
12
+ CREATE TABLE IF NOT EXISTS rss_sources (
13
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
14
+ url TEXT NOT NULL UNIQUE,
15
+ name TEXT,
16
+ category TEXT,
17
+ created_at TEXT DEFAULT (datetime('now'))
18
+ );
19
+ CREATE TABLE IF NOT EXISTS rss_articles (
20
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
21
+ source_url TEXT NOT NULL,
22
+ title TEXT,
23
+ link TEXT NOT NULL,
24
+ published TEXT,
25
+ summary TEXT,
26
+ fetched_at TEXT DEFAULT (datetime('now')),
27
+ is_new INTEGER DEFAULT 1,
28
+ UNIQUE(source_url, link)
29
+ );
30
+ `);
31
+ }
32
+ /** Extract text content from an XML tag, handling CDATA */
33
+ function extractTag(xml, tag) {
34
+ const match = xml.match(new RegExp(`<${tag}[^>]*>\\s*(?:<!\\[CDATA\\[)?([\\s\\S]*?)(?:\\]\\]>)?\\s*<\\/${tag}>`, "i"));
35
+ return match ? match[1].trim() : "";
36
+ }
37
+ /** Extract href from Atom <link> element */
38
+ function extractAtomLink(xml) {
39
+ // Prefer rel="alternate", fall back to first <link href="...">
40
+ const altMatch = xml.match(/<link[^>]*rel="alternate"[^>]*href="([^"]+)"/i);
41
+ if (altMatch)
42
+ return altMatch[1];
43
+ const anyMatch = xml.match(/<link[^>]*href="([^"]+)"/i);
44
+ return anyMatch ? anyMatch[1] : "";
45
+ }
46
+ /** Strip HTML tags for clean summaries */
47
+ function stripHtml(html) {
48
+ return html
49
+ .replace(/<[^>]+>/g, "")
50
+ .replace(/&amp;/g, "&")
51
+ .replace(/&lt;/g, "<")
52
+ .replace(/&gt;/g, ">")
53
+ .replace(/&quot;/g, '"')
54
+ .replace(/&#39;/g, "'")
55
+ .replace(/\s+/g, " ")
56
+ .trim();
57
+ }
58
+ /** Fetch and parse an RSS 2.0 or Atom feed */
59
+ async function parseFeed(url) {
60
+ const response = await fetch(url, {
61
+ headers: { "User-Agent": "NodeBench-MCP RSS Reader" },
62
+ signal: AbortSignal.timeout(15000),
63
+ });
64
+ if (!response.ok)
65
+ throw new Error(`Feed fetch failed: ${response.status} ${response.statusText}`);
66
+ const xml = await response.text();
67
+ const items = [];
68
+ // Try RSS 2.0 first: <item>...</item>
69
+ const rssItems = xml.match(/<item>([\s\S]*?)<\/item>/gi) || [];
70
+ if (rssItems.length > 0) {
71
+ for (const item of rssItems) {
72
+ items.push({
73
+ title: stripHtml(extractTag(item, "title")),
74
+ link: extractTag(item, "link"),
75
+ published: extractTag(item, "pubDate"),
76
+ summary: stripHtml(extractTag(item, "description")).substring(0, 500),
77
+ });
78
+ }
79
+ return { title: stripHtml(extractTag(xml, "title")), items };
80
+ }
81
+ // Try Atom: <entry>...</entry>
82
+ const atomEntries = xml.match(/<entry>([\s\S]*?)<\/entry>/gi) || [];
83
+ for (const entry of atomEntries) {
84
+ items.push({
85
+ title: stripHtml(extractTag(entry, "title")),
86
+ link: extractAtomLink(entry) || extractTag(entry, "link"),
87
+ published: extractTag(entry, "published") || extractTag(entry, "updated"),
88
+ summary: stripHtml(extractTag(entry, "summary") || extractTag(entry, "content")).substring(0, 500),
89
+ });
90
+ }
91
+ return { title: stripHtml(extractTag(xml, "title")), items };
92
+ }
93
+ // ── Tools ────────────────────────────────────────────────────────────────────
94
+ export const rssTools = [
95
+ {
96
+ name: "add_rss_source",
97
+ description: "Register an RSS or Atom feed URL for monitoring. Stored in SQLite for persistent tracking. Validates the feed on add. Use fetch_rss_feeds to pull articles and build_research_digest to generate a digest of new articles.",
98
+ inputSchema: {
99
+ type: "object",
100
+ properties: {
101
+ url: {
102
+ type: "string",
103
+ description: "RSS or Atom feed URL (e.g., https://arxiv.org/rss/cs.AI)",
104
+ },
105
+ name: {
106
+ type: "string",
107
+ description: 'Human-readable name for the source (e.g., "arXiv CS.AI")',
108
+ },
109
+ category: {
110
+ type: "string",
111
+ description: 'Category tag for grouping in digests (e.g., "ai-research", "security", "frontend")',
112
+ },
113
+ },
114
+ required: ["url"],
115
+ },
116
+ handler: async (args) => {
117
+ ensureRssTables();
118
+ const db = getDb();
119
+ const url = args.url;
120
+ const name = args.name || url;
121
+ const category = args.category || "general";
122
+ try {
123
+ // Validate the feed is fetchable and parseable
124
+ const feed = await parseFeed(url);
125
+ db.prepare("INSERT OR IGNORE INTO rss_sources (url, name, category) VALUES (?, ?, ?)").run(url, name, category);
126
+ return [
127
+ {
128
+ type: "text",
129
+ text: JSON.stringify({
130
+ success: true,
131
+ source: { url, name, category },
132
+ feedTitle: feed.title,
133
+ articleCount: feed.items.length,
134
+ message: `Added RSS source "${name}" — ${feed.items.length} articles available`,
135
+ }),
136
+ },
137
+ ];
138
+ }
139
+ catch (error) {
140
+ return [
141
+ {
142
+ type: "text",
143
+ text: JSON.stringify({
144
+ success: false,
145
+ error: `Failed to add feed: ${error.message}`,
146
+ hint: "Check the URL is a valid RSS/Atom feed. Try opening it in a browser first.",
147
+ }),
148
+ },
149
+ ];
150
+ }
151
+ },
152
+ },
153
+ {
154
+ name: "fetch_rss_feeds",
155
+ description: "Fetch and parse all registered RSS/Atom feeds (or specific URLs). New articles are stored in SQLite with is_new=1 for digest generation. Previously seen articles (same source_url + link) are skipped. Returns per-feed counts and articles.",
156
+ inputSchema: {
157
+ type: "object",
158
+ properties: {
159
+ urls: {
160
+ type: "array",
161
+ items: { type: "string" },
162
+ description: "Specific feed URLs to fetch (optional — defaults to all registered sources from add_rss_source)",
163
+ },
164
+ limit_per_feed: {
165
+ type: "number",
166
+ description: "Maximum articles per feed (default: 20)",
167
+ },
168
+ },
169
+ required: [],
170
+ },
171
+ handler: async (args) => {
172
+ ensureRssTables();
173
+ const db = getDb();
174
+ const limitPerFeed = args.limit_per_feed || 20;
175
+ let urls = args.urls;
176
+ if (!urls || urls.length === 0) {
177
+ const sources = db.prepare("SELECT url FROM rss_sources").all();
178
+ urls = sources.map((s) => s.url);
179
+ }
180
+ if (urls.length === 0) {
181
+ return [
182
+ {
183
+ type: "text",
184
+ text: JSON.stringify({
185
+ error: "No RSS sources registered. Use add_rss_source first.",
186
+ }),
187
+ },
188
+ ];
189
+ }
190
+ const insertStmt = db.prepare("INSERT OR IGNORE INTO rss_articles (source_url, title, link, published, summary) VALUES (?, ?, ?, ?, ?)");
191
+ const results = [];
192
+ for (const url of urls) {
193
+ try {
194
+ const feed = await parseFeed(url);
195
+ const articles = feed.items.slice(0, limitPerFeed);
196
+ let newCount = 0;
197
+ for (const article of articles) {
198
+ if (!article.link)
199
+ continue;
200
+ const result = insertStmt.run(url, article.title, article.link, article.published, article.summary);
201
+ if (result.changes > 0)
202
+ newCount++;
203
+ }
204
+ results.push({
205
+ source: url,
206
+ title: feed.title,
207
+ articlesFound: articles.length,
208
+ newArticles: newCount,
209
+ articles,
210
+ });
211
+ }
212
+ catch (error) {
213
+ results.push({
214
+ source: url,
215
+ title: `(error: ${error.message})`,
216
+ articlesFound: 0,
217
+ newArticles: 0,
218
+ articles: [],
219
+ });
220
+ }
221
+ }
222
+ const totalNew = results.reduce((sum, r) => sum + r.newArticles, 0);
223
+ const totalFound = results.reduce((sum, r) => sum + r.articlesFound, 0);
224
+ return [
225
+ {
226
+ type: "text",
227
+ text: JSON.stringify({
228
+ summary: {
229
+ sourcesChecked: results.length,
230
+ totalArticles: totalFound,
231
+ newArticles: totalNew,
232
+ },
233
+ feeds: results,
234
+ }),
235
+ },
236
+ ];
237
+ },
238
+ },
239
+ {
240
+ name: "build_research_digest",
241
+ description: "Generate a digest of new (unseen) articles from RSS feeds. Compares against previously seen articles via SQLite. Returns only new items grouped by category. After generating, articles are marked as seen so the next digest only shows truly new content. Output formats: markdown (default), json, or html (ready for send_email).",
242
+ inputSchema: {
243
+ type: "object",
244
+ properties: {
245
+ since_hours: {
246
+ type: "number",
247
+ description: "Include articles fetched in the last N hours (default: 24)",
248
+ },
249
+ category: {
250
+ type: "string",
251
+ description: "Filter by source category (optional — omit for all categories)",
252
+ },
253
+ format: {
254
+ type: "string",
255
+ enum: ["markdown", "json", "html"],
256
+ description: "Output format. Use 'html' for send_email with html parameter, 'markdown' for readability, 'json' for programmatic use (default: markdown)",
257
+ },
258
+ },
259
+ required: [],
260
+ },
261
+ handler: async (args) => {
262
+ ensureRssTables();
263
+ const db = getDb();
264
+ const sinceHours = args.since_hours || 24;
265
+ const category = args.category;
266
+ const format = args.format || "markdown";
267
+ // Query new articles
268
+ const params = [sinceHours];
269
+ let query = `
270
+ SELECT a.title, a.link, a.published, a.summary, a.source_url, a.fetched_at,
271
+ COALESCE(s.name, a.source_url) as source_name,
272
+ COALESCE(s.category, 'uncategorized') as category
273
+ FROM rss_articles a
274
+ LEFT JOIN rss_sources s ON a.source_url = s.url
275
+ WHERE a.is_new = 1
276
+ AND a.fetched_at >= datetime('now', '-' || ? || ' hours')
277
+ `;
278
+ if (category) {
279
+ query += " AND s.category = ?";
280
+ params.push(category);
281
+ }
282
+ query += " ORDER BY a.fetched_at DESC";
283
+ const articles = db.prepare(query).all(...params);
284
+ // Mark articles as seen
285
+ const markParams = [sinceHours];
286
+ let markQuery = `
287
+ UPDATE rss_articles SET is_new = 0
288
+ WHERE is_new = 1 AND fetched_at >= datetime('now', '-' || ? || ' hours')
289
+ `;
290
+ if (category) {
291
+ markQuery += ` AND source_url IN (SELECT url FROM rss_sources WHERE category = ?)`;
292
+ markParams.push(category);
293
+ }
294
+ db.prepare(markQuery).run(...markParams);
295
+ if (articles.length === 0) {
296
+ return [
297
+ {
298
+ type: "text",
299
+ text: JSON.stringify({
300
+ message: "No new articles found. Run fetch_rss_feeds first to check for updates.",
301
+ articleCount: 0,
302
+ }),
303
+ },
304
+ ];
305
+ }
306
+ // Group by category
307
+ const byCategory = new Map();
308
+ for (const a of articles) {
309
+ const cat = a.category;
310
+ if (!byCategory.has(cat))
311
+ byCategory.set(cat, []);
312
+ byCategory.get(cat).push(a);
313
+ }
314
+ if (format === "json") {
315
+ return [
316
+ {
317
+ type: "text",
318
+ text: JSON.stringify({
319
+ generatedAt: new Date().toISOString(),
320
+ articleCount: articles.length,
321
+ sinceHours,
322
+ byCategory: Object.fromEntries(byCategory),
323
+ }),
324
+ },
325
+ ];
326
+ }
327
+ if (format === "html") {
328
+ const sections = [...byCategory.entries()]
329
+ .map(([cat, items]) => `
330
+ <h2 style="color:#333;border-bottom:2px solid #007bff;padding-bottom:4px">${cat} (${items.length})</h2>
331
+ ${items
332
+ .map((a) => `
333
+ <div style="margin-bottom:16px;padding:12px;background:#f8f9fa;border-radius:6px">
334
+ <a href="${a.link}" style="font-size:16px;font-weight:600;color:#007bff;text-decoration:none">${a.title}</a>
335
+ <div style="color:#666;font-size:12px;margin-top:4px">${a.source_name} · ${a.published || a.fetched_at}</div>
336
+ ${a.summary ? `<p style="margin-top:8px;color:#444;font-size:14px">${a.summary}</p>` : ""}
337
+ </div>`)
338
+ .join("")}`)
339
+ .join("");
340
+ const html = `
341
+ <div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;max-width:640px;margin:0 auto;padding:20px">
342
+ <h1 style="color:#111;margin-bottom:4px">Research Digest</h1>
343
+ <p style="color:#666;margin-top:0">${articles.length} new articles · ${new Date().toLocaleDateString()}</p>
344
+ ${sections}
345
+ <hr style="margin-top:32px;border:none;border-top:1px solid #ddd">
346
+ <p style="color:#999;font-size:11px">Generated by NodeBench MCP · build_research_digest</p>
347
+ </div>`;
348
+ return [
349
+ {
350
+ type: "text",
351
+ text: JSON.stringify({ html, articleCount: articles.length }),
352
+ },
353
+ ];
354
+ }
355
+ // Markdown format (default)
356
+ const lines = [
357
+ `# Research Digest`,
358
+ ``,
359
+ `**${articles.length} new articles** · ${new Date().toLocaleDateString()}`,
360
+ ``,
361
+ ];
362
+ for (const [cat, catArticles] of byCategory) {
363
+ lines.push(`## ${cat} (${catArticles.length})`);
364
+ lines.push("");
365
+ for (const a of catArticles) {
366
+ lines.push(`### [${a.title}](${a.link})`);
367
+ lines.push(`*${a.source_name}* · ${a.published || a.fetched_at}`);
368
+ if (a.summary)
369
+ lines.push(`> ${a.summary.substring(0, 300)}`);
370
+ lines.push("");
371
+ }
372
+ }
373
+ return [{ type: "text", text: lines.join("\n") }];
374
+ },
375
+ },
376
+ {
377
+ name: "scaffold_research_pipeline",
378
+ description: "Generate a complete, standalone Node.js project for an automated research digest pipeline. Creates: package.json, main script (RSS subscribe → fetch → digest → email), cron setup, .env template, and README with setup instructions. The generated code is self-contained — no dependency on nodebench-mcp at runtime. Use this to help users build their own automated research monitoring.",
379
+ inputSchema: {
380
+ type: "object",
381
+ properties: {
382
+ project_name: {
383
+ type: "string",
384
+ description: 'Project directory name (default: "research-digest")',
385
+ },
386
+ feeds: {
387
+ type: "array",
388
+ items: {
389
+ type: "object",
390
+ properties: {
391
+ url: { type: "string" },
392
+ name: { type: "string" },
393
+ category: { type: "string" },
394
+ },
395
+ required: ["url"],
396
+ },
397
+ description: 'Initial RSS/Atom feed URLs to include (optional — can be added later). Example: [{"url": "https://arxiv.org/rss/cs.AI", "name": "arXiv AI", "category": "ai-research"}]',
398
+ },
399
+ email_to: {
400
+ type: "string",
401
+ description: "Default recipient email for digest delivery (optional — configured in .env)",
402
+ },
403
+ schedule: {
404
+ type: "string",
405
+ enum: ["daily", "twice-daily", "weekly", "manual"],
406
+ description: 'Digest schedule (default: "daily"). Generates appropriate cron expression.',
407
+ },
408
+ output_dir: {
409
+ type: "string",
410
+ description: "Directory to scaffold into (default: current working directory + project_name)",
411
+ },
412
+ },
413
+ required: [],
414
+ },
415
+ handler: async (args) => {
416
+ const projectName = args.project_name || "research-digest";
417
+ const feeds = args.feeds || [];
418
+ const emailTo = args.email_to || "";
419
+ const schedule = args.schedule || "daily";
420
+ const cronExpr = {
421
+ daily: "0 8 * * *",
422
+ "twice-daily": "0 8,18 * * *",
423
+ weekly: "0 8 * * 1",
424
+ manual: "(run manually)",
425
+ };
426
+ const feedsJs = feeds.length > 0
427
+ ? feeds.map((f) => ` { url: "${f.url}", name: "${f.name || f.url}", category: "${f.category || "general"}" },`).join("\n")
428
+ : ` // Add your feeds here:\n // { url: "https://arxiv.org/rss/cs.AI", name: "arXiv AI", category: "ai-research" },\n // { url: "https://hnrss.org/newest?points=100", name: "Hacker News Top", category: "tech" },`;
429
+ // ── Generate the main script ──
430
+ const mainScript = `#!/usr/bin/env node
431
+ /**
432
+ * ${projectName} — Automated Research Digest Pipeline
433
+ *
434
+ * Subscribe to RSS/Atom feeds, fetch new articles, build a digest, and email it.
435
+ * Generated by NodeBench MCP scaffold_research_pipeline.
436
+ *
437
+ * Usage:
438
+ * node digest.mjs # Run once (fetch + digest + email)
439
+ * node digest.mjs --fetch-only # Just fetch, don't email
440
+ * node digest.mjs --list-sources # Show registered feeds
441
+ * node digest.mjs --add-feed <url> # Add a new feed
442
+ *
443
+ * Schedule with cron:
444
+ * crontab -e
445
+ * ${cronExpr[schedule]} cd /path/to/${projectName} && node digest.mjs >> digest.log 2>&1
446
+ */
447
+
448
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
449
+ import * as tls from "node:tls";
450
+
451
+ // ── Config ────────────────────────────────────────────────────────────────────
452
+
453
+ const DATA_DIR = new URL("./data/", import.meta.url).pathname.replace(/^\\//, "");
454
+ if (!existsSync(DATA_DIR)) mkdirSync(DATA_DIR, { recursive: true });
455
+
456
+ const SOURCES_FILE = DATA_DIR + "sources.json";
457
+ const ARTICLES_FILE = DATA_DIR + "articles.json";
458
+
459
+ const EMAIL_USER = process.env.EMAIL_USER || "";
460
+ const EMAIL_PASS = process.env.EMAIL_PASS || "";
461
+ const EMAIL_TO = process.env.DIGEST_TO || "${emailTo}" || EMAIL_USER;
462
+ const SMTP_HOST = process.env.EMAIL_SMTP_HOST || "smtp.gmail.com";
463
+ const SMTP_PORT = parseInt(process.env.EMAIL_SMTP_PORT || "465");
464
+
465
+ // ── Feed storage (JSON file-based, no SQLite needed) ──────────────────────────
466
+
467
+ function loadSources() {
468
+ if (!existsSync(SOURCES_FILE)) return [];
469
+ return JSON.parse(readFileSync(SOURCES_FILE, "utf-8"));
470
+ }
471
+
472
+ function saveSources(sources) {
473
+ writeFileSync(SOURCES_FILE, JSON.stringify(sources, null, 2));
474
+ }
475
+
476
+ function loadArticles() {
477
+ if (!existsSync(ARTICLES_FILE)) return {};
478
+ return JSON.parse(readFileSync(ARTICLES_FILE, "utf-8"));
479
+ }
480
+
481
+ function saveArticles(articles) {
482
+ writeFileSync(ARTICLES_FILE, JSON.stringify(articles, null, 2));
483
+ }
484
+
485
+ // ── RSS/Atom parser ───────────────────────────────────────────────────────────
486
+
487
+ function extractTag(xml, tag) {
488
+ const m = xml.match(new RegExp(\`<\${tag}[^>]*>\\\\s*(?:<!\\\\[CDATA\\\\[)?([\\\\s\\\\S]*?)(?:\\\\]\\\\]>)?\\\\s*</\${tag}>\`, "i"));
489
+ return m ? m[1].trim() : "";
490
+ }
491
+
492
+ function extractAtomLink(xml) {
493
+ const alt = xml.match(/<link[^>]*rel="alternate"[^>]*href="([^"]+)"/i);
494
+ if (alt) return alt[1];
495
+ const any = xml.match(/<link[^>]*href="([^"]+)"/i);
496
+ return any ? any[1] : "";
497
+ }
498
+
499
+ function stripHtml(html) {
500
+ return html.replace(/<[^>]+>/g, "").replace(/&amp;/g, "&").replace(/&lt;/g, "<")
501
+ .replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'")
502
+ .replace(/\\s+/g, " ").trim();
503
+ }
504
+
505
+ async function parseFeed(url) {
506
+ const res = await fetch(url, {
507
+ headers: { "User-Agent": "${projectName} RSS Reader" },
508
+ signal: AbortSignal.timeout(15000),
509
+ });
510
+ if (!res.ok) throw new Error(\`Feed fetch failed: \${res.status}\`);
511
+ const xml = await res.text();
512
+ const items = [];
513
+
514
+ const rssItems = xml.match(/<item>([\\s\\S]*?)<\\/item>/gi) || [];
515
+ if (rssItems.length > 0) {
516
+ for (const item of rssItems) {
517
+ items.push({
518
+ title: stripHtml(extractTag(item, "title")),
519
+ link: extractTag(item, "link"),
520
+ published: extractTag(item, "pubDate"),
521
+ summary: stripHtml(extractTag(item, "description")).substring(0, 500),
522
+ });
523
+ }
524
+ return { title: stripHtml(extractTag(xml, "title")), items };
525
+ }
526
+
527
+ const atomEntries = xml.match(/<entry>([\\s\\S]*?)<\\/entry>/gi) || [];
528
+ for (const entry of atomEntries) {
529
+ items.push({
530
+ title: stripHtml(extractTag(entry, "title")),
531
+ link: extractAtomLink(entry) || extractTag(entry, "link"),
532
+ published: extractTag(entry, "published") || extractTag(entry, "updated"),
533
+ summary: stripHtml(extractTag(entry, "summary") || extractTag(entry, "content")).substring(0, 500),
534
+ });
535
+ }
536
+ return { title: stripHtml(extractTag(xml, "title")), items };
537
+ }
538
+
539
+ // ── SMTP email sender ─────────────────────────────────────────────────────────
540
+
541
+ function readSmtp(socket, timeout = 10000) {
542
+ return new Promise((resolve, reject) => {
543
+ let buf = "";
544
+ const timer = setTimeout(() => { socket.removeAllListeners("data"); reject(new Error("SMTP timeout")); }, timeout);
545
+ const onData = (chunk) => {
546
+ buf += chunk.toString();
547
+ const lines = buf.split("\\r\\n").filter(Boolean);
548
+ const last = lines[lines.length - 1];
549
+ if (last && /^\\d{3} /.test(last)) {
550
+ clearTimeout(timer); socket.removeListener("data", onData);
551
+ const code = parseInt(last.substring(0, 3));
552
+ if (code >= 400) reject(new Error(\`SMTP \${code}: \${buf.trim()}\`));
553
+ else resolve(buf.trim());
554
+ }
555
+ };
556
+ socket.on("data", onData);
557
+ });
558
+ }
559
+
560
+ async function smtpCmd(socket, cmd) { socket.write(cmd + "\\r\\n"); return readSmtp(socket); }
561
+
562
+ async function sendDigestEmail(to, subject, html, plainText) {
563
+ if (!EMAIL_USER || !EMAIL_PASS) {
564
+ console.log(" [skip] EMAIL_USER/EMAIL_PASS not set — digest printed to stdout instead");
565
+ console.log(plainText);
566
+ return;
567
+ }
568
+
569
+ const boundary = "----DigestBoundary" + Date.now();
570
+ const message = [
571
+ \`From: \${EMAIL_USER}\`, \`To: \${to}\`, \`Subject: \${subject}\`,
572
+ \`Date: \${new Date().toUTCString()}\`, "MIME-Version: 1.0",
573
+ \`Content-Type: multipart/alternative; boundary="\${boundary}"\`,
574
+ "", \`--\${boundary}\`, "Content-Type: text/plain; charset=UTF-8", "", plainText,
575
+ \`--\${boundary}\`, "Content-Type: text/html; charset=UTF-8", "", html,
576
+ \`--\${boundary}--\`,
577
+ ].join("\\r\\n");
578
+
579
+ const socket = tls.connect({ host: SMTP_HOST, port: SMTP_PORT, rejectUnauthorized: true });
580
+ await new Promise((resolve, reject) => { socket.once("secureConnect", resolve); socket.once("error", reject); });
581
+ try {
582
+ await readSmtp(socket);
583
+ await smtpCmd(socket, "EHLO digest-pipeline");
584
+ await smtpCmd(socket, "AUTH LOGIN");
585
+ await smtpCmd(socket, Buffer.from(EMAIL_USER).toString("base64"));
586
+ await smtpCmd(socket, Buffer.from(EMAIL_PASS).toString("base64"));
587
+ await smtpCmd(socket, \`MAIL FROM:<\${EMAIL_USER}>\`);
588
+ await smtpCmd(socket, \`RCPT TO:<\${to}>\`);
589
+ await smtpCmd(socket, "DATA");
590
+ socket.write(message.replace(/\\r\\n\\./g, "\\r\\n..") + "\\r\\n.\\r\\n");
591
+ await readSmtp(socket);
592
+ await smtpCmd(socket, "QUIT").catch(() => {});
593
+ console.log(\` [sent] Digest emailed to \${to}\`);
594
+ } finally { socket.destroy(); }
595
+ }
596
+
597
+ // ── Main pipeline ─────────────────────────────────────────────────────────────
598
+
599
+ async function fetchAll() {
600
+ const sources = loadSources();
601
+ if (sources.length === 0) {
602
+ console.log("No sources registered. Use: node digest.mjs --add-feed <url> [name] [category]");
603
+ return [];
604
+ }
605
+
606
+ const seenArticles = loadArticles();
607
+ const newArticles = [];
608
+
609
+ for (const source of sources) {
610
+ try {
611
+ const feed = await parseFeed(source.url);
612
+ let newCount = 0;
613
+ for (const item of feed.items.slice(0, 20)) {
614
+ if (!item.link) continue;
615
+ const key = source.url + "|" + item.link;
616
+ if (seenArticles[key]) continue;
617
+ seenArticles[key] = { fetchedAt: new Date().toISOString(), seen: false };
618
+ newArticles.push({ ...item, sourceName: source.name, category: source.category });
619
+ newCount++;
620
+ }
621
+ console.log(\` [\${source.name}] \${feed.items.length} articles, \${newCount} new\`);
622
+ } catch (e) {
623
+ console.log(\` [\${source.name}] ERROR: \${e.message}\`);
624
+ }
625
+ }
626
+
627
+ saveArticles(seenArticles);
628
+ return newArticles;
629
+ }
630
+
631
+ function buildDigest(articles) {
632
+ if (articles.length === 0) return { html: "", plainText: "", count: 0 };
633
+
634
+ const byCategory = new Map();
635
+ for (const a of articles) {
636
+ const cat = a.category || "general";
637
+ if (!byCategory.has(cat)) byCategory.set(cat, []);
638
+ byCategory.get(cat).push(a);
639
+ }
640
+
641
+ // Plain text
642
+ const lines = [\`Research Digest — \${articles.length} new articles — \${new Date().toLocaleDateString()}\`, ""];
643
+ for (const [cat, items] of byCategory) {
644
+ lines.push(\`## \${cat} (\${items.length})\`, "");
645
+ for (const a of items) {
646
+ lines.push(\`- \${a.title}\`);
647
+ lines.push(\` \${a.sourceName} · \${a.published || "recent"}\`);
648
+ lines.push(\` \${a.link}\`);
649
+ if (a.summary) lines.push(\` > \${a.summary.substring(0, 200)}\`);
650
+ lines.push("");
651
+ }
652
+ }
653
+
654
+ // HTML
655
+ const sections = [...byCategory.entries()].map(([cat, items]) => \`
656
+ <h2 style="color:#333;border-bottom:2px solid #007bff;padding-bottom:4px">\${cat} (\${items.length})</h2>
657
+ \${items.map(a => \`
658
+ <div style="margin-bottom:16px;padding:12px;background:#f8f9fa;border-radius:6px">
659
+ <a href="\${a.link}" style="font-size:16px;font-weight:600;color:#007bff;text-decoration:none">\${a.title}</a>
660
+ <div style="color:#666;font-size:12px;margin-top:4px">\${a.sourceName} · \${a.published || "recent"}</div>
661
+ \${a.summary ? \`<p style="margin-top:8px;color:#444;font-size:14px">\${a.summary.substring(0, 300)}</p>\` : ""}
662
+ </div>\`).join("")}\`).join("");
663
+
664
+ const html = \`<div style="font-family:-apple-system,sans-serif;max-width:640px;margin:0 auto;padding:20px">
665
+ <h1 style="color:#111">Research Digest</h1>
666
+ <p style="color:#666">\${articles.length} new articles · \${new Date().toLocaleDateString()}</p>
667
+ \${sections}
668
+ <hr style="margin-top:32px;border:none;border-top:1px solid #ddd">
669
+ <p style="color:#999;font-size:11px">Generated by ${projectName}</p>
670
+ </div>\`;
671
+
672
+ return { html, plainText: lines.join("\\n"), count: articles.length };
673
+ }
674
+
675
+ // ── CLI ───────────────────────────────────────────────────────────────────────
676
+
677
+ const args = process.argv.slice(2);
678
+
679
+ if (args.includes("--list-sources")) {
680
+ const sources = loadSources();
681
+ if (sources.length === 0) console.log("No sources. Use --add-feed <url> [name] [category]");
682
+ else sources.forEach((s, i) => console.log(\`\${i + 1}. [\${s.category}] \${s.name} — \${s.url}\`));
683
+ } else if (args.includes("--add-feed")) {
684
+ const idx = args.indexOf("--add-feed");
685
+ const url = args[idx + 1];
686
+ const name = args[idx + 2] || url;
687
+ const category = args[idx + 3] || "general";
688
+ if (!url) { console.error("Usage: --add-feed <url> [name] [category]"); process.exit(1); }
689
+ try {
690
+ const feed = await parseFeed(url);
691
+ const sources = loadSources();
692
+ sources.push({ url, name, category });
693
+ saveSources(sources);
694
+ console.log(\`Added: \${name} (\${feed.title}) — \${feed.items.length} articles available\`);
695
+ } catch (e) {
696
+ console.error(\`Failed to validate feed: \${e.message}\`);
697
+ }
698
+ } else {
699
+ console.log(\`\\n${"=".repeat(60)}\`);
700
+ console.log(\` ${projectName} — \${new Date().toISOString()}\`);
701
+ console.log(\`${"=".repeat(60)}\\n\`);
702
+
703
+ console.log("Fetching feeds...");
704
+ const articles = await fetchAll();
705
+
706
+ if (articles.length === 0) {
707
+ console.log("\\nNo new articles found.");
708
+ } else {
709
+ const digest = buildDigest(articles);
710
+ console.log(\`\\nDigest: \${digest.count} new articles\`);
711
+
712
+ if (!args.includes("--fetch-only")) {
713
+ const subject = \`Research Digest — \${digest.count} articles — \${new Date().toLocaleDateString()}\`;
714
+ await sendDigestEmail(EMAIL_TO, subject, digest.html, digest.plainText);
715
+ }
716
+
717
+ // Save digest to file
718
+ const digestFile = DATA_DIR + \`digest-\${new Date().toISOString().slice(0,10)}.md\`;
719
+ writeFileSync(digestFile, digest.plainText);
720
+ console.log(\` [saved] \${digestFile}\`);
721
+ }
722
+ }
723
+ `;
724
+ // ── Generate package.json ──
725
+ const packageJson = JSON.stringify({
726
+ name: projectName,
727
+ version: "1.0.0",
728
+ type: "module",
729
+ description: "Automated research digest pipeline — RSS/Atom → digest → email",
730
+ main: "digest.mjs",
731
+ scripts: {
732
+ start: "node digest.mjs",
733
+ "fetch-only": "node digest.mjs --fetch-only",
734
+ "list-sources": "node digest.mjs --list-sources",
735
+ },
736
+ engines: { node: ">=18.0.0" },
737
+ license: "MIT",
738
+ }, null, 2);
739
+ // ── Generate .env template ──
740
+ const envTemplate = `# Email configuration (required for email delivery)
741
+ # For Gmail: use an App Password (Google Account → Security → App passwords)
742
+ EMAIL_USER=your.email@gmail.com
743
+ EMAIL_PASS=your-16-char-app-password
744
+ DIGEST_TO=${emailTo || "your.email@gmail.com"}
745
+
746
+ # Optional: non-Gmail SMTP
747
+ # EMAIL_SMTP_HOST=smtp.gmail.com
748
+ # EMAIL_SMTP_PORT=465
749
+ `;
750
+ // ── Generate README ──
751
+ const readme = `# ${projectName}
752
+
753
+ Automated research digest pipeline. Subscribes to RSS/Atom feeds, fetches new articles, builds a categorized digest, and emails it to you.
754
+
755
+ ## Quick Start
756
+
757
+ \`\`\`bash
758
+ # 1. Set up email (see .env.example)
759
+ cp .env.example .env
760
+ # Edit .env with your email credentials
761
+
762
+ # 2. Add feeds
763
+ node digest.mjs --add-feed "https://arxiv.org/rss/cs.AI" "arXiv AI" "ai-research"
764
+ node digest.mjs --add-feed "https://hnrss.org/newest?points=100" "HN Top" "tech"
765
+ node digest.mjs --add-feed "https://blog.anthropic.com/rss.xml" "Anthropic" "ai-research"
766
+
767
+ # 3. Run
768
+ node digest.mjs
769
+ \`\`\`
770
+
771
+ ## Schedule (cron)
772
+
773
+ \`\`\`bash
774
+ # Edit crontab
775
+ crontab -e
776
+
777
+ # Add (${schedule}):
778
+ ${cronExpr[schedule]} cd /path/to/${projectName} && node digest.mjs >> digest.log 2>&1
779
+ \`\`\`
780
+
781
+ ## Commands
782
+
783
+ | Command | What it does |
784
+ |---|---|
785
+ | \`node digest.mjs\` | Fetch + digest + email |
786
+ | \`node digest.mjs --fetch-only\` | Fetch only (no email) |
787
+ | \`node digest.mjs --list-sources\` | Show registered feeds |
788
+ | \`node digest.mjs --add-feed <url> [name] [category]\` | Add a new feed |
789
+
790
+ ## How It Works
791
+
792
+ 1. **Fetch**: Pulls latest articles from all registered RSS/Atom feeds
793
+ 2. **Deduplicate**: Skips articles already seen (tracked in \`data/articles.json\`)
794
+ 3. **Digest**: Builds a categorized summary (plain text + HTML)
795
+ 4. **Email**: Sends via SMTP over TLS (Gmail default, configurable)
796
+ 5. **Save**: Archives digest as markdown in \`data/\`
797
+
798
+ No dependencies. Pure Node.js (>= 18). Zero npm packages.
799
+
800
+ Generated by [NodeBench MCP](https://www.npmjs.com/package/nodebench-mcp) \`scaffold_research_pipeline\`
801
+ `;
802
+ return [
803
+ {
804
+ type: "text",
805
+ text: JSON.stringify({
806
+ projectName,
807
+ schedule: `${schedule} (${cronExpr[schedule]})`,
808
+ feedCount: feeds.length,
809
+ files: {
810
+ "digest.mjs": mainScript,
811
+ "package.json": packageJson,
812
+ ".env.example": envTemplate,
813
+ "README.md": readme,
814
+ },
815
+ setupSteps: [
816
+ `1. Create directory: mkdir ${projectName} && cd ${projectName}`,
817
+ "2. Save the files above (digest.mjs, package.json, .env.example, README.md)",
818
+ "3. Copy .env.example to .env and fill in your email credentials",
819
+ `4. Add feeds: node digest.mjs --add-feed "https://arxiv.org/rss/cs.AI" "arXiv AI" "ai-research"`,
820
+ "5. Test: node digest.mjs --fetch-only",
821
+ "6. Run with email: node digest.mjs",
822
+ schedule !== "manual"
823
+ ? `7. Schedule: add to crontab: ${cronExpr[schedule]} cd /path/to/${projectName} && node digest.mjs >> digest.log 2>&1`
824
+ : "7. Run manually whenever you want a digest",
825
+ ],
826
+ tip: "The generated project has ZERO npm dependencies — just Node.js >= 18. Copy the files and run.",
827
+ }),
828
+ },
829
+ ];
830
+ },
831
+ },
832
+ ];
833
+ //# sourceMappingURL=rssTools.js.map