@harbinger-ai/harbinger 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +406 -0
- package/agents/README.md +76 -0
- package/agents/_template/CONFIG.yaml +7 -0
- package/agents/_template/HEARTBEAT.md +59 -0
- package/agents/_template/IDENTITY.md +4 -0
- package/agents/_template/SKILLS.md +1 -0
- package/agents/_template/SOUL.md +25 -0
- package/agents/_template/TOOLS.md +3 -0
- package/agents/binary-reverser/CONFIG.yaml +21 -0
- package/agents/binary-reverser/HEARTBEAT.md +65 -0
- package/agents/binary-reverser/IDENTITY.md +1 -0
- package/agents/binary-reverser/SKILLS.md +1 -0
- package/agents/binary-reverser/SOUL.md +23 -0
- package/agents/binary-reverser/TOOLS.md +99 -0
- package/agents/browser-agent/CONFIG.yaml +20 -0
- package/agents/browser-agent/HEARTBEAT.md +79 -0
- package/agents/browser-agent/IDENTITY.md +5 -0
- package/agents/browser-agent/SKILLS.md +86 -0
- package/agents/browser-agent/SOUL.md +23 -0
- package/agents/browser-agent/TOOLS.md +186 -0
- package/agents/cloud-infiltrator/CONFIG.yaml +22 -0
- package/agents/cloud-infiltrator/HEARTBEAT.md +78 -0
- package/agents/cloud-infiltrator/IDENTITY.md +1 -0
- package/agents/cloud-infiltrator/SKILLS.md +1 -0
- package/agents/cloud-infiltrator/SOUL.md +23 -0
- package/agents/cloud-infiltrator/TOOLS.md +68 -0
- package/agents/coding-assistant/CONFIG.yaml +22 -0
- package/agents/coding-assistant/HEARTBEAT.md +57 -0
- package/agents/coding-assistant/IDENTITY.md +5 -0
- package/agents/coding-assistant/SKILLS.md +69 -0
- package/agents/coding-assistant/SOUL.md +60 -0
- package/agents/coding-assistant/TOOLS.md +168 -0
- package/agents/learning-agent/CONFIG.yaml +21 -0
- package/agents/learning-agent/HEARTBEAT.md +63 -0
- package/agents/learning-agent/IDENTITY.md +5 -0
- package/agents/learning-agent/SKILLS.md +86 -0
- package/agents/learning-agent/SOUL.md +77 -0
- package/agents/learning-agent/TOOLS.md +145 -0
- package/agents/maintainer/CONFIG.yaml +31 -0
- package/agents/maintainer/HEARTBEAT.md +28 -0
- package/agents/maintainer/IDENTITY.md +33 -0
- package/agents/maintainer/SKILLS.md +24 -0
- package/agents/maintainer/SOUL.md +61 -0
- package/agents/maintainer/TOOLS.md +29 -0
- package/agents/maintainer/lib/engine.js +279 -0
- package/agents/maintainer/lib/safe-fixer.js +183 -0
- package/agents/morning-brief/CONFIG.yaml +22 -0
- package/agents/morning-brief/HEARTBEAT.md +60 -0
- package/agents/morning-brief/IDENTITY.md +5 -0
- package/agents/morning-brief/SKILLS.md +56 -0
- package/agents/morning-brief/SOUL.md +64 -0
- package/agents/morning-brief/TOOLS.md +112 -0
- package/agents/osint-detective/CONFIG.yaml +24 -0
- package/agents/osint-detective/HEARTBEAT.md +66 -0
- package/agents/osint-detective/IDENTITY.md +1 -0
- package/agents/osint-detective/SKILLS.md +1 -0
- package/agents/osint-detective/SOUL.md +23 -0
- package/agents/osint-detective/TOOLS.md +81 -0
- package/agents/recon-scout/CONFIG.yaml +22 -0
- package/agents/recon-scout/HEARTBEAT.md +79 -0
- package/agents/recon-scout/IDENTITY.md +1 -0
- package/agents/recon-scout/SKILLS.md +1 -0
- package/agents/recon-scout/SOUL.md +23 -0
- package/agents/recon-scout/TOOLS.md +93 -0
- package/agents/report-writer/CONFIG.yaml +21 -0
- package/agents/report-writer/HEARTBEAT.md +63 -0
- package/agents/report-writer/IDENTITY.md +1 -0
- package/agents/report-writer/SKILLS.md +1 -0
- package/agents/report-writer/SOUL.md +23 -0
- package/agents/report-writer/TOOLS.md +69 -0
- package/agents/shared/README.md +13 -0
- package/agents/web-hacker/CONFIG.yaml +24 -0
- package/agents/web-hacker/HEARTBEAT.md +78 -0
- package/agents/web-hacker/IDENTITY.md +1 -0
- package/agents/web-hacker/SKILLS.md +1 -0
- package/agents/web-hacker/SOUL.md +23 -0
- package/agents/web-hacker/TOOLS.md +86 -0
- package/api/CLAUDE.md +19 -0
- package/api/index.js +274 -0
- package/bin/cli.js +620 -0
- package/bin/local.sh +31 -0
- package/bin/postinstall.js +63 -0
- package/config/index.js +24 -0
- package/config/instrumentation.js +93 -0
- package/drizzle/0000_initial.sql +52 -0
- package/drizzle/0001_bounty_and_registry.sql +82 -0
- package/drizzle/0002_sync_columns.sql +7 -0
- package/drizzle/0003_graceful_bloodscream.sql +86 -0
- package/drizzle/meta/0000_snapshot.json +321 -0
- package/drizzle/meta/0003_snapshot.json +878 -0
- package/drizzle/meta/_journal.json +34 -0
- package/drizzle/relations.ts +3 -0
- package/drizzle/schema.ts +145 -0
- package/lib/actions.js +47 -0
- package/lib/agents.js +166 -0
- package/lib/ai/agent.js +96 -0
- package/lib/ai/autonomous-engine.js +261 -0
- package/lib/ai/index.js +359 -0
- package/lib/ai/model-router.js +254 -0
- package/lib/ai/model.js +73 -0
- package/lib/ai/tools.js +84 -0
- package/lib/auth/actions.js +28 -0
- package/lib/auth/config.js +27 -0
- package/lib/auth/edge-config.js +27 -0
- package/lib/auth/index.js +27 -0
- package/lib/auth/middleware.js +53 -0
- package/lib/bounty/actions.js +119 -0
- package/lib/bounty/findings.js +64 -0
- package/lib/bounty/programs.js +34 -0
- package/lib/bounty/sync-targets.js +267 -0
- package/lib/bounty/targets.js +33 -0
- package/lib/channels/base.js +56 -0
- package/lib/channels/index.js +15 -0
- package/lib/channels/telegram.js +148 -0
- package/lib/chat/actions.js +288 -0
- package/lib/chat/api.js +135 -0
- package/lib/chat/components/app-sidebar.js +237 -0
- package/lib/chat/components/app-sidebar.jsx +289 -0
- package/lib/chat/components/chat-header.js +27 -0
- package/lib/chat/components/chat-header.jsx +37 -0
- package/lib/chat/components/chat-input.js +230 -0
- package/lib/chat/components/chat-input.jsx +228 -0
- package/lib/chat/components/chat-nav-context.js +11 -0
- package/lib/chat/components/chat-nav-context.jsx +11 -0
- package/lib/chat/components/chat-page.js +81 -0
- package/lib/chat/components/chat-page.jsx +100 -0
- package/lib/chat/components/chat.js +150 -0
- package/lib/chat/components/chat.jsx +182 -0
- package/lib/chat/components/chats-page.js +302 -0
- package/lib/chat/components/chats-page.jsx +330 -0
- package/lib/chat/components/crons-page.js +172 -0
- package/lib/chat/components/crons-page.jsx +244 -0
- package/lib/chat/components/enhanced-tool-call.js +103 -0
- package/lib/chat/components/enhanced-tool-call.jsx +139 -0
- package/lib/chat/components/findings-page.js +175 -0
- package/lib/chat/components/findings-page.jsx +214 -0
- package/lib/chat/components/greeting.js +22 -0
- package/lib/chat/components/greeting.jsx +26 -0
- package/lib/chat/components/icons.js +777 -0
- package/lib/chat/components/icons.jsx +741 -0
- package/lib/chat/components/index.js +26 -0
- package/lib/chat/components/mcp-page.js +260 -0
- package/lib/chat/components/mcp-page.jsx +355 -0
- package/lib/chat/components/message.js +289 -0
- package/lib/chat/components/message.jsx +315 -0
- package/lib/chat/components/messages.js +66 -0
- package/lib/chat/components/messages.jsx +77 -0
- package/lib/chat/components/notifications-page.js +56 -0
- package/lib/chat/components/notifications-page.jsx +87 -0
- package/lib/chat/components/page-layout.js +21 -0
- package/lib/chat/components/page-layout.jsx +28 -0
- package/lib/chat/components/registry-page.js +222 -0
- package/lib/chat/components/registry-page.jsx +255 -0
- package/lib/chat/components/settings-layout.js +40 -0
- package/lib/chat/components/settings-layout.jsx +54 -0
- package/lib/chat/components/settings-secrets-page.js +216 -0
- package/lib/chat/components/settings-secrets-page.jsx +264 -0
- package/lib/chat/components/sidebar-history-item.js +132 -0
- package/lib/chat/components/sidebar-history-item.jsx +113 -0
- package/lib/chat/components/sidebar-history.js +115 -0
- package/lib/chat/components/sidebar-history.jsx +157 -0
- package/lib/chat/components/sidebar-user-nav.js +63 -0
- package/lib/chat/components/sidebar-user-nav.jsx +73 -0
- package/lib/chat/components/status-bar.js +39 -0
- package/lib/chat/components/status-bar.jsx +51 -0
- package/lib/chat/components/swarm-page.js +157 -0
- package/lib/chat/components/swarm-page.jsx +210 -0
- package/lib/chat/components/targets-page.js +376 -0
- package/lib/chat/components/targets-page.jsx +389 -0
- package/lib/chat/components/tool-call.js +86 -0
- package/lib/chat/components/tool-call.jsx +104 -0
- package/lib/chat/components/tool-panel.js +107 -0
- package/lib/chat/components/tool-panel.jsx +145 -0
- package/lib/chat/components/triggers-page.js +153 -0
- package/lib/chat/components/triggers-page.jsx +221 -0
- package/lib/chat/components/ui/confirm-dialog.js +53 -0
- package/lib/chat/components/ui/confirm-dialog.jsx +57 -0
- package/lib/chat/components/ui/dropdown-menu.js +98 -0
- package/lib/chat/components/ui/dropdown-menu.jsx +116 -0
- package/lib/chat/components/ui/rename-dialog.js +74 -0
- package/lib/chat/components/ui/rename-dialog.jsx +72 -0
- package/lib/chat/components/ui/scroll-area.js +13 -0
- package/lib/chat/components/ui/scroll-area.jsx +17 -0
- package/lib/chat/components/ui/separator.js +21 -0
- package/lib/chat/components/ui/separator.jsx +18 -0
- package/lib/chat/components/ui/sheet.js +75 -0
- package/lib/chat/components/ui/sheet.jsx +95 -0
- package/lib/chat/components/ui/sidebar.js +227 -0
- package/lib/chat/components/ui/sidebar.jsx +245 -0
- package/lib/chat/components/ui/tooltip.js +56 -0
- package/lib/chat/components/ui/tooltip.jsx +66 -0
- package/lib/chat/components/upgrade-dialog.js +151 -0
- package/lib/chat/components/upgrade-dialog.jsx +170 -0
- package/lib/chat/utils.js +11 -0
- package/lib/cron.js +246 -0
- package/lib/db/api-keys.js +163 -0
- package/lib/db/chats.js +145 -0
- package/lib/db/index.js +52 -0
- package/lib/db/notifications.js +99 -0
- package/lib/db/schema.js +145 -0
- package/lib/db/update-check.js +96 -0
- package/lib/db/users.js +89 -0
- package/lib/mcp/actions.js +104 -0
- package/lib/mcp/client.js +79 -0
- package/lib/mcp/handler.js +57 -0
- package/lib/mcp/server.js +165 -0
- package/lib/paths.js +46 -0
- package/lib/registry/actions.js +164 -0
- package/lib/registry/catalog.js +137 -0
- package/lib/registry/tools.js +71 -0
- package/lib/tools/create-job.js +99 -0
- package/lib/tools/github.js +217 -0
- package/lib/tools/openai.js +35 -0
- package/lib/tools/telegram.js +292 -0
- package/lib/triggers.js +118 -0
- package/lib/utils/render-md.js +102 -0
- package/package.json +103 -0
- package/setup/lib/auth.mjs +81 -0
- package/setup/lib/env.mjs +21 -0
- package/setup/lib/fs-utils.mjs +20 -0
- package/setup/lib/github.mjs +149 -0
- package/setup/lib/prerequisites.mjs +155 -0
- package/setup/lib/prompts.mjs +267 -0
- package/setup/lib/providers.mjs +48 -0
- package/setup/lib/sync.mjs +125 -0
- package/setup/lib/targets.mjs +45 -0
- package/setup/lib/telegram-verify.mjs +63 -0
- package/setup/lib/telegram.mjs +76 -0
- package/setup/setup-telegram.mjs +264 -0
- package/setup/setup.mjs +842 -0
- package/templates/.dockerignore +5 -0
- package/templates/.env.example +63 -0
- package/templates/.github/workflows/auto-merge.yml +117 -0
- package/templates/.github/workflows/build-image.yml +36 -0
- package/templates/.github/workflows/notify-job-failed.yml +64 -0
- package/templates/.github/workflows/notify-pr-complete.yml +119 -0
- package/templates/.github/workflows/rebuild-event-handler.yml +121 -0
- package/templates/.github/workflows/run-job.yml +89 -0
- package/templates/.github/workflows/upgrade-event-handler.yml +62 -0
- package/templates/.gitignore.template +45 -0
- package/templates/.pi/extensions/env-sanitizer/index.ts +48 -0
- package/templates/.pi/extensions/env-sanitizer/package.json +5 -0
- package/templates/CLAUDE.md +29 -0
- package/templates/CLAUDE.md.template +307 -0
- package/templates/app/api/[...thepopebot]/route.js +1 -0
- package/templates/app/api/auth/[...nextauth]/route.js +1 -0
- package/templates/app/chat/[chatId]/page.js +8 -0
- package/templates/app/chats/page.js +7 -0
- package/templates/app/components/ascii-logo.jsx +10 -0
- package/templates/app/components/login-form.jsx +92 -0
- package/templates/app/components/setup-form.jsx +82 -0
- package/templates/app/components/theme-provider.jsx +11 -0
- package/templates/app/components/theme-toggle.jsx +38 -0
- package/templates/app/components/ui/button.jsx +21 -0
- package/templates/app/components/ui/card.jsx +23 -0
- package/templates/app/components/ui/input.jsx +10 -0
- package/templates/app/components/ui/label.jsx +10 -0
- package/templates/app/crons/page.js +5 -0
- package/templates/app/findings/page.js +7 -0
- package/templates/app/globals.css +90 -0
- package/templates/app/layout.js +19 -0
- package/templates/app/login/page.js +15 -0
- package/templates/app/notifications/page.js +7 -0
- package/templates/app/page.js +7 -0
- package/templates/app/settings/crons/page.js +5 -0
- package/templates/app/settings/layout.js +7 -0
- package/templates/app/settings/mcp/page.js +5 -0
- package/templates/app/settings/page.js +5 -0
- package/templates/app/settings/secrets/page.js +5 -0
- package/templates/app/settings/triggers/page.js +5 -0
- package/templates/app/stream/chat/route.js +1 -0
- package/templates/app/swarm/page.js +7 -0
- package/templates/app/targets/page.js +7 -0
- package/templates/app/toolbox/page.js +7 -0
- package/templates/app/triggers/page.js +5 -0
- package/templates/config/AGENT.md +34 -0
- package/templates/config/CRONS.json +56 -0
- package/templates/config/EVENT_HANDLER.md +224 -0
- package/templates/config/HEARTBEAT.md +3 -0
- package/templates/config/JOB_SUMMARY.md +130 -0
- package/templates/config/MCP_SERVERS.json +1 -0
- package/templates/config/SKILL_BUILDING_GUIDE.md +90 -0
- package/templates/config/SOUL.md +17 -0
- package/templates/config/TRIGGERS.json +58 -0
- package/templates/docker/event-handler/Dockerfile +20 -0
- package/templates/docker/event-handler/ecosystem.config.cjs +8 -0
- package/templates/docker/job-claude-code/Dockerfile +34 -0
- package/templates/docker/job-claude-code/entrypoint.sh +139 -0
- package/templates/docker/job-pi-coding-agent/Dockerfile +44 -0
- package/templates/docker/job-pi-coding-agent/entrypoint.sh +163 -0
- package/templates/docker-compose.yml +63 -0
- package/templates/instrumentation.js +6 -0
- package/templates/middleware.js +1 -0
- package/templates/next.config.mjs +3 -0
- package/templates/postcss.config.mjs +5 -0
- package/templates/skills/LICENSE +21 -0
- package/templates/skills/README.md +119 -0
- package/templates/skills/brave-search/SKILL.md +79 -0
- package/templates/skills/brave-search/content.js +86 -0
- package/templates/skills/brave-search/package-lock.json +621 -0
- package/templates/skills/brave-search/package.json +14 -0
- package/templates/skills/brave-search/search.js +199 -0
- package/templates/skills/browser-tools/SKILL.md +196 -0
- package/templates/skills/browser-tools/browser-content.js +103 -0
- package/templates/skills/browser-tools/browser-cookies.js +35 -0
- package/templates/skills/browser-tools/browser-eval.js +53 -0
- package/templates/skills/browser-tools/browser-hn-scraper.js +108 -0
- package/templates/skills/browser-tools/browser-nav.js +44 -0
- package/templates/skills/browser-tools/browser-pick.js +162 -0
- package/templates/skills/browser-tools/browser-screenshot.js +34 -0
- package/templates/skills/browser-tools/browser-start.js +87 -0
- package/templates/skills/browser-tools/package-lock.json +2556 -0
- package/templates/skills/browser-tools/package.json +19 -0
- package/templates/skills/llm-secrets/SKILL.md +34 -0
- package/templates/skills/llm-secrets/llm-secrets.js +33 -0
- package/templates/skills/modify-self/SKILL.md +12 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Readability } from "@mozilla/readability";
|
|
4
|
+
import { JSDOM } from "jsdom";
|
|
5
|
+
import TurndownService from "turndown";
|
|
6
|
+
import { gfm } from "turndown-plugin-gfm";
|
|
7
|
+
|
|
8
|
+
const args = process.argv.slice(2);
|
|
9
|
+
|
|
10
|
+
const contentIndex = args.indexOf("--content");
|
|
11
|
+
const fetchContent = contentIndex !== -1;
|
|
12
|
+
if (fetchContent) args.splice(contentIndex, 1);
|
|
13
|
+
|
|
14
|
+
let numResults = 5;
|
|
15
|
+
const nIndex = args.indexOf("-n");
|
|
16
|
+
if (nIndex !== -1 && args[nIndex + 1]) {
|
|
17
|
+
numResults = parseInt(args[nIndex + 1], 10);
|
|
18
|
+
args.splice(nIndex, 2);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Parse country option
|
|
22
|
+
let country = "US";
|
|
23
|
+
const countryIndex = args.indexOf("--country");
|
|
24
|
+
if (countryIndex !== -1 && args[countryIndex + 1]) {
|
|
25
|
+
country = args[countryIndex + 1].toUpperCase();
|
|
26
|
+
args.splice(countryIndex, 2);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Parse freshness option
|
|
30
|
+
let freshness = null;
|
|
31
|
+
const freshnessIndex = args.indexOf("--freshness");
|
|
32
|
+
if (freshnessIndex !== -1 && args[freshnessIndex + 1]) {
|
|
33
|
+
freshness = args[freshnessIndex + 1];
|
|
34
|
+
args.splice(freshnessIndex, 2);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const query = args.join(" ");
|
|
38
|
+
|
|
39
|
+
if (!query) {
|
|
40
|
+
console.log("Usage: search.js <query> [-n <num>] [--content] [--country <code>] [--freshness <period>]");
|
|
41
|
+
console.log("\nOptions:");
|
|
42
|
+
console.log(" -n <num> Number of results (default: 5, max: 20)");
|
|
43
|
+
console.log(" --content Fetch readable content as markdown");
|
|
44
|
+
console.log(" --country <code> Country code for results (default: US)");
|
|
45
|
+
console.log(" --freshness <period> Filter by time: pd (day), pw (week), pm (month), py (year)");
|
|
46
|
+
console.log("\nEnvironment:");
|
|
47
|
+
console.log(" BRAVE_API_KEY Required. Your Brave Search API key.");
|
|
48
|
+
console.log("\nExamples:");
|
|
49
|
+
console.log(' search.js "javascript async await"');
|
|
50
|
+
console.log(' search.js "rust programming" -n 10');
|
|
51
|
+
console.log(' search.js "climate change" --content');
|
|
52
|
+
console.log(' search.js "news today" --freshness pd');
|
|
53
|
+
process.exit(1);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const apiKey = process.env.BRAVE_API_KEY;
|
|
57
|
+
if (!apiKey) {
|
|
58
|
+
console.error("Error: BRAVE_API_KEY environment variable is required.");
|
|
59
|
+
console.error("Get your API key at: https://api-dashboard.search.brave.com/app/keys");
|
|
60
|
+
process.exit(1);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function fetchBraveResults(query, numResults, country, freshness) {
|
|
64
|
+
const params = new URLSearchParams({
|
|
65
|
+
q: query,
|
|
66
|
+
count: Math.min(numResults, 20).toString(),
|
|
67
|
+
country: country,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
if (freshness) {
|
|
71
|
+
params.append("freshness", freshness);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const url = `https://api.search.brave.com/res/v1/web/search?${params.toString()}`;
|
|
75
|
+
|
|
76
|
+
const response = await fetch(url, {
|
|
77
|
+
headers: {
|
|
78
|
+
"Accept": "application/json",
|
|
79
|
+
"Accept-Encoding": "gzip",
|
|
80
|
+
"X-Subscription-Token": apiKey,
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
if (!response.ok) {
|
|
85
|
+
const errorText = await response.text();
|
|
86
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}\n${errorText}`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const data = await response.json();
|
|
90
|
+
|
|
91
|
+
const results = [];
|
|
92
|
+
|
|
93
|
+
// Extract web results
|
|
94
|
+
if (data.web && data.web.results) {
|
|
95
|
+
for (const result of data.web.results) {
|
|
96
|
+
if (results.length >= numResults) break;
|
|
97
|
+
|
|
98
|
+
results.push({
|
|
99
|
+
title: result.title || "",
|
|
100
|
+
link: result.url || "",
|
|
101
|
+
snippet: result.description || "",
|
|
102
|
+
age: result.age || result.page_age || "",
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return results;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function htmlToMarkdown(html) {
|
|
111
|
+
const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
112
|
+
turndown.use(gfm);
|
|
113
|
+
turndown.addRule("removeEmptyLinks", {
|
|
114
|
+
filter: (node) => node.nodeName === "A" && !node.textContent?.trim(),
|
|
115
|
+
replacement: () => "",
|
|
116
|
+
});
|
|
117
|
+
return turndown
|
|
118
|
+
.turndown(html)
|
|
119
|
+
.replace(/\[\\?\[\s*\\?\]\]\([^)]*\)/g, "")
|
|
120
|
+
.replace(/ +/g, " ")
|
|
121
|
+
.replace(/\s+,/g, ",")
|
|
122
|
+
.replace(/\s+\./g, ".")
|
|
123
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
124
|
+
.trim();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async function fetchPageContent(url) {
|
|
128
|
+
try {
|
|
129
|
+
const response = await fetch(url, {
|
|
130
|
+
headers: {
|
|
131
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
132
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
133
|
+
},
|
|
134
|
+
signal: AbortSignal.timeout(10000),
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
return `(HTTP ${response.status})`;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const html = await response.text();
|
|
142
|
+
const dom = new JSDOM(html, { url });
|
|
143
|
+
const reader = new Readability(dom.window.document);
|
|
144
|
+
const article = reader.parse();
|
|
145
|
+
|
|
146
|
+
if (article && article.content) {
|
|
147
|
+
return htmlToMarkdown(article.content).substring(0, 5000);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Fallback: try to get main content
|
|
151
|
+
const fallbackDoc = new JSDOM(html, { url });
|
|
152
|
+
const body = fallbackDoc.window.document;
|
|
153
|
+
body.querySelectorAll("script, style, noscript, nav, header, footer, aside").forEach(el => el.remove());
|
|
154
|
+
const main = body.querySelector("main, article, [role='main'], .content, #content") || body.body;
|
|
155
|
+
const text = main?.textContent || "";
|
|
156
|
+
|
|
157
|
+
if (text.trim().length > 100) {
|
|
158
|
+
return text.trim().substring(0, 5000);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return "(Could not extract content)";
|
|
162
|
+
} catch (e) {
|
|
163
|
+
return `(Error: ${e.message})`;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Main
|
|
168
|
+
try {
|
|
169
|
+
const results = await fetchBraveResults(query, numResults, country, freshness);
|
|
170
|
+
|
|
171
|
+
if (results.length === 0) {
|
|
172
|
+
console.error("No results found.");
|
|
173
|
+
process.exit(0);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (fetchContent) {
|
|
177
|
+
for (const result of results) {
|
|
178
|
+
result.content = await fetchPageContent(result.link);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
for (let i = 0; i < results.length; i++) {
|
|
183
|
+
const r = results[i];
|
|
184
|
+
console.log(`--- Result ${i + 1} ---`);
|
|
185
|
+
console.log(`Title: ${r.title}`);
|
|
186
|
+
console.log(`Link: ${r.link}`);
|
|
187
|
+
if (r.age) {
|
|
188
|
+
console.log(`Age: ${r.age}`);
|
|
189
|
+
}
|
|
190
|
+
console.log(`Snippet: ${r.snippet}`);
|
|
191
|
+
if (r.content) {
|
|
192
|
+
console.log(`Content:\n${r.content}`);
|
|
193
|
+
}
|
|
194
|
+
console.log("");
|
|
195
|
+
}
|
|
196
|
+
} catch (e) {
|
|
197
|
+
console.error(`Error: ${e.message}`);
|
|
198
|
+
process.exit(1);
|
|
199
|
+
}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: browser-tools
|
|
3
|
+
description: Interactive browser automation via Chrome DevTools Protocol. Use when you need to interact with web pages, test frontends, or when user interaction with a visible browser is required.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Browser Tools
|
|
7
|
+
|
|
8
|
+
Chrome DevTools Protocol tools for agent-assisted web automation. These tools connect to Chrome running on `:9222` with remote debugging enabled.
|
|
9
|
+
|
|
10
|
+
## Setup
|
|
11
|
+
|
|
12
|
+
Run once before first use:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
cd skills/browser-tools
|
|
16
|
+
npm install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Start Chrome
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
skills/browser-tools/browser-start.js # Fresh profile
|
|
23
|
+
skills/browser-tools/browser-start.js --profile # Copy user's profile (cookies, logins)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Launch Chrome with remote debugging on `:9222`. Use `--profile` to preserve user's authentication state.
|
|
27
|
+
|
|
28
|
+
## Navigate
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
skills/browser-tools/browser-nav.js https://example.com
|
|
32
|
+
skills/browser-tools/browser-nav.js https://example.com --new
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Navigate to URLs. Use `--new` flag to open in a new tab instead of reusing current tab.
|
|
36
|
+
|
|
37
|
+
## Evaluate JavaScript
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
skills/browser-tools/browser-eval.js 'document.title'
|
|
41
|
+
skills/browser-tools/browser-eval.js 'document.querySelectorAll("a").length'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Execute JavaScript in the active tab. Code runs in async context. Use this to extract data, inspect page state, or perform DOM operations programmatically.
|
|
45
|
+
|
|
46
|
+
## Screenshot
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
skills/browser-tools/browser-screenshot.js
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Capture current viewport and return temporary file path. Use this to visually inspect page state or verify UI changes.
|
|
53
|
+
|
|
54
|
+
## Pick Elements
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
skills/browser-tools/browser-pick.js "Click the submit button"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**IMPORTANT**: Use this tool when the user wants to select specific DOM elements on the page. This launches an interactive picker that lets the user click elements to select them. The user can select multiple elements (Cmd/Ctrl+Click) and press Enter when done. The tool returns CSS selectors for the selected elements.
|
|
61
|
+
|
|
62
|
+
Common use cases:
|
|
63
|
+
- User says "I want to click that button" ā Use this tool to let them select it
|
|
64
|
+
- User says "extract data from these items" ā Use this tool to let them select the elements
|
|
65
|
+
- When you need specific selectors but the page structure is complex or ambiguous
|
|
66
|
+
|
|
67
|
+
## Cookies
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
skills/browser-tools/browser-cookies.js
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Display all cookies for the current tab including domain, path, httpOnly, and secure flags. Use this to debug authentication issues or inspect session state.
|
|
74
|
+
|
|
75
|
+
## Extract Page Content
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
skills/browser-tools/browser-content.js https://example.com
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Navigate to a URL and extract readable content as markdown. Uses Mozilla Readability for article extraction and Turndown for HTML-to-markdown conversion. Works on pages with JavaScript content (waits for page to load).
|
|
82
|
+
|
|
83
|
+
## When to Use
|
|
84
|
+
|
|
85
|
+
- Testing frontend code in a real browser
|
|
86
|
+
- Interacting with pages that require JavaScript
|
|
87
|
+
- When user needs to visually see or interact with a page
|
|
88
|
+
- Debugging authentication or session issues
|
|
89
|
+
- Scraping dynamic content that requires JS execution
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Efficiency Guide
|
|
94
|
+
|
|
95
|
+
### DOM Inspection Over Screenshots
|
|
96
|
+
|
|
97
|
+
**Don't** take screenshots to see page state. **Do** parse the DOM directly:
|
|
98
|
+
|
|
99
|
+
```javascript
|
|
100
|
+
// Get page structure
|
|
101
|
+
document.body.innerHTML.slice(0, 5000)
|
|
102
|
+
|
|
103
|
+
// Find interactive elements
|
|
104
|
+
Array.from(document.querySelectorAll('button, input, [role="button"]')).map(e => ({
|
|
105
|
+
id: e.id,
|
|
106
|
+
text: e.textContent.trim(),
|
|
107
|
+
class: e.className
|
|
108
|
+
}))
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Complex Scripts in Single Calls
|
|
112
|
+
|
|
113
|
+
Wrap everything in an IIFE to run multi-statement code:
|
|
114
|
+
|
|
115
|
+
```javascript
|
|
116
|
+
(function() {
|
|
117
|
+
// Multiple operations
|
|
118
|
+
const data = document.querySelector('#target').textContent;
|
|
119
|
+
const buttons = document.querySelectorAll('button');
|
|
120
|
+
|
|
121
|
+
// Interactions
|
|
122
|
+
buttons[0].click();
|
|
123
|
+
|
|
124
|
+
// Return results
|
|
125
|
+
return JSON.stringify({ data, buttonCount: buttons.length });
|
|
126
|
+
})()
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Batch Interactions
|
|
130
|
+
|
|
131
|
+
**Don't** make separate calls for each click. **Do** batch them:
|
|
132
|
+
|
|
133
|
+
```javascript
|
|
134
|
+
(function() {
|
|
135
|
+
const actions = ["btn1", "btn2", "btn3"];
|
|
136
|
+
actions.forEach(id => document.getElementById(id).click());
|
|
137
|
+
return "Done";
|
|
138
|
+
})()
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Typing/Input Sequences
|
|
142
|
+
|
|
143
|
+
```javascript
|
|
144
|
+
(function() {
|
|
145
|
+
const text = "HELLO";
|
|
146
|
+
for (const char of text) {
|
|
147
|
+
document.getElementById("key-" + char).click();
|
|
148
|
+
}
|
|
149
|
+
document.getElementById("submit").click();
|
|
150
|
+
return "Submitted: " + text;
|
|
151
|
+
})()
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Reading App/Game State
|
|
155
|
+
|
|
156
|
+
Extract structured state in one call:
|
|
157
|
+
|
|
158
|
+
```javascript
|
|
159
|
+
(function() {
|
|
160
|
+
const state = {
|
|
161
|
+
score: document.querySelector('.score')?.textContent,
|
|
162
|
+
status: document.querySelector('.status')?.className,
|
|
163
|
+
items: Array.from(document.querySelectorAll('.item')).map(el => ({
|
|
164
|
+
text: el.textContent,
|
|
165
|
+
active: el.classList.contains('active')
|
|
166
|
+
}))
|
|
167
|
+
};
|
|
168
|
+
return JSON.stringify(state, null, 2);
|
|
169
|
+
})()
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Waiting for Updates
|
|
173
|
+
|
|
174
|
+
If DOM updates after actions, add a small delay with bash:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
sleep 0.5 && skills/browser-tools/browser-eval.js '...'
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Investigate Before Interacting
|
|
181
|
+
|
|
182
|
+
Always start by understanding the page structure:
|
|
183
|
+
|
|
184
|
+
```javascript
|
|
185
|
+
(function() {
|
|
186
|
+
return {
|
|
187
|
+
title: document.title,
|
|
188
|
+
forms: document.forms.length,
|
|
189
|
+
buttons: document.querySelectorAll('button').length,
|
|
190
|
+
inputs: document.querySelectorAll('input').length,
|
|
191
|
+
mainContent: document.body.innerHTML.slice(0, 3000)
|
|
192
|
+
};
|
|
193
|
+
})()
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Then target specific elements based on what you find.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import puppeteer from "puppeteer-core";
|
|
4
|
+
import { Readability } from "@mozilla/readability";
|
|
5
|
+
import { JSDOM } from "jsdom";
|
|
6
|
+
import TurndownService from "turndown";
|
|
7
|
+
import { gfm } from "turndown-plugin-gfm";
|
|
8
|
+
|
|
9
|
+
// Global timeout - exit if script takes too long
|
|
10
|
+
const TIMEOUT = 30000;
|
|
11
|
+
const timeoutId = setTimeout(() => {
|
|
12
|
+
console.error("ā Timeout after 30s");
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}, TIMEOUT).unref();
|
|
15
|
+
|
|
16
|
+
const url = process.argv[2];
|
|
17
|
+
|
|
18
|
+
if (!url) {
|
|
19
|
+
console.log("Usage: browser-content.js <url>");
|
|
20
|
+
console.log("\nExtracts readable content from a URL as markdown.");
|
|
21
|
+
console.log("\nExamples:");
|
|
22
|
+
console.log(" browser-content.js https://example.com");
|
|
23
|
+
console.log(" browser-content.js https://en.wikipedia.org/wiki/Rust_(programming_language)");
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const b = await Promise.race([
|
|
28
|
+
puppeteer.connect({
|
|
29
|
+
browserURL: "http://localhost:9222",
|
|
30
|
+
defaultViewport: null,
|
|
31
|
+
}),
|
|
32
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 5000)),
|
|
33
|
+
]).catch((e) => {
|
|
34
|
+
console.error("ā Could not connect to browser:", e.message);
|
|
35
|
+
console.error(" Run: browser-start.js");
|
|
36
|
+
process.exit(1);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const p = (await b.pages()).at(-1);
|
|
40
|
+
if (!p) {
|
|
41
|
+
console.error("ā No active tab found");
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
await Promise.race([
|
|
46
|
+
p.goto(url, { waitUntil: "networkidle2" }),
|
|
47
|
+
new Promise((r) => setTimeout(r, 10000)),
|
|
48
|
+
]).catch(() => {});
|
|
49
|
+
|
|
50
|
+
// Get HTML via CDP (works even with TrustedScriptURL restrictions)
|
|
51
|
+
const client = await p.createCDPSession();
|
|
52
|
+
const { root } = await client.send("DOM.getDocument", { depth: -1, pierce: true });
|
|
53
|
+
const { outerHTML } = await client.send("DOM.getOuterHTML", { nodeId: root.nodeId });
|
|
54
|
+
await client.detach();
|
|
55
|
+
|
|
56
|
+
const finalUrl = p.url();
|
|
57
|
+
|
|
58
|
+
// Extract with Readability
|
|
59
|
+
const doc = new JSDOM(outerHTML, { url: finalUrl });
|
|
60
|
+
const reader = new Readability(doc.window.document);
|
|
61
|
+
const article = reader.parse();
|
|
62
|
+
|
|
63
|
+
// Convert to markdown
|
|
64
|
+
function htmlToMarkdown(html) {
|
|
65
|
+
const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
66
|
+
turndown.use(gfm);
|
|
67
|
+
turndown.addRule("removeEmptyLinks", {
|
|
68
|
+
filter: (node) => node.nodeName === "A" && !node.textContent?.trim(),
|
|
69
|
+
replacement: () => "",
|
|
70
|
+
});
|
|
71
|
+
return turndown
|
|
72
|
+
.turndown(html)
|
|
73
|
+
.replace(/\[\\?\[\s*\\?\]\]\([^)]*\)/g, "")
|
|
74
|
+
.replace(/ +/g, " ")
|
|
75
|
+
.replace(/\s+,/g, ",")
|
|
76
|
+
.replace(/\s+\./g, ".")
|
|
77
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
78
|
+
.trim();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
let content;
|
|
82
|
+
if (article && article.content) {
|
|
83
|
+
content = htmlToMarkdown(article.content);
|
|
84
|
+
} else {
|
|
85
|
+
// Fallback
|
|
86
|
+
const fallbackDoc = new JSDOM(outerHTML, { url: finalUrl });
|
|
87
|
+
const fallbackBody = fallbackDoc.window.document;
|
|
88
|
+
fallbackBody.querySelectorAll("script, style, noscript, nav, header, footer, aside").forEach((el) => el.remove());
|
|
89
|
+
const main = fallbackBody.querySelector("main, article, [role='main'], .content, #content") || fallbackBody.body;
|
|
90
|
+
const fallbackHtml = main?.innerHTML || "";
|
|
91
|
+
if (fallbackHtml.trim().length > 100) {
|
|
92
|
+
content = htmlToMarkdown(fallbackHtml);
|
|
93
|
+
} else {
|
|
94
|
+
content = "(Could not extract content)";
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
console.log(`URL: ${finalUrl}`);
|
|
99
|
+
if (article?.title) console.log(`Title: ${article.title}`);
|
|
100
|
+
console.log("");
|
|
101
|
+
console.log(content);
|
|
102
|
+
|
|
103
|
+
process.exit(0);
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import puppeteer from "puppeteer-core";
|
|
4
|
+
|
|
5
|
+
const b = await Promise.race([
|
|
6
|
+
puppeteer.connect({
|
|
7
|
+
browserURL: "http://localhost:9222",
|
|
8
|
+
defaultViewport: null,
|
|
9
|
+
}),
|
|
10
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 5000)),
|
|
11
|
+
]).catch((e) => {
|
|
12
|
+
console.error("ā Could not connect to browser:", e.message);
|
|
13
|
+
console.error(" Run: browser-start.js");
|
|
14
|
+
process.exit(1);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const p = (await b.pages()).at(-1);
|
|
18
|
+
|
|
19
|
+
if (!p) {
|
|
20
|
+
console.error("ā No active tab found");
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const cookies = await p.cookies();
|
|
25
|
+
|
|
26
|
+
for (const cookie of cookies) {
|
|
27
|
+
console.log(`${cookie.name}: ${cookie.value}`);
|
|
28
|
+
console.log(` domain: ${cookie.domain}`);
|
|
29
|
+
console.log(` path: ${cookie.path}`);
|
|
30
|
+
console.log(` httpOnly: ${cookie.httpOnly}`);
|
|
31
|
+
console.log(` secure: ${cookie.secure}`);
|
|
32
|
+
console.log("");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
await b.disconnect();
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import puppeteer from "puppeteer-core";
|
|
4
|
+
|
|
5
|
+
const code = process.argv.slice(2).join(" ");
|
|
6
|
+
if (!code) {
|
|
7
|
+
console.log("Usage: browser-eval.js 'code'");
|
|
8
|
+
console.log("\nExamples:");
|
|
9
|
+
console.log(' browser-eval.js "document.title"');
|
|
10
|
+
console.log(' browser-eval.js "document.querySelectorAll(\'a\').length"');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const b = await Promise.race([
|
|
15
|
+
puppeteer.connect({
|
|
16
|
+
browserURL: "http://localhost:9222",
|
|
17
|
+
defaultViewport: null,
|
|
18
|
+
}),
|
|
19
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 5000)),
|
|
20
|
+
]).catch((e) => {
|
|
21
|
+
console.error("ā Could not connect to browser:", e.message);
|
|
22
|
+
console.error(" Run: browser-start.js");
|
|
23
|
+
process.exit(1);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
const p = (await b.pages()).at(-1);
|
|
27
|
+
|
|
28
|
+
if (!p) {
|
|
29
|
+
console.error("ā No active tab found");
|
|
30
|
+
process.exit(1);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const result = await p.evaluate((c) => {
|
|
34
|
+
const AsyncFunction = (async () => {}).constructor;
|
|
35
|
+
return new AsyncFunction(`return (${c})`)();
|
|
36
|
+
}, code);
|
|
37
|
+
|
|
38
|
+
if (Array.isArray(result)) {
|
|
39
|
+
for (let i = 0; i < result.length; i++) {
|
|
40
|
+
if (i > 0) console.log("");
|
|
41
|
+
for (const [key, value] of Object.entries(result[i])) {
|
|
42
|
+
console.log(`${key}: ${value}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
} else if (typeof result === "object" && result !== null) {
|
|
46
|
+
for (const [key, value] of Object.entries(result)) {
|
|
47
|
+
console.log(`${key}: ${value}`);
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
console.log(result);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
await b.disconnect();
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Hacker News Scraper
|
|
5
|
+
*
|
|
6
|
+
* Fetches and parses submissions from Hacker News front page.
|
|
7
|
+
* Usage: node browser-hn-scraper.js [--limit <number>]
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import * as cheerio from 'cheerio';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Scrapes Hacker News front page
|
|
14
|
+
* @param {number} limit - Maximum number of submissions to return (default: 30)
|
|
15
|
+
* @returns {Promise<Array>} Array of submission objects
|
|
16
|
+
*/
|
|
17
|
+
async function scrapeHackerNews(limit = 30) {
|
|
18
|
+
const url = 'https://news.ycombinator.com';
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const response = await fetch(url);
|
|
22
|
+
if (!response.ok) {
|
|
23
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const html = await response.text();
|
|
27
|
+
const $ = cheerio.load(html);
|
|
28
|
+
const submissions = [];
|
|
29
|
+
|
|
30
|
+
// Each submission has class 'athing'
|
|
31
|
+
$('.athing').each((index, element) => {
|
|
32
|
+
if (submissions.length >= limit) return false; // Stop when limit reached
|
|
33
|
+
|
|
34
|
+
const $element = $(element);
|
|
35
|
+
const id = $element.attr('id');
|
|
36
|
+
|
|
37
|
+
// Get title and URL from titleline
|
|
38
|
+
const $titleLine = $element.find('.titleline > a').first();
|
|
39
|
+
const title = $titleLine.text().trim();
|
|
40
|
+
const url = $titleLine.attr('href');
|
|
41
|
+
|
|
42
|
+
// Get the next row which contains metadata (points, author, comments)
|
|
43
|
+
const $metadataRow = $element.next();
|
|
44
|
+
const $subtext = $metadataRow.find('.subtext');
|
|
45
|
+
|
|
46
|
+
// Get points
|
|
47
|
+
const $score = $subtext.find(`#score_${id}`);
|
|
48
|
+
const pointsText = $score.text();
|
|
49
|
+
const points = pointsText ? parseInt(pointsText.match(/\d+/)?.[0] || '0') : 0;
|
|
50
|
+
|
|
51
|
+
// Get author
|
|
52
|
+
const author = $subtext.find('.hnuser').text().trim();
|
|
53
|
+
|
|
54
|
+
// Get time
|
|
55
|
+
const time = $subtext.find('.age').attr('title') || $subtext.find('.age').text().trim();
|
|
56
|
+
|
|
57
|
+
// Get comments count
|
|
58
|
+
const $commentsLink = $subtext.find('a').last();
|
|
59
|
+
const commentsText = $commentsLink.text();
|
|
60
|
+
let commentsCount = 0;
|
|
61
|
+
|
|
62
|
+
if (commentsText.includes('comment')) {
|
|
63
|
+
const match = commentsText.match(/(\d+)/);
|
|
64
|
+
commentsCount = match ? parseInt(match[0]) : 0;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
submissions.push({
|
|
68
|
+
id,
|
|
69
|
+
title,
|
|
70
|
+
url,
|
|
71
|
+
points,
|
|
72
|
+
author,
|
|
73
|
+
time,
|
|
74
|
+
comments: commentsCount,
|
|
75
|
+
hnUrl: `https://news.ycombinator.com/item?id=${id}`
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return submissions;
|
|
80
|
+
} catch (error) {
|
|
81
|
+
console.error('Error scraping Hacker News:', error.message);
|
|
82
|
+
throw error;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// CLI interface
|
|
87
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
88
|
+
const args = process.argv.slice(2);
|
|
89
|
+
let limit = 30;
|
|
90
|
+
|
|
91
|
+
// Parse --limit argument
|
|
92
|
+
const limitIndex = args.indexOf('--limit');
|
|
93
|
+
if (limitIndex !== -1 && args[limitIndex + 1]) {
|
|
94
|
+
limit = parseInt(args[limitIndex + 1]);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
scrapeHackerNews(limit)
|
|
98
|
+
.then(submissions => {
|
|
99
|
+
console.log(JSON.stringify(submissions, null, 2));
|
|
100
|
+
console.error(`\nā Scraped ${submissions.length} submissions`);
|
|
101
|
+
})
|
|
102
|
+
.catch(error => {
|
|
103
|
+
console.error('Failed to scrape:', error.message);
|
|
104
|
+
process.exit(1);
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export { scrapeHackerNews };
|