@docyrus/docyrus 0.0.59 → 0.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +46 -0
  2. package/agent-loader.js +1 -1
  3. package/agent-loader.js.map +2 -2
  4. package/main.js +321 -25
  5. package/main.js.map +2 -2
  6. package/package.json +1 -1
  7. package/resources/browser-tools/browser-click.js +74 -0
  8. package/resources/browser-tools/browser-client.js +236 -0
  9. package/resources/browser-tools/browser-close.js +19 -0
  10. package/resources/browser-tools/browser-console.js +73 -0
  11. package/resources/browser-tools/browser-content.js +36 -75
  12. package/resources/browser-tools/browser-cookies.js +19 -14
  13. package/resources/browser-tools/browser-daemon.js +452 -0
  14. package/resources/browser-tools/browser-devtools.js +62 -0
  15. package/resources/browser-tools/browser-eval.js +16 -22
  16. package/resources/browser-tools/browser-fill.js +70 -0
  17. package/resources/browser-tools/browser-info.js +13 -0
  18. package/resources/browser-tools/browser-nav.js +21 -22
  19. package/resources/browser-tools/browser-network.js +91 -0
  20. package/resources/browser-tools/browser-run-script.js +12 -30
  21. package/resources/browser-tools/browser-screenshot.js +22 -22
  22. package/resources/browser-tools/browser-select.js +59 -0
  23. package/resources/browser-tools/browser-snapshot.js +100 -0
  24. package/resources/browser-tools/browser-start.js +101 -85
  25. package/resources/browser-tools/browser-tabs.js +38 -0
  26. package/resources/browser-tools/browser-wait.js +50 -0
  27. package/resources/pi-agent/extensions/browser-tools.ts +229 -0
  28. package/resources/pi-agent/skills/docyrus-chrome-devtools-cli/SKILL.md +157 -46
  29. package/server-loader.js +580 -232
  30. package/server-loader.js.map +4 -4
  31. package/resources/browser-tools/browser-connect.js +0 -172
  32. package/resources/browser-tools/browser-pick.js +0 -143
  33. package/resources/pi-agent/extensions/docyrus-web-browser.ts +0 -31
  34. package/resources/pi-agent/shared/docyrusWebBrowserProtocol.ts +0 -169
  35. package/resources/pi-agent/skills/agent-browser/SKILL.md +0 -779
  36. package/resources/pi-agent/skills/agent-browser/references/authentication.md +0 -303
  37. package/resources/pi-agent/skills/agent-browser/references/commands.md +0 -295
  38. package/resources/pi-agent/skills/agent-browser/references/profiling.md +0 -120
  39. package/resources/pi-agent/skills/agent-browser/references/proxy-support.md +0 -194
  40. package/resources/pi-agent/skills/agent-browser/references/session-management.md +0 -193
  41. package/resources/pi-agent/skills/agent-browser/references/snapshot-refs.md +0 -219
  42. package/resources/pi-agent/skills/agent-browser/references/video-recording.md +0 -173
  43. package/resources/pi-agent/skills/agent-browser/templates/authenticated-session.sh +0 -105
  44. package/resources/pi-agent/skills/agent-browser/templates/capture-workflow.sh +0 -69
  45. package/resources/pi-agent/skills/agent-browser/templates/form-automation.sh +0 -62
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@docyrus/docyrus",
3
- "version": "0.0.59",
3
+ "version": "0.0.62",
4
4
  "private": false,
5
5
  "description": "Docyrus API CLI",
6
6
  "main": "./main.js",
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { ensureDaemon, cdp, evaluate, clickAt, getMode, waitForCondition } from "./browser-client.js";
6
+
7
+ const args = process.argv.slice(2);
8
+ const timeoutIdx = args.indexOf("--timeout");
9
+ const timeout = timeoutIdx !== -1 ? parseInt(args[timeoutIdx + 1], 10) : 5000;
10
+ const positional = args.filter((a, i) => !a.startsWith("--") && args[i - 1] !== "--timeout");
11
+
12
+ const isCoordMode = positional.length >= 2 && /^\d+$/.test(positional[0]) && /^\d+$/.test(positional[1]);
13
+ const target = isCoordMode ? null : positional[0];
14
+
15
+ if (!target && !isCoordMode) {
16
+ console.log("Usage: browser-click.js <@ref|selector|x y> [--timeout <ms>]");
17
+ process.exit(1);
18
+ }
19
+
20
+ function resolveRef(ref) {
21
+ try {
22
+ const refs = JSON.parse(readFileSync(join(process.cwd(), ".docyrus", "browser-refs.json"), "utf8"));
23
+ const entry = refs[ref];
24
+ if (!entry) { throw new Error(`Unknown ref "${ref}"`); }
25
+ return entry;
26
+ } catch (e) {
27
+ if (e.message.includes("Unknown ref")) {throw e;}
28
+ throw new Error("No snapshot refs found. Run \"docyrus browser snapshot\" first.");
29
+ }
30
+ }
31
+
32
+ await ensureDaemon();
33
+
34
+ try {
35
+ if (isCoordMode) {
36
+ clickAt(parseInt(positional[0], 10), parseInt(positional[1], 10));
37
+ const url = evaluate("window.location.href");
38
+ console.log(JSON.stringify({ mode: getMode(), clicked: `${positional[0]},${positional[1]}`, url }));
39
+ } else if (target.startsWith("@e")) {
40
+ const { selector, xpath } = resolveRef(target);
41
+ // Wait for element, then get its center coordinates for compositor-level click
42
+ waitForCondition(`!!document.querySelector(${JSON.stringify(selector)}) || !!document.evaluate(${JSON.stringify(xpath)}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, timeout);
43
+
44
+ const coords = evaluate(`(() => {
45
+ let el = document.querySelector(${JSON.stringify(selector)});
46
+ if (!el) el = document.evaluate(${JSON.stringify(xpath)}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
47
+ if (!el) return null;
48
+ const r = el.getBoundingClientRect();
49
+ return { x: Math.round(r.x + r.width / 2), y: Math.round(r.y + r.height / 2) };
50
+ })()`);
51
+
52
+ if (!coords) { throw new Error(`Element not found: ${target}`); }
53
+ clickAt(coords.x, coords.y);
54
+ const url = evaluate("window.location.href");
55
+ console.log(JSON.stringify({ mode: getMode(), clicked: target, url }));
56
+ } else {
57
+ // CSS selector — get center coords and click at compositor level
58
+ waitForCondition(`!!document.querySelector(${JSON.stringify(target)})`, timeout);
59
+ const coords = evaluate(`(() => {
60
+ const el = document.querySelector(${JSON.stringify(target)});
61
+ if (!el) return null;
62
+ const r = el.getBoundingClientRect();
63
+ return { x: Math.round(r.x + r.width / 2), y: Math.round(r.y + r.height / 2) };
64
+ })()`);
65
+
66
+ if (!coords) { throw new Error(`Element not found: ${target}`); }
67
+ clickAt(coords.x, coords.y);
68
+ const url = evaluate("window.location.href");
69
+ console.log(JSON.stringify({ mode: getMode(), clicked: target, url }));
70
+ }
71
+ } catch (e) {
72
+ console.error(`✗ Click failed: ${e.message}`);
73
+ process.exit(1);
74
+ }
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Browser client — thin HTTP wrapper for the browser daemon.
3
+ * Replaces browser-connect.js. All operations are raw CDP via daemon HTTP API.
4
+ *
5
+ * Usage:
6
+ * import { cdp, ensureDaemon, drainEvents, shutdown, getMode } from "./browser-client.js";
7
+ *
8
+ * await ensureDaemon();
9
+ * const { root } = cdp("DOM.getDocument", { depth: -1 });
10
+ * const screenshot = cdp("Page.captureScreenshot", { format: "png" });
11
+ */
12
+
13
+ import { readFileSync, existsSync } from "node:fs";
14
+ import { join } from "node:path";
15
+ import { execFileSync, spawn } from "node:child_process";
16
+ import { request } from "node:http";
17
+
18
+ const DOCYRUS_DIR = join(process.cwd(), ".docyrus");
19
+ const DAEMON_INFO_FILE = join(DOCYRUS_DIR, "browser-daemon.json");
20
+ const DEFAULT_PORT = 9333;
21
+
22
+ let daemonPort = null;
23
+
24
+ // ── Daemon Lifecycle ────────────────────────────────────────────────────────
25
+
26
+ function getDaemonPort() {
27
+ if (daemonPort) { return daemonPort; }
28
+ try {
29
+ const info = JSON.parse(readFileSync(DAEMON_INFO_FILE, "utf8"));
30
+ daemonPort = info.port;
31
+ return daemonPort;
32
+ } catch {
33
+ return DEFAULT_PORT;
34
+ }
35
+ }
36
+
37
+ function isDaemonAlive() {
38
+ try {
39
+ const res = httpSync("GET", "/health");
40
+ return res?.alive === true;
41
+ } catch {
42
+ return false;
43
+ }
44
+ }
45
+
46
+ export async function ensureDaemon() {
47
+ if (isDaemonAlive()) { return; }
48
+
49
+ // Find the daemon script
50
+ const candidates = [
51
+ join(import.meta.dirname || ".", "browser-daemon.js"),
52
+ join(process.cwd(), "apps/api-cli/resources/browser-tools/browser-daemon.js"),
53
+ ];
54
+ const daemonScript = candidates.find((c) => existsSync(c));
55
+ if (!daemonScript) {
56
+ throw new Error("Cannot find browser-daemon.js");
57
+ }
58
+
59
+ // Start daemon in background
60
+ const child = spawn(process.execPath, [daemonScript], {
61
+ detached: true,
62
+ stdio: "ignore",
63
+ cwd: process.cwd(),
64
+ env: { ...process.env },
65
+ });
66
+ child.unref();
67
+
68
+ // Wait for daemon to be ready (up to 30s)
69
+ for (let i = 0; i < 60; i++) {
70
+ await new Promise((r) => setTimeout(r, 500));
71
+ if (isDaemonAlive()) {
72
+ return;
73
+ }
74
+ }
75
+ throw new Error("Daemon failed to start within 30 seconds");
76
+ }
77
+
78
+ // ── HTTP Communication (synchronous) ────────────────────────────────────────
79
+
80
+ function httpSync(method, path, body) {
81
+ const port = getDaemonPort();
82
+ const url = `http://127.0.0.1:${port}${path}`;
83
+
84
+ // Use node -e to make a synchronous HTTP call (matches browser-harness pattern)
85
+ const script = method === "GET"
86
+ ? `fetch("${url}").then(r=>r.json()).then(d=>process.stdout.write(JSON.stringify(d))).catch(e=>{process.stderr.write(e.message);process.exit(1)})`
87
+ : `fetch("${url}",{method:"POST",headers:{"Content-Type":"application/json"},body:${JSON.stringify(JSON.stringify(body))}}).then(r=>r.json()).then(d=>process.stdout.write(JSON.stringify(d))).catch(e=>{process.stderr.write(e.message);process.exit(1)})`;
88
+
89
+ const result = execFileSync(process.execPath, ["-e", script], {
90
+ encoding: "utf8",
91
+ timeout: 35_000,
92
+ });
93
+
94
+ return JSON.parse(result);
95
+ }
96
+
97
+ // ── Public API ──────────────────────────────────────────────────────────────
98
+
99
+ /**
100
+ * Send a raw CDP command through the daemon.
101
+ * @param {string} method - CDP method (e.g., "Page.navigate", "Runtime.evaluate")
102
+ * @param {object} params - CDP params
103
+ * @param {string} [sid] - Optional session ID override
104
+ * @returns {object} CDP result
105
+ */
106
+ export function cdp(method, params = {}, sid) {
107
+ const req = { method, params };
108
+ if (sid) { req.sessionId = sid; }
109
+ const resp = httpSync("POST", "/", req);
110
+ if (resp.error) {
111
+ throw new Error(`CDP ${method}: ${resp.error}`);
112
+ }
113
+ return resp.result || {};
114
+ }
115
+
116
+ /** Drain buffered CDP events from the daemon. */
117
+ export function drainEvents() {
118
+ const resp = httpSync("POST", "/", { meta: "drain_events" });
119
+ return resp.events || [];
120
+ }
121
+
122
+ /** Get the current daemon session ID. */
123
+ export function getSession() {
124
+ const resp = httpSync("POST", "/", { meta: "session" });
125
+ return resp.sessionId;
126
+ }
127
+
128
+ /** Set the daemon's active session. */
129
+ export function setSession(sid) {
130
+ httpSync("POST", "/", { meta: "set_session", sessionId: sid });
131
+ }
132
+
133
+ /** Gracefully shut down the daemon. */
134
+ export function shutdown() {
135
+ try { httpSync("POST", "/", { meta: "shutdown" }); } catch {}
136
+ }
137
+
138
+ /** Get daemon health info. */
139
+ export function health() {
140
+ return httpSync("GET", "/health");
141
+ }
142
+
143
+ /** Get current mode (local/remote). */
144
+ export function getMode() {
145
+ try {
146
+ const h = health();
147
+ return h.mode || "local";
148
+ } catch {
149
+ return "unknown";
150
+ }
151
+ }
152
+
153
+ // ── High-Level Helpers (common CDP sequences) ───────────────────────────────
154
+
155
+ /** Navigate to a URL and wait for load. */
156
+ export function navigate(url, waitUntil = "load") {
157
+ const result = cdp("Page.navigate", { url });
158
+ if (waitUntil) {
159
+ // Wait for loadEventFired or DOMContentLoaded via events
160
+ const deadline = Date.now() + 15_000;
161
+ const targetEvent = waitUntil === "domcontentloaded" ? "Page.domContentEventFired" : "Page.loadEventFired";
162
+ while (Date.now() < deadline) {
163
+ const evts = drainEvents();
164
+ if (evts.some((e) => e.method === targetEvent)) { break; }
165
+ execFileSync(process.execPath, ["-e", "setTimeout(()=>{},200)"], { timeout: 1000 });
166
+ }
167
+ }
168
+ return result;
169
+ }
170
+
171
+ /** Evaluate JS in the page and return the value. */
172
+ export function evaluate(expression) {
173
+ const result = cdp("Runtime.evaluate", {
174
+ expression,
175
+ returnByValue: true,
176
+ awaitPromise: true,
177
+ });
178
+ if (result.exceptionDetails) {
179
+ throw new Error(result.exceptionDetails.text || result.exceptionDetails.exception?.description || "JS evaluation error");
180
+ }
181
+ return result.result?.value;
182
+ }
183
+
184
+ /** Take a screenshot and return base64 data. */
185
+ export function captureScreenshot(options = {}) {
186
+ return cdp("Page.captureScreenshot", {
187
+ format: "png",
188
+ captureBeyondViewport: options.full || false,
189
+ ...options,
190
+ });
191
+ }
192
+
193
+ /** Get page info (URL, title, viewport, scroll). */
194
+ export function pageInfo() {
195
+ return evaluate(`({
196
+ url: window.location.href,
197
+ title: document.title,
198
+ viewportWidth: window.innerWidth,
199
+ viewportHeight: window.innerHeight,
200
+ scrollX: Math.round(window.scrollX),
201
+ scrollY: Math.round(window.scrollY),
202
+ pageWidth: document.documentElement.scrollWidth,
203
+ pageHeight: document.documentElement.scrollHeight,
204
+ readyState: document.readyState,
205
+ })`);
206
+ }
207
+
208
+ /** Click at compositor-level coordinates (passes through iframes/shadow DOM). */
209
+ export function clickAt(x, y) {
210
+ cdp("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button: "left", clickCount: 1 });
211
+ cdp("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button: "left", clickCount: 1 });
212
+ }
213
+
214
+ /** Type text via CDP Input.insertText. */
215
+ export function typeText(text) {
216
+ cdp("Input.insertText", { text });
217
+ }
218
+
219
+ /** Press a key via CDP Input.dispatchKeyEvent. */
220
+ export function pressKey(key) {
221
+ cdp("Input.dispatchKeyEvent", { type: "keyDown", key });
222
+ cdp("Input.dispatchKeyEvent", { type: "keyUp", key });
223
+ }
224
+
225
+ /** Wait for a JS condition to be true. */
226
+ export function waitForCondition(expression, timeout = 15000) {
227
+ const deadline = Date.now() + timeout;
228
+ while (Date.now() < deadline) {
229
+ try {
230
+ const val = evaluate(expression);
231
+ if (val) { return val; }
232
+ } catch {}
233
+ execFileSync(process.execPath, ["-e", "setTimeout(()=>{},250)"], { timeout: 1000 });
234
+ }
235
+ throw new Error(`waitForCondition timed out: ${expression}`);
236
+ }
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { unlinkSync } from "node:fs";
4
+ import { join } from "node:path";
5
+ import { shutdown, getMode } from "./browser-client.js";
6
+
7
+ const kill = process.argv.includes("--kill");
8
+
9
+ try {
10
+ const mode = getMode();
11
+ shutdown();
12
+
13
+ // Clear cached remote session
14
+ try { unlinkSync(join(process.cwd(), ".docyrus", "browser-session.json")); } catch {}
15
+
16
+ console.log(JSON.stringify({ mode, closed: true, daemonStopped: true }));
17
+ } catch {
18
+ console.log(JSON.stringify({ closed: true, note: "Daemon was not running" }));
19
+ }
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { ensureDaemon, evaluate, drainEvents, getMode } from "./browser-client.js";
4
+ import { execFileSync } from "node:child_process";
5
+
6
+ const args = process.argv.slice(2);
7
+ const levelIdx = args.indexOf("--level");
8
+ const listenIdx = args.indexOf("--listen");
9
+ const filterLevel = levelIdx !== -1 ? args[levelIdx + 1] : null;
10
+ const listenMs = listenIdx !== -1 ? parseInt(args[listenIdx + 1], 10) : null;
11
+
12
+ await ensureDaemon();
13
+
14
+ try {
15
+ if (listenMs) {
16
+ // Listen mode: collect Runtime.consoleAPICalled events from daemon buffer
17
+ const messages = [];
18
+ const deadline = Date.now() + listenMs;
19
+
20
+ while (Date.now() < deadline) {
21
+ const evts = drainEvents();
22
+ for (const evt of evts) {
23
+ if (evt.method === "Runtime.consoleAPICalled") {
24
+ const level = evt.params.type || "log";
25
+ if (filterLevel && level !== filterLevel) {continue;}
26
+ const text = (evt.params.args || []).map((a) => a.value ?? a.description ?? "").join(" ");
27
+ messages.push({ level, text, url: evt.params.stackTrace?.callFrames?.[0]?.url || null });
28
+ }
29
+ if (evt.method === "Runtime.exceptionThrown") {
30
+ if (!filterLevel || filterLevel === "error") {
31
+ messages.push({ level: "error", text: evt.params.exceptionDetails?.text || "Unknown error", url: null });
32
+ }
33
+ }
34
+ }
35
+ execFileSync(process.execPath, ["-e", "setTimeout(()=>{},200)"], { timeout: 1000 });
36
+ }
37
+
38
+ console.log(JSON.stringify({ mode: getMode(), count: messages.length, messages }));
39
+ } else {
40
+ // Snapshot mode: install interceptor and read captured logs
41
+ const messages = evaluate(`(() => {
42
+ const filterLevel = ${filterLevel ? JSON.stringify(filterLevel) : "null"};
43
+ if (!window.__docyrus_console_log) {
44
+ window.__docyrus_console_log = [];
45
+ const MAX = 100;
46
+ const orig = {};
47
+ for (const level of ["log", "warn", "error", "info", "debug"]) {
48
+ orig[level] = console[level];
49
+ console[level] = (...args) => {
50
+ window.__docyrus_console_log.push({
51
+ level,
52
+ text: args.map(a => typeof a === "object" ? JSON.stringify(a) : String(a)).join(" "),
53
+ timestamp: Date.now(),
54
+ });
55
+ if (window.__docyrus_console_log.length > MAX) window.__docyrus_console_log.shift();
56
+ orig[level](...args);
57
+ };
58
+ }
59
+ window.addEventListener("error", e => {
60
+ window.__docyrus_console_log.push({ level: "error", text: e.message || String(e), timestamp: Date.now() });
61
+ });
62
+ }
63
+ let logs = window.__docyrus_console_log || [];
64
+ if (filterLevel) logs = logs.filter(l => l.level === filterLevel);
65
+ return logs.slice(-50);
66
+ })()`);
67
+
68
+ console.log(JSON.stringify({ mode: getMode(), count: messages.length, messages }));
69
+ }
70
+ } catch (e) {
71
+ console.error(`✗ Console capture failed: ${e.message}`);
72
+ process.exit(1);
73
+ }
@@ -1,99 +1,60 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { Readability } from "@mozilla/readability";
4
3
  import { JSDOM } from "jsdom";
4
+ import { Readability } from "@mozilla/readability";
5
5
  import TurndownService from "turndown";
6
6
  import { gfm } from "turndown-plugin-gfm";
7
- import { connectBrowser } from "./browser-connect.js";
7
+ import { ensureDaemon, navigate, cdp, evaluate, getMode } from "./browser-client.js";
8
8
 
9
- // Global timeout - exit if script takes too long
10
9
  const TIMEOUT = 30000;
11
- setTimeout(() => {
12
- console.error("✗ Timeout after 30s");
13
- process.exit(1);
14
- }, TIMEOUT).unref();
10
+ setTimeout(() => { console.error("✗ Timeout after 30s"); process.exit(1); }, TIMEOUT).unref();
15
11
 
16
12
  const url = process.argv[2];
17
-
18
13
  if (!url) {
19
14
  console.log("Usage: browser-content.js <url>");
20
- console.log("\nExtracts readable content from a URL as markdown.");
21
- console.log("\nExamples:");
22
- console.log(" browser-content.js https://example.com");
23
- console.log(" browser-content.js https://en.wikipedia.org/wiki/Rust_(programming_language)");
24
15
  process.exit(1);
25
16
  }
26
17
 
27
- const { browser: b, mode, session } = await connectBrowser();
28
-
29
- const p = (await b.pages()).at(-1);
30
- if (!p) {
31
- console.error("✗ No active tab found");
32
- process.exit(1);
33
- }
18
+ await ensureDaemon();
34
19
 
35
- await Promise.race([
36
- p.goto(url, { waitUntil: "networkidle2" }),
37
- new Promise((r) => setTimeout(r, 10000)),
38
- ]).catch(() => {});
20
+ try {
21
+ navigate(url, "load");
39
22
 
40
- // Get HTML via CDP (works even with TrustedScriptURL restrictions)
41
- const client = await p.createCDPSession();
42
- const { root } = await client.send("DOM.getDocument", { depth: -1, pierce: true });
43
- const { outerHTML } = await client.send("DOM.getOuterHTML", { nodeId: root.nodeId });
44
- await client.detach();
23
+ // Get HTML via CDP DOM (bypasses TrustedScriptURL restrictions)
24
+ const { root } = cdp("DOM.getDocument", { depth: -1, pierce: true });
25
+ const { outerHTML } = cdp("DOM.getOuterHTML", { nodeId: root.nodeId });
45
26
 
46
- const finalUrl = p.url();
27
+ const finalUrl = evaluate("window.location.href");
47
28
 
48
- // Extract with Readability
49
- const doc = new JSDOM(outerHTML, { url: finalUrl });
50
- const reader = new Readability(doc.window.document);
51
- const article = reader.parse();
29
+ // Extract with Readability
30
+ const doc = new JSDOM(outerHTML, { url: finalUrl });
31
+ const reader = new Readability(doc.window.document);
32
+ const article = reader.parse();
52
33
 
53
- // Convert to markdown
54
- function htmlToMarkdown(html) {
55
- const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
56
- turndown.use(gfm);
57
- turndown.addRule("removeEmptyLinks", {
58
- filter: (node) => node.nodeName === "A" && !node.textContent?.trim(),
59
- replacement: () => "",
60
- });
61
- return turndown
62
- .turndown(html)
63
- .replace(/\[\\?\[\s*\\?\]\]\([^)]*\)/g, "")
64
- .replace(/ +/g, " ")
65
- .replace(/\s+,/g, ",")
66
- .replace(/\s+\./g, ".")
67
- .replace(/\n{3,}/g, "\n\n")
68
- .trim();
69
- }
34
+ function htmlToMarkdown(html) {
35
+ const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
36
+ turndown.use(gfm);
37
+ turndown.addRule("removeEmptyLinks", {
38
+ filter: (node) => node.nodeName === "A" && !node.textContent?.trim(),
39
+ replacement: () => "",
40
+ });
41
+ return turndown.turndown(html).replace(/\[\\?\[\s*\\?\]\]\([^)]*\)/g, "").replace(/ +/g, " ").replace(/\s+,/g, ",").replace(/\s+\./g, ".").replace(/\n{3,}/g, "\n\n").trim();
42
+ }
70
43
 
71
- let content;
72
- if (article && article.content) {
73
- content = htmlToMarkdown(article.content);
74
- } else {
75
- // Fallback
76
- const fallbackDoc = new JSDOM(outerHTML, { url: finalUrl });
77
- const fallbackBody = fallbackDoc.window.document;
78
- fallbackBody.querySelectorAll("script, style, noscript, nav, header, footer, aside").forEach((el) => el.remove());
79
- const main = fallbackBody.querySelector("main, article, [role='main'], .content, #content") || fallbackBody.body;
80
- const fallbackHtml = main?.innerHTML || "";
81
- if (fallbackHtml.trim().length > 100) {
82
- content = htmlToMarkdown(fallbackHtml);
44
+ let content;
45
+ if (article?.content) {
46
+ content = htmlToMarkdown(article.content);
83
47
  } else {
84
- content = "(Could not extract content)";
48
+ const fallbackDoc = new JSDOM(outerHTML, { url: finalUrl });
49
+ const fallbackBody = fallbackDoc.window.document;
50
+ fallbackBody.querySelectorAll("script, style, noscript, nav, header, footer, aside").forEach((el) => el.remove());
51
+ const main = fallbackBody.querySelector("main, article, [role='main'], .content, #content") || fallbackBody.body;
52
+ const fallbackHtml = main?.innerHTML || "";
53
+ content = fallbackHtml.trim().length > 100 ? htmlToMarkdown(fallbackHtml) : "(Could not extract content)";
85
54
  }
86
- }
87
55
 
88
- // Content command outputs markdown directly (not wrapped in JSON) since
89
- // the extracted text is the primary payload and is consumed as plain text.
90
- console.log(`URL: ${finalUrl}`);
91
- if (article?.title) {console.log(`Title: ${article.title}`);}
92
- if (mode === "remote" && session?.devtoolsFrontendUrl) {
93
- console.log(`DevTools: ${session.devtoolsFrontendUrl}`);
56
+ console.log(JSON.stringify({ mode: getMode(), url: finalUrl, title: article?.title || null, content }));
57
+ } catch (e) {
58
+ console.error(`✗ Content extraction failed: ${e.message}`);
59
+ process.exit(1);
94
60
  }
95
- console.log(`Mode: ${mode}`);
96
- console.log("");
97
- console.log(content);
98
-
99
- process.exit(0);
@@ -1,22 +1,27 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { connectBrowser } from "./browser-connect.js";
3
+ import { ensureDaemon, cdp, getMode } from "./browser-client.js";
4
4
 
5
- const { browser: b, mode, session } = await connectBrowser();
5
+ const args = process.argv.slice(2);
6
+ const nameIdx = args.indexOf("--name");
7
+ const domainIdx = args.indexOf("--domain");
8
+ const filterName = nameIdx !== -1 ? args[nameIdx + 1] : null;
9
+ const filterDomain = domainIdx !== -1 ? args[domainIdx + 1] : null;
6
10
 
7
- const p = (await b.pages()).at(-1);
11
+ await ensureDaemon();
8
12
 
9
- if (!p) {
10
- console.error("✗ No active tab found");
11
- process.exit(1);
12
- }
13
+ try {
14
+ let { cookies } = cdp("Network.getCookies");
13
15
 
14
- const cookies = await p.cookies();
16
+ if (filterName) {
17
+ cookies = cookies.filter((c) => c.name === filterName);
18
+ }
19
+ if (filterDomain) {
20
+ cookies = cookies.filter((c) => c.domain.includes(filterDomain));
21
+ }
15
22
 
16
- const result = { mode, cookies };
17
- if (session?.devtoolsFrontendUrl) {
18
- result.devtoolsFrontendUrl = session.devtoolsFrontendUrl;
23
+ console.log(JSON.stringify({ mode: getMode(), cookies }));
24
+ } catch (e) {
25
+ console.error(`✗ Cookies failed: ${e.message}`);
26
+ process.exit(1);
19
27
  }
20
- console.log(JSON.stringify(result));
21
-
22
- await b.disconnect();