@apmantza/greedysearch-pi 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,237 +1,237 @@
1
- // extractors/common.mjs — shared utilities for CDP-based extractors
2
- // Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
3
-
4
- import { spawn } from "node:child_process";
5
- import { dirname, join } from "node:path";
6
- import { fileURLToPath } from "node:url";
7
-
8
- const __dir = dirname(fileURLToPath(import.meta.url));
9
- const CDP = join(__dir, "..", "cdp.mjs");
10
-
11
- // ============================================================================
12
- // CDP wrapper
13
- // ============================================================================
14
-
15
- /**
16
- * Execute a CDP command through the cdp.mjs CLI
17
- * @param {string[]} args - Command arguments
18
- * @param {number} [timeoutMs=30000] - Timeout in milliseconds
19
- * @returns {Promise<string>} Command output
20
- */
21
- export function cdp(args, timeoutMs = 30000) {
22
- return new Promise((resolve, reject) => {
23
- const proc = spawn("node", [CDP, ...args], {
24
- stdio: ["ignore", "pipe", "pipe"],
25
- });
26
- let out = "";
27
- let err = "";
28
- proc.stdout.on("data", (d) => (out += d));
29
- proc.stderr.on("data", (d) => (err += d));
30
- const timer = setTimeout(() => {
31
- proc.kill();
32
- reject(new Error(`cdp timeout: ${args[0]}`));
33
- }, timeoutMs);
34
- proc.on("close", (code) => {
35
- clearTimeout(timer);
36
- if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
37
- else resolve(out.trim());
38
- });
39
- });
40
- }
41
-
42
- // ============================================================================
43
- // Tab management
44
- // ============================================================================
45
-
46
- /**
47
- * Get an existing tab by prefix or open a new one
48
- * @param {string|null} tabPrefix - Existing tab prefix, or null to create new
49
- * @returns {Promise<string>} Tab identifier
50
- */
51
- export async function getOrOpenTab(tabPrefix) {
52
- if (tabPrefix) return tabPrefix;
53
- // Always open a fresh tab to avoid SPA navigation issues
54
- const list = await cdp(["list"]);
55
- const anchor = list.split("\n")[0]?.slice(0, 8);
56
- if (!anchor)
57
- throw new Error(
58
- "No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
59
- );
60
- const raw = await cdp([
61
- "evalraw",
62
- anchor,
63
- "Target.createTarget",
64
- '{"url":"about:blank"}',
65
- ]);
66
- const { targetId } = JSON.parse(raw);
67
- await cdp(["list"]); // refresh cache
68
- return targetId.slice(0, 8);
69
- }
70
-
71
- // ============================================================================
72
- // Clipboard interception (for extractors that use copy-to-clipboard)
73
- // ============================================================================
74
-
75
- /**
76
- * Inject clipboard interceptor to capture text when copy buttons are clicked.
77
- * Each engine uses a unique global variable to avoid conflicts.
78
- * @param {string} tab - Tab identifier
79
- * @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
80
- */
81
- export async function injectClipboardInterceptor(tab, globalVar) {
82
- const code = `
83
- window.${globalVar} = null;
84
- const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
85
- navigator.clipboard.writeText = function(text) {
86
- window.${globalVar} = text;
87
- return _origWriteText(text);
88
- };
89
- const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
90
- navigator.clipboard.write = async function(items) {
91
- try {
92
- for (const item of items) {
93
- if (item.types && item.types.includes('text/plain')) {
94
- const blob = await item.getType('text/plain');
95
- window.${globalVar} = await blob.text();
96
- break;
97
- }
98
- }
99
- } catch(e) {}
100
- return _origWrite(items);
101
- };
102
- `;
103
- await cdp(["eval", tab, code]);
104
- }
105
-
106
- // ============================================================================
107
- // Source extraction from markdown
108
- // ============================================================================
109
-
110
- /**
111
- * Parse Markdown links from text to extract sources
112
- * @param {string} text - Text containing Markdown links like [title](url)
113
- * @returns {Array<{title: string, url: string}>} Extracted sources
114
- */
115
- export function parseSourcesFromMarkdown(text) {
116
- return Array.from(text.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)/g))
117
- .map((m) => ({ title: m[1], url: m[2] }))
118
- .filter((v, i, arr) => arr.findIndex((x) => x.url === v.url) === i)
119
- .slice(0, 10);
120
- }
121
-
122
- // ============================================================================
123
- // Stream completion detection
124
- // ============================================================================
125
-
126
- /**
127
- * Wait for generation/streaming to complete by monitoring text length stability
128
- * @param {string} tab - Tab identifier
129
- * @param {object} options - Options
130
- * @param {number} [options.timeout=30000] - Maximum wait time in ms
131
- * @param {number} [options.interval=600] - Polling interval in ms
132
- * @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
133
- * @param {string} [options.selector='document.body'] - Element to monitor (default: body)
134
- * @returns {Promise<number>} Final text length
135
- */
136
- export async function waitForStreamComplete(tab, options = {}) {
137
- const {
138
- timeout = 30000,
139
- interval = 600,
140
- stableRounds = 3,
141
- selector = "document.body",
142
- } = options;
143
-
144
- const deadline = Date.now() + timeout;
145
- let lastLen = -1;
146
- let stableCount = 0;
147
-
148
- while (Date.now() < deadline) {
149
- await new Promise((r) => setTimeout(r, interval));
150
- const lenStr = await cdp([
151
- "eval",
152
- tab,
153
- `${selector}?.innerText?.length ?? 0`,
154
- ]).catch(() => "0");
155
- const currentLen = parseInt(lenStr, 10) || 0;
156
-
157
- if (currentLen > 0) {
158
- if (currentLen === lastLen) {
159
- stableCount++;
160
- if (stableCount >= stableRounds) return currentLen;
161
- } else {
162
- lastLen = currentLen;
163
- stableCount = 0;
164
- }
165
- }
166
- }
167
-
168
- throw new Error(`Generation did not stabilise within ${timeout}ms`);
169
- }
170
-
171
- // ============================================================================
172
- // CLI argument parsing
173
- // ============================================================================
174
-
175
- /**
176
- * Parse standard extractor CLI arguments
177
- * @param {string[]} args - process.argv.slice(2)
178
- * @returns {{query: string, tabPrefix: string|null, short: boolean}}
179
- */
180
- export function parseArgs(args) {
181
- const short = args.includes("--short");
182
- const rest = args.filter((a) => a !== "--short");
183
- const tabFlagIdx = rest.indexOf("--tab");
184
- const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
185
- const query =
186
- tabFlagIdx !== -1
187
- ? rest
188
- .filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1)
189
- .join(" ")
190
- : rest.join(" ");
191
- return { query, tabPrefix, short };
192
- }
193
-
194
- /**
195
- * Validate that a query was provided, show usage and exit if not
196
- * @param {string[]} args - process.argv.slice(2)
197
- * @param {string} usage - Usage string for error message
198
- */
199
- export function validateQuery(args, usage) {
200
- if (!args.length || args[0] === "--help") {
201
- process.stderr.write(usage);
202
- process.exit(1);
203
- }
204
- }
205
-
206
- // ============================================================================
207
- // Output formatting
208
- // ============================================================================
209
-
210
- /**
211
- * Truncate answer if short mode is enabled
212
- * @param {string} answer - Full answer text
213
- * @param {boolean} short - Whether to truncate
214
- * @param {number} [maxLen=300] - Maximum length in short mode
215
- * @returns {string} Formatted answer
216
- */
217
- export function formatAnswer(answer, short, maxLen = 300) {
218
- if (!short || answer.length <= maxLen) return answer;
219
- return `${answer.slice(0, maxLen).replace(/\s+\S*$/, "")}…`;
220
- }
221
-
222
- /**
223
- * Output JSON result to stdout
224
- * @param {object} data - Data to output
225
- */
226
- export function outputJson(data) {
227
- process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
228
- }
229
-
230
- /**
231
- * Handle and output error, then exit
232
- * @param {Error} error - Error to handle
233
- */
234
- export function handleError(error) {
235
- process.stderr.write(`Error: ${error.message}\n`);
236
- process.exit(1);
237
- }
1
+ // extractors/common.mjs — shared utilities for CDP-based extractors
2
+ // Extracts common patterns: cdp wrapper, tab management, clipboard interception, source parsing
3
+
4
+ import { spawn } from "node:child_process";
5
+ import { dirname, join } from "node:path";
6
+ import { fileURLToPath } from "node:url";
7
+
8
+ const __dir = dirname(fileURLToPath(import.meta.url));
9
+ const CDP = join(__dir, "..", "bin", "cdp.mjs");
10
+
11
+ // ============================================================================
12
+ // CDP wrapper
13
+ // ============================================================================
14
+
15
+ /**
16
+ * Execute a CDP command through the cdp.mjs CLI
17
+ * @param {string[]} args - Command arguments
18
+ * @param {number} [timeoutMs=30000] - Timeout in milliseconds
19
+ * @returns {Promise<string>} Command output
20
+ */
21
+ export function cdp(args, timeoutMs = 30000) {
22
+ return new Promise((resolve, reject) => {
23
+ const proc = spawn("node", [CDP, ...args], {
24
+ stdio: ["ignore", "pipe", "pipe"],
25
+ });
26
+ let out = "";
27
+ let err = "";
28
+ proc.stdout.on("data", (d) => (out += d));
29
+ proc.stderr.on("data", (d) => (err += d));
30
+ const timer = setTimeout(() => {
31
+ proc.kill();
32
+ reject(new Error(`cdp timeout: ${args[0]}`));
33
+ }, timeoutMs);
34
+ proc.on("close", (code) => {
35
+ clearTimeout(timer);
36
+ if (code !== 0) reject(new Error(err.trim() || `cdp exit ${code}`));
37
+ else resolve(out.trim());
38
+ });
39
+ });
40
+ }
41
+
42
+ // ============================================================================
43
+ // Tab management
44
+ // ============================================================================
45
+
46
+ /**
47
+ * Get an existing tab by prefix or open a new one
48
+ * @param {string|null} tabPrefix - Existing tab prefix, or null to create new
49
+ * @returns {Promise<string>} Tab identifier
50
+ */
51
+ export async function getOrOpenTab(tabPrefix) {
52
+ if (tabPrefix) return tabPrefix;
53
+ // Always open a fresh tab to avoid SPA navigation issues
54
+ const list = await cdp(["list"]);
55
+ const anchor = list.split("\n")[0]?.slice(0, 8);
56
+ if (!anchor)
57
+ throw new Error(
58
+ "No Chrome tabs found. Is Chrome running with --remote-debugging-port=9222?",
59
+ );
60
+ const raw = await cdp([
61
+ "evalraw",
62
+ anchor,
63
+ "Target.createTarget",
64
+ '{"url":"about:blank"}',
65
+ ]);
66
+ const { targetId } = JSON.parse(raw);
67
+ await cdp(["list"]); // refresh cache
68
+ return targetId.slice(0, 8);
69
+ }
70
+
71
+ // ============================================================================
72
+ // Clipboard interception (for extractors that use copy-to-clipboard)
73
+ // ============================================================================
74
+
75
+ /**
76
+ * Inject clipboard interceptor to capture text when copy buttons are clicked.
77
+ * Each engine uses a unique global variable to avoid conflicts.
78
+ * @param {string} tab - Tab identifier
79
+ * @param {string} globalVar - Global variable name (e.g., '__pplxClipboard', '__geminiClipboard')
80
+ */
81
+ export async function injectClipboardInterceptor(tab, globalVar) {
82
+ const code = `
83
+ window.${globalVar} = null;
84
+ const _origWriteText = navigator.clipboard.writeText.bind(navigator.clipboard);
85
+ navigator.clipboard.writeText = function(text) {
86
+ window.${globalVar} = text;
87
+ return _origWriteText(text);
88
+ };
89
+ const _origWrite = navigator.clipboard.write.bind(navigator.clipboard);
90
+ navigator.clipboard.write = async function(items) {
91
+ try {
92
+ for (const item of items) {
93
+ if (item.types && item.types.includes('text/plain')) {
94
+ const blob = await item.getType('text/plain');
95
+ window.${globalVar} = await blob.text();
96
+ break;
97
+ }
98
+ }
99
+ } catch(e) {}
100
+ return _origWrite(items);
101
+ };
102
+ `;
103
+ await cdp(["eval", tab, code]);
104
+ }
105
+
106
+ // ============================================================================
107
+ // Source extraction from markdown
108
+ // ============================================================================
109
+
110
+ /**
111
+ * Parse Markdown links from text to extract sources
112
+ * @param {string} text - Text containing Markdown links like [title](url)
113
+ * @returns {Array<{title: string, url: string}>} Extracted sources
114
+ */
115
+ export function parseSourcesFromMarkdown(text) {
116
+ return Array.from(text.matchAll(/\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)/g))
117
+ .map((m) => ({ title: m[1], url: m[2] }))
118
+ .filter((v, i, arr) => arr.findIndex((x) => x.url === v.url) === i)
119
+ .slice(0, 10);
120
+ }
121
+
122
+ // ============================================================================
123
+ // Stream completion detection
124
+ // ============================================================================
125
+
126
+ /**
127
+ * Wait for generation/streaming to complete by monitoring text length stability
128
+ * @param {string} tab - Tab identifier
129
+ * @param {object} options - Options
130
+ * @param {number} [options.timeout=30000] - Maximum wait time in ms
131
+ * @param {number} [options.interval=600] - Polling interval in ms
132
+ * @param {number} [options.stableRounds=3] - Required stable rounds to consider complete
133
+ * @param {string} [options.selector='document.body'] - Element to monitor (default: body)
134
+ * @returns {Promise<number>} Final text length
135
+ */
136
+ export async function waitForStreamComplete(tab, options = {}) {
137
+ const {
138
+ timeout = 30000,
139
+ interval = 600,
140
+ stableRounds = 3,
141
+ selector = "document.body",
142
+ } = options;
143
+
144
+ const deadline = Date.now() + timeout;
145
+ let lastLen = -1;
146
+ let stableCount = 0;
147
+
148
+ while (Date.now() < deadline) {
149
+ await new Promise((r) => setTimeout(r, interval));
150
+ const lenStr = await cdp([
151
+ "eval",
152
+ tab,
153
+ `${selector}?.innerText?.length ?? 0`,
154
+ ]).catch(() => "0");
155
+ const currentLen = parseInt(lenStr, 10) || 0;
156
+
157
+ if (currentLen > 0) {
158
+ if (currentLen === lastLen) {
159
+ stableCount++;
160
+ if (stableCount >= stableRounds) return currentLen;
161
+ } else {
162
+ lastLen = currentLen;
163
+ stableCount = 0;
164
+ }
165
+ }
166
+ }
167
+
168
+ throw new Error(`Generation did not stabilise within ${timeout}ms`);
169
+ }
170
+
171
+ // ============================================================================
172
+ // CLI argument parsing
173
+ // ============================================================================
174
+
175
+ /**
176
+ * Parse standard extractor CLI arguments
177
+ * @param {string[]} args - process.argv.slice(2)
178
+ * @returns {{query: string, tabPrefix: string|null, short: boolean}}
179
+ */
180
+ export function parseArgs(args) {
181
+ const short = args.includes("--short");
182
+ const rest = args.filter((a) => a !== "--short");
183
+ const tabFlagIdx = rest.indexOf("--tab");
184
+ const tabPrefix = tabFlagIdx !== -1 ? rest[tabFlagIdx + 1] : null;
185
+ const query =
186
+ tabFlagIdx !== -1
187
+ ? rest
188
+ .filter((_, i) => i !== tabFlagIdx && i !== tabFlagIdx + 1)
189
+ .join(" ")
190
+ : rest.join(" ");
191
+ return { query, tabPrefix, short };
192
+ }
193
+
194
+ /**
195
+ * Validate that a query was provided, show usage and exit if not
196
+ * @param {string[]} args - process.argv.slice(2)
197
+ * @param {string} usage - Usage string for error message
198
+ */
199
+ export function validateQuery(args, usage) {
200
+ if (!args.length || args[0] === "--help") {
201
+ process.stderr.write(usage);
202
+ process.exit(1);
203
+ }
204
+ }
205
+
206
+ // ============================================================================
207
+ // Output formatting
208
+ // ============================================================================
209
+
210
+ /**
211
+ * Truncate answer if short mode is enabled
212
+ * @param {string} answer - Full answer text
213
+ * @param {boolean} short - Whether to truncate
214
+ * @param {number} [maxLen=300] - Maximum length in short mode
215
+ * @returns {string} Formatted answer
216
+ */
217
+ export function formatAnswer(answer, short, maxLen = 300) {
218
+ if (!short || answer.length <= maxLen) return answer;
219
+ return `${answer.slice(0, maxLen).replace(/\s+\S*$/, "")}…`;
220
+ }
221
+
222
+ /**
223
+ * Output JSON result to stdout
224
+ * @param {object} data - Data to output
225
+ */
226
+ export function outputJson(data) {
227
+ process.stdout.write(`${JSON.stringify(data, null, 2)}\n`);
228
+ }
229
+
230
+ /**
231
+ * Handle and output error, then exit
232
+ * @param {Error} error - Error to handle
233
+ */
234
+ export function handleError(error) {
235
+ process.stderr.write(`Error: ${error.message}\n`);
236
+ process.exit(1);
237
+ }