@apmantza/greedysearch-pi 1.9.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,436 @@
1
+ #!/usr/bin/env node
2
+
3
+ // extractors/chatgpt.mjs
4
+ // Navigate chatgpt.com, submit query, wait for answer, extract answer + sources.
5
+ //
6
+ // Usage:
7
+ // node extractors/chatgpt.mjs "<query>" [--tab <prefix>]
8
+ //
9
+ // Output (stdout): JSON { answer, sources, query, url }
10
+ // Errors go to stderr only — stdout is always clean JSON for piping.
11
+
12
+ import {
13
+ buildEnvelope,
14
+ cdp,
15
+ cdpWithInput,
16
+ formatAnswer,
17
+ getOrOpenTab,
18
+ handleError,
19
+ injectClipboardInterceptor,
20
+ jitter,
21
+ logStage,
22
+ outputJson,
23
+ parseArgs,
24
+ parseSourcesFromMarkdown,
25
+ parseSourcesFromMarkdownRefStyle,
26
+ prepareArgs,
27
+ validateQuery,
28
+ waitForSelector,
29
+ waitForStreamComplete,
30
+ } from "./common.mjs";
31
+ import { dismissConsent, handleVerification } from "./consent.mjs";
32
+
33
+ const GLOBAL_VAR = "__chatgptClipboard";
34
+ const PROSE_SELECTOR = "div.ProseMirror";
35
+ const SEND_SELECTOR = 'button[data-testid="send-button"]';
36
+ const COPY_SELECTOR = 'button[data-testid="copy-turn-action-button"]';
37
+
38
+ // ============================================================================
39
+ // ChatGPT-specific helpers
40
+ // ============================================================================
41
+
42
+ async function typeAndSubmit(tab, query) {
43
+ // Focus the ProseMirror editor
44
+ await cdp(["click", tab, PROSE_SELECTOR]);
45
+ await new Promise((r) => setTimeout(r, jitter(200)));
46
+
47
+ // Type via CDP (sends Input.insertText). Use stdin so long synthesis
48
+ // prompts do not hit Windows command-line length limits.
49
+ await cdpWithInput(["type", tab, "--stdin"], query);
50
+ await new Promise((r) => setTimeout(r, jitter(300)));
51
+
52
+ // Click send button
53
+ const sendCode = `
54
+ (() => {
55
+ const btn = document.querySelector('${SEND_SELECTOR}');
56
+ if (!btn) return 'no-send';
57
+ btn.click();
58
+ return 'ok';
59
+ })()
60
+ `;
61
+ const sendResult = await cdp(["eval", tab, sendCode]);
62
+ if (sendResult === "no-send")
63
+ throw new Error("ChatGPT send button not found");
64
+ await new Promise((r) => setTimeout(r, jitter(300)));
65
+ }
66
+
67
+ /**
68
+ * Inline selector for waitForStreamComplete: returns the assistant message
69
+ * that comes AFTER the last user message, or null if none exists. This
70
+ * skips chatgpt.com's static pre-rendered greeting card (which is
71
+ * `data-turn-start-message="true"` and lives on the homepage before any
72
+ * conversation) so short answers like "Hello! 👋" don't get confused with
73
+ * the 32-char placeholder.
74
+ */
75
+ const CHATGPT_RESPONSE_SELECTOR = String.raw`(() => {
76
+ const all = document.querySelectorAll('[data-message-author-role]');
77
+ let lastUserIdx = -1;
78
+ for (let i = 0; i < all.length; i++) {
79
+ if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
80
+ }
81
+ if (lastUserIdx < 0) return null;
82
+ let bestEl = null;
83
+ let bestLen = 0;
84
+ for (let i = lastUserIdx + 1; i < all.length; i++) {
85
+ if (all[i].getAttribute('data-message-author-role') === 'assistant') {
86
+ const len = (all[i].innerText || '').length;
87
+ if (len > bestLen) { bestLen = len; bestEl = all[i]; }
88
+ }
89
+ }
90
+ return bestEl;
91
+ })()`;
92
+
93
+ /**
94
+ * Wait for ChatGPT's response to finish streaming. Delegates to the shared
95
+ * waitForStreamComplete in common.mjs with a custom selector that skips the
96
+ * static homepage greeting card. minLength: 1 means any non-empty response
97
+ * is considered "started" — short answers like "Hello! 👋" (8 chars) used
98
+ * to burn the full 65s budget under the old 50-char threshold.
99
+ */
100
+ async function waitForResponse(tab, timeoutMs = 20000) {
101
+ return waitForStreamComplete(tab, {
102
+ timeout: timeoutMs,
103
+ interval: 600,
104
+ stableRounds: 3,
105
+ minLength: 1,
106
+ selector: CHATGPT_RESPONSE_SELECTOR,
107
+ });
108
+ }
109
+
110
+ /**
111
+ * Node-side fallback for chatgpt stream completion. Used when the in-browser
112
+ * poll times out (typically because Chrome throttles background tabs to 1Hz
113
+ * when 3+ extractors run in parallel in `all` mode). Polls the same
114
+ * greeting-card-skipping selector via short independent Runtime.evaluate
115
+ * calls so the WebSocket is free between polls.
116
+ */
117
+ async function pollForResponseNodeSide(tab, maxMs = 15000) {
118
+ const deadline = Date.now() + maxMs;
119
+ let lastLen = 0;
120
+ let stableRounds = 0;
121
+ while (Date.now() < deadline) {
122
+ const result = await cdp(
123
+ ["eval", tab, `${CHATGPT_RESPONSE_SELECTOR}?.innerText?.length ?? 0`],
124
+ 4000,
125
+ ).catch(() => "0");
126
+ const len = parseInt(result, 10) || 0;
127
+ if (len >= 1 && len === lastLen) {
128
+ stableRounds++;
129
+ if (stableRounds >= 3) return len;
130
+ } else {
131
+ lastLen = len;
132
+ stableRounds = 0;
133
+ }
134
+ await new Promise((r) => setTimeout(r, 1200));
135
+ }
136
+ return lastLen;
137
+ }
138
+
139
+ async function extractAnswerFromDom(tab) {
140
+ const raw = await cdp([
141
+ "eval",
142
+ tab,
143
+ String.raw`
144
+ (() => {
145
+ // Find the assistant message that comes AFTER the last user message,
146
+ // not the absolute last assistant element. The chatgpt.com homepage
147
+ // has a static pre-rendered greeting card that renders as a
148
+ // [data-message-author-role="assistant"] element with
149
+ // data-turn-start-message="true" — it must be skipped or the
150
+ // static "Hello! How can I help you today?" placeholder gets
151
+ // returned as the answer to a query the assistant never answered.
152
+ const all = Array.from(document.querySelectorAll('[data-message-author-role]'));
153
+ let lastUserIdx = -1;
154
+ for (let i = 0; i < all.length; i++) {
155
+ if (all[i].getAttribute('data-message-author-role') === 'user') {
156
+ lastUserIdx = i;
157
+ }
158
+ }
159
+ if (lastUserIdx < 0) {
160
+ // No user message at all — page is still on the homepage.
161
+ return JSON.stringify({
162
+ answer: '',
163
+ sources: [],
164
+ skipped: 'no-user-message',
165
+ });
166
+ }
167
+ let assistant = null;
168
+ for (let i = lastUserIdx + 1; i < all.length; i++) {
169
+ if (all[i].getAttribute('data-message-author-role') === 'assistant') {
170
+ assistant = all[i];
171
+ }
172
+ }
173
+ if (!assistant) {
174
+ return JSON.stringify({
175
+ answer: '',
176
+ sources: [],
177
+ skipped: 'no-assistant-response',
178
+ });
179
+ }
180
+ const answer = (assistant.innerText || assistant.textContent || '').trim();
181
+ const seen = new Set();
182
+ const sources = [];
183
+ for (const link of assistant.querySelectorAll('a[href]')) {
184
+ const url = link.href;
185
+ if (!url || seen.has(url)) continue;
186
+ seen.add(url);
187
+ const title = (link.innerText || link.textContent || '').replace(/\s+/g, ' ').trim();
188
+ sources.push({ title, url });
189
+ if (sources.length >= 10) break;
190
+ }
191
+ return JSON.stringify({ answer, sources });
192
+ })()
193
+ `,
194
+ ]);
195
+ try {
196
+ return JSON.parse(raw);
197
+ } catch {
198
+ return { answer: "", sources: [], skipped: "parse-error" };
199
+ }
200
+ }
201
+
202
+ async function extractAnswer(tab, env) {
203
+ // Click the copy button on the assistant's response (after the last
204
+ // user message). The old `buttons[buttons.length - 1]` picked the
205
+ // absolute last copy button on the page — which is the USER message's
206
+ // copy button when the assistant response is still empty (0 chars) and
207
+ // has no copy button of its own. That copied the user's query into
208
+ // the clipboard interceptor and returned it as the "answer".
209
+ //
210
+ // If the assistant message has no copy button yet (still streaming, or
211
+ // the React tree hasn't rendered the button after streaming completed),
212
+ // we deliberately click NOTHING rather than falling back to the last
213
+ // copy button on the page. An empty clipboard routes us to the DOM
214
+ // fallback, which correctly targets the assistant message after the
215
+ // last user message and returns its innerText.
216
+ await cdp([
217
+ "eval",
218
+ tab,
219
+ `(() => {
220
+ const all = document.querySelectorAll('[data-message-author-role]');
221
+ let lastUserIdx = -1;
222
+ for (let i = 0; i < all.length; i++) {
223
+ if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
224
+ }
225
+ if (lastUserIdx < 0) return 'no-user';
226
+ let assistantCopy = null;
227
+ for (let i = lastUserIdx + 1; i < all.length; i++) {
228
+ if (all[i].getAttribute('data-message-author-role') === 'assistant') {
229
+ const btn = all[i].querySelector('${COPY_SELECTOR}');
230
+ if (btn) assistantCopy = btn;
231
+ }
232
+ }
233
+ if (assistantCopy) { assistantCopy.click(); return 'clicked'; }
234
+ return 'no-assistant-copy';
235
+ })()`,
236
+ ]);
237
+ await new Promise((r) => setTimeout(r, 600));
238
+
239
+ let answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
240
+ env.clipboardEmpty = !answer;
241
+
242
+ // Retry once if clipboard is empty — the assistant message may have
243
+ // finished streaming and the copy button may have rendered in the
244
+ // meantime.
245
+ if (!answer) {
246
+ console.error("[chatgpt] Clipboard empty, retrying in 2s...");
247
+ await cdp([
248
+ "eval",
249
+ tab,
250
+ `(() => {
251
+ const all = document.querySelectorAll('[data-message-author-role]');
252
+ let lastUserIdx = -1;
253
+ for (let i = 0; i < all.length; i++) {
254
+ if (all[i].getAttribute('data-message-author-role') === 'user') lastUserIdx = i;
255
+ }
256
+ if (lastUserIdx < 0) return 'no-user';
257
+ let assistantCopy = null;
258
+ for (let i = lastUserIdx + 1; i < all.length; i++) {
259
+ if (all[i].getAttribute('data-message-author-role') === 'assistant') {
260
+ const btn = all[i].querySelector('${COPY_SELECTOR}');
261
+ if (btn) assistantCopy = btn;
262
+ }
263
+ }
264
+ if (assistantCopy) { assistantCopy.click(); return 'clicked'; }
265
+ return 'no-assistant-copy';
266
+ })()`,
267
+ ]);
268
+ await new Promise((r) => setTimeout(r, 2000));
269
+ answer = await cdp(["eval", tab, `window.${GLOBAL_VAR} || ''`]);
270
+ env.clipboardEmpty = !answer;
271
+ }
272
+
273
+ let domFallback = null;
274
+ if (!answer) {
275
+ domFallback = await extractAnswerFromDom(tab);
276
+ answer = domFallback.answer;
277
+ env.fallbackUsed = answer ? "dom" : null;
278
+ }
279
+
280
+ if (!answer) throw new Error("Clipboard interceptor returned empty text");
281
+
282
+ // Parse sources from both inline/reference-style markdown links and DOM links
283
+ // (DOM fallback preserves sources even when native clipboard copy fails).
284
+ const sourcesInline = parseSourcesFromMarkdown(answer);
285
+ const sourcesRef = parseSourcesFromMarkdownRefStyle(answer);
286
+ const sourceMap = new Map();
287
+ for (const s of [
288
+ ...(domFallback?.sources || []),
289
+ ...sourcesRef,
290
+ ...sourcesInline,
291
+ ]) {
292
+ if (s?.url && !sourceMap.has(s.url)) sourceMap.set(s.url, s);
293
+ }
294
+ const sources = Array.from(sourceMap.values()).slice(0, 10);
295
+
296
+ return { answer: answer.trim(), sources };
297
+ }
298
+
299
+ // ============================================================================
300
+ // Main
301
+ // ============================================================================
302
+
303
+ const USAGE = 'Usage: node extractors/chatgpt.mjs "<query>" [--tab <prefix>]\n';
304
+
305
+ async function main() {
306
+ const args = await prepareArgs(process.argv.slice(2));
307
+ validateQuery(args, USAGE);
308
+
309
+ const { query, tabPrefix, short } = parseArgs(args);
310
+ const startTime = Date.now();
311
+ const mode =
312
+ process.env.GREEDY_SEARCH_VISIBLE === "1" ? "visible" : "headless";
313
+
314
+ const env = {
315
+ engine: "chatgpt",
316
+ mode,
317
+ clipboardEmpty: null,
318
+ fallbackUsed: null,
319
+ blockedBy: null,
320
+ verificationResult: null,
321
+ inputReady: null,
322
+ };
323
+
324
+ try {
325
+ if (!tabPrefix) await cdp(["list"]);
326
+ const tab = await getOrOpenTab(tabPrefix);
327
+
328
+ const currentUrl = await cdp(["eval", tab, "document.location.href"]).catch(
329
+ () => "",
330
+ );
331
+ let onChatGPT = false;
332
+ try {
333
+ onChatGPT = new URL(currentUrl).hostname.toLowerCase() === "chatgpt.com";
334
+ } catch {}
335
+
336
+ if (!onChatGPT) {
337
+ logStage(env, "nav", startTime);
338
+ await cdp(["nav", tab, "https://chatgpt.com"], 20000);
339
+ await new Promise((r) => setTimeout(r, 600));
340
+ }
341
+ logStage(env, "consent", startTime);
342
+ await dismissConsent(tab, cdp);
343
+ logStage(env, "verification", startTime);
344
+ await handleVerification(tab, cdp, 10000);
345
+
346
+ logStage(env, "input-wait", startTime);
347
+ const inputReady = await waitForSelector(tab, PROSE_SELECTOR, 8000, 400);
348
+ env.inputReady = inputReady;
349
+ if (!inputReady) {
350
+ const bodyText = await cdp([
351
+ "eval",
352
+ tab,
353
+ `document.body?.innerText || ''`,
354
+ ]).catch(() => "");
355
+ if (
356
+ /sign in|log in|sign up|\u03a3\u03cd\u03bd\u03b4\u03b5\u03c3\u03b7|login/i.test(
357
+ bodyText,
358
+ )
359
+ ) {
360
+ throw new Error(
361
+ "ChatGPT requires sign-in — please sign in in the visible browser window",
362
+ );
363
+ }
364
+ throw new Error(
365
+ "ChatGPT input not found — page may be blocked or in unexpected state",
366
+ );
367
+ }
368
+
369
+ logStage(env, "clipboard-inject", startTime);
370
+ await injectClipboardInterceptor(tab, GLOBAL_VAR);
371
+ logStage(env, "type-and-submit", startTime);
372
+ await typeAndSubmit(tab, query);
373
+
374
+ logStage(env, "stream-wait", startTime);
375
+ // waitForStreamComplete handles the in-browser poll in a single
376
+ // Runtime.evaluate call. If the response is still streaming past
377
+ // 20s (slow under tab throttling in `all` mode), fall back to
378
+ // node-side polls that release the WebSocket between each call.
379
+ // Together they stay well within the engine's 80s outer budget.
380
+ let asstLen = 0;
381
+ try {
382
+ asstLen = await waitForResponse(tab, 20000);
383
+ } catch (e) {
384
+ logStage(env, "stream-poll-fallback", startTime);
385
+ asstLen = await pollForResponseNodeSide(tab, 15000);
386
+ }
387
+ env.assistantTextLen = asstLen;
388
+ if (asstLen < 1) {
389
+ console.error(
390
+ "[chatgpt] Warning: assistant response may not have completed",
391
+ );
392
+ }
393
+
394
+ logStage(env, "extract", startTime);
395
+ const { answer, sources, skipped } = await extractAnswer(tab, env);
396
+ // If the DOM fallback skipped the response (no real assistant
397
+ // message after the user's query), surface a clear error so the
398
+ // caller doesn't silently consume the static homepage greeting
399
+ // card as a real answer. The static card lives on chatgpt.com
400
+ // before any conversation; without this guard the extractor used
401
+ // to return "Hello! How can I help you today?" as a successful
402
+ // response to every query.
403
+ if (!answer) {
404
+ env.blockedBy = "no-response";
405
+ env.skipped = skipped || null;
406
+ throw new Error(
407
+ skipped === "no-user-message"
408
+ ? "ChatGPT still on homepage — query was not submitted"
409
+ : skipped === "no-assistant-response"
410
+ ? "ChatGPT did not return an assistant response after submit"
411
+ : "ChatGPT returned no answer — assistant never responded",
412
+ );
413
+ }
414
+ logStage(env, "done", startTime);
415
+
416
+ const finalUrl = await cdp(["eval", tab, "document.location.href"]).catch(
417
+ () => "https://chatgpt.com",
418
+ );
419
+ env.durationMs = Date.now() - startTime;
420
+ outputJson({
421
+ query,
422
+ url: finalUrl,
423
+ answer: formatAnswer(answer, short),
424
+ sources,
425
+ _envelope: buildEnvelope(env),
426
+ });
427
+ } catch (e) {
428
+ env.durationMs = Date.now() - startTime;
429
+ console.error(
430
+ `[chatgpt] error during stage '${env.lastStage || "unknown"}': ${e.message}`,
431
+ );
432
+ handleError(e, buildEnvelope(env));
433
+ }
434
+ }
435
+
436
+ main();