@apmantza/greedysearch-pi 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extractors/bing-copilot.mjs +12 -9
- package/extractors/gemini.mjs +14 -10
- package/extractors/google-ai.mjs +9 -5
- package/extractors/perplexity.mjs +11 -8
- package/extractors/selectors.mjs +52 -0
- package/package.json +2 -2
|
@@ -14,6 +14,7 @@ import { tmpdir } from 'os';
|
|
|
14
14
|
import { join, dirname } from 'path';
|
|
15
15
|
import { fileURLToPath } from 'url';
|
|
16
16
|
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
17
|
+
import { SELECTORS } from './selectors.mjs';
|
|
17
18
|
|
|
18
19
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
19
20
|
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
@@ -22,6 +23,8 @@ const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
|
22
23
|
const COPY_POLL_INTERVAL = 700;
|
|
23
24
|
const COPY_TIMEOUT = 60000;
|
|
24
25
|
|
|
26
|
+
const S = SELECTORS.bing;
|
|
27
|
+
|
|
25
28
|
// ---------------------------------------------------------------------------
|
|
26
29
|
|
|
27
30
|
function cdp(args, timeoutMs = 30000) {
|
|
@@ -84,7 +87,7 @@ async function waitForCopyButton(tab) {
|
|
|
84
87
|
while (Date.now() < deadline) {
|
|
85
88
|
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
86
89
|
const found = await cdp(['eval', tab,
|
|
87
|
-
`!!document.querySelector('
|
|
90
|
+
`!!document.querySelector('${S.copyButton}')`
|
|
88
91
|
]).catch(() => 'false');
|
|
89
92
|
if (found === 'true') return;
|
|
90
93
|
}
|
|
@@ -92,7 +95,7 @@ async function waitForCopyButton(tab) {
|
|
|
92
95
|
}
|
|
93
96
|
|
|
94
97
|
async function extractAnswer(tab) {
|
|
95
|
-
await cdp(['eval', tab, `document.querySelector('
|
|
98
|
+
await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
|
|
96
99
|
await new Promise(r => setTimeout(r, 400));
|
|
97
100
|
|
|
98
101
|
const answer = await cdp(['eval', tab, `window.__bingClipboard || ''`]);
|
|
@@ -100,9 +103,9 @@ async function extractAnswer(tab) {
|
|
|
100
103
|
|
|
101
104
|
const raw = await cdp(['eval', tab, `
|
|
102
105
|
(function() {
|
|
103
|
-
var sources = Array.from(document.querySelectorAll('
|
|
106
|
+
var sources = Array.from(document.querySelectorAll('${S.sourceLink}'))
|
|
104
107
|
.map(a => ({ url: a.href, title: a.innerText?.trim().split('\\n')[0] || a.title || '' }))
|
|
105
|
-
.filter(s => s.url && !s.url.includes('
|
|
108
|
+
.filter(s => s.url && !s.url.includes('${S.sourceExclude}'))
|
|
106
109
|
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
107
110
|
.slice(0, 10);
|
|
108
111
|
return JSON.stringify(sources);
|
|
@@ -158,30 +161,30 @@ async function main() {
|
|
|
158
161
|
}
|
|
159
162
|
}
|
|
160
163
|
|
|
161
|
-
// Wait for React app to mount
|
|
164
|
+
// Wait for React app to mount input (up to 15s, longer after verification)
|
|
162
165
|
const inputDeadline = Date.now() + 15000;
|
|
163
166
|
while (Date.now() < inputDeadline) {
|
|
164
|
-
const found = await cdp(['eval', tab, `!!document.querySelector('
|
|
167
|
+
const found = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
|
|
165
168
|
if (found === 'true') break;
|
|
166
169
|
await new Promise(r => setTimeout(r, 500));
|
|
167
170
|
}
|
|
168
171
|
await new Promise(r => setTimeout(r, 300));
|
|
169
172
|
|
|
170
173
|
// Verify input is actually there before proceeding
|
|
171
|
-
const inputReady = await cdp(['eval', tab, `!!document.querySelector('
|
|
174
|
+
const inputReady = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
|
|
172
175
|
if (inputReady !== 'true') {
|
|
173
176
|
throw new Error('Copilot input not found — verification may have failed or page is in unexpected state');
|
|
174
177
|
}
|
|
175
178
|
|
|
176
179
|
await injectClipboardInterceptor(tab);
|
|
177
|
-
await cdp(['click', tab,
|
|
180
|
+
await cdp(['click', tab, S.input]);
|
|
178
181
|
await new Promise(r => setTimeout(r, 400));
|
|
179
182
|
await cdp(['type', tab, query]);
|
|
180
183
|
await new Promise(r => setTimeout(r, 400));
|
|
181
184
|
|
|
182
185
|
// Submit with Enter (most reliable across locales and Chrome instances)
|
|
183
186
|
await cdp(['eval', tab,
|
|
184
|
-
`document.querySelector('
|
|
187
|
+
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
|
|
185
188
|
]);
|
|
186
189
|
|
|
187
190
|
await waitForCopyButton(tab);
|
package/extractors/gemini.mjs
CHANGED
|
@@ -14,13 +14,16 @@ import { tmpdir } from 'os';
|
|
|
14
14
|
import { join, dirname } from 'path';
|
|
15
15
|
import { fileURLToPath } from 'url';
|
|
16
16
|
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
17
|
+
import { SELECTORS } from './selectors.mjs';
|
|
17
18
|
|
|
18
19
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
19
20
|
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
20
21
|
const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
21
22
|
|
|
22
23
|
const COPY_POLL_INTERVAL = 600;
|
|
23
|
-
const COPY_TIMEOUT = 120000;
|
|
24
|
+
const COPY_TIMEOUT = 120000;
|
|
25
|
+
|
|
26
|
+
const S = SELECTORS.gemini;
|
|
24
27
|
|
|
25
28
|
// ---------------------------------------------------------------------------
|
|
26
29
|
|
|
@@ -53,7 +56,7 @@ async function getOrOpenTab(tabPrefix) {
|
|
|
53
56
|
async function typeIntoGemini(tab, text) {
|
|
54
57
|
await cdp(['eval', tab, `
|
|
55
58
|
(function(t) {
|
|
56
|
-
var el = document.querySelector('
|
|
59
|
+
var el = document.querySelector('${S.input}');
|
|
57
60
|
if (!el) return false;
|
|
58
61
|
el.focus();
|
|
59
62
|
document.execCommand('insertText', false, t);
|
|
@@ -93,7 +96,7 @@ async function waitForCopyButton(tab) {
|
|
|
93
96
|
while (Date.now() < deadline) {
|
|
94
97
|
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
95
98
|
const found = await cdp(['eval', tab,
|
|
96
|
-
`!!document.querySelector('
|
|
99
|
+
`!!document.querySelector('${S.copyButton}')`
|
|
97
100
|
]).catch(() => 'false');
|
|
98
101
|
if (found === 'true') return;
|
|
99
102
|
}
|
|
@@ -102,16 +105,17 @@ async function waitForCopyButton(tab) {
|
|
|
102
105
|
|
|
103
106
|
async function extractAnswer(tab) {
|
|
104
107
|
// Click copy button → our interceptor captures the text.
|
|
105
|
-
await cdp(['eval', tab, `document.querySelector('
|
|
108
|
+
await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
|
|
106
109
|
await new Promise(r => setTimeout(r, 400));
|
|
107
110
|
|
|
108
111
|
const answer = await cdp(['eval', tab, `window.__geminiClipboard || ''`]);
|
|
109
112
|
if (!answer) throw new Error('Clipboard interceptor returned empty text');
|
|
110
113
|
|
|
111
114
|
// Click "Sources" button to open the sidebar with proper source cards
|
|
115
|
+
const sourceExcludeFilter = S.sourcesExclude.map(e => `!s.url.includes('${e}')`).join(' && ');
|
|
112
116
|
await cdp(['eval', tab, `
|
|
113
117
|
(function() {
|
|
114
|
-
var btn = document.querySelector('
|
|
118
|
+
var btn = document.querySelector('${S.sourcesSidebarButton}');
|
|
115
119
|
if (!btn) btn = Array.from(document.querySelectorAll('button')).find(b => b.innerText?.trim() === 'Sources');
|
|
116
120
|
if (btn) { btn.click(); return 'clicked'; }
|
|
117
121
|
return 'not-found';
|
|
@@ -131,17 +135,17 @@ async function extractAnswer(tab) {
|
|
|
131
135
|
var container = sourceHeading.closest('.container') || sourceHeading.parentElement;
|
|
132
136
|
var links = Array.from(container.querySelectorAll('a[href^="http"]'))
|
|
133
137
|
.map(a => ({ url: a.href.split('#')[0], title: a.innerText?.trim().split('\\n')[0] || '' }))
|
|
134
|
-
.filter(s => s.url &&
|
|
138
|
+
.filter(s => s.url && ${sourceExcludeFilter})
|
|
135
139
|
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
136
140
|
.slice(0, 8);
|
|
137
141
|
return JSON.stringify(links);
|
|
138
142
|
}
|
|
139
143
|
// Fallback: inline source cards with aria-labels
|
|
140
|
-
var cards = Array.from(document.querySelectorAll('
|
|
144
|
+
var cards = Array.from(document.querySelectorAll('${S.citationButtonPattern}'));
|
|
141
145
|
if (cards.length) {
|
|
142
146
|
return JSON.stringify(cards.map(b => {
|
|
143
147
|
var label = b.getAttribute('aria-label') || '';
|
|
144
|
-
var name = label.match(
|
|
148
|
+
var name = label.match(${S.citationNameRegex})?.[1] || label;
|
|
145
149
|
return { url: '', title: name };
|
|
146
150
|
}));
|
|
147
151
|
}
|
|
@@ -188,7 +192,7 @@ async function main() {
|
|
|
188
192
|
// Wait for input to be ready
|
|
189
193
|
const deadline = Date.now() + 10000;
|
|
190
194
|
while (Date.now() < deadline) {
|
|
191
|
-
const ready = await cdp(['eval', tab, `!!document.querySelector('
|
|
195
|
+
const ready = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
|
|
192
196
|
if (ready === 'true') break;
|
|
193
197
|
await new Promise(r => setTimeout(r, 400));
|
|
194
198
|
}
|
|
@@ -198,7 +202,7 @@ async function main() {
|
|
|
198
202
|
await typeIntoGemini(tab, query);
|
|
199
203
|
await new Promise(r => setTimeout(r, 400));
|
|
200
204
|
|
|
201
|
-
await cdp(['eval', tab, `document.querySelector('
|
|
205
|
+
await cdp(['eval', tab, `document.querySelector('${S.sendButton}')?.click()`]);
|
|
202
206
|
|
|
203
207
|
await waitForCopyButton(tab);
|
|
204
208
|
|
package/extractors/google-ai.mjs
CHANGED
|
@@ -14,6 +14,7 @@ import { tmpdir } from 'os';
|
|
|
14
14
|
import { join, dirname } from 'path';
|
|
15
15
|
import { fileURLToPath } from 'url';
|
|
16
16
|
import { dismissConsent, handleVerification } from './consent.mjs';
|
|
17
|
+
import { SELECTORS } from './selectors.mjs';
|
|
17
18
|
|
|
18
19
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
19
20
|
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
@@ -24,6 +25,8 @@ const STREAM_STABLE_ROUNDS = 3;
|
|
|
24
25
|
const STREAM_TIMEOUT = 45000;
|
|
25
26
|
const MIN_ANSWER_LENGTH = 50;
|
|
26
27
|
|
|
28
|
+
const S = SELECTORS.google;
|
|
29
|
+
|
|
27
30
|
// ---------------------------------------------------------------------------
|
|
28
31
|
|
|
29
32
|
function cdp(args, timeoutMs = 30000) {
|
|
@@ -66,7 +69,7 @@ async function waitForStreamComplete(tab) {
|
|
|
66
69
|
await new Promise(r => setTimeout(r, STREAM_POLL_INTERVAL));
|
|
67
70
|
|
|
68
71
|
const lenStr = await cdp(['eval', tab,
|
|
69
|
-
`(document.querySelector('.
|
|
72
|
+
`(document.querySelector('${S.answerContainer}')?.innerText?.length || 0) + ''`
|
|
70
73
|
]).catch(() => '0');
|
|
71
74
|
|
|
72
75
|
const len = parseInt(lenStr) || 0;
|
|
@@ -85,14 +88,15 @@ async function waitForStreamComplete(tab) {
|
|
|
85
88
|
}
|
|
86
89
|
|
|
87
90
|
async function extractAnswer(tab) {
|
|
91
|
+
const excludeFilter = S.sourceExclude.map(e => `!a.href.includes('${e}')`).join(' && ');
|
|
88
92
|
const raw = await cdp(['eval', tab, `
|
|
89
93
|
(function() {
|
|
90
|
-
var el = document.querySelector('.
|
|
94
|
+
var el = document.querySelector('${S.answerContainer}');
|
|
91
95
|
if (!el) return JSON.stringify({ answer: '', sources: [] });
|
|
92
96
|
var answer = el.innerText.trim();
|
|
93
|
-
var sources = Array.from(document.querySelectorAll('
|
|
94
|
-
.filter(a =>
|
|
95
|
-
.map(a => ({ url: a.href.split('#')[0], title: (a.closest('
|
|
97
|
+
var sources = Array.from(document.querySelectorAll('${S.sourceLink}'))
|
|
98
|
+
.filter(a => ${excludeFilter})
|
|
99
|
+
.map(a => ({ url: a.href.split('#')[0], title: (a.closest('${S.sourceHeadingParent}')?.querySelector('h3, [role=heading]')?.innerText || a.innerText?.trim().split('\\n')[0] || '').slice(0, 100) }))
|
|
96
100
|
.filter(s => s.url && s.url.length > 10)
|
|
97
101
|
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
98
102
|
.slice(0, 10);
|
|
@@ -14,6 +14,7 @@ import { tmpdir } from 'os';
|
|
|
14
14
|
import { join, dirname } from 'path';
|
|
15
15
|
import { fileURLToPath } from 'url';
|
|
16
16
|
import { dismissConsent } from './consent.mjs';
|
|
17
|
+
import { SELECTORS } from './selectors.mjs';
|
|
17
18
|
|
|
18
19
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
19
20
|
const CDP = join(__dir, '..', 'cdp.mjs');
|
|
@@ -22,6 +23,8 @@ const PAGES_CACHE = `${tmpdir().replace(/\\/g, '/')}/cdp-pages.json`;
|
|
|
22
23
|
const COPY_POLL_INTERVAL = 600;
|
|
23
24
|
const COPY_TIMEOUT = 30000;
|
|
24
25
|
|
|
26
|
+
const S = SELECTORS.perplexity;
|
|
27
|
+
|
|
25
28
|
// ---------------------------------------------------------------------------
|
|
26
29
|
|
|
27
30
|
function cdp(args, timeoutMs = 30000) {
|
|
@@ -87,7 +90,7 @@ async function waitForCopyButton(tab) {
|
|
|
87
90
|
while (Date.now() < deadline) {
|
|
88
91
|
await new Promise(r => setTimeout(r, COPY_POLL_INTERVAL));
|
|
89
92
|
const found = await cdp(['eval', tab,
|
|
90
|
-
`!!document.querySelector('
|
|
93
|
+
`!!document.querySelector('${S.copyButton}')`
|
|
91
94
|
]).catch(() => 'false');
|
|
92
95
|
if (found === 'true') return;
|
|
93
96
|
}
|
|
@@ -95,7 +98,7 @@ async function waitForCopyButton(tab) {
|
|
|
95
98
|
}
|
|
96
99
|
|
|
97
100
|
async function extractAnswer(tab) {
|
|
98
|
-
await cdp(['eval', tab, `document.querySelector('
|
|
101
|
+
await cdp(['eval', tab, `document.querySelector('${S.copyButton}')?.click()`]);
|
|
99
102
|
await new Promise(r => setTimeout(r, 400));
|
|
100
103
|
|
|
101
104
|
const answer = await cdp(['eval', tab, `window.__pplxClipboard || ''`]);
|
|
@@ -103,8 +106,8 @@ async function extractAnswer(tab) {
|
|
|
103
106
|
|
|
104
107
|
const raw = await cdp(['eval', tab, `
|
|
105
108
|
(function() {
|
|
106
|
-
var sources = Array.from(document.querySelectorAll('
|
|
107
|
-
.map(el => ({ url: el.getAttribute('data-pplx-citation-url'), title: el.querySelector('
|
|
109
|
+
var sources = Array.from(document.querySelectorAll('${S.sourceItem}'))
|
|
110
|
+
.map(el => ({ url: el.getAttribute('data-pplx-citation-url'), title: el.querySelector('${S.sourceLink}')?.innerText?.trim() || '' }))
|
|
108
111
|
.filter(s => s.url)
|
|
109
112
|
.filter((v, i, arr) => arr.findIndex(x => x.url === v.url) === i)
|
|
110
113
|
.slice(0, 10);
|
|
@@ -144,23 +147,23 @@ async function main() {
|
|
|
144
147
|
await cdp(['nav', tab, 'https://www.perplexity.ai/'], 35000);
|
|
145
148
|
await dismissConsent(tab, cdp);
|
|
146
149
|
|
|
147
|
-
// Wait for React app to mount
|
|
150
|
+
// Wait for React app to mount input (up to 8s)
|
|
148
151
|
const deadline = Date.now() + 8000;
|
|
149
152
|
while (Date.now() < deadline) {
|
|
150
|
-
const found = await cdp(['eval', tab, `!!document.querySelector('
|
|
153
|
+
const found = await cdp(['eval', tab, `!!document.querySelector('${S.input}')`]).catch(() => 'false');
|
|
151
154
|
if (found === 'true') break;
|
|
152
155
|
await new Promise(r => setTimeout(r, 400));
|
|
153
156
|
}
|
|
154
157
|
await new Promise(r => setTimeout(r, 300));
|
|
155
158
|
|
|
156
159
|
await injectClipboardInterceptor(tab);
|
|
157
|
-
await cdp(['click', tab,
|
|
160
|
+
await cdp(['click', tab, S.input]);
|
|
158
161
|
await new Promise(r => setTimeout(r, 400));
|
|
159
162
|
await cdp(['type', tab, query]);
|
|
160
163
|
await new Promise(r => setTimeout(r, 400));
|
|
161
164
|
// Submit with Enter (most reliable across Chrome instances)
|
|
162
165
|
await cdp(['eval', tab,
|
|
163
|
-
`document.querySelector('
|
|
166
|
+
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`
|
|
164
167
|
]);
|
|
165
168
|
|
|
166
169
|
await waitForCopyButton(tab);
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// extractors/selectors.mjs
|
|
2
|
+
// Centralized CSS selectors for all engines.
|
|
3
|
+
// Update selectors here when a site changes its UI.
|
|
4
|
+
|
|
5
|
+
export const SELECTORS = {
|
|
6
|
+
// ──────────────────────────────────────────────
|
|
7
|
+
// Perplexity (perplexity.ai)
|
|
8
|
+
// ──────────────────────────────────────────────
|
|
9
|
+
perplexity: {
|
|
10
|
+
input: '#ask-input',
|
|
11
|
+
copyButton: 'button[aria-label="Copy"]',
|
|
12
|
+
sourceItem: '[data-pplx-citation-url]',
|
|
13
|
+
sourceLink: 'a',
|
|
14
|
+
consent: '#onetrust-accept-btn-handler',
|
|
15
|
+
},
|
|
16
|
+
|
|
17
|
+
// ──────────────────────────────────────────────
|
|
18
|
+
// Bing Copilot (copilot.microsoft.com)
|
|
19
|
+
// ──────────────────────────────────────────────
|
|
20
|
+
bing: {
|
|
21
|
+
input: '#userInput',
|
|
22
|
+
copyButton: 'button[data-testid="copy-ai-message-button"]',
|
|
23
|
+
sourceLink: 'a[href^="http"][target="_blank"]',
|
|
24
|
+
sourceExclude: 'copilot.microsoft.com',
|
|
25
|
+
consent: '#onetrust-accept-btn-handler',
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
// ──────────────────────────────────────────────
|
|
29
|
+
// Google AI Mode (google.com/search?udm=50)
|
|
30
|
+
// ──────────────────────────────────────────────
|
|
31
|
+
google: {
|
|
32
|
+
answerContainer: '.pWvJNd',
|
|
33
|
+
sourceLink: 'a[href^="http"]',
|
|
34
|
+
sourceExclude: ['google.', 'gstatic', 'googleapis'],
|
|
35
|
+
sourceHeadingParent: '[data-snhf]',
|
|
36
|
+
consent: '#L2AGLb, button[jsname="b3VHJd"], .tHlp8d',
|
|
37
|
+
},
|
|
38
|
+
|
|
39
|
+
// ──────────────────────────────────────────────
|
|
40
|
+
// Gemini (gemini.google.com/app)
|
|
41
|
+
// ──────────────────────────────────────────────
|
|
42
|
+
gemini: {
|
|
43
|
+
input: 'rich-textarea .ql-editor',
|
|
44
|
+
copyButton: 'button[aria-label="Copy"]',
|
|
45
|
+
sendButton: 'button[aria-label*="Send"]',
|
|
46
|
+
sourcesSidebarButton: 'button.legacy-sources-sidebar-button',
|
|
47
|
+
sourcesExclude: ['gemini.google', 'gstatic', 'google.com/search'],
|
|
48
|
+
citationButtonPattern: 'button[aria-label*="citation from"]',
|
|
49
|
+
// For parsing citation aria-labels: "View source details for citation from {name}. Opens side panel."
|
|
50
|
+
citationNameRegex: /from\s+(.+?)\.\s/,
|
|
51
|
+
},
|
|
52
|
+
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@apmantza/greedysearch-pi",
|
|
3
|
-
"version": "1.1.
|
|
4
|
-
"description": "Pi extension:
|
|
3
|
+
"version": "1.1.7",
|
|
4
|
+
"description": "Pi extension: browser-automation tool that searches Perplexity, Bing Copilot, and Google AI in parallel, extracts answers and sources via CDP, with optional Gemini synthesis — grounded AI answers from real browser interactions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"pi-package"
|