@apmantza/greedysearch-pi 1.7.2 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -94
- package/README.md +73 -73
- package/bin/coding-task.mjs +5 -27
- package/bin/search.mjs +159 -185
- package/extractors/bing-copilot.mjs +8 -20
- package/extractors/common.mjs +44 -2
- package/extractors/gemini.mjs +20 -35
- package/extractors/google-ai.mjs +11 -43
- package/index.ts +18 -18
- package/package.json +46 -46
package/bin/search.mjs
CHANGED
|
@@ -32,19 +32,19 @@ import http from "node:http";
|
|
|
32
32
|
import { tmpdir } from "node:os";
|
|
33
33
|
import { dirname, join } from "node:path";
|
|
34
34
|
import { fileURLToPath } from "node:url";
|
|
35
|
-
import { fetchSourceHttp, shouldUseBrowser } from "../src/fetcher.mjs";
|
|
36
|
-
import { fetchGitHubContent, parseGitHubUrl } from "../src/github.mjs";
|
|
37
|
-
import { trimContentHeadTail } from "../src/utils/content.mjs";
|
|
35
|
+
import { fetchSourceHttp, shouldUseBrowser } from "../src/fetcher.mjs";
|
|
36
|
+
import { fetchGitHubContent, parseGitHubUrl } from "../src/github.mjs";
|
|
37
|
+
import { trimContentHeadTail } from "../src/utils/content.mjs";
|
|
38
38
|
|
|
39
39
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
40
40
|
const CDP = join(__dir, "cdp.mjs");
|
|
41
41
|
const PAGES_CACHE = `${tmpdir().replace(/\\/g, "/")}/cdp-pages.json`;
|
|
42
42
|
|
|
43
|
-
const GREEDY_PORT = 9222;
|
|
44
|
-
const SOURCE_FETCH_CONCURRENCY = Math.max(
|
|
45
|
-
1,
|
|
46
|
-
parseInt(process.env.GREEDY_FETCH_CONCURRENCY || "2", 10) || 2,
|
|
47
|
-
);
|
|
43
|
+
const GREEDY_PORT = 9222;
|
|
44
|
+
const SOURCE_FETCH_CONCURRENCY = Math.max(
|
|
45
|
+
1,
|
|
46
|
+
parseInt(process.env.GREEDY_FETCH_CONCURRENCY || "2", 10) || 2,
|
|
47
|
+
);
|
|
48
48
|
|
|
49
49
|
const ENGINES = {
|
|
50
50
|
perplexity: "perplexity.mjs",
|
|
@@ -752,27 +752,6 @@ async function getAnyTab() {
|
|
|
752
752
|
return first.slice(0, 8);
|
|
753
753
|
}
|
|
754
754
|
|
|
755
|
-
async function _getOrReuseBlankTab() {
|
|
756
|
-
// Reuse an existing about:blank tab rather than always creating a new one
|
|
757
|
-
const listOut = await cdp(["list"]);
|
|
758
|
-
const lines = listOut.split("\n").filter(Boolean);
|
|
759
|
-
for (const line of lines) {
|
|
760
|
-
if (line.includes("about:blank")) {
|
|
761
|
-
return line.slice(0, 8); // prefix of the blank tab's targetId
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
// No blank tab — open a new one
|
|
765
|
-
const anchor = await getAnyTab();
|
|
766
|
-
const raw = await cdp([
|
|
767
|
-
"evalraw",
|
|
768
|
-
anchor,
|
|
769
|
-
"Target.createTarget",
|
|
770
|
-
'{"url":"about:blank"}',
|
|
771
|
-
]);
|
|
772
|
-
const { targetId } = JSON.parse(raw);
|
|
773
|
-
return targetId;
|
|
774
|
-
}
|
|
775
|
-
|
|
776
755
|
async function openNewTab() {
|
|
777
756
|
const anchor = await getAnyTab();
|
|
778
757
|
const raw = await cdp([
|
|
@@ -785,11 +764,6 @@ async function openNewTab() {
|
|
|
785
764
|
return targetId;
|
|
786
765
|
}
|
|
787
766
|
|
|
788
|
-
async function _getOrOpenEngineTab(engine) {
|
|
789
|
-
await cdp(["list"]);
|
|
790
|
-
return getFullTabFromCache(engine) || openNewTab();
|
|
791
|
-
}
|
|
792
|
-
|
|
793
767
|
async function activateTab(targetId) {
|
|
794
768
|
try {
|
|
795
769
|
const anchor = await getAnyTab();
|
|
@@ -844,13 +818,13 @@ function runExtractor(
|
|
|
844
818
|
...(tabPrefix ? ["--tab", tabPrefix] : []),
|
|
845
819
|
...(short ? ["--short"] : []),
|
|
846
820
|
];
|
|
847
|
-
return new Promise((resolve, reject) => {
|
|
848
|
-
const proc = spawn(
|
|
849
|
-
"node",
|
|
850
|
-
[join(__dir, "..", "extractors", script), query, ...extraArgs],
|
|
851
|
-
{
|
|
852
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
853
|
-
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
821
|
+
return new Promise((resolve, reject) => {
|
|
822
|
+
const proc = spawn(
|
|
823
|
+
"node",
|
|
824
|
+
[join(__dir, "..", "extractors", script), query, ...extraArgs],
|
|
825
|
+
{
|
|
826
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
827
|
+
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
854
828
|
},
|
|
855
829
|
);
|
|
856
830
|
let out = "";
|
|
@@ -1032,62 +1006,62 @@ async function fetchSourceContentBrowser(url, maxChars = 8000) {
|
|
|
1032
1006
|
}
|
|
1033
1007
|
}
|
|
1034
1008
|
|
|
1035
|
-
async function fetchMultipleSources(
|
|
1036
|
-
sources,
|
|
1037
|
-
maxSources = 5,
|
|
1038
|
-
maxChars = 8000,
|
|
1039
|
-
concurrency = SOURCE_FETCH_CONCURRENCY,
|
|
1040
|
-
) {
|
|
1041
|
-
const toFetch = sources.slice(0, maxSources);
|
|
1042
|
-
if (toFetch.length === 0) return [];
|
|
1043
|
-
|
|
1044
|
-
const workerCount = Math.min(
|
|
1045
|
-
toFetch.length,
|
|
1046
|
-
Math.max(1, parseInt(String(concurrency), 10) || SOURCE_FETCH_CONCURRENCY),
|
|
1047
|
-
);
|
|
1048
|
-
|
|
1049
|
-
process.stderr.write(
|
|
1050
|
-
`[greedysearch] Fetching content from ${toFetch.length} sources via HTTP (concurrency ${workerCount})...\n`,
|
|
1051
|
-
);
|
|
1052
|
-
|
|
1053
|
-
const fetched = new Array(toFetch.length);
|
|
1054
|
-
let nextIndex = 0;
|
|
1055
|
-
let completed = 0;
|
|
1056
|
-
|
|
1057
|
-
async function worker() {
|
|
1058
|
-
while (true) {
|
|
1059
|
-
const index = nextIndex++;
|
|
1060
|
-
if (index >= toFetch.length) return;
|
|
1061
|
-
|
|
1062
|
-
const s = toFetch[index];
|
|
1063
|
-
const url = s.canonicalUrl || s.url;
|
|
1064
|
-
process.stderr.write(
|
|
1065
|
-
`[greedysearch] [${index + 1}/${toFetch.length}] Fetching: ${url.slice(0, 60)}...\n`,
|
|
1066
|
-
);
|
|
1067
|
-
|
|
1068
|
-
const result = await fetchSourceContent(url, maxChars);
|
|
1069
|
-
fetched[index] = {
|
|
1070
|
-
id: s.id,
|
|
1071
|
-
...result,
|
|
1072
|
-
};
|
|
1073
|
-
|
|
1074
|
-
if (result.content && result.content.length > 100) {
|
|
1075
|
-
process.stderr.write(
|
|
1076
|
-
`[greedysearch] ✓ ${result.source}: ${result.content.length} chars\n`,
|
|
1077
|
-
);
|
|
1078
|
-
} else if (result.error) {
|
|
1079
|
-
process.stderr.write(`[greedysearch] ✗ ${result.error.slice(0, 80)}\n`);
|
|
1080
|
-
}
|
|
1081
|
-
|
|
1082
|
-
completed += 1;
|
|
1083
|
-
process.stderr.write(`PROGRESS:fetch:${completed}/${toFetch.length}\n`);
|
|
1084
|
-
}
|
|
1085
|
-
}
|
|
1086
|
-
|
|
1087
|
-
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
1088
|
-
|
|
1089
|
-
// Log summary
|
|
1090
|
-
const successful = fetched.filter((f) => f.content && f.content.length > 100);
|
|
1009
|
+
async function fetchMultipleSources(
|
|
1010
|
+
sources,
|
|
1011
|
+
maxSources = 5,
|
|
1012
|
+
maxChars = 8000,
|
|
1013
|
+
concurrency = SOURCE_FETCH_CONCURRENCY,
|
|
1014
|
+
) {
|
|
1015
|
+
const toFetch = sources.slice(0, maxSources);
|
|
1016
|
+
if (toFetch.length === 0) return [];
|
|
1017
|
+
|
|
1018
|
+
const workerCount = Math.min(
|
|
1019
|
+
toFetch.length,
|
|
1020
|
+
Math.max(1, parseInt(String(concurrency), 10) || SOURCE_FETCH_CONCURRENCY),
|
|
1021
|
+
);
|
|
1022
|
+
|
|
1023
|
+
process.stderr.write(
|
|
1024
|
+
`[greedysearch] Fetching content from ${toFetch.length} sources via HTTP (concurrency ${workerCount})...\n`,
|
|
1025
|
+
);
|
|
1026
|
+
|
|
1027
|
+
const fetched = new Array(toFetch.length);
|
|
1028
|
+
let nextIndex = 0;
|
|
1029
|
+
let completed = 0;
|
|
1030
|
+
|
|
1031
|
+
async function worker() {
|
|
1032
|
+
while (true) {
|
|
1033
|
+
const index = nextIndex++;
|
|
1034
|
+
if (index >= toFetch.length) return;
|
|
1035
|
+
|
|
1036
|
+
const s = toFetch[index];
|
|
1037
|
+
const url = s.canonicalUrl || s.url;
|
|
1038
|
+
process.stderr.write(
|
|
1039
|
+
`[greedysearch] [${index + 1}/${toFetch.length}] Fetching: ${url.slice(0, 60)}...\n`,
|
|
1040
|
+
);
|
|
1041
|
+
|
|
1042
|
+
const result = await fetchSourceContent(url, maxChars);
|
|
1043
|
+
fetched[index] = {
|
|
1044
|
+
id: s.id,
|
|
1045
|
+
...result,
|
|
1046
|
+
};
|
|
1047
|
+
|
|
1048
|
+
if (result.content && result.content.length > 100) {
|
|
1049
|
+
process.stderr.write(
|
|
1050
|
+
`[greedysearch] ✓ ${result.source}: ${result.content.length} chars\n`,
|
|
1051
|
+
);
|
|
1052
|
+
} else if (result.error) {
|
|
1053
|
+
process.stderr.write(`[greedysearch] ✗ ${result.error.slice(0, 80)}\n`);
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
completed += 1;
|
|
1057
|
+
process.stderr.write(`PROGRESS:fetch:${completed}/${toFetch.length}\n`);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
1062
|
+
|
|
1063
|
+
// Log summary
|
|
1064
|
+
const successful = fetched.filter((f) => f.content && f.content.length > 100);
|
|
1091
1065
|
const httpCount = fetched.filter((f) => f.source === "http").length;
|
|
1092
1066
|
const browserCount = fetched.filter((f) => f.source === "browser").length;
|
|
1093
1067
|
|
|
@@ -1119,14 +1093,14 @@ async function synthesizeWithGemini(
|
|
|
1119
1093
|
: buildSourceRegistry(results);
|
|
1120
1094
|
const prompt = buildSynthesisPrompt(query, results, sources, { grounded });
|
|
1121
1095
|
|
|
1122
|
-
return new Promise((resolve, reject) => {
|
|
1123
|
-
const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
|
|
1124
|
-
const proc = spawn(
|
|
1125
|
-
"node",
|
|
1126
|
-
[join(__dir, "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
|
|
1127
|
-
{
|
|
1128
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
1129
|
-
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
1096
|
+
return new Promise((resolve, reject) => {
|
|
1097
|
+
const extraArgs = tabPrefix ? ["--tab", String(tabPrefix)] : [];
|
|
1098
|
+
const proc = spawn(
|
|
1099
|
+
"node",
|
|
1100
|
+
[join(__dir, "..", "extractors", "gemini.mjs"), prompt, ...extraArgs],
|
|
1101
|
+
{
|
|
1102
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
1103
|
+
env: { ...process.env, CDP_PROFILE_DIR: GREEDY_PROFILE_DIR },
|
|
1130
1104
|
},
|
|
1131
1105
|
);
|
|
1132
1106
|
let out = "";
|
|
@@ -1166,11 +1140,11 @@ function slugify(query) {
|
|
|
1166
1140
|
.slice(0, 60);
|
|
1167
1141
|
}
|
|
1168
1142
|
|
|
1169
|
-
function resultsDir() {
|
|
1170
|
-
const dir = join(__dir, "..", "results");
|
|
1171
|
-
mkdirSync(dir, { recursive: true });
|
|
1172
|
-
return dir;
|
|
1173
|
-
}
|
|
1143
|
+
function resultsDir() {
|
|
1144
|
+
const dir = join(__dir, "..", "results");
|
|
1145
|
+
mkdirSync(dir, { recursive: true });
|
|
1146
|
+
return dir;
|
|
1147
|
+
}
|
|
1174
1148
|
|
|
1175
1149
|
function writeOutput(
|
|
1176
1150
|
data,
|
|
@@ -1235,57 +1209,57 @@ function probeGreedyChrome(timeoutMs = 3000) {
|
|
|
1235
1209
|
// Write (or refresh) the DevToolsActivePort file for the GreedySearch Chrome so
|
|
1236
1210
|
// cdp.mjs always connects to the right port rather than the user's main Chrome.
|
|
1237
1211
|
// Uses atomic write (write to temp + rename) to prevent corruption from parallel processes.
|
|
1238
|
-
async function refreshPortFile() {
|
|
1239
|
-
const LOCK_FILE = `${ACTIVE_PORT_FILE}.lock`;
|
|
1240
|
-
const TEMP_FILE = `${ACTIVE_PORT_FILE}.tmp`;
|
|
1241
|
-
const LOCK_STALE_MS = 5000;
|
|
1242
|
-
const LOCK_WAIT_MS = 1000;
|
|
1243
|
-
|
|
1244
|
-
// File-based lock with exclusive create + stale lock recovery
|
|
1245
|
-
const lockAcquired = await new Promise((resolve) => {
|
|
1246
|
-
const start = Date.now();
|
|
1247
|
-
const tryLock = () => {
|
|
1248
|
-
try {
|
|
1249
|
-
const payload = JSON.stringify({ pid: process.pid, ts: Date.now() });
|
|
1250
|
-
writeFileSync(LOCK_FILE, payload, { encoding: "utf8", flag: "wx" });
|
|
1251
|
-
resolve(true);
|
|
1252
|
-
} catch (e) {
|
|
1253
|
-
if (e?.code !== "EEXIST") {
|
|
1254
|
-
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1255
|
-
setTimeout(tryLock, 50);
|
|
1256
|
-
} else {
|
|
1257
|
-
resolve(false);
|
|
1258
|
-
}
|
|
1259
|
-
return;
|
|
1260
|
-
}
|
|
1261
|
-
|
|
1262
|
-
try {
|
|
1263
|
-
const lockRaw = readFileSync(LOCK_FILE, "utf8").trim();
|
|
1264
|
-
const parsed = lockRaw.startsWith("{")
|
|
1265
|
-
? JSON.parse(lockRaw)
|
|
1266
|
-
: { ts: Number(lockRaw) };
|
|
1267
|
-
const lockTime = Number(parsed?.ts) || 0;
|
|
1268
|
-
|
|
1269
|
-
if (lockTime > 0 && Date.now() - lockTime > LOCK_STALE_MS) {
|
|
1270
|
-
try {
|
|
1271
|
-
unlinkSync(LOCK_FILE);
|
|
1272
|
-
} catch {}
|
|
1273
|
-
}
|
|
1274
|
-
|
|
1275
|
-
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1276
|
-
setTimeout(tryLock, 50);
|
|
1277
|
-
} else {
|
|
1278
|
-
resolve(false);
|
|
1279
|
-
}
|
|
1280
|
-
} catch {
|
|
1281
|
-
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1282
|
-
setTimeout(tryLock, 50);
|
|
1283
|
-
} else {
|
|
1284
|
-
resolve(false);
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1287
|
-
}
|
|
1288
|
-
};
|
|
1212
|
+
async function refreshPortFile() {
|
|
1213
|
+
const LOCK_FILE = `${ACTIVE_PORT_FILE}.lock`;
|
|
1214
|
+
const TEMP_FILE = `${ACTIVE_PORT_FILE}.tmp`;
|
|
1215
|
+
const LOCK_STALE_MS = 5000;
|
|
1216
|
+
const LOCK_WAIT_MS = 1000;
|
|
1217
|
+
|
|
1218
|
+
// File-based lock with exclusive create + stale lock recovery
|
|
1219
|
+
const lockAcquired = await new Promise((resolve) => {
|
|
1220
|
+
const start = Date.now();
|
|
1221
|
+
const tryLock = () => {
|
|
1222
|
+
try {
|
|
1223
|
+
const payload = JSON.stringify({ pid: process.pid, ts: Date.now() });
|
|
1224
|
+
writeFileSync(LOCK_FILE, payload, { encoding: "utf8", flag: "wx" });
|
|
1225
|
+
resolve(true);
|
|
1226
|
+
} catch (e) {
|
|
1227
|
+
if (e?.code !== "EEXIST") {
|
|
1228
|
+
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1229
|
+
setTimeout(tryLock, 50);
|
|
1230
|
+
} else {
|
|
1231
|
+
resolve(false);
|
|
1232
|
+
}
|
|
1233
|
+
return;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
try {
|
|
1237
|
+
const lockRaw = readFileSync(LOCK_FILE, "utf8").trim();
|
|
1238
|
+
const parsed = lockRaw.startsWith("{")
|
|
1239
|
+
? JSON.parse(lockRaw)
|
|
1240
|
+
: { ts: Number(lockRaw) };
|
|
1241
|
+
const lockTime = Number(parsed?.ts) || 0;
|
|
1242
|
+
|
|
1243
|
+
if (lockTime > 0 && Date.now() - lockTime > LOCK_STALE_MS) {
|
|
1244
|
+
try {
|
|
1245
|
+
unlinkSync(LOCK_FILE);
|
|
1246
|
+
} catch {}
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1250
|
+
setTimeout(tryLock, 50);
|
|
1251
|
+
} else {
|
|
1252
|
+
resolve(false);
|
|
1253
|
+
}
|
|
1254
|
+
} catch {
|
|
1255
|
+
if (Date.now() - start < LOCK_WAIT_MS) {
|
|
1256
|
+
setTimeout(tryLock, 50);
|
|
1257
|
+
} else {
|
|
1258
|
+
resolve(false);
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
};
|
|
1289
1263
|
tryLock();
|
|
1290
1264
|
});
|
|
1291
1265
|
|
|
@@ -1428,18 +1402,18 @@ async function main() {
|
|
|
1428
1402
|
// PARALLEL-SAFE: Always create fresh tabs for each engine to avoid race conditions
|
|
1429
1403
|
// when multiple "all" searches run concurrently. Previously, reusing cached tabs
|
|
1430
1404
|
// caused ERR_ABORTED and Uncaught errors as multiple processes fought over the same tab.
|
|
1431
|
-
const engineTabs = [];
|
|
1432
|
-
for (let i = 0; i < ALL_ENGINES.length; i++) {
|
|
1433
|
-
if (i > 0) await new Promise((r) => setTimeout(r, 300)); // small delay between tab opens
|
|
1434
|
-
const tab = await openNewTab();
|
|
1435
|
-
engineTabs.push(tab);
|
|
1436
|
-
}
|
|
1405
|
+
const engineTabs = [];
|
|
1406
|
+
for (let i = 0; i < ALL_ENGINES.length; i++) {
|
|
1407
|
+
if (i > 0) await new Promise((r) => setTimeout(r, 300)); // small delay between tab opens
|
|
1408
|
+
const tab = await openNewTab();
|
|
1409
|
+
engineTabs.push(tab);
|
|
1410
|
+
}
|
|
1437
1411
|
|
|
1438
1412
|
// All tabs assigned — run extractors in parallel
|
|
1439
1413
|
try {
|
|
1440
1414
|
const results = await Promise.allSettled(
|
|
1441
|
-
ALL_ENGINES.map((e, i) =>
|
|
1442
|
-
runExtractor(ENGINES[e], query, engineTabs[i], short)
|
|
1415
|
+
ALL_ENGINES.map((e, i) =>
|
|
1416
|
+
runExtractor(ENGINES[e], query, engineTabs[i], short)
|
|
1443
1417
|
.then((r) => {
|
|
1444
1418
|
process.stderr.write(`PROGRESS:${e}:done\n`);
|
|
1445
1419
|
return { engine: e, ...r };
|
|
@@ -1461,7 +1435,7 @@ async function main() {
|
|
|
1461
1435
|
}
|
|
1462
1436
|
}
|
|
1463
1437
|
|
|
1464
|
-
await closeTabs(engineTabs);
|
|
1438
|
+
await closeTabs(engineTabs);
|
|
1465
1439
|
|
|
1466
1440
|
// Build a canonical source registry across all engines
|
|
1467
1441
|
out._sources = buildSourceRegistry(out, query);
|
|
@@ -1486,18 +1460,18 @@ async function main() {
|
|
|
1486
1460
|
process.stderr.write(
|
|
1487
1461
|
"[greedysearch] Synthesizing results with Gemini...\n",
|
|
1488
1462
|
);
|
|
1489
|
-
try {
|
|
1490
|
-
const geminiTab = await
|
|
1491
|
-
await activateTab(geminiTab);
|
|
1492
|
-
const synthesis = await synthesizeWithGemini(query, out, {
|
|
1493
|
-
grounded: depth === "deep",
|
|
1494
|
-
tabPrefix: geminiTab,
|
|
1495
|
-
});
|
|
1496
|
-
await activateTab(geminiTab);
|
|
1497
|
-
out._synthesis = {
|
|
1498
|
-
...synthesis,
|
|
1499
|
-
synthesized: true,
|
|
1500
|
-
};
|
|
1463
|
+
try {
|
|
1464
|
+
const geminiTab = await openNewTab();
|
|
1465
|
+
await activateTab(geminiTab);
|
|
1466
|
+
const synthesis = await synthesizeWithGemini(query, out, {
|
|
1467
|
+
grounded: depth === "deep",
|
|
1468
|
+
tabPrefix: geminiTab,
|
|
1469
|
+
});
|
|
1470
|
+
await activateTab(geminiTab);
|
|
1471
|
+
out._synthesis = {
|
|
1472
|
+
...synthesis,
|
|
1473
|
+
synthesized: true,
|
|
1474
|
+
};
|
|
1501
1475
|
process.stderr.write("PROGRESS:synthesis:done\n");
|
|
1502
1476
|
} catch (e) {
|
|
1503
1477
|
process.stderr.write(
|
|
@@ -1523,9 +1497,9 @@ async function main() {
|
|
|
1523
1497
|
});
|
|
1524
1498
|
return;
|
|
1525
1499
|
} finally {
|
|
1526
|
-
await closeTabs(engineTabs);
|
|
1527
|
-
}
|
|
1528
|
-
}
|
|
1500
|
+
await closeTabs(engineTabs);
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1529
1503
|
|
|
1530
1504
|
const script = ENGINES[engine];
|
|
1531
1505
|
if (!script) {
|
|
@@ -18,7 +18,9 @@ import {
|
|
|
18
18
|
outputJson,
|
|
19
19
|
parseArgs,
|
|
20
20
|
parseSourcesFromMarkdown,
|
|
21
|
+
TIMING,
|
|
21
22
|
validateQuery,
|
|
23
|
+
waitForCopyButton,
|
|
22
24
|
} from "./common.mjs";
|
|
23
25
|
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
24
26
|
import { SELECTORS } from "./selectors.mjs";
|
|
@@ -30,20 +32,6 @@ const GLOBAL_VAR = "__bingClipboard";
|
|
|
30
32
|
// Bing Copilot-specific helpers
|
|
31
33
|
// ============================================================================
|
|
32
34
|
|
|
33
|
-
async function waitForCopyButton(tab, timeout = 60000) {
|
|
34
|
-
const deadline = Date.now() + timeout;
|
|
35
|
-
while (Date.now() < deadline) {
|
|
36
|
-
await new Promise((r) => setTimeout(r, 700));
|
|
37
|
-
const found = await cdp([
|
|
38
|
-
"eval",
|
|
39
|
-
tab,
|
|
40
|
-
`!!document.querySelector('${S.copyButton}')`,
|
|
41
|
-
]).catch(() => "false");
|
|
42
|
-
if (found === "true") return;
|
|
43
|
-
}
|
|
44
|
-
throw new Error(`Copilot copy button did not appear within ${timeout}ms`);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
35
|
async function extractAnswer(tab) {
|
|
48
36
|
await cdp([
|
|
49
37
|
"eval",
|
|
@@ -78,7 +66,7 @@ async function main() {
|
|
|
78
66
|
|
|
79
67
|
// Navigate to Copilot homepage and use the chat input
|
|
80
68
|
await cdp(["nav", tab, "https://copilot.microsoft.com/"], 35000);
|
|
81
|
-
await new Promise((r) => setTimeout(r,
|
|
69
|
+
await new Promise((r) => setTimeout(r, TIMING.postNavSlow));
|
|
82
70
|
await dismissConsent(tab, cdp);
|
|
83
71
|
|
|
84
72
|
// Handle verification challenges (Cloudflare Turnstile, Microsoft auth, etc.)
|
|
@@ -91,7 +79,7 @@ async function main() {
|
|
|
91
79
|
|
|
92
80
|
// After verification, page may have redirected or reloaded — wait for it to settle
|
|
93
81
|
if (verifyResult === "clicked") {
|
|
94
|
-
await new Promise((r) => setTimeout(r,
|
|
82
|
+
await new Promise((r) => setTimeout(r, TIMING.afterVerify));
|
|
95
83
|
|
|
96
84
|
// Re-navigate if we got redirected
|
|
97
85
|
const currentUrl = await cdp([
|
|
@@ -101,7 +89,7 @@ async function main() {
|
|
|
101
89
|
]).catch(() => "");
|
|
102
90
|
if (!currentUrl.includes("copilot.microsoft.com")) {
|
|
103
91
|
await cdp(["nav", tab, "https://copilot.microsoft.com/"], 35000);
|
|
104
|
-
await new Promise((r) => setTimeout(r,
|
|
92
|
+
await new Promise((r) => setTimeout(r, TIMING.postNavSlow));
|
|
105
93
|
await dismissConsent(tab, cdp);
|
|
106
94
|
}
|
|
107
95
|
}
|
|
@@ -133,9 +121,9 @@ async function main() {
|
|
|
133
121
|
|
|
134
122
|
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
135
123
|
await cdp(["click", tab, S.input]);
|
|
136
|
-
await new Promise((r) => setTimeout(r,
|
|
124
|
+
await new Promise((r) => setTimeout(r, TIMING.postClick));
|
|
137
125
|
await cdp(["type", tab, query]);
|
|
138
|
-
await new Promise((r) => setTimeout(r,
|
|
126
|
+
await new Promise((r) => setTimeout(r, TIMING.postType));
|
|
139
127
|
|
|
140
128
|
// Submit with Enter (most reliable across locales and Chrome instances)
|
|
141
129
|
await cdp([
|
|
@@ -144,7 +132,7 @@ async function main() {
|
|
|
144
132
|
`document.querySelector('${S.input}')?.dispatchEvent(new KeyboardEvent('keydown',{key:'Enter',bubbles:true,keyCode:13})), 'ok'`,
|
|
145
133
|
]);
|
|
146
134
|
|
|
147
|
-
await waitForCopyButton(tab);
|
|
135
|
+
await waitForCopyButton(tab, S.copyButton, { timeout: 60000 });
|
|
148
136
|
|
|
149
137
|
const { answer, sources } = await extractAnswer(tab);
|
|
150
138
|
if (!answer)
|
package/extractors/common.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import { dirname, join } from "node:path";
|
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
|
|
8
8
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
9
|
-
const CDP = join(__dir, "..", "bin", "cdp.mjs");
|
|
9
|
+
const CDP = join(__dir, "..", "bin", "cdp.mjs");
|
|
10
10
|
|
|
11
11
|
// ============================================================================
|
|
12
12
|
// CDP wrapper
|
|
@@ -119,6 +119,46 @@ export function parseSourcesFromMarkdown(text) {
|
|
|
119
119
|
.slice(0, 10);
|
|
120
120
|
}
|
|
121
121
|
|
|
122
|
+
// ============================================================================
|
|
123
|
+
// Timing constants
|
|
124
|
+
// ============================================================================
|
|
125
|
+
|
|
126
|
+
export const TIMING = {
|
|
127
|
+
postNav: 1500, // settle after navigation
|
|
128
|
+
postNavSlow: 2000, // settle after slower navigations (Bing, Gemini)
|
|
129
|
+
postClick: 400, // settle after a UI click
|
|
130
|
+
postType: 400, // settle after typing
|
|
131
|
+
inputPoll: 400, // polling interval when waiting for input to appear
|
|
132
|
+
copyPoll: 600, // polling interval when waiting for copy button
|
|
133
|
+
afterVerify: 3000, // settle after a verification challenge completes
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// ============================================================================
|
|
137
|
+
// Copy button polling
|
|
138
|
+
// ============================================================================
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Wait for a copy button to appear in the DOM.
|
|
142
|
+
* @param {string} tab - Tab identifier
|
|
143
|
+
* @param {string} selector - CSS selector for the copy button
|
|
144
|
+
* @param {object} [options]
|
|
145
|
+
* @param {number} [options.timeout=60000] - Max wait in ms
|
|
146
|
+
* @param {Function} [options.onPoll] - Optional async callback on each poll tick (e.g. scroll)
|
|
147
|
+
* @returns {Promise<void>}
|
|
148
|
+
*/
|
|
149
|
+
export async function waitForCopyButton(tab, selector, options = {}) {
|
|
150
|
+
const { timeout = 60000, onPoll } = options;
|
|
151
|
+
const deadline = Date.now() + timeout;
|
|
152
|
+
let tick = 0;
|
|
153
|
+
while (Date.now() < deadline) {
|
|
154
|
+
await new Promise((r) => setTimeout(r, TIMING.copyPoll));
|
|
155
|
+
if (onPoll) await onPoll(++tick).catch(() => null);
|
|
156
|
+
const found = await cdp(["eval", tab, `!!document.querySelector('${selector}')`]).catch(() => "false");
|
|
157
|
+
if (found === "true") return;
|
|
158
|
+
}
|
|
159
|
+
throw new Error(`Copy button ('${selector}') did not appear within ${timeout}ms`);
|
|
160
|
+
}
|
|
161
|
+
|
|
122
162
|
// ============================================================================
|
|
123
163
|
// Stream completion detection
|
|
124
164
|
// ============================================================================
|
|
@@ -139,6 +179,7 @@ export async function waitForStreamComplete(tab, options = {}) {
|
|
|
139
179
|
interval = 600,
|
|
140
180
|
stableRounds = 3,
|
|
141
181
|
selector = "document.body",
|
|
182
|
+
minLength = 0,
|
|
142
183
|
} = options;
|
|
143
184
|
|
|
144
185
|
const deadline = Date.now() + timeout;
|
|
@@ -154,7 +195,7 @@ export async function waitForStreamComplete(tab, options = {}) {
|
|
|
154
195
|
]).catch(() => "0");
|
|
155
196
|
const currentLen = parseInt(lenStr, 10) || 0;
|
|
156
197
|
|
|
157
|
-
if (currentLen
|
|
198
|
+
if (currentLen >= minLength) {
|
|
158
199
|
if (currentLen === lastLen) {
|
|
159
200
|
stableCount++;
|
|
160
201
|
if (stableCount >= stableRounds) return currentLen;
|
|
@@ -165,6 +206,7 @@ export async function waitForStreamComplete(tab, options = {}) {
|
|
|
165
206
|
}
|
|
166
207
|
}
|
|
167
208
|
|
|
209
|
+
if (lastLen >= minLength) return lastLen;
|
|
168
210
|
throw new Error(`Generation did not stabilise within ${timeout}ms`);
|
|
169
211
|
}
|
|
170
212
|
|
package/extractors/gemini.mjs
CHANGED
|
@@ -18,7 +18,9 @@ import {
|
|
|
18
18
|
outputJson,
|
|
19
19
|
parseArgs,
|
|
20
20
|
parseSourcesFromMarkdown,
|
|
21
|
+
TIMING,
|
|
21
22
|
validateQuery,
|
|
23
|
+
waitForCopyButton,
|
|
22
24
|
} from "./common.mjs";
|
|
23
25
|
import { dismissConsent, handleVerification } from "./consent.mjs";
|
|
24
26
|
import { SELECTORS } from "./selectors.mjs";
|
|
@@ -46,36 +48,15 @@ async function typeIntoGemini(tab, text) {
|
|
|
46
48
|
]);
|
|
47
49
|
}
|
|
48
50
|
|
|
49
|
-
async function
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
"eval",
|
|
59
|
-
tab,
|
|
60
|
-
`
|
|
61
|
-
(function() {
|
|
62
|
-
const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
|
|
63
|
-
const scrollHeight = chat.scrollHeight || document.body.scrollHeight || 0;
|
|
64
|
-
// Scroll to bottom to ensure all content is loaded
|
|
65
|
-
chat.scrollTo ? chat.scrollTo({ top: scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, scrollHeight);
|
|
66
|
-
})()
|
|
67
|
-
`,
|
|
68
|
-
]).catch(() => null);
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
const found = await cdp([
|
|
72
|
-
"eval",
|
|
73
|
-
tab,
|
|
74
|
-
`!!document.querySelector('${S.copyButton}')`,
|
|
75
|
-
]).catch(() => "false");
|
|
76
|
-
if (found === "true") return;
|
|
77
|
-
}
|
|
78
|
-
throw new Error(`Gemini copy button did not appear within ${timeout}ms`);
|
|
51
|
+
async function scrollToBottom(tab) {
|
|
52
|
+
await cdp([
|
|
53
|
+
"eval",
|
|
54
|
+
tab,
|
|
55
|
+
`(function() {
|
|
56
|
+
const chat = document.querySelector('chat-window, [role="main"], main') || document.body;
|
|
57
|
+
chat.scrollTo ? chat.scrollTo({ top: chat.scrollHeight, behavior: 'smooth' }) : window.scrollTo(0, document.body.scrollHeight);
|
|
58
|
+
})()`,
|
|
59
|
+
]);
|
|
79
60
|
}
|
|
80
61
|
|
|
81
62
|
async function extractAnswer(tab) {
|
|
@@ -111,7 +92,7 @@ async function main() {
|
|
|
111
92
|
|
|
112
93
|
// Each search = fresh conversation
|
|
113
94
|
await cdp(["nav", tab, "https://gemini.google.com/app"], 35000);
|
|
114
|
-
await new Promise((r) => setTimeout(r,
|
|
95
|
+
await new Promise((r) => setTimeout(r, TIMING.postNavSlow));
|
|
115
96
|
await dismissConsent(tab, cdp);
|
|
116
97
|
await handleVerification(tab, cdp, 60000);
|
|
117
98
|
|
|
@@ -124,13 +105,13 @@ async function main() {
|
|
|
124
105
|
`!!document.querySelector('${S.input}')`,
|
|
125
106
|
]).catch(() => "false");
|
|
126
107
|
if (ready === "true") break;
|
|
127
|
-
await new Promise((r) => setTimeout(r,
|
|
108
|
+
await new Promise((r) => setTimeout(r, TIMING.inputPoll));
|
|
128
109
|
}
|
|
129
|
-
await new Promise((r) => setTimeout(r,
|
|
110
|
+
await new Promise((r) => setTimeout(r, TIMING.postClick));
|
|
130
111
|
|
|
131
112
|
await injectClipboardInterceptor(tab, GLOBAL_VAR);
|
|
132
113
|
await typeIntoGemini(tab, query);
|
|
133
|
-
await new Promise((r) => setTimeout(r,
|
|
114
|
+
await new Promise((r) => setTimeout(r, TIMING.postType));
|
|
134
115
|
|
|
135
116
|
await cdp([
|
|
136
117
|
"eval",
|
|
@@ -138,7 +119,11 @@ async function main() {
|
|
|
138
119
|
`document.querySelector('${S.sendButton}')?.click()`,
|
|
139
120
|
]);
|
|
140
121
|
|
|
141
|
-
|
|
122
|
+
// Scroll to bottom every ~6s while waiting to trigger lazy-loaded content
|
|
123
|
+
await waitForCopyButton(tab, S.copyButton, {
|
|
124
|
+
timeout: 120000,
|
|
125
|
+
onPoll: (tick) => tick % 10 === 0 ? scrollToBottom(tab) : Promise.resolve(),
|
|
126
|
+
});
|
|
142
127
|
|
|
143
128
|
const { answer, sources } = await extractAnswer(tab);
|
|
144
129
|
if (!answer) throw new Error("No answer captured from Gemini clipboard");
|