@apmantza/greedysearch-pi 1.9.2 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -2
- package/README.md +82 -47
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +9 -0
- package/bin/search.mjs +318 -81
- package/extractors/bing-copilot.mjs +48 -18
- package/extractors/chatgpt.mjs +553 -0
- package/extractors/common.mjs +213 -22
- package/extractors/consensus.mjs +655 -0
- package/extractors/consent.mjs +182 -18
- package/extractors/gemini.mjs +350 -217
- package/extractors/google-ai.mjs +129 -128
- package/extractors/logically.mjs +629 -0
- package/extractors/perplexity.mjs +547 -217
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/package.json +8 -4
- package/skills/greedy-search/skill.md +20 -12
- package/src/fetcher.mjs +23 -1
- package/src/formatters/results.ts +185 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/challenge-detect.mjs +205 -0
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +155 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/progress.mjs +145 -0
- package/src/search/recovery.mjs +73 -45
- package/src/search/research.mjs +1419 -62
- package/src/search/scale-aware.mjs +93 -0
- package/src/search/simple-research.mjs +520 -0
- package/src/search/sources.mjs +52 -22
- package/src/search/synthesis-runner.mjs +105 -26
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +129 -59
- package/src/tools/shared.ts +312 -186
- package/src/types.ts +110 -104
- package/test.mjs +537 -18
package/test.mjs
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
// node test.mjs flags # flag/option tests only
|
|
10
10
|
// node test.mjs edge # edge case tests only
|
|
11
11
|
// node test.mjs unit # fast unit tests only (no Chrome needed)
|
|
12
|
+
// node test.mjs synth # synthesis config smoke (gemini + chatgpt)
|
|
12
13
|
|
|
13
14
|
import { spawn } from "node:child_process";
|
|
14
15
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
@@ -93,11 +94,16 @@ function checkJson(file, checkFn) {
|
|
|
93
94
|
// Unit Tests (no Chrome required)
|
|
94
95
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
95
96
|
|
|
96
|
-
if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
97
|
+
if (["", "all", "unit", "quick", "smoke", "synth"].includes(mode)) {
|
|
97
98
|
section("🧪 Unit Tests");
|
|
98
99
|
|
|
99
100
|
subsection("stripQuotes — param double-escaping workaround (issue #2)");
|
|
100
|
-
|
|
101
|
+
// Inlined from src/tools/shared.ts — importing the .ts file from
|
|
102
|
+
// test.mjs works at the project root (Node strips types) but fails
|
|
103
|
+
// when test.mjs runs from the installed tarball in node_modules
|
|
104
|
+
// (ERR_UNSUPPORTED_NODE_MODULES_TYPE_STRIPPING). Keep in sync with
|
|
105
|
+
// src/tools/shared.ts.
|
|
106
|
+
const stripQuotes = (val) => String(val ?? "").replace(/^"|"$/g, "");
|
|
101
107
|
|
|
102
108
|
const stripCases = [
|
|
103
109
|
// [input, expected, label]
|
|
@@ -166,24 +172,24 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
166
172
|
["VERIFICATION REQUIRED", true, 'legacy pattern: "VERIFICATION REQUIRED"'],
|
|
167
173
|
["verification failed", true, 'extended: "verification" in sentence'],
|
|
168
174
|
[
|
|
169
|
-
"
|
|
175
|
+
"Cloudflare Turnstile challenge detected in closed shadow DOM",
|
|
170
176
|
true,
|
|
171
|
-
"new:
|
|
177
|
+
"new: CF closed-shadow-dom block triggers visible retry",
|
|
172
178
|
],
|
|
173
179
|
[
|
|
174
|
-
"
|
|
180
|
+
"Copilot verification required — please solve it manually in the browser window",
|
|
175
181
|
true,
|
|
176
|
-
"new:
|
|
182
|
+
"new: per-engine 'verification required' triggers visible retry",
|
|
177
183
|
],
|
|
178
184
|
[
|
|
179
|
-
"
|
|
185
|
+
"Network timeout after 30000ms",
|
|
180
186
|
true,
|
|
181
|
-
"new:
|
|
187
|
+
"new: timeout triggers visible retry",
|
|
182
188
|
],
|
|
183
189
|
[
|
|
184
|
-
"
|
|
190
|
+
"Perplexity input not found — page may be blocked or in unexpected state",
|
|
185
191
|
true,
|
|
186
|
-
"new:
|
|
192
|
+
"new: 'input not found' triggers visible retry",
|
|
187
193
|
],
|
|
188
194
|
["", false, "empty string"],
|
|
189
195
|
];
|
|
@@ -217,7 +223,7 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
217
223
|
}
|
|
218
224
|
|
|
219
225
|
const retryEngines = findHeadlessBlockedEngines({
|
|
220
|
-
perplexity: { error: "
|
|
226
|
+
perplexity: { error: "Perplexity input not found — page may be blocked or in unexpected state" },
|
|
221
227
|
bing: { error: "Copilot verification required" },
|
|
222
228
|
google: { error: "Google verification required" },
|
|
223
229
|
});
|
|
@@ -232,11 +238,16 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
232
238
|
const pplxTestCases = [
|
|
233
239
|
["ask-input selector not found", true, 'legacy: "ask-input"'],
|
|
234
240
|
[
|
|
235
|
-
"
|
|
241
|
+
"Perplexity input not found — page may be blocked or in unexpected state",
|
|
236
242
|
true,
|
|
237
|
-
"new:
|
|
243
|
+
"new: 'input not found' triggers for perplexity",
|
|
238
244
|
],
|
|
239
245
|
["Perplexity timeout", true, "timeout triggers visible retry"],
|
|
246
|
+
[
|
|
247
|
+
"Clipboard interceptor returned empty text",
|
|
248
|
+
false,
|
|
249
|
+
"new: 'clipboard' substring no longer triggers (was too broad — fired on routine DOM-fallback failures)",
|
|
250
|
+
],
|
|
240
251
|
];
|
|
241
252
|
for (const [error, expected, label] of pplxTestCases) {
|
|
242
253
|
const matched = isHeadlessBlockedError(error);
|
|
@@ -245,18 +256,110 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
245
256
|
failMsg(`pplxPattern: ${label} — expected ${expected}, got ${matched}`);
|
|
246
257
|
}
|
|
247
258
|
|
|
248
|
-
subsection("
|
|
249
|
-
const { isChromeHeadless
|
|
250
|
-
"./src/search/chrome.mjs"
|
|
259
|
+
subsection("Chrome lifecycle — visible/headless mode detection");
|
|
260
|
+
const { detectHeadlessFromChromeCommandLine, isChromeHeadless } =
|
|
261
|
+
await import("./src/search/chrome.mjs");
|
|
262
|
+
const { commandLineMatchesGreedyChrome } = await import(
|
|
263
|
+
"./src/search/browser-lifecycle.mjs"
|
|
251
264
|
);
|
|
252
|
-
|
|
253
|
-
|
|
265
|
+
|
|
266
|
+
const visibleCmd =
|
|
267
|
+
'"C:/Program Files/Google/Chrome/Application/chrome.exe" --remote-debugging-port=9222 --user-data-dir=C:\\Users\\me\\AppData\\Local\\Temp\\greedysearch-chrome-profile about:blank';
|
|
268
|
+
const headlessCmd = `${visibleCmd} --headless=new`;
|
|
269
|
+
const rendererCmd = `${visibleCmd} --type=renderer`;
|
|
270
|
+
|
|
271
|
+
if (detectHeadlessFromChromeCommandLine(visibleCmd) === false) {
|
|
272
|
+
passMsg("chrome mode: live visible command line overrides stale marker");
|
|
273
|
+
} else {
|
|
274
|
+
failMsg("chrome mode: visible command line should detect non-headless");
|
|
275
|
+
}
|
|
276
|
+
if (detectHeadlessFromChromeCommandLine(headlessCmd) === true) {
|
|
277
|
+
passMsg("chrome mode: live headless command line detected");
|
|
278
|
+
} else {
|
|
279
|
+
failMsg("chrome mode: headless command line should detect headless");
|
|
280
|
+
}
|
|
281
|
+
if (detectHeadlessFromChromeCommandLine(rendererCmd) === null) {
|
|
282
|
+
passMsg("chrome mode: ignores child renderer processes");
|
|
283
|
+
} else {
|
|
284
|
+
failMsg("chrome mode: renderer command line should be ignored");
|
|
285
|
+
}
|
|
286
|
+
if (
|
|
287
|
+
commandLineMatchesGreedyChrome(
|
|
288
|
+
visibleCmd,
|
|
289
|
+
"C:/Users/me/AppData/Local/Temp/greedysearch-chrome-profile",
|
|
290
|
+
)
|
|
291
|
+
) {
|
|
292
|
+
passMsg(
|
|
293
|
+
"stale cleanup: Windows backslash profile path verifies as GreedySearch Chrome",
|
|
294
|
+
);
|
|
295
|
+
} else {
|
|
296
|
+
failMsg(
|
|
297
|
+
"stale cleanup: should accept equivalent slash/backslash profile paths",
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
if (
|
|
301
|
+
!commandLineMatchesGreedyChrome(
|
|
302
|
+
rendererCmd,
|
|
303
|
+
"C:/Users/me/AppData/Local/Temp/greedysearch-chrome-profile",
|
|
304
|
+
)
|
|
305
|
+
) {
|
|
306
|
+
passMsg("stale cleanup: renderer child is not treated as browser process");
|
|
307
|
+
} else {
|
|
308
|
+
failMsg(
|
|
309
|
+
"stale cleanup: renderer child should not verify as browser process",
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
if (typeof isChromeHeadless === "function")
|
|
313
|
+
passMsg("isChromeHeadless: function exists");
|
|
254
314
|
else failMsg("isChromeHeadless: not a function");
|
|
255
315
|
|
|
316
|
+
subsection("Synthesis routing — configurable synthesizer helpers");
|
|
317
|
+
const { normalizeSynthesizer, getSynthesisStartUrl } = await import(
|
|
318
|
+
"./src/search/synthesis-runner.mjs"
|
|
319
|
+
);
|
|
320
|
+
if (normalizeSynthesizer("gem") === "gemini")
|
|
321
|
+
passMsg("synthesizer: gem alias normalizes to gemini");
|
|
322
|
+
else failMsg("synthesizer: gem alias should normalize to gemini");
|
|
323
|
+
if (normalizeSynthesizer("gpt") === "chatgpt")
|
|
324
|
+
passMsg("synthesizer: gpt alias normalizes to chatgpt");
|
|
325
|
+
else failMsg("synthesizer: gpt alias should normalize to chatgpt");
|
|
326
|
+
if (getSynthesisStartUrl("chatgpt") === "https://chatgpt.com/")
|
|
327
|
+
passMsg("synthesizer: chatgpt start URL");
|
|
328
|
+
else failMsg("synthesizer: unexpected chatgpt start URL");
|
|
329
|
+
|
|
256
330
|
subsection("Research mode option/query normalization");
|
|
257
331
|
const { clampResearchOptions, normalizeResearchQueries } = await import(
|
|
258
332
|
"./src/search/research.mjs"
|
|
259
333
|
);
|
|
334
|
+
const { ALL_ENGINES, DEFAULT_SYNTHESIZER, ENGINES, RESEARCH_ENGINES } =
|
|
335
|
+
await import("./src/search/constants.mjs");
|
|
336
|
+
if (RESEARCH_ENGINES.join(",") === ALL_ENGINES.join(",")) {
|
|
337
|
+
passMsg("research config: reuses normal all-engine fan-out");
|
|
338
|
+
} else {
|
|
339
|
+
failMsg(
|
|
340
|
+
`research config: expected ${ALL_ENGINES.join(",")}, got ${RESEARCH_ENGINES.join(",")}`,
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
if (DEFAULT_SYNTHESIZER === "gemini") {
|
|
344
|
+
passMsg("research config: default synthesizer is gemini");
|
|
345
|
+
} else {
|
|
346
|
+
failMsg(
|
|
347
|
+
`research config: expected gemini default, got ${DEFAULT_SYNTHESIZER}`,
|
|
348
|
+
);
|
|
349
|
+
}
|
|
350
|
+
if (!ENGINES.consensus && !ENGINES.cns) {
|
|
351
|
+
passMsg("research config: consensus is not a registered engine");
|
|
352
|
+
} else {
|
|
353
|
+
failMsg("research config: consensus should not be registered");
|
|
354
|
+
}
|
|
355
|
+
if (
|
|
356
|
+
ENGINES["semantic-scholar"] &&
|
|
357
|
+
ENGINES.s2 === ENGINES["semantic-scholar"]
|
|
358
|
+
) {
|
|
359
|
+
passMsg("research config: semantic-scholar is registered with s2 alias");
|
|
360
|
+
} else {
|
|
361
|
+
failMsg("research config: semantic-scholar registration missing");
|
|
362
|
+
}
|
|
260
363
|
const clamped = clampResearchOptions({
|
|
261
364
|
breadth: 99,
|
|
262
365
|
iterations: 0,
|
|
@@ -367,6 +470,66 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
367
470
|
);
|
|
368
471
|
}
|
|
369
472
|
|
|
473
|
+
const academicRanked = buildSourceRegistry(
|
|
474
|
+
{
|
|
475
|
+
"semantic-scholar": {
|
|
476
|
+
sources: [
|
|
477
|
+
{
|
|
478
|
+
title:
|
|
479
|
+
"Chain of Thought Prompting Elicits Reasoning in Large Language Models",
|
|
480
|
+
url: "https://arxiv.org/pdf/2201.11903.pdf",
|
|
481
|
+
},
|
|
482
|
+
],
|
|
483
|
+
},
|
|
484
|
+
},
|
|
485
|
+
"large language models",
|
|
486
|
+
);
|
|
487
|
+
if (
|
|
488
|
+
academicRanked[0]?.engines.includes("semantic-scholar") &&
|
|
489
|
+
academicRanked[0]?.sourceType === "academic"
|
|
490
|
+
) {
|
|
491
|
+
passMsg("source ranking: semantic-scholar sources are indexed as academic");
|
|
492
|
+
} else {
|
|
493
|
+
failMsg(
|
|
494
|
+
`source ranking: unexpected academic source ${JSON.stringify(academicRanked[0])}`,
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Social hard guardrail: a single-engine x.com citation must never be
|
|
499
|
+
// S1. Composite score is high (Google rank #1, x.com matched the
|
|
500
|
+
// "x" letter in "context"), so the smartScore −20 penalty alone
|
|
501
|
+
// isn't enough — the post-sort demotion is what keeps socials out
|
|
502
|
+
// of the top 12.
|
|
503
|
+
const socialGuardrail = buildSourceRegistry(
|
|
504
|
+
{
|
|
505
|
+
google: {
|
|
506
|
+
sources: [
|
|
507
|
+
{
|
|
508
|
+
title: "Redis on X",
|
|
509
|
+
url: "https://x.com/Redisinc/status/123",
|
|
510
|
+
},
|
|
511
|
+
{
|
|
512
|
+
title: "Self-Route paper",
|
|
513
|
+
url: "https://arxiv.org/abs/2407.16833",
|
|
514
|
+
},
|
|
515
|
+
],
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
"retrieval augmented generation vs long context LLMs for factual accuracy and hallucination reduction",
|
|
519
|
+
);
|
|
520
|
+
if (
|
|
521
|
+
socialGuardrail[0]?.sourceType !== "social" &&
|
|
522
|
+
socialGuardrail[0]?.domain === "arxiv.org"
|
|
523
|
+
) {
|
|
524
|
+
passMsg(
|
|
525
|
+
"source ranking: social sources are demoted below academic even with a higher composite score",
|
|
526
|
+
);
|
|
527
|
+
} else {
|
|
528
|
+
failMsg(
|
|
529
|
+
`source ranking: S1 should be arxiv, got ${socialGuardrail[0]?.domain} (${socialGuardrail[0]?.sourceType})`,
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
|
|
370
533
|
// ─── Phase 2: Quality Evaluator + Novelty Gate ────────────────────────
|
|
371
534
|
|
|
372
535
|
subsection("Novelty Gate — Jaccard similarity");
|
|
@@ -496,6 +659,139 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
496
659
|
if (gapTargets) passMsg("fallback queries: targets identified gaps");
|
|
497
660
|
else failMsg("fallback queries: gaps not targeted");
|
|
498
661
|
|
|
662
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
663
|
+
// Synthesis routing — config-driven live smoke
|
|
664
|
+
//
|
|
665
|
+
// Verifies the `synthesizer` field in ~/.pi/greedyconfig is honored by
|
|
666
|
+
// `engine: "all" --synthesize`. Runs both the default (gemini) and an
|
|
667
|
+
// override (chatgpt). Backups the user's config and restores it after.
|
|
668
|
+
//
|
|
669
|
+
// Mode gating: only runs in "", "all", or "synth". Skipped in unit/quick/
|
|
670
|
+
// smoke because it requires Chrome + network and takes several minutes.
|
|
671
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
672
|
+
if (["", "all", "synth"].includes(mode)) {
|
|
673
|
+
subsection(
|
|
674
|
+
"Synthesis routing — config-driven live smoke (gemini + chatgpt)",
|
|
675
|
+
);
|
|
676
|
+
const { existsSync, copyFileSync, writeFileSync, unlinkSync } =
|
|
677
|
+
await import("node:fs");
|
|
678
|
+
const { homedir } = await import("node:os");
|
|
679
|
+
const { join } = await import("node:path");
|
|
680
|
+
const cfgDir = join(homedir(), ".pi");
|
|
681
|
+
const cfgFile = join(cfgDir, "greedyconfig");
|
|
682
|
+
const backup = join(cfgDir, "greedyconfig.test-backup");
|
|
683
|
+
const hadOriginal = existsSync(cfgFile);
|
|
684
|
+
if (hadOriginal) copyFileSync(cfgFile, backup);
|
|
685
|
+
|
|
686
|
+
const meaningfulQuery = "Who is Apostolos Mantzaris?";
|
|
687
|
+
const engines = ["perplexity", "google", "chatgpt", "gemini"];
|
|
688
|
+
const results = {};
|
|
689
|
+
|
|
690
|
+
const runSynth = async (synthesizer) => {
|
|
691
|
+
mkdirSync(cfgDir, { recursive: true });
|
|
692
|
+
writeFileSync(
|
|
693
|
+
cfgFile,
|
|
694
|
+
JSON.stringify({ engines, synthesizer }, null, 2) + "\n",
|
|
695
|
+
"utf8",
|
|
696
|
+
);
|
|
697
|
+
const outFile = join(resultsDir, `synth_${synthesizer}.json`);
|
|
698
|
+
const script = `
|
|
699
|
+
import { spawn } from 'node:child_process';
|
|
700
|
+
import { writeFileSync } from 'node:fs';
|
|
701
|
+
const proc = spawn(process.execPath, [
|
|
702
|
+
'${join(__dir, "bin", "search.mjs").replace(/\\/g, "\\\\")}',
|
|
703
|
+
'all', '--inline', '--stdin', '--headless', '--synthesize'
|
|
704
|
+
], { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
705
|
+
let out = '', err = '';
|
|
706
|
+
proc.stdout.on('data', d => out += d);
|
|
707
|
+
proc.stderr.on('data', d => err += d);
|
|
708
|
+
proc.stdin.end(${JSON.stringify(meaningfulQuery)});
|
|
709
|
+
proc.on('close', code => {
|
|
710
|
+
writeFileSync(${JSON.stringify(outFile.replace(/\\/g, "\\\\"))}, JSON.stringify({
|
|
711
|
+
code, out, err,
|
|
712
|
+
}, null, 2));
|
|
713
|
+
});
|
|
714
|
+
`;
|
|
715
|
+
const tmp = join(resultsDir, `_synth_${synthesizer}.mjs`);
|
|
716
|
+
writeFileSync(tmp, script, "utf8");
|
|
717
|
+
await runNode([tmp], 240);
|
|
718
|
+
const data = JSON.parse(readFileSync(outFile, "utf8"));
|
|
719
|
+
let parsed = null;
|
|
720
|
+
try {
|
|
721
|
+
parsed = JSON.parse(data.out);
|
|
722
|
+
} catch (e) {
|
|
723
|
+
return {
|
|
724
|
+
synthesized: false,
|
|
725
|
+
synthesizedBy: null,
|
|
726
|
+
parseError: e.message,
|
|
727
|
+
rawOut: data.out.slice(0, 200),
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
return {
|
|
731
|
+
synthesized: parsed._synthesis?.synthesized === true,
|
|
732
|
+
synthesizedBy: parsed._synthesis?.synthesizedBy || null,
|
|
733
|
+
engines: Object.keys(parsed).filter((k) => !k.startsWith("_")),
|
|
734
|
+
chatgptAnswer: parsed.chatgpt?.answer || null,
|
|
735
|
+
chatgptError: parsed.chatgpt?.error || null,
|
|
736
|
+
chatgptStage: parsed.chatgpt?._envelope?.lastStage || null,
|
|
737
|
+
chatgptStages: parsed.chatgpt?._envelope?.stages || null,
|
|
738
|
+
answerPreview: String(parsed._synthesis?.answer || "").slice(0, 120),
|
|
739
|
+
};
|
|
740
|
+
};
|
|
741
|
+
|
|
742
|
+
try {
|
|
743
|
+
results.gemini = await runSynth("gemini");
|
|
744
|
+
if (
|
|
745
|
+
results.gemini.synthesized &&
|
|
746
|
+
results.gemini.synthesizedBy === "gemini"
|
|
747
|
+
) {
|
|
748
|
+
passMsg("synth=gemini: synthesizedBy === gemini");
|
|
749
|
+
} else {
|
|
750
|
+
failMsg(
|
|
751
|
+
`synth=gemini: expected synthesizedBy=gemini, got ${JSON.stringify(results.gemini)}`,
|
|
752
|
+
);
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
results.chatgpt = await runSynth("chatgpt");
|
|
756
|
+
if (
|
|
757
|
+
results.chatgpt.synthesized &&
|
|
758
|
+
results.chatgpt.synthesizedBy === "chatgpt"
|
|
759
|
+
) {
|
|
760
|
+
passMsg("synth=chatgpt: synthesizedBy === chatgpt");
|
|
761
|
+
} else {
|
|
762
|
+
failMsg(
|
|
763
|
+
`synth=chatgpt: expected synthesizedBy=chatgpt, got ${JSON.stringify(results.chatgpt)}`,
|
|
764
|
+
);
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// Also assert chatgpt-search succeeded under parallel load — a
|
|
768
|
+
// regression of the throttling fix or the engine budget would
|
|
769
|
+
// re-introduce the "cdp timeout: eval" failure at stream-wait.
|
|
770
|
+
// We require an actual answer (not just a synthesis routing
|
|
771
|
+
// marker) so the test catches the underlying engine problem.
|
|
772
|
+
if (results.gemini.chatgptAnswer) {
|
|
773
|
+
passMsg(
|
|
774
|
+
"chatgpt-search: produced an answer (parallel contention not blocking)",
|
|
775
|
+
);
|
|
776
|
+
} else {
|
|
777
|
+
failMsg(
|
|
778
|
+
`chatgpt-search: no answer — error=${JSON.stringify(results.gemini.chatgptError)} lastStage=${results.gemini.chatgptStage}`,
|
|
779
|
+
);
|
|
780
|
+
}
|
|
781
|
+
} finally {
|
|
782
|
+
if (hadOriginal) {
|
|
783
|
+
copyFileSync(backup, cfgFile);
|
|
784
|
+
try {
|
|
785
|
+
unlinkSync(backup);
|
|
786
|
+
} catch {}
|
|
787
|
+
} else {
|
|
788
|
+
try {
|
|
789
|
+
unlinkSync(cfgFile);
|
|
790
|
+
} catch {}
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
499
795
|
// ─── Phase 3: Action Planner ──────────────────────────────────────────
|
|
500
796
|
|
|
501
797
|
subsection("Action Planner — validation & parsing");
|
|
@@ -689,6 +985,229 @@ END_JSON`,
|
|
|
689
985
|
} else {
|
|
690
986
|
failMsg("citation audit: S2 should be flagged as unfetched");
|
|
691
987
|
}
|
|
988
|
+
|
|
989
|
+
subsection("Citation URL Reachability — checkCitationUrls");
|
|
990
|
+
const { checkCitationUrls, runCitationUrlCheck } = await import(
|
|
991
|
+
"./src/search/research.mjs"
|
|
992
|
+
);
|
|
993
|
+
|
|
994
|
+
// Empty sources → ok
|
|
995
|
+
const emptyResult = await checkCitationUrls([]);
|
|
996
|
+
if (emptyResult.ok && emptyResult.reachable.length === 0) {
|
|
997
|
+
passMsg("checkCitationUrls: empty sources returns ok");
|
|
998
|
+
} else {
|
|
999
|
+
failMsg(
|
|
1000
|
+
`checkCitationUrls: empty sources unexpected: ${JSON.stringify(emptyResult)}`,
|
|
1001
|
+
);
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// Non-HTTP URLs are skipped
|
|
1005
|
+
const nonHttpResult = await checkCitationUrls([
|
|
1006
|
+
{ id: "S1", url: "ftp://example.com/file" },
|
|
1007
|
+
{ id: "S2", url: "not-a-url" },
|
|
1008
|
+
]);
|
|
1009
|
+
if (
|
|
1010
|
+
nonHttpResult.ok &&
|
|
1011
|
+
nonHttpResult.skipped.length === 2 &&
|
|
1012
|
+
nonHttpResult.reachable.length === 0
|
|
1013
|
+
) {
|
|
1014
|
+
passMsg("checkCitationUrls: non-HTTP URLs are skipped");
|
|
1015
|
+
} else {
|
|
1016
|
+
failMsg(
|
|
1017
|
+
`checkCitationUrls: non-HTTP unexpected: ${JSON.stringify(nonHttpResult)}`,
|
|
1018
|
+
);
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
// Concurrency guard: concurrency=0 should not infinite loop
|
|
1022
|
+
// Skip in CI — makes a real HEAD request to example.com which may be
|
|
1023
|
+
// blocked in sandboxed CI environments
|
|
1024
|
+
if (!process.env.CI) {
|
|
1025
|
+
const concurrencyResult = await checkCitationUrls(
|
|
1026
|
+
[{ id: "S1", url: "https://example.com" }],
|
|
1027
|
+
{ concurrency: 0, timeoutMs: 2000 },
|
|
1028
|
+
);
|
|
1029
|
+
if (concurrencyResult.ok || concurrencyResult.dead.length > 0) {
|
|
1030
|
+
passMsg("checkCitationUrls: concurrency=0 does not infinite loop");
|
|
1031
|
+
} else {
|
|
1032
|
+
failMsg(
|
|
1033
|
+
`checkCitationUrls: concurrency=0 unexpected: ${JSON.stringify(concurrencyResult)}`,
|
|
1034
|
+
);
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
// runCitationUrlCheck returns null on error (non-throwing)
|
|
1039
|
+
const runResult = await runCitationUrlCheck([]);
|
|
1040
|
+
if (runResult && runResult.ok) {
|
|
1041
|
+
passMsg("runCitationUrlCheck: empty sources returns ok");
|
|
1042
|
+
} else {
|
|
1043
|
+
failMsg(
|
|
1044
|
+
`runCitationUrlCheck: empty sources unexpected: ${JSON.stringify(runResult)}`,
|
|
1045
|
+
);
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
subsection("Provenance Sidecar — writeProvenanceSidecar");
|
|
1049
|
+
const { writeProvenanceSidecar } = await import("./src/search/research.mjs");
|
|
1050
|
+
const { existsSync, rmSync } = await import("node:fs");
|
|
1051
|
+
const { tmpdir } = await import("node:os");
|
|
1052
|
+
|
|
1053
|
+
const testProvenanceDir = join(
|
|
1054
|
+
tmpdir(),
|
|
1055
|
+
`greedysearch-test-provenance-${Date.now()}`,
|
|
1056
|
+
);
|
|
1057
|
+
mkdirSync(testProvenanceDir, { recursive: true });
|
|
1058
|
+
|
|
1059
|
+
try {
|
|
1060
|
+
writeProvenanceSidecar(testProvenanceDir, {
|
|
1061
|
+
query: "test query",
|
|
1062
|
+
rounds: [{ round: 1, actions: [], learnings: [], gaps: [] }],
|
|
1063
|
+
sources: [{ id: "S1", title: "Test Source" }],
|
|
1064
|
+
fetchedSources: [{ id: "S1", contentChars: 500 }],
|
|
1065
|
+
citationAudit: { ok: true, cited: ["S1"], missing: [], unfetched: [] },
|
|
1066
|
+
citationUrls: { reachable: [], dead: [], skipped: [], ok: true },
|
|
1067
|
+
floor: { floorMet: true, checks: { citationsPresent: true } },
|
|
1068
|
+
manifest: {
|
|
1069
|
+
startedAt: "2026-01-01",
|
|
1070
|
+
finishedAt: "2026-01-01",
|
|
1071
|
+
durationMs: 1000,
|
|
1072
|
+
},
|
|
1073
|
+
});
|
|
1074
|
+
|
|
1075
|
+
const provenancePath = join(testProvenanceDir, "provenance.md");
|
|
1076
|
+
if (existsSync(provenancePath)) {
|
|
1077
|
+
const content = readFileSync(provenancePath, "utf8");
|
|
1078
|
+
if (content.includes("test query") && content.includes("S1")) {
|
|
1079
|
+
passMsg(
|
|
1080
|
+
"writeProvenanceSidecar: writes provenance.md with query and sources",
|
|
1081
|
+
);
|
|
1082
|
+
} else {
|
|
1083
|
+
failMsg(
|
|
1084
|
+
"writeProvenanceSidecar: provenance.md missing expected content",
|
|
1085
|
+
);
|
|
1086
|
+
}
|
|
1087
|
+
} else {
|
|
1088
|
+
failMsg("writeProvenanceSidecar: provenance.md not created");
|
|
1089
|
+
}
|
|
1090
|
+
} catch (e) {
|
|
1091
|
+
failMsg(`writeProvenanceSidecar: threw error: ${e.message}`);
|
|
1092
|
+
} finally {
|
|
1093
|
+
try {
|
|
1094
|
+
rmSync(testProvenanceDir, { recursive: true, force: true });
|
|
1095
|
+
} catch {}
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
subsection("Research Floor and Question Ledger");
|
|
1099
|
+
const { computeResearchFloor, createQuestionLedger, updateQuestionLedger } =
|
|
1100
|
+
await import("./src/search/research.mjs");
|
|
1101
|
+
const floorOk = computeResearchFloor({
|
|
1102
|
+
sources: [
|
|
1103
|
+
{ id: "S1", sourceType: "official-docs" },
|
|
1104
|
+
{ id: "S2", sourceType: "community" },
|
|
1105
|
+
],
|
|
1106
|
+
fetchedSources: [
|
|
1107
|
+
{ id: "S1", contentChars: 500 },
|
|
1108
|
+
{ id: "S2", contentChars: 500 },
|
|
1109
|
+
{ id: "S3", contentChars: 500 },
|
|
1110
|
+
],
|
|
1111
|
+
synthesis: {
|
|
1112
|
+
claims: [{ claim: "React has docs", sourceIds: ["S1"] }],
|
|
1113
|
+
},
|
|
1114
|
+
citationAudit: { ok: true, cited: ["S1"], unfetched: [] },
|
|
1115
|
+
rounds: [{ round: 1 }],
|
|
1116
|
+
qualityScore: 8.2,
|
|
1117
|
+
maxSources: 3,
|
|
1118
|
+
});
|
|
1119
|
+
if (floorOk.floorMet)
|
|
1120
|
+
passMsg("research floor: passes with evidence and citations");
|
|
1121
|
+
else failMsg(`research floor: expected pass, got ${JSON.stringify(floorOk)}`);
|
|
1122
|
+
|
|
1123
|
+
const floorMissingCitation = computeResearchFloor({
|
|
1124
|
+
sources: [{ id: "S1", sourceType: "official-docs" }],
|
|
1125
|
+
fetchedSources: [{ id: "S1", contentChars: 500 }],
|
|
1126
|
+
synthesis: { claims: [] },
|
|
1127
|
+
citationAudit: { ok: true, cited: [], unfetched: [] },
|
|
1128
|
+
rounds: [{ round: 1 }],
|
|
1129
|
+
qualityScore: 9,
|
|
1130
|
+
maxSources: 1,
|
|
1131
|
+
});
|
|
1132
|
+
if (
|
|
1133
|
+
!floorMissingCitation.floorMet &&
|
|
1134
|
+
!floorMissingCitation.checks.citationsPresent
|
|
1135
|
+
) {
|
|
1136
|
+
passMsg("research floor: rejects missing citations");
|
|
1137
|
+
} else {
|
|
1138
|
+
failMsg("research floor: missing citations should fail");
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
const ledger = createQuestionLedger("What is React 19?");
|
|
1142
|
+
updateQuestionLedger(ledger, {
|
|
1143
|
+
roundNumber: 1,
|
|
1144
|
+
actions: [
|
|
1145
|
+
{
|
|
1146
|
+
type: "search",
|
|
1147
|
+
query: "React 19 actions",
|
|
1148
|
+
researchGoal: "Find React 19 feature list",
|
|
1149
|
+
},
|
|
1150
|
+
],
|
|
1151
|
+
learningPayload: {
|
|
1152
|
+
answeredQuestions: [
|
|
1153
|
+
{ id: "Q1", evidence: "React 19 is documented", sourceIds: ["S1"] },
|
|
1154
|
+
],
|
|
1155
|
+
newQuestions: ["Which React 19 features are stable?"],
|
|
1156
|
+
},
|
|
1157
|
+
});
|
|
1158
|
+
const closedQ1 = ledger.find((q) => q.id === "Q1")?.status === "closed";
|
|
1159
|
+
const addedOpen = ledger.some(
|
|
1160
|
+
(q) => q.question.includes("stable") && q.status === "open",
|
|
1161
|
+
);
|
|
1162
|
+
if (closedQ1 && addedOpen) {
|
|
1163
|
+
passMsg("question ledger: closes answered questions and adds follow-ups");
|
|
1164
|
+
} else {
|
|
1165
|
+
failMsg(`question ledger: unexpected ${JSON.stringify(ledger)}`);
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
subsection("Structured JSON parser");
|
|
1169
|
+
const { parseStructuredJson } = await import("./src/search/synthesis.mjs");
|
|
1170
|
+
const parsedLooseJson = parseStructuredJson(`BEGIN_JSON
|
|
1171
|
+
{"answer":"line one
|
|
1172
|
+
line two","claims":[{"claim":"x"}]}
|
|
1173
|
+
END_JSON
|
|
1174
|
+
trailing note`);
|
|
1175
|
+
if (parsedLooseJson?.answer?.includes("line two")) {
|
|
1176
|
+
passMsg("structured JSON: repairs raw newlines inside strings");
|
|
1177
|
+
} else {
|
|
1178
|
+
failMsg(
|
|
1179
|
+
`structured JSON: failed to repair ${JSON.stringify(parsedLooseJson)}`,
|
|
1180
|
+
);
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
subsection("Progress tracker — bar rendering and ETA");
|
|
1184
|
+
const { createProgressTracker } = await import("./src/search/progress.mjs");
|
|
1185
|
+
const silentTracker = createProgressTracker({
|
|
1186
|
+
totalActions: 4,
|
|
1187
|
+
silent: true,
|
|
1188
|
+
});
|
|
1189
|
+
silentTracker.startAction("search", "test");
|
|
1190
|
+
silentTracker.endAction();
|
|
1191
|
+
silentTracker.startAction("fetch", "https://example.com");
|
|
1192
|
+
silentTracker.endAction();
|
|
1193
|
+
if (silentTracker.getElapsedMs() >= 0) {
|
|
1194
|
+
passMsg("progress: tracker records action timing");
|
|
1195
|
+
} else {
|
|
1196
|
+
failMsg("progress: tracker elapsed time invalid");
|
|
1197
|
+
}
|
|
1198
|
+
// Test bar formatting indirectly via duration
|
|
1199
|
+
const tracker2 = createProgressTracker({
|
|
1200
|
+
totalActions: 2,
|
|
1201
|
+
totalRounds: 1,
|
|
1202
|
+
silent: true,
|
|
1203
|
+
});
|
|
1204
|
+
tracker2.startAction("search", "q1");
|
|
1205
|
+
tracker2.endAction();
|
|
1206
|
+
if (tracker2.getElapsedMs() >= 0) {
|
|
1207
|
+
passMsg("progress: round tracking works");
|
|
1208
|
+
} else {
|
|
1209
|
+
failMsg("progress: round tracking broken");
|
|
1210
|
+
}
|
|
692
1211
|
}
|
|
693
1212
|
|
|
694
1213
|
// ─────────────────────────────────────────────────────────────────────────────
|