@apmantza/greedysearch-pi 1.9.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -1
- package/README.md +82 -47
- package/bin/cdp.mjs +1153 -1108
- package/bin/launch.mjs +9 -0
- package/bin/search.mjs +197 -68
- package/extractors/bing-copilot.mjs +42 -4
- package/extractors/chatgpt.mjs +436 -0
- package/extractors/common.mjs +155 -21
- package/extractors/consensus.mjs +655 -0
- package/extractors/gemini.mjs +335 -217
- package/extractors/logically.mjs +567 -0
- package/extractors/selectors.mjs +3 -2
- package/extractors/semantic-scholar.mjs +219 -0
- package/package.json +7 -3
- package/skills/greedy-search/skill.md +9 -3
- package/src/fetcher.mjs +8 -1
- package/src/formatters/results.ts +163 -128
- package/src/search/browser-lifecycle.mjs +27 -5
- package/src/search/chrome.mjs +653 -590
- package/src/search/constants.mjs +150 -39
- package/src/search/engines.mjs +114 -76
- package/src/search/fetch-source.mjs +566 -451
- package/src/search/pdf.mjs +68 -0
- package/src/search/recovery.mjs +51 -45
- package/src/search/research.mjs +1059 -61
- package/src/search/sources.mjs +52 -22
- package/src/search/synthesis-runner.mjs +105 -26
- package/src/search/synthesis.mjs +286 -246
- package/src/tools/greedy-search-handler.ts +124 -52
- package/src/tools/shared.ts +187 -186
- package/src/types.ts +110 -104
- package/test.mjs +377 -6
package/src/types.ts
CHANGED
|
@@ -1,104 +1,110 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TypeScript interfaces for GreedySearch data structures
|
|
3
|
-
*
|
|
4
|
-
* These types document the shape of data flowing between modules.
|
|
5
|
-
* They can be imported by TypeScript files (index.ts, tool handlers, formatters)
|
|
6
|
-
* and used for type safety without runtime overhead.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
// ============================================================================
|
|
10
|
-
// Search Result Types
|
|
11
|
-
// ============================================================================
|
|
12
|
-
|
|
13
|
-
/** A single source extracted from search results */
|
|
14
|
-
export interface Source {
|
|
15
|
-
url: string;
|
|
16
|
-
title: string;
|
|
17
|
-
type?:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
export interface
|
|
79
|
-
content: Array<{ type: "text"; text: string }>;
|
|
80
|
-
details:
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
/**
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
1
|
+
/**
|
|
2
|
+
* TypeScript interfaces for GreedySearch data structures
|
|
3
|
+
*
|
|
4
|
+
* These types document the shape of data flowing between modules.
|
|
5
|
+
* They can be imported by TypeScript files (index.ts, tool handlers, formatters)
|
|
6
|
+
* and used for type safety without runtime overhead.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ============================================================================
|
|
10
|
+
// Search Result Types
|
|
11
|
+
// ============================================================================
|
|
12
|
+
|
|
13
|
+
/** A single source extracted from search results */
|
|
14
|
+
export interface Source {
|
|
15
|
+
url: string;
|
|
16
|
+
title: string;
|
|
17
|
+
type?:
|
|
18
|
+
| "official-docs"
|
|
19
|
+
| "maintainer-blog"
|
|
20
|
+
| "repo"
|
|
21
|
+
| "academic"
|
|
22
|
+
| "community"
|
|
23
|
+
| "website";
|
|
24
|
+
domain?: string;
|
|
25
|
+
snippet?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Result from a single search engine */
|
|
29
|
+
export interface SearchResult {
|
|
30
|
+
engine: string;
|
|
31
|
+
answer: string;
|
|
32
|
+
sources: Source[];
|
|
33
|
+
url?: string;
|
|
34
|
+
query?: string;
|
|
35
|
+
error?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Synthesis result combining multiple engine results */
|
|
39
|
+
export interface SynthesisResult {
|
|
40
|
+
answer: string;
|
|
41
|
+
agreementLevel?: "consensus" | "majority" | "mixed" | "conflicting";
|
|
42
|
+
claims?: Claim[];
|
|
43
|
+
sourceIds?: string[];
|
|
44
|
+
confidence?: ConfidenceMetrics;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** A single claim within a synthesis */
|
|
48
|
+
export interface Claim {
|
|
49
|
+
text: string;
|
|
50
|
+
sourceIds: string[];
|
|
51
|
+
confidence?: "high" | "medium" | "low";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Confidence metrics for a synthesis */
|
|
55
|
+
export interface ConfidenceMetrics {
|
|
56
|
+
overall: number; // 0-1
|
|
57
|
+
consensus: number; // fraction of engines agreeing
|
|
58
|
+
sourceCount: number;
|
|
59
|
+
engineCount: number;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ============================================================================
|
|
63
|
+
// Source Registry Types
|
|
64
|
+
// ============================================================================
|
|
65
|
+
|
|
66
|
+
/** A classified source in the registry */
|
|
67
|
+
export interface ClassifiedSource extends Source {
|
|
68
|
+
engineOrigin: string[];
|
|
69
|
+
isOfficial: boolean;
|
|
70
|
+
consensus: number; // fraction of engines citing this source
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ============================================================================
|
|
74
|
+
// Tool Result Types
|
|
75
|
+
// ============================================================================
|
|
76
|
+
|
|
77
|
+
/** Progress update sent via onUpdate during long-running searches */
|
|
78
|
+
export interface ProgressUpdate {
|
|
79
|
+
content: Array<{ type: "text"; text: string }>;
|
|
80
|
+
details: { _progress: true };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Pi tool result format */
|
|
84
|
+
export interface ToolResult {
|
|
85
|
+
content: Array<{ type: "text"; text: string }>;
|
|
86
|
+
details: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ============================================================================
|
|
90
|
+
// Engine Configuration Types
|
|
91
|
+
// ============================================================================
|
|
92
|
+
|
|
93
|
+
/** Engine definition for the ENGINES map */
|
|
94
|
+
export interface EngineConfig {
|
|
95
|
+
/** Extractor script filename (e.g. "perplexity.mjs") */
|
|
96
|
+
script: string;
|
|
97
|
+
/** Human-readable label for progress messages */
|
|
98
|
+
label: string;
|
|
99
|
+
/** Domain pattern for source matching */
|
|
100
|
+
domain: string;
|
|
101
|
+
/** URL pattern for the engine */
|
|
102
|
+
url: string;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ============================================================================
|
|
106
|
+
// Constants
|
|
107
|
+
// ============================================================================
|
|
108
|
+
|
|
109
|
+
// Runtime defaults are in src/search/defaults.mjs (since .ts files can't be
|
|
110
|
+
// imported directly by Node.js). Import DEFAULTS from there for runtime values.
|
package/test.mjs
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
// node test.mjs flags # flag/option tests only
|
|
10
10
|
// node test.mjs edge # edge case tests only
|
|
11
11
|
// node test.mjs unit # fast unit tests only (no Chrome needed)
|
|
12
|
+
// node test.mjs synth # synthesis config smoke (gemini + chatgpt)
|
|
12
13
|
|
|
13
14
|
import { spawn } from "node:child_process";
|
|
14
15
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
@@ -93,7 +94,7 @@ function checkJson(file, checkFn) {
|
|
|
93
94
|
// Unit Tests (no Chrome required)
|
|
94
95
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
95
96
|
|
|
96
|
-
if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
97
|
+
if (["", "all", "unit", "quick", "smoke", "synth"].includes(mode)) {
|
|
97
98
|
section("🧪 Unit Tests");
|
|
98
99
|
|
|
99
100
|
subsection("stripQuotes — param double-escaping workaround (issue #2)");
|
|
@@ -245,18 +246,110 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
245
246
|
failMsg(`pplxPattern: ${label} — expected ${expected}, got ${matched}`);
|
|
246
247
|
}
|
|
247
248
|
|
|
248
|
-
subsection("
|
|
249
|
-
const { isChromeHeadless
|
|
250
|
-
"./src/search/chrome.mjs"
|
|
249
|
+
subsection("Chrome lifecycle — visible/headless mode detection");
|
|
250
|
+
const { detectHeadlessFromChromeCommandLine, isChromeHeadless } =
|
|
251
|
+
await import("./src/search/chrome.mjs");
|
|
252
|
+
const { commandLineMatchesGreedyChrome } = await import(
|
|
253
|
+
"./src/search/browser-lifecycle.mjs"
|
|
251
254
|
);
|
|
252
|
-
|
|
253
|
-
|
|
255
|
+
|
|
256
|
+
const visibleCmd =
|
|
257
|
+
'"C:/Program Files/Google/Chrome/Application/chrome.exe" --remote-debugging-port=9222 --user-data-dir=C:\\Users\\me\\AppData\\Local\\Temp\\greedysearch-chrome-profile about:blank';
|
|
258
|
+
const headlessCmd = `${visibleCmd} --headless=new`;
|
|
259
|
+
const rendererCmd = `${visibleCmd} --type=renderer`;
|
|
260
|
+
|
|
261
|
+
if (detectHeadlessFromChromeCommandLine(visibleCmd) === false) {
|
|
262
|
+
passMsg("chrome mode: live visible command line overrides stale marker");
|
|
263
|
+
} else {
|
|
264
|
+
failMsg("chrome mode: visible command line should detect non-headless");
|
|
265
|
+
}
|
|
266
|
+
if (detectHeadlessFromChromeCommandLine(headlessCmd) === true) {
|
|
267
|
+
passMsg("chrome mode: live headless command line detected");
|
|
268
|
+
} else {
|
|
269
|
+
failMsg("chrome mode: headless command line should detect headless");
|
|
270
|
+
}
|
|
271
|
+
if (detectHeadlessFromChromeCommandLine(rendererCmd) === null) {
|
|
272
|
+
passMsg("chrome mode: ignores child renderer processes");
|
|
273
|
+
} else {
|
|
274
|
+
failMsg("chrome mode: renderer command line should be ignored");
|
|
275
|
+
}
|
|
276
|
+
if (
|
|
277
|
+
commandLineMatchesGreedyChrome(
|
|
278
|
+
visibleCmd,
|
|
279
|
+
"C:/Users/me/AppData/Local/Temp/greedysearch-chrome-profile",
|
|
280
|
+
)
|
|
281
|
+
) {
|
|
282
|
+
passMsg(
|
|
283
|
+
"stale cleanup: Windows backslash profile path verifies as GreedySearch Chrome",
|
|
284
|
+
);
|
|
285
|
+
} else {
|
|
286
|
+
failMsg(
|
|
287
|
+
"stale cleanup: should accept equivalent slash/backslash profile paths",
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
if (
|
|
291
|
+
!commandLineMatchesGreedyChrome(
|
|
292
|
+
rendererCmd,
|
|
293
|
+
"C:/Users/me/AppData/Local/Temp/greedysearch-chrome-profile",
|
|
294
|
+
)
|
|
295
|
+
) {
|
|
296
|
+
passMsg("stale cleanup: renderer child is not treated as browser process");
|
|
297
|
+
} else {
|
|
298
|
+
failMsg(
|
|
299
|
+
"stale cleanup: renderer child should not verify as browser process",
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
if (typeof isChromeHeadless === "function")
|
|
303
|
+
passMsg("isChromeHeadless: function exists");
|
|
254
304
|
else failMsg("isChromeHeadless: not a function");
|
|
255
305
|
|
|
306
|
+
subsection("Synthesis routing — configurable synthesizer helpers");
|
|
307
|
+
const { normalizeSynthesizer, getSynthesisStartUrl } = await import(
|
|
308
|
+
"./src/search/synthesis-runner.mjs"
|
|
309
|
+
);
|
|
310
|
+
if (normalizeSynthesizer("gem") === "gemini")
|
|
311
|
+
passMsg("synthesizer: gem alias normalizes to gemini");
|
|
312
|
+
else failMsg("synthesizer: gem alias should normalize to gemini");
|
|
313
|
+
if (normalizeSynthesizer("gpt") === "chatgpt")
|
|
314
|
+
passMsg("synthesizer: gpt alias normalizes to chatgpt");
|
|
315
|
+
else failMsg("synthesizer: gpt alias should normalize to chatgpt");
|
|
316
|
+
if (getSynthesisStartUrl("chatgpt") === "https://chatgpt.com/")
|
|
317
|
+
passMsg("synthesizer: chatgpt start URL");
|
|
318
|
+
else failMsg("synthesizer: unexpected chatgpt start URL");
|
|
319
|
+
|
|
256
320
|
subsection("Research mode option/query normalization");
|
|
257
321
|
const { clampResearchOptions, normalizeResearchQueries } = await import(
|
|
258
322
|
"./src/search/research.mjs"
|
|
259
323
|
);
|
|
324
|
+
const { ALL_ENGINES, DEFAULT_SYNTHESIZER, ENGINES, RESEARCH_ENGINES } =
|
|
325
|
+
await import("./src/search/constants.mjs");
|
|
326
|
+
if (RESEARCH_ENGINES.join(",") === ALL_ENGINES.join(",")) {
|
|
327
|
+
passMsg("research config: reuses normal all-engine fan-out");
|
|
328
|
+
} else {
|
|
329
|
+
failMsg(
|
|
330
|
+
`research config: expected ${ALL_ENGINES.join(",")}, got ${RESEARCH_ENGINES.join(",")}`,
|
|
331
|
+
);
|
|
332
|
+
}
|
|
333
|
+
if (DEFAULT_SYNTHESIZER === "gemini") {
|
|
334
|
+
passMsg("research config: default synthesizer is gemini");
|
|
335
|
+
} else {
|
|
336
|
+
failMsg(
|
|
337
|
+
`research config: expected gemini default, got ${DEFAULT_SYNTHESIZER}`,
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
if (!ENGINES.consensus && !ENGINES.cns) {
|
|
341
|
+
passMsg("research config: consensus is not a registered engine");
|
|
342
|
+
} else {
|
|
343
|
+
failMsg("research config: consensus should not be registered");
|
|
344
|
+
}
|
|
345
|
+
if (
|
|
346
|
+
ENGINES["semantic-scholar"] &&
|
|
347
|
+
ENGINES.s2 === ENGINES["semantic-scholar"]
|
|
348
|
+
) {
|
|
349
|
+
passMsg("research config: semantic-scholar is registered with s2 alias");
|
|
350
|
+
} else {
|
|
351
|
+
failMsg("research config: semantic-scholar registration missing");
|
|
352
|
+
}
|
|
260
353
|
const clamped = clampResearchOptions({
|
|
261
354
|
breadth: 99,
|
|
262
355
|
iterations: 0,
|
|
@@ -367,6 +460,66 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
367
460
|
);
|
|
368
461
|
}
|
|
369
462
|
|
|
463
|
+
const academicRanked = buildSourceRegistry(
|
|
464
|
+
{
|
|
465
|
+
"semantic-scholar": {
|
|
466
|
+
sources: [
|
|
467
|
+
{
|
|
468
|
+
title:
|
|
469
|
+
"Chain of Thought Prompting Elicits Reasoning in Large Language Models",
|
|
470
|
+
url: "https://arxiv.org/pdf/2201.11903.pdf",
|
|
471
|
+
},
|
|
472
|
+
],
|
|
473
|
+
},
|
|
474
|
+
},
|
|
475
|
+
"large language models",
|
|
476
|
+
);
|
|
477
|
+
if (
|
|
478
|
+
academicRanked[0]?.engines.includes("semantic-scholar") &&
|
|
479
|
+
academicRanked[0]?.sourceType === "academic"
|
|
480
|
+
) {
|
|
481
|
+
passMsg("source ranking: semantic-scholar sources are indexed as academic");
|
|
482
|
+
} else {
|
|
483
|
+
failMsg(
|
|
484
|
+
`source ranking: unexpected academic source ${JSON.stringify(academicRanked[0])}`,
|
|
485
|
+
);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Social hard guardrail: a single-engine x.com citation must never be
|
|
489
|
+
// S1. Composite score is high (Google rank #1, x.com matched the
|
|
490
|
+
// "x" letter in "context"), so the smartScore −20 penalty alone
|
|
491
|
+
// isn't enough — the post-sort demotion is what keeps socials out
|
|
492
|
+
// of the top 12.
|
|
493
|
+
const socialGuardrail = buildSourceRegistry(
|
|
494
|
+
{
|
|
495
|
+
google: {
|
|
496
|
+
sources: [
|
|
497
|
+
{
|
|
498
|
+
title: "Redis on X",
|
|
499
|
+
url: "https://x.com/Redisinc/status/123",
|
|
500
|
+
},
|
|
501
|
+
{
|
|
502
|
+
title: "Self-Route paper",
|
|
503
|
+
url: "https://arxiv.org/abs/2407.16833",
|
|
504
|
+
},
|
|
505
|
+
],
|
|
506
|
+
},
|
|
507
|
+
},
|
|
508
|
+
"retrieval augmented generation vs long context LLMs for factual accuracy and hallucination reduction",
|
|
509
|
+
);
|
|
510
|
+
if (
|
|
511
|
+
socialGuardrail[0]?.sourceType !== "social" &&
|
|
512
|
+
socialGuardrail[0]?.domain === "arxiv.org"
|
|
513
|
+
) {
|
|
514
|
+
passMsg(
|
|
515
|
+
"source ranking: social sources are demoted below academic even with a higher composite score",
|
|
516
|
+
);
|
|
517
|
+
} else {
|
|
518
|
+
failMsg(
|
|
519
|
+
`source ranking: S1 should be arxiv, got ${socialGuardrail[0]?.domain} (${socialGuardrail[0]?.sourceType})`,
|
|
520
|
+
);
|
|
521
|
+
}
|
|
522
|
+
|
|
370
523
|
// ─── Phase 2: Quality Evaluator + Novelty Gate ────────────────────────
|
|
371
524
|
|
|
372
525
|
subsection("Novelty Gate — Jaccard similarity");
|
|
@@ -496,6 +649,139 @@ if (["", "all", "unit", "quick", "smoke"].includes(mode)) {
|
|
|
496
649
|
if (gapTargets) passMsg("fallback queries: targets identified gaps");
|
|
497
650
|
else failMsg("fallback queries: gaps not targeted");
|
|
498
651
|
|
|
652
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
653
|
+
// Synthesis routing — config-driven live smoke
|
|
654
|
+
//
|
|
655
|
+
// Verifies the `synthesizer` field in ~/.pi/greedyconfig is honored by
|
|
656
|
+
// `engine: "all" --synthesize`. Runs both the default (gemini) and an
|
|
657
|
+
// override (chatgpt). Backups the user's config and restores it after.
|
|
658
|
+
//
|
|
659
|
+
// Mode gating: only runs in "", "all", or "synth". Skipped in unit/quick/
|
|
660
|
+
// smoke because it requires Chrome + network and takes several minutes.
|
|
661
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
662
|
+
if (["", "all", "synth"].includes(mode)) {
|
|
663
|
+
subsection(
|
|
664
|
+
"Synthesis routing — config-driven live smoke (gemini + chatgpt)",
|
|
665
|
+
);
|
|
666
|
+
const { existsSync, copyFileSync, writeFileSync, unlinkSync } =
|
|
667
|
+
await import("node:fs");
|
|
668
|
+
const { homedir } = await import("node:os");
|
|
669
|
+
const { join } = await import("node:path");
|
|
670
|
+
const cfgDir = join(homedir(), ".pi");
|
|
671
|
+
const cfgFile = join(cfgDir, "greedyconfig");
|
|
672
|
+
const backup = join(cfgDir, "greedyconfig.test-backup");
|
|
673
|
+
const hadOriginal = existsSync(cfgFile);
|
|
674
|
+
if (hadOriginal) copyFileSync(cfgFile, backup);
|
|
675
|
+
|
|
676
|
+
const meaningfulQuery = "Who is Apostolos Mantzaris?";
|
|
677
|
+
const engines = ["perplexity", "google", "chatgpt", "gemini"];
|
|
678
|
+
const results = {};
|
|
679
|
+
|
|
680
|
+
const runSynth = async (synthesizer) => {
|
|
681
|
+
mkdirSync(cfgDir, { recursive: true });
|
|
682
|
+
writeFileSync(
|
|
683
|
+
cfgFile,
|
|
684
|
+
JSON.stringify({ engines, synthesizer }, null, 2) + "\n",
|
|
685
|
+
"utf8",
|
|
686
|
+
);
|
|
687
|
+
const outFile = join(resultsDir, `synth_${synthesizer}.json`);
|
|
688
|
+
const script = `
|
|
689
|
+
import { spawn } from 'node:child_process';
|
|
690
|
+
import { writeFileSync } from 'node:fs';
|
|
691
|
+
const proc = spawn(process.execPath, [
|
|
692
|
+
'${join(__dir, "bin", "search.mjs").replace(/\\/g, "\\\\")}',
|
|
693
|
+
'all', '--inline', '--stdin', '--headless', '--synthesize'
|
|
694
|
+
], { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
695
|
+
let out = '', err = '';
|
|
696
|
+
proc.stdout.on('data', d => out += d);
|
|
697
|
+
proc.stderr.on('data', d => err += d);
|
|
698
|
+
proc.stdin.end(${JSON.stringify(meaningfulQuery)});
|
|
699
|
+
proc.on('close', code => {
|
|
700
|
+
writeFileSync(${JSON.stringify(outFile.replace(/\\/g, "\\\\"))}, JSON.stringify({
|
|
701
|
+
code, out, err,
|
|
702
|
+
}, null, 2));
|
|
703
|
+
});
|
|
704
|
+
`;
|
|
705
|
+
const tmp = join(resultsDir, `_synth_${synthesizer}.mjs`);
|
|
706
|
+
writeFileSync(tmp, script, "utf8");
|
|
707
|
+
await runNode([tmp], 240);
|
|
708
|
+
const data = JSON.parse(readFileSync(outFile, "utf8"));
|
|
709
|
+
let parsed = null;
|
|
710
|
+
try {
|
|
711
|
+
parsed = JSON.parse(data.out);
|
|
712
|
+
} catch (e) {
|
|
713
|
+
return {
|
|
714
|
+
synthesized: false,
|
|
715
|
+
synthesizedBy: null,
|
|
716
|
+
parseError: e.message,
|
|
717
|
+
rawOut: data.out.slice(0, 200),
|
|
718
|
+
};
|
|
719
|
+
}
|
|
720
|
+
return {
|
|
721
|
+
synthesized: parsed._synthesis?.synthesized === true,
|
|
722
|
+
synthesizedBy: parsed._synthesis?.synthesizedBy || null,
|
|
723
|
+
engines: Object.keys(parsed).filter((k) => !k.startsWith("_")),
|
|
724
|
+
chatgptAnswer: parsed.chatgpt?.answer || null,
|
|
725
|
+
chatgptError: parsed.chatgpt?.error || null,
|
|
726
|
+
chatgptStage: parsed.chatgpt?._envelope?.lastStage || null,
|
|
727
|
+
chatgptStages: parsed.chatgpt?._envelope?.stages || null,
|
|
728
|
+
answerPreview: String(parsed._synthesis?.answer || "").slice(0, 120),
|
|
729
|
+
};
|
|
730
|
+
};
|
|
731
|
+
|
|
732
|
+
try {
|
|
733
|
+
results.gemini = await runSynth("gemini");
|
|
734
|
+
if (
|
|
735
|
+
results.gemini.synthesized &&
|
|
736
|
+
results.gemini.synthesizedBy === "gemini"
|
|
737
|
+
) {
|
|
738
|
+
passMsg("synth=gemini: synthesizedBy === gemini");
|
|
739
|
+
} else {
|
|
740
|
+
failMsg(
|
|
741
|
+
`synth=gemini: expected synthesizedBy=gemini, got ${JSON.stringify(results.gemini)}`,
|
|
742
|
+
);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
results.chatgpt = await runSynth("chatgpt");
|
|
746
|
+
if (
|
|
747
|
+
results.chatgpt.synthesized &&
|
|
748
|
+
results.chatgpt.synthesizedBy === "chatgpt"
|
|
749
|
+
) {
|
|
750
|
+
passMsg("synth=chatgpt: synthesizedBy === chatgpt");
|
|
751
|
+
} else {
|
|
752
|
+
failMsg(
|
|
753
|
+
`synth=chatgpt: expected synthesizedBy=chatgpt, got ${JSON.stringify(results.chatgpt)}`,
|
|
754
|
+
);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Also assert chatgpt-search succeeded under parallel load — a
|
|
758
|
+
// regression of the throttling fix or the engine budget would
|
|
759
|
+
// re-introduce the "cdp timeout: eval" failure at stream-wait.
|
|
760
|
+
// We require an actual answer (not just a synthesis routing
|
|
761
|
+
// marker) so the test catches the underlying engine problem.
|
|
762
|
+
if (results.gemini.chatgptAnswer) {
|
|
763
|
+
passMsg(
|
|
764
|
+
"chatgpt-search: produced an answer (parallel contention not blocking)",
|
|
765
|
+
);
|
|
766
|
+
} else {
|
|
767
|
+
failMsg(
|
|
768
|
+
`chatgpt-search: no answer — error=${JSON.stringify(results.gemini.chatgptError)} lastStage=${results.gemini.chatgptStage}`,
|
|
769
|
+
);
|
|
770
|
+
}
|
|
771
|
+
} finally {
|
|
772
|
+
if (hadOriginal) {
|
|
773
|
+
copyFileSync(backup, cfgFile);
|
|
774
|
+
try {
|
|
775
|
+
unlinkSync(backup);
|
|
776
|
+
} catch {}
|
|
777
|
+
} else {
|
|
778
|
+
try {
|
|
779
|
+
unlinkSync(cfgFile);
|
|
780
|
+
} catch {}
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
|
|
499
785
|
// ─── Phase 3: Action Planner ──────────────────────────────────────────
|
|
500
786
|
|
|
501
787
|
subsection("Action Planner — validation & parsing");
|
|
@@ -689,6 +975,91 @@ END_JSON`,
|
|
|
689
975
|
} else {
|
|
690
976
|
failMsg("citation audit: S2 should be flagged as unfetched");
|
|
691
977
|
}
|
|
978
|
+
|
|
979
|
+
subsection("Research Floor and Question Ledger");
|
|
980
|
+
const { computeResearchFloor, createQuestionLedger, updateQuestionLedger } =
|
|
981
|
+
await import("./src/search/research.mjs");
|
|
982
|
+
const floorOk = computeResearchFloor({
|
|
983
|
+
sources: [
|
|
984
|
+
{ id: "S1", sourceType: "official-docs" },
|
|
985
|
+
{ id: "S2", sourceType: "community" },
|
|
986
|
+
],
|
|
987
|
+
fetchedSources: [
|
|
988
|
+
{ id: "S1", contentChars: 500 },
|
|
989
|
+
{ id: "S2", contentChars: 500 },
|
|
990
|
+
{ id: "S3", contentChars: 500 },
|
|
991
|
+
],
|
|
992
|
+
synthesis: {
|
|
993
|
+
claims: [{ claim: "React has docs", sourceIds: ["S1"] }],
|
|
994
|
+
},
|
|
995
|
+
citationAudit: { ok: true, cited: ["S1"], unfetched: [] },
|
|
996
|
+
rounds: [{ round: 1 }],
|
|
997
|
+
qualityScore: 8.2,
|
|
998
|
+
maxSources: 3,
|
|
999
|
+
});
|
|
1000
|
+
if (floorOk.floorMet)
|
|
1001
|
+
passMsg("research floor: passes with evidence and citations");
|
|
1002
|
+
else failMsg(`research floor: expected pass, got ${JSON.stringify(floorOk)}`);
|
|
1003
|
+
|
|
1004
|
+
const floorMissingCitation = computeResearchFloor({
|
|
1005
|
+
sources: [{ id: "S1", sourceType: "official-docs" }],
|
|
1006
|
+
fetchedSources: [{ id: "S1", contentChars: 500 }],
|
|
1007
|
+
synthesis: { claims: [] },
|
|
1008
|
+
citationAudit: { ok: true, cited: [], unfetched: [] },
|
|
1009
|
+
rounds: [{ round: 1 }],
|
|
1010
|
+
qualityScore: 9,
|
|
1011
|
+
maxSources: 1,
|
|
1012
|
+
});
|
|
1013
|
+
if (
|
|
1014
|
+
!floorMissingCitation.floorMet &&
|
|
1015
|
+
!floorMissingCitation.checks.citationsPresent
|
|
1016
|
+
) {
|
|
1017
|
+
passMsg("research floor: rejects missing citations");
|
|
1018
|
+
} else {
|
|
1019
|
+
failMsg("research floor: missing citations should fail");
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
const ledger = createQuestionLedger("What is React 19?");
|
|
1023
|
+
updateQuestionLedger(ledger, {
|
|
1024
|
+
roundNumber: 1,
|
|
1025
|
+
actions: [
|
|
1026
|
+
{
|
|
1027
|
+
type: "search",
|
|
1028
|
+
query: "React 19 actions",
|
|
1029
|
+
researchGoal: "Find React 19 feature list",
|
|
1030
|
+
},
|
|
1031
|
+
],
|
|
1032
|
+
learningPayload: {
|
|
1033
|
+
answeredQuestions: [
|
|
1034
|
+
{ id: "Q1", evidence: "React 19 is documented", sourceIds: ["S1"] },
|
|
1035
|
+
],
|
|
1036
|
+
newQuestions: ["Which React 19 features are stable?"],
|
|
1037
|
+
},
|
|
1038
|
+
});
|
|
1039
|
+
const closedQ1 = ledger.find((q) => q.id === "Q1")?.status === "closed";
|
|
1040
|
+
const addedOpen = ledger.some(
|
|
1041
|
+
(q) => q.question.includes("stable") && q.status === "open",
|
|
1042
|
+
);
|
|
1043
|
+
if (closedQ1 && addedOpen) {
|
|
1044
|
+
passMsg("question ledger: closes answered questions and adds follow-ups");
|
|
1045
|
+
} else {
|
|
1046
|
+
failMsg(`question ledger: unexpected ${JSON.stringify(ledger)}`);
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
subsection("Structured JSON parser");
|
|
1050
|
+
const { parseStructuredJson } = await import("./src/search/synthesis.mjs");
|
|
1051
|
+
const parsedLooseJson = parseStructuredJson(`BEGIN_JSON
|
|
1052
|
+
{"answer":"line one
|
|
1053
|
+
line two","claims":[{"claim":"x"}]}
|
|
1054
|
+
END_JSON
|
|
1055
|
+
trailing note`);
|
|
1056
|
+
if (parsedLooseJson?.answer?.includes("line two")) {
|
|
1057
|
+
passMsg("structured JSON: repairs raw newlines inside strings");
|
|
1058
|
+
} else {
|
|
1059
|
+
failMsg(
|
|
1060
|
+
`structured JSON: failed to repair ${JSON.stringify(parsedLooseJson)}`,
|
|
1061
|
+
);
|
|
1062
|
+
}
|
|
692
1063
|
}
|
|
693
1064
|
|
|
694
1065
|
// ─────────────────────────────────────────────────────────────────────────────
|