@apmantza/greedysearch-pi 2.0.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,520 @@
1
+ // src/search/simple-research.mjs — Fast-path research for simple queries
2
+ //
3
+ // Runs a single all-engine search, fetches top sources, and produces a cited
4
+ // synthesis in one pass. Returns the same shape as runResearchMode() for
5
+ // compatibility with the rest of the pipeline.
6
+
7
+ import { ALL_ENGINES, RESEARCH_ENGINES } from "./constants.mjs";
8
+ import {
9
+ buildSourceRegistry,
10
+ mergeFetchDataIntoSources,
11
+ trimText,
12
+ } from "./sources.mjs";
13
+ import {
14
+ auditCitations,
15
+ buildFinalReportPrompt,
16
+ buildSynthesisFromEvidencePrompt,
17
+ computeResearchFloor,
18
+ createQuestionLedger,
19
+ extractEvidenceFromSources,
20
+ reconcileQuestionsFromSynthesis,
21
+ runCitationUrlCheck,
22
+ writeResearchBundle,
23
+ } from "./research.mjs";
24
+ import { parseStructuredJson } from "./synthesis.mjs";
25
+ import { writeSourcesToFiles } from "./file-sources.mjs";
26
+ import { fetchMultipleSources } from "./fetch-source.mjs";
27
+ import { runGeminiPrompt } from "./synthesis-runner.mjs";
28
+ import { createProgressTracker } from "./progress.mjs";
29
+ import { spawn } from "node:child_process";
30
+ import { join } from "node:path";
31
+ import { fileURLToPath } from "node:url";
32
+
33
+ const __dir = fileURLToPath(new URL(".", import.meta.url)).replace(
34
+ /^\/([A-Z]:)/,
35
+ "$1",
36
+ );
37
+ const SEARCH_BIN = join(__dir, "..", "..", "bin", "search.mjs");
38
+
39
+ function uniqueStrings(items, limit = Infinity) {
40
+ const seen = new Set();
41
+ const out = [];
42
+ for (const item of items || []) {
43
+ const clean = trimText(String(item || ""), 1000);
44
+ if (!clean || seen.has(clean)) continue;
45
+ seen.add(clean);
46
+ out.push(clean);
47
+ if (out.length >= limit) break;
48
+ }
49
+ return out;
50
+ }
51
+
52
+ // Build 3 distinct search angles for direct-mode research. Inspired by
53
+ // Feynman's deepresearch prompt: definition, mechanism, current usage.
54
+ export function buildSearchAngles(query) {
55
+ const trimmed = String(query || "").trim();
56
+ if (!trimmed) return [];
57
+ return [
58
+ `${trimmed} — definition and overview`,
59
+ `${trimmed} — how it works, mechanism, or key details`,
60
+ `${trimmed} — current usage, comparison, or best practices`,
61
+ ];
62
+ }
63
+
64
+ // Merge sources from multiple search angles, deduplicating by URL.
65
+ export function mergeSourcesByUrl(existing, incoming) {
66
+ const urlMap = new Map();
67
+ for (const s of existing || []) {
68
+ const key = s?.canonicalUrl || s?.finalUrl || s?.url;
69
+ if (key) urlMap.set(key, s);
70
+ }
71
+ for (const s of incoming || []) {
72
+ const key = s?.canonicalUrl || s?.finalUrl || s?.url;
73
+ if (!key) continue;
74
+ if (urlMap.has(key)) {
75
+ // Merge: keep existing but note the new angle
76
+ const merged = {
77
+ ...urlMap.get(key),
78
+ angles: [
79
+ ...(urlMap.get(key).angles || [urlMap.get(key).query || ""]),
80
+ s.query || "",
81
+ ],
82
+ };
83
+ urlMap.set(key, merged);
84
+ } else {
85
+ urlMap.set(key, s);
86
+ }
87
+ }
88
+ return Array.from(urlMap.values());
89
+ }
90
+
91
+ // Build engine-matching regex from ALL_ENGINES so new engines are auto-forwarded
92
+ const _enginePattern = ALL_ENGINES.join("|");
93
+ const _engineRegex = new RegExp(`^\\[(${_enginePattern})\\]`);
94
+
95
+ function shouldForwardChildStderr(line) {
96
+ return (
97
+ /^PROGRESS:/.test(line) ||
98
+ /^\[greedysearch\]/.test(line) ||
99
+ _engineRegex.test(line) ||
100
+ /^GreedySearch Chrome/.test(line) ||
101
+ /^Launching GreedySearch Chrome/.test(line) ||
102
+ /^Headless mode/.test(line) ||
103
+ /^Ready\.?$/.test(line)
104
+ );
105
+ }
106
+
107
+ async function runFastAllSearch(query, { locale = null, short = true } = {}) {
108
+ const args = [SEARCH_BIN, "all", "--inline", "--stdin", "--fast"];
109
+ if (!short) args.push("--full");
110
+ if (locale) args.push("--locale", locale);
111
+
112
+ return new Promise((resolve, reject) => {
113
+ const proc = spawn(process.execPath, args, {
114
+ stdio: ["pipe", "pipe", "pipe"],
115
+ env: { ...process.env, GREEDY_SEARCH_RESEARCH_CHILD: "1" },
116
+ });
117
+ proc.stdin.write(query);
118
+ proc.stdin.end();
119
+
120
+ let out = "";
121
+ let err = "";
122
+ let stderrBuffer = "";
123
+ proc.stdout.on("data", (d) => (out += d));
124
+ proc.stderr.on("data", (d) => {
125
+ err += d;
126
+ stderrBuffer += d.toString();
127
+ const lines = stderrBuffer.split("\n");
128
+ stderrBuffer = lines.pop() || "";
129
+ for (const line of lines) {
130
+ if (shouldForwardChildStderr(line)) {
131
+ process.stderr.write(`${line}\n`);
132
+ }
133
+ }
134
+ });
135
+ const t = setTimeout(() => {
136
+ proc.kill();
137
+ reject(new Error(`research child search timed out for: ${query}`));
138
+ }, 140000);
139
+ proc.on("close", (code) => {
140
+ clearTimeout(t);
141
+ if (code !== 0) {
142
+ reject(
143
+ new Error(err.trim() || `search child exited with code ${code}`),
144
+ );
145
+ return;
146
+ }
147
+ try {
148
+ resolve(JSON.parse(out.trim()));
149
+ } catch {
150
+ reject(
151
+ new Error(`Invalid JSON from research child: ${out.slice(0, 200)}`),
152
+ );
153
+ }
154
+ });
155
+ });
156
+ }
157
+
158
+ function annotateFetchedSourcesWithIds(fetchedSources, sources) {
159
+ const byUrl = new Map();
160
+ for (const source of sources || []) {
161
+ const key = source?.canonicalUrl || source?.finalUrl || source?.url || "";
162
+ if (key && source?.id) byUrl.set(key, source.id);
163
+ }
164
+ return (fetchedSources || []).map((source, index) => {
165
+ const key = source?.finalUrl || source?.canonicalUrl || source?.url || "";
166
+ return {
167
+ ...source,
168
+ id: source?.id || byUrl.get(key) || `F${index + 1}`,
169
+ };
170
+ });
171
+ }
172
+
173
+ function questionProgress(questions) {
174
+ const total = questions.length;
175
+ const closed = questions.filter((q) => q.status === "closed").length;
176
+ return { total, closed, open: Math.max(0, total - closed) };
177
+ }
178
+
179
+ /**
180
+ * Fast-path research for simple queries. Runs a single all-engine search,
181
+ * fetches top sources, and produces a cited synthesis in one pass.
182
+ * Returns the same shape as runResearchMode() for compatibility.
183
+ */
184
+ export async function runSimpleResearchMode({
185
+ query,
186
+ locale = null,
187
+ maxSources = 5,
188
+ qualityThreshold = 8.5,
189
+ writeBundle = process.env.GREEDY_RESEARCH_BUNDLE !== "0",
190
+ researchOutDir = null,
191
+ } = {}) {
192
+ const startedAt = new Date().toISOString();
193
+ const startMs = Date.now();
194
+ const questions = createQuestionLedger(query);
195
+ const extractedSourceKeys = new Set();
196
+
197
+ process.stderr.write(
198
+ `[greedysearch] Simple research mode: single-pass for "${trimText(query, 80)}"\n`,
199
+ );
200
+
201
+ // Progress bar with ETA — simple path does 3 search angles + 1 fetch
202
+ // batch + 1-2 synthesis calls. Use a conservative total so the ETA
203
+ // doesn't start at zero.
204
+ const searchAnglesCount = 3;
205
+ const totalSteps = searchAnglesCount + 1 + 2; // searches + fetch + 2 synth calls
206
+ const progressTracker = createProgressTracker({
207
+ totalActions: totalSteps,
208
+ totalRounds: 1,
209
+ totalFetches: 1,
210
+ silent: process.env.GREEDY_RESEARCH_QUIET === "1",
211
+ });
212
+ progressTracker.startRound(1);
213
+
214
+ // Step 1: Multi-angle search. Feynman's deepresearch pattern: for
215
+ // direct-mode research, run a minimum of 3 distinct search angles
216
+ // (definition, mechanism, current usage/comparison) to get broader
217
+ // source coverage than a single query.
218
+ let combinedSources = [];
219
+ let fetchedSources = [];
220
+ const searchAngles = buildSearchAngles(query);
221
+ const searchResults = [];
222
+ for (const angle of searchAngles) {
223
+ try {
224
+ progressTracker.startAction("search", angle.slice(0, 50));
225
+ const result = await runFastAllSearch(angle, { locale, short: true });
226
+ progressTracker.endAction();
227
+ searchResults.push({ angle, result });
228
+ const sources = buildSourceRegistry(result, angle);
229
+ combinedSources = mergeSourcesByUrl(combinedSources, sources);
230
+ } catch (error) {
231
+ progressTracker.endAction();
232
+ process.stderr.write(
233
+ `[greedysearch] Simple search angle "${angle}" failed: ${error.message}\n`,
234
+ );
235
+ }
236
+ }
237
+
238
+ // Step 2: Fetch top sources
239
+ process.stderr.write("PROGRESS:research:simple:fetching\n");
240
+ if (combinedSources.length > 0) {
241
+ try {
242
+ progressTracker.startFetch(
243
+ `top ${Math.min(maxSources, combinedSources.length)} sources`,
244
+ );
245
+ fetchedSources = await fetchMultipleSources(
246
+ combinedSources,
247
+ Math.min(maxSources, combinedSources.length),
248
+ 8000,
249
+ Math.min(3, maxSources),
250
+ );
251
+ progressTracker.endFetch(true);
252
+ combinedSources = mergeFetchDataIntoSources(
253
+ combinedSources,
254
+ fetchedSources,
255
+ );
256
+ } catch (error) {
257
+ progressTracker.endFetch(false);
258
+ process.stderr.write(
259
+ `[greedysearch] Source fetching failed: ${error.message}\n`,
260
+ );
261
+ }
262
+ }
263
+ fetchedSources = annotateFetchedSourcesWithIds(
264
+ fetchedSources,
265
+ combinedSources,
266
+ );
267
+
268
+ // Step 3: Goal-based evidence extraction (single pass)
269
+ process.stderr.write("PROGRESS:research:simple:evidence\n");
270
+ let evidenceItems = [];
271
+ try {
272
+ const evidenceRun = await extractEvidenceFromSources({
273
+ query,
274
+ questions,
275
+ fetchedSources,
276
+ extractedSourceKeys,
277
+ });
278
+ evidenceItems = evidenceRun.evidence || [];
279
+ for (const evidence of evidenceRun.evidence) {
280
+ const answered = Array.isArray(evidence.answers) ? evidence.answers : [];
281
+ for (const ans of answered) {
282
+ const id = ans?.id || ans?.question;
283
+ if (id) {
284
+ const target = questions.find((q) => q.id === id);
285
+ if (target) {
286
+ target.status = "closed";
287
+ target.closedRound = 1;
288
+ if (ans.evidence)
289
+ target.evidence = uniqueStrings(
290
+ [...(target.evidence || []), ans.evidence],
291
+ 4,
292
+ );
293
+ }
294
+ }
295
+ }
296
+ const newQs = Array.isArray(evidence.newQuestions)
297
+ ? evidence.newQuestions
298
+ : [];
299
+ for (const q of newQs) {
300
+ const clean = trimText(String(q), 320);
301
+ if (clean && !questions.some((x) => x.question === clean)) {
302
+ questions.push({
303
+ id: `Q${questions.length + 1}`,
304
+ question: clean,
305
+ status: "open",
306
+ reason: "Discovered gap/follow-up",
307
+ createdRound: 1,
308
+ evidence: [],
309
+ sourceIds: [],
310
+ });
311
+ }
312
+ }
313
+ }
314
+ } catch (error) {
315
+ process.stderr.write(
316
+ `[greedysearch] Evidence extraction failed: ${error.message}\n`,
317
+ );
318
+ }
319
+
320
+ // Step 4: Single-pass synthesis
321
+ process.stderr.write("PROGRESS:research:simple:synthesizing\n");
322
+ let synthesis = {
323
+ answer: "",
324
+ agreement: { level: "mixed", summary: "Single-pass synthesis." },
325
+ differences: [],
326
+ caveats: [],
327
+ claims: [],
328
+ recommendedSources: combinedSources.slice(0, 4).map((s) => s.id),
329
+ synthesized: false,
330
+ };
331
+
332
+ if (evidenceItems.length > 0) {
333
+ try {
334
+ progressTracker.startAction("synth-evidence", "from evidence");
335
+ const rawReport = await runGeminiPrompt(
336
+ buildSynthesisFromEvidencePrompt(
337
+ query,
338
+ combinedSources,
339
+ questions,
340
+ evidenceItems,
341
+ ),
342
+ { timeoutMs: 120_000 },
343
+ );
344
+ progressTracker.endAction();
345
+ synthesis = {
346
+ ...synthesis,
347
+ ...(parseStructuredJson(rawReport?.answer || "") || {}),
348
+ };
349
+ synthesis.synthesized =
350
+ Array.isArray(synthesis.claims) && synthesis.claims.length > 0;
351
+ } catch (error) {
352
+ process.stderr.write(
353
+ `[greedysearch] Evidence synthesis failed: ${error.message}\n`,
354
+ );
355
+ }
356
+ }
357
+
358
+ if (!synthesis.synthesized && combinedSources.length > 0) {
359
+ try {
360
+ progressTracker.startAction("synth-final", "fallback report");
361
+ const rawReport = await runGeminiPrompt(
362
+ buildFinalReportPrompt(
363
+ query,
364
+ [{ round: 1, learnings: [], gaps: [], actions: [] }],
365
+ combinedSources,
366
+ questions,
367
+ evidenceItems,
368
+ ),
369
+ { timeoutMs: 120_000 },
370
+ );
371
+ progressTracker.endAction();
372
+ synthesis = {
373
+ ...synthesis,
374
+ ...(parseStructuredJson(rawReport?.answer || "") || {}),
375
+ };
376
+ synthesis.synthesized =
377
+ Array.isArray(synthesis.claims) && synthesis.claims.length > 0;
378
+ } catch (error) {
379
+ process.stderr.write(
380
+ `[greedysearch] Final synthesis failed: ${error.message}\n`,
381
+ );
382
+ }
383
+ }
384
+
385
+ // Step 5: Citation audit + floor check
386
+ process.stderr.write("PROGRESS:research:simple:audit\n");
387
+ const citationAudit = auditCitations(synthesis.answer || "", combinedSources);
388
+
389
+ // Citation URL reachability check
390
+ const citationUrls = await runCitationUrlCheck(combinedSources);
391
+
392
+ reconcileQuestionsFromSynthesis(questions, synthesis, citationAudit);
393
+ const allGaps = uniqueStrings(synthesis.caveats || []);
394
+ const floor = computeResearchFloor({
395
+ sources: combinedSources,
396
+ fetchedSources,
397
+ synthesis,
398
+ citationAudit,
399
+ gaps: allGaps,
400
+ questions,
401
+ rounds: [{ round: 1, actions: [], learnings: [], gaps: allGaps }],
402
+ qualityScore: synthesis.synthesized ? 8 : 5,
403
+ qualityThreshold,
404
+ maxSources,
405
+ });
406
+
407
+ const finishedAt = new Date().toISOString();
408
+ const durationMs = Date.now() - startMs;
409
+
410
+ // Shared manifest fields
411
+ const baseManifest = {
412
+ startedAt,
413
+ finishedAt,
414
+ durationMs,
415
+ rounds: 1,
416
+ terminationReason: "simple_single_pass",
417
+ };
418
+
419
+ // Step 6: Write bundle (lightweight)
420
+ let bundle = null;
421
+ let fetchedFiles;
422
+ if (writeBundle) {
423
+ process.stderr.write("PROGRESS:research:simple:bundle\n");
424
+ try {
425
+ bundle = await writeResearchBundle({
426
+ query,
427
+ rounds: [
428
+ {
429
+ round: 1,
430
+ actions: [],
431
+ learnings: [],
432
+ gaps: allGaps,
433
+ evidence: evidenceItems,
434
+ },
435
+ ],
436
+ sources: combinedSources,
437
+ fetchedSources,
438
+ evidenceItems,
439
+ synthesis,
440
+ citationAudit,
441
+ citationUrls,
442
+ floor,
443
+ manifest: {
444
+ ...baseManifest,
445
+ engines: RESEARCH_ENGINES,
446
+ synthesizer: "gemini",
447
+ actionsRun: 1,
448
+ searches: 1,
449
+ fetches: fetchedSources.length,
450
+ sourcesFetched: fetchedSources.filter((s) => s?.contentChars > 100)
451
+ .length,
452
+ engineFailures: [],
453
+ floorMet: floor.floorMet,
454
+ },
455
+ allGaps,
456
+ questions,
457
+ outDir: researchOutDir,
458
+ });
459
+ fetchedFiles = bundle.sourceFiles;
460
+ delete bundle.sourceFiles;
461
+ } catch (error) {
462
+ process.stderr.write(
463
+ `[greedysearch] Research bundle write failed: ${error.message}\n`,
464
+ );
465
+ bundle = { error: error.message || String(error) };
466
+ fetchedFiles = await writeSourcesToFiles(fetchedSources);
467
+ }
468
+ } else {
469
+ fetchedFiles = await writeSourcesToFiles(fetchedSources);
470
+ }
471
+
472
+ process.stderr.write("PROGRESS:research:done\n");
473
+ progressTracker.endRound();
474
+ progressTracker.finish();
475
+
476
+ return {
477
+ query,
478
+ _research: {
479
+ mode: "simple",
480
+ breadth: 1,
481
+ iterations: 1,
482
+ maxSources,
483
+ rounds: [
484
+ {
485
+ round: 1,
486
+ actions: [],
487
+ learnings: [],
488
+ gaps: allGaps,
489
+ evidence: evidenceItems,
490
+ },
491
+ ],
492
+ learnings: [],
493
+ gaps: allGaps,
494
+ evidence: evidenceItems,
495
+ questions,
496
+ questionProgress: questionProgress(questions),
497
+ qualityHistory: [synthesis.synthesized ? 8 : 5],
498
+ terminationReason: "simple_single_pass",
499
+ qualityThreshold,
500
+ floor,
501
+ bundle,
502
+ manifest: baseManifest,
503
+ },
504
+ _citationAudit: citationAudit,
505
+ _citationUrls: citationUrls,
506
+ _sources: combinedSources,
507
+ _fetchedSources: fetchedFiles,
508
+ _synthesis: synthesis,
509
+ _confidence: {
510
+ sourcesCount: combinedSources.length,
511
+ fetchedSourceSuccessRate:
512
+ fetchedSources.length > 0
513
+ ? fetchedSources.filter((source) => source.contentChars > 100)
514
+ .length / fetchedSources.length
515
+ : 0,
516
+ agreementLevel: synthesis.agreement?.level || "mixed",
517
+ floorMet: floor.floorMet,
518
+ },
519
+ };
520
+ }
@@ -84,7 +84,7 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
84
84
  "WEB/RESEARCH SEARCH ONLY — searches live web via Perplexity, Google AI, ChatGPT, and Gemini, plus opt-in research through Semantic Scholar and Logically. " +
85
85
  "Research mode reuses the configured ~/.pi/greedyconfig engines for child searches and Gemini for planning/final synthesis. " +
86
86
  "Research mode is the centerpiece: it plans follow-up actions, fetches sources, audits citations, " +
87
- "and writes a structured research bundle on disk. " +
87
+ "and writes a structured research bundle on disk. Scale-aware: simple queries auto-classify and use a fast single-pass path. " +
88
88
  "Use for: library docs, recent framework changes, error messages, best practices, current events. " +
89
89
  "Reports streaming progress as each engine completes.",
90
90
  promptSnippet: "Multi-engine AI web search with streaming progress",
@@ -231,15 +231,13 @@ export function registerGreedySearchTool(pi: ExtensionAPI, baseDir: string) {
231
231
  flags.push("--synthesizer", params.synthesizer);
232
232
  }
233
233
 
234
- const onProgress =
235
- effectiveEngine === "all"
236
- ? makeProgressTracker(
237
- ALL_ENGINES,
238
- onUpdate,
239
- researchMode ? "Researching" : "Searching",
240
- synthesize,
241
- )
242
- : undefined;
234
+ const onProgress = makeProgressTracker(
235
+ effectiveEngine === "all" ? ALL_ENGINES : [effectiveEngine],
236
+ onUpdate,
237
+ researchMode ? "Researching" : "Searching",
238
+ synthesize && effectiveEngine === "all",
239
+ query,
240
+ );
243
241
 
244
242
  try {
245
243
  const data = await runSearch(