@apmantza/greedysearch-pi 1.9.2 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +132 -2
  2. package/README.md +82 -47
  3. package/bin/cdp.mjs +1153 -1108
  4. package/bin/launch.mjs +9 -0
  5. package/bin/search.mjs +318 -81
  6. package/extractors/bing-copilot.mjs +48 -18
  7. package/extractors/chatgpt.mjs +553 -0
  8. package/extractors/common.mjs +213 -22
  9. package/extractors/consensus.mjs +655 -0
  10. package/extractors/consent.mjs +182 -18
  11. package/extractors/gemini.mjs +350 -217
  12. package/extractors/google-ai.mjs +129 -128
  13. package/extractors/logically.mjs +629 -0
  14. package/extractors/perplexity.mjs +547 -217
  15. package/extractors/selectors.mjs +3 -2
  16. package/extractors/semantic-scholar.mjs +219 -0
  17. package/package.json +8 -4
  18. package/skills/greedy-search/skill.md +20 -12
  19. package/src/fetcher.mjs +23 -1
  20. package/src/formatters/results.ts +185 -128
  21. package/src/search/browser-lifecycle.mjs +27 -5
  22. package/src/search/challenge-detect.mjs +205 -0
  23. package/src/search/chrome.mjs +653 -590
  24. package/src/search/constants.mjs +155 -39
  25. package/src/search/engines.mjs +114 -76
  26. package/src/search/fetch-source.mjs +566 -451
  27. package/src/search/pdf.mjs +68 -0
  28. package/src/search/progress.mjs +145 -0
  29. package/src/search/recovery.mjs +73 -45
  30. package/src/search/research.mjs +1419 -62
  31. package/src/search/scale-aware.mjs +93 -0
  32. package/src/search/simple-research.mjs +520 -0
  33. package/src/search/sources.mjs +52 -22
  34. package/src/search/synthesis-runner.mjs +105 -26
  35. package/src/search/synthesis.mjs +286 -246
  36. package/src/tools/greedy-search-handler.ts +129 -59
  37. package/src/tools/shared.ts +312 -186
  38. package/src/types.ts +110 -104
  39. package/test.mjs +537 -18
@@ -1,186 +1,312 @@
1
- /**
2
- * Shared types, utilities, and runSearch for Pi tool handlers
3
- */
4
-
5
- import { spawn } from "node:child_process";
6
- import { existsSync } from "node:fs";
7
- import { join } from "node:path";
8
- import type { ProgressUpdate, ToolResult } from "../types.js";
9
-
10
- export type { ProgressUpdate, ToolResult } from "../types.js";
11
-
12
- // Canonical source is src/search/constants.mjs keep in sync
13
- const ALL_ENGINES = ["perplexity", "bing", "google"] as const;
14
-
15
- export { ALL_ENGINES };
16
-
17
- /** Strip surrounding double-quotes that some framework versions inject into string params */
18
- export function stripQuotes(val: string): string {
19
- return val.replace(/^"|"$/g, "");
20
- }
21
-
22
- /**
23
- * Check if the CDP module is available in the package directory
24
- */
25
- export function cdpAvailable(baseDir: string): boolean {
26
- return existsSync(join(baseDir, "bin", "cdp.mjs"));
27
- }
28
-
29
- /**
30
- * Create a "cdp missing" error result
31
- */
32
- export function cdpMissingResult(): ToolResult {
33
- return {
34
- content: [
35
- {
36
- type: "text",
37
- text: "cdp.mjs missing — try reinstalling: pi install git:github.com/apmantza/GreedySearch-pi",
38
- },
39
- ],
40
- details: {} as Record<string, unknown>,
41
- };
42
- }
43
-
44
- /**
45
- * Create an error result with a message
46
- */
47
- export function errorResult(prefix: string, e: unknown): ToolResult {
48
- const msg = e instanceof Error ? e.message : String(e);
49
- return {
50
- content: [{ type: "text", text: `${prefix}: ${msg}` }],
51
- details: {} as Record<string, unknown>,
52
- };
53
- }
54
-
55
- /**
56
- * Spawn search.mjs and collect JSON results, with progress streaming via stderr.
57
- * Shared by GreedySearch tool handlers.
58
- */
59
- export function runSearch(
60
- engine: string,
61
- query: string,
62
- flags: string[],
63
- searchBin: string,
64
- signal?: AbortSignal,
65
- onProgress?: (
66
- engine: string,
67
- status: "done" | "error" | "needs-human",
68
- ) => void,
69
- headless?: boolean, // defaults to true (headless is the default)
70
- ): Promise<Record<string, unknown>> {
71
- return new Promise((resolve, reject) => {
72
- const allFlags = [...flags];
73
- // Headless is default — only skip if explicitly false or GREEDY_SEARCH_VISIBLE=1
74
- if (headless !== false && process.env.GREEDY_SEARCH_VISIBLE !== "1")
75
- allFlags.push("--headless");
76
- if (headless === false) allFlags.push("--always-visible");
77
- // Propagate visibility preference via env (--headless flag is informational;
78
- // the actual headless control in search.mjs / launch.mjs reads the env var).
79
- const procEnv = { ...process.env };
80
- if (headless === false) {
81
- procEnv.GREEDY_SEARCH_VISIBLE = "1";
82
- procEnv.GREEDY_SEARCH_ALWAYS_VISIBLE = "1";
83
- }
84
- const proc = spawn(
85
- process.execPath,
86
- [searchBin, engine, "--inline", "--stdin", ...allFlags],
87
- { stdio: ["pipe", "pipe", "pipe"], env: procEnv },
88
- );
89
- // Pipe query via stdin to avoid leaking it in process table command-line
90
- proc.stdin.write(query);
91
- proc.stdin.end();
92
- let out = "";
93
- let err = "";
94
-
95
- const onAbort = () => {
96
- proc.kill("SIGTERM");
97
- reject(new Error("Aborted"));
98
- };
99
- signal?.addEventListener("abort", onAbort, { once: true });
100
-
101
- proc.stderr.on("data", (d: Buffer) => {
102
- err += d;
103
- for (const line of d.toString().split("\n")) {
104
- // Engine progress: perplexity/bing/google
105
- const engineMatch = line.match(
106
- /^PROGRESS:(perplexity|bing|google):(done|error|needs-human)$/,
107
- );
108
- if (engineMatch && onProgress) {
109
- onProgress(
110
- engineMatch[1],
111
- engineMatch[2] as "done" | "error" | "needs-human",
112
- );
113
- }
114
- // Synthesis progress: skipped (manual verification) or done/error
115
- const synthMatch = line.match(
116
- /^PROGRESS:synthesis:(done|error|skipped)$/,
117
- );
118
- if (synthMatch && onProgress) {
119
- onProgress(
120
- "synthesis",
121
- synthMatch[1] as "done" | "error" | "needs-human",
122
- );
123
- }
124
- }
125
- });
126
-
127
- proc.stdout.on("data", (d: Buffer) => (out += d));
128
- proc.on("close", (code: number) => {
129
- signal?.removeEventListener("abort", onAbort);
130
- if (code !== 0) {
131
- reject(new Error(err.trim() || `search.mjs exited with code ${code}`));
132
- } else {
133
- try {
134
- resolve(JSON.parse(out.trim()));
135
- } catch {
136
- reject(
137
- new Error(`Invalid JSON from search.mjs: ${out.slice(0, 200)}`),
138
- );
139
- }
140
- }
141
- });
142
- });
143
- }
144
-
145
- /**
146
- * Build a progress callback that tracks completed engines.
147
- * Returns an onProgress function suitable for runSearch.
148
- */
149
- export function makeProgressTracker(
150
- engines: readonly string[],
151
- onUpdate: ((update: ProgressUpdate) => void) | undefined,
152
- suffix: "Searching" | "Researching",
153
- depth: string,
154
- ) {
155
- const completed = new Map<string, "done" | "error" | "needs-human">();
156
-
157
- return (eng: string, status: "done" | "error" | "needs-human") => {
158
- completed.set(eng, status);
159
- const parts: string[] = [];
160
- for (const e of engines) {
161
- const s = completed.get(e);
162
- if (s === "done") parts.push(`✅ ${e} done`);
163
- else if (s === "error") parts.push(`❌ ${e} failed`);
164
- else if (s === "needs-human")
165
- parts.push(`🔓 ${e} needs manual verification`);
166
- else parts.push(`⏳ ${e}`);
167
- }
168
- // Synthesis status: when all engines complete in non-fast mode,
169
- // show synthesis progress. Handle "skipped" status (emitted when
170
- // manual verification is needed and synthesis is bypassed).
171
- if (depth !== "fast" && completed.size >= 3) {
172
- const synStatus = completed.get("synthesis");
173
- if (synStatus === "done") parts.push("✅ synthesized");
174
- else if (synStatus === "error") parts.push("❌ synthesis failed");
175
- else if (synStatus === "needs-human") parts.push("⏭️ synthesis skipped");
176
- else parts.push("🔄 synthesizing");
177
- }
178
-
179
- onUpdate?.({
180
- content: [
181
- { type: "text", text: `**${suffix}...** ${parts.join(" · ")}` },
182
- ],
183
- details: { _progress: true },
184
- } satisfies ProgressUpdate);
185
- };
186
- }
1
+ /**
2
+ * Shared types, utilities, and runSearch for Pi tool handlers
3
+ */
4
+
5
+ import { spawn } from "node:child_process";
6
+ import { existsSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import type { ProgressUpdate, ToolResult } from "../types.js";
9
+
10
+ export type { ProgressUpdate, ToolResult } from "../types.js";
11
+
12
+ // Import and re-export ALL_ENGINES from constants.mjs so it's always in sync.
13
+ // constants.mjs reads ~/.pi/greedyconfig for user overrides.
14
+ import { ALL_ENGINES } from "../search/constants.mjs";
15
+ export { ALL_ENGINES };
16
+
17
+ /** Strip surrounding double-quotes that some framework versions inject into string params */
18
+ export function stripQuotes(val: string): string {
19
+ return val.replace(/^"|"$/g, "");
20
+ }
21
+
22
+ /**
23
+ * Check if the CDP module is available in the package directory
24
+ */
25
+ export function cdpAvailable(baseDir: string): boolean {
26
+ return existsSync(join(baseDir, "bin", "cdp.mjs"));
27
+ }
28
+
29
+ /**
30
+ * Create a "cdp missing" error result
31
+ */
32
+ export function cdpMissingResult(): ToolResult {
33
+ return {
34
+ content: [
35
+ {
36
+ type: "text",
37
+ text: "cdp.mjs missing — try reinstalling: pi install git:github.com/apmantza/GreedySearch-pi",
38
+ },
39
+ ],
40
+ details: {} as Record<string, unknown>,
41
+ };
42
+ }
43
+
44
+ /**
45
+ * Create an error result with a message
46
+ */
47
+ export function errorResult(prefix: string, e: unknown): ToolResult {
48
+ const msg = e instanceof Error ? e.message : String(e);
49
+ return {
50
+ content: [{ type: "text", text: `${prefix}: ${msg}` }],
51
+ details: {} as Record<string, unknown>,
52
+ };
53
+ }
54
+
55
+ /** Progress update for a single engine finishing/failing */
56
+ type EngineProgress = {
57
+ type: "engine";
58
+ engine: string;
59
+ status: "done" | "error" | "needs-human";
60
+ };
61
+
62
+ /** Free-form progress text (e.g. research bar + ETA) */
63
+ type TextProgress = {
64
+ type: "text";
65
+ text: string;
66
+ };
67
+
68
+ /**
69
+ * Spawn search.mjs and collect JSON results, with progress streaming via stderr.
70
+ * Shared by GreedySearch tool handlers.
71
+ */
72
+ export function runSearch(
73
+ engine: string,
74
+ query: string,
75
+ flags: string[],
76
+ searchBin: string,
77
+ signal?: AbortSignal,
78
+ onProgress?: (update: EngineProgress | TextProgress) => void,
79
+ options: { headless?: boolean } = {},
80
+ ): Promise<Record<string, unknown>> {
81
+ return new Promise((resolve, reject) => {
82
+ const { headless = true } = options;
83
+ const allFlags = [...flags];
84
+ // Headless is default — only skip if explicitly false or GREEDY_SEARCH_VISIBLE=1
85
+ if (headless !== false && process.env.GREEDY_SEARCH_VISIBLE !== "1")
86
+ allFlags.push("--headless");
87
+ if (headless === false) allFlags.push("--always-visible");
88
+ // Propagate visibility preference via env (--headless flag is informational;
89
+ // the actual headless control in search.mjs / launch.mjs reads the env var).
90
+ const procEnv = { ...process.env };
91
+ if (headless === false) {
92
+ procEnv.GREEDY_SEARCH_VISIBLE = "1";
93
+ procEnv.GREEDY_SEARCH_ALWAYS_VISIBLE = "1";
94
+ }
95
+ const proc = spawn(
96
+ process.execPath,
97
+ [searchBin, engine, "--inline", "--stdin", ...allFlags],
98
+ { stdio: ["pipe", "pipe", "pipe"], env: procEnv },
99
+ );
100
+ // Pipe query via stdin to avoid leaking it in process table command-line
101
+ proc.stdin.write(query);
102
+ proc.stdin.end();
103
+ let out = "";
104
+ let err = "";
105
+
106
+ const onAbort = () => {
107
+ proc.kill("SIGTERM");
108
+ reject(new Error("Aborted"));
109
+ };
110
+ signal?.addEventListener("abort", onAbort, { once: true });
111
+
112
+ proc.stderr.on("data", (d: Buffer) => {
113
+ err += d;
114
+ // Match PROGRESS lines for any known engine.
115
+ const ENGINE_PROGRESS_RE =
116
+ /^PROGRESS:(perplexity|google|chatgpt|bing|gemini|semantic-scholar|semanticscholar|s2|logically):(done|error|needs-human)$/;
117
+ for (const line of d.toString().split("\n")) {
118
+ // Engine progress: any known engine
119
+ const engineMatch = line.match(ENGINE_PROGRESS_RE);
120
+ if (engineMatch && onProgress) {
121
+ onProgress({
122
+ type: "engine",
123
+ engine: engineMatch[1],
124
+ status: engineMatch[2] as "done" | "error" | "needs-human",
125
+ });
126
+ }
127
+ // Synthesis progress: skipped (manual verification) or done/error
128
+ const synthMatch = line.match(
129
+ /^PROGRESS:synthesis:(done|error|skipped)$/,
130
+ );
131
+ if (synthMatch && onProgress) {
132
+ onProgress({
133
+ type: "engine",
134
+ engine: "synthesis",
135
+ status: synthMatch[1] as "done" | "error" | "needs-human",
136
+ });
137
+ }
138
+ // Research progress markers (planning/fetching/synthesizing)
139
+ const researchMatch = line.match(/^PROGRESS:research:(.+)$/);
140
+ if (researchMatch && onProgress) {
141
+ onProgress({
142
+ type: "text",
143
+ text: researchMatch[1],
144
+ });
145
+ }
146
+ // Progress bar + ETA lines from createProgressTracker
147
+ const barMatch = line.match(/^\[greedysearch\] (\[.+?\] .+)$/);
148
+ if (barMatch && onProgress) {
149
+ onProgress({
150
+ type: "text",
151
+ text: barMatch[1],
152
+ });
153
+ }
154
+ // Single-engine stage lines: "[perplexity] stage: nav (+563ms)"
155
+ const stageMatch = line.match(
156
+ /^\[(perplexity|google|chatgpt|bing|gemini|semantic-scholar|logically)\] stage: (.+) \(\+\d+ms\)$/,
157
+ );
158
+ if (stageMatch && onProgress) {
159
+ onProgress({
160
+ type: "text",
161
+ text: `${stageMatch[1]}: ${stageMatch[2]}`,
162
+ });
163
+ }
164
+ }
165
+ });
166
+
167
+ proc.stdout.on("data", (d: Buffer) => (out += d));
168
+ proc.on("close", (code: number) => {
169
+ signal?.removeEventListener("abort", onAbort);
170
+ if (code !== 0) {
171
+ reject(new Error(err.trim() || `search.mjs exited with code ${code}`));
172
+ } else {
173
+ // For single-engine calls, signal completion so the progress
174
+ // tracker can mark the engine as done.
175
+ if (onProgress && engine !== "all") {
176
+ onProgress({
177
+ type: "engine",
178
+ engine,
179
+ status: "done" as const,
180
+ });
181
+ }
182
+ try {
183
+ resolve(JSON.parse(out.trim()));
184
+ } catch {
185
+ reject(
186
+ new Error(`Invalid JSON from search.mjs: ${out.slice(0, 200)}`),
187
+ );
188
+ }
189
+ }
190
+ });
191
+ });
192
+ }
193
+
194
+ /**
195
+ * Render a Unicode progress bar.
196
+ * Example: [████████████░░░░] for 75%
197
+ */
198
+ function renderBar(done: number, total: number): string {
199
+ if (total <= 0) return "";
200
+ const width = 16;
201
+ const filled = Math.round((done / total) * width);
202
+ return (
203
+ "[" +
204
+ "█".repeat(Math.min(filled, width)) +
205
+ "░".repeat(Math.max(0, width - filled)) +
206
+ "]"
207
+ );
208
+ }
209
+
210
+ /**
211
+ * Format milliseconds as a short human duration.
212
+ * e.g. "—" / "45s" / "1m 30s"
213
+ */
214
+ function fmtDuration(ms: number): string {
215
+ if (ms < 1000) return "—";
216
+ const s = Math.round(ms / 1000);
217
+ if (s < 60) return `${s}s`;
218
+ return `${Math.floor(s / 60)}m ${s % 60}s`;
219
+ }
220
+
221
+ /**
222
+ * Build a progress callback that tracks completed engines.
223
+ * Returns an onProgress function suitable for runSearch.
224
+ *
225
+ * For multi-engine calls (research or not) this shows a bar + ETA
226
+ * line that tracks fraction of engines completed. The bar always
227
+ * appears; the research-path bar from `createProgressTracker` takes
228
+ * priority when present.
229
+ */
230
+ export function makeProgressTracker(
231
+ engines: readonly string[],
232
+ onUpdate: ((update: ProgressUpdate) => void) | undefined,
233
+ suffix: "Searching" | "Researching",
234
+ showSynthesis: boolean,
235
+ query?: string,
236
+ ) {
237
+ const startedAt = Date.now();
238
+ const completed = new Map<string, "done" | "error" | "needs-human">();
239
+ let latestBarText = "";
240
+
241
+ function render() {
242
+ const lines: string[] = [];
243
+ lines.push(`**${suffix}...** ${query || ""}`.trim());
244
+
245
+ const done = completed.size;
246
+
247
+ // Multi-engine bar + ETA (unless research supplies its own bar)
248
+ if (engines.length > 1 && done > 0 && !latestBarText) {
249
+ const elapsed = Date.now() - startedAt;
250
+ const frac = Math.min(1, done / engines.length);
251
+ const etaMs = frac > 0.01 ? Math.round(elapsed / frac - elapsed) : null;
252
+ const bar = renderBar(done, engines.length);
253
+ const eta = etaMs != null && etaMs > 0 ? fmtDuration(etaMs) : "—";
254
+ lines.push(`${bar} ${done}/${engines.length} engines (ETA ${eta})`);
255
+ }
256
+
257
+ // Research mode bar from createProgressTracker has priority
258
+ if (latestBarText) lines.push(latestBarText);
259
+
260
+ const parts: string[] = [];
261
+ for (const e of engines) {
262
+ const s = completed.get(e);
263
+ if (s === "done") parts.push(`✅ ${e} done`);
264
+ else if (s === "error") parts.push(`❌ ${e} failed`);
265
+ else if (s === "needs-human")
266
+ parts.push(`🔓 ${e} needs manual verification`);
267
+ else parts.push(`⏳ ${e}`);
268
+ }
269
+ if (showSynthesis && done >= engines.length) {
270
+ const synStatus = completed.get("synthesis");
271
+ if (synStatus === "done") parts.push("✅ synthesized");
272
+ else if (synStatus === "error") parts.push("❌ synthesis failed");
273
+ else if (synStatus === "needs-human") parts.push("⏭️ synthesis skipped");
274
+ else parts.push("🔄 synthesizing");
275
+ }
276
+ if (parts.length > 0) {
277
+ // Engine status line: 5 engines with emoji+separator runs ~110
278
+ // chars (visible width 116+ because emoji take 2 cols each),
279
+ // which is over the 112-char terminal width. The TUI's
280
+ // Text.render can't wrap a single line and crashes with
281
+ // "Rendered line N exceeds terminal width (W > W-4)"
282
+ // if a single rendered line is wider than the terminal.
283
+ // Truncate at 90 chars (well under 100 visible-width to leave
284
+ // padding-room for variable emoji widths).
285
+ const statusLine = parts.join(" · ");
286
+ lines.push(
287
+ statusLine.length > 90
288
+ ? statusLine.slice(0, 88) + "…"
289
+ : statusLine,
290
+ );
291
+ }
292
+
293
+ onUpdate?.({
294
+ content: [{ type: "text", text: lines.join("\n") }],
295
+ details: { _progress: true },
296
+ } satisfies ProgressUpdate);
297
+ }
298
+
299
+ return (update: EngineProgress | TextProgress) => {
300
+ if (update.type === "text") {
301
+ if (update.text.startsWith("[")) {
302
+ latestBarText = update.text;
303
+ }
304
+ render();
305
+ return;
306
+ }
307
+
308
+ const { engine, status } = update;
309
+ completed.set(engine, status);
310
+ render();
311
+ };
312
+ }