pi-research 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
- import { spawn } from "node:child_process";
1
+ import { spawn as nodeSpawn, spawnSync } from "node:child_process";
2
+ import { existsSync } from "node:fs";
2
3
  import { fileURLToPath } from "node:url";
3
4
  import path from "node:path";
4
5
 
@@ -28,6 +29,13 @@ const DYNAMIC_PATTERNS = [
28
29
  /id=["']root["']/i,
29
30
  ];
30
31
 
32
+ let spawnProcess = nodeSpawn;
33
+ let daemonState = null;
34
+ let daemonSequence = 0;
35
+ let exitHookInstalled = false;
36
+ let runtimeStatus = null;
37
+ const DAEMON_IDLE_TIMEOUT_MS = 3000;
38
+
31
39
  function stripHtml(value) {
32
40
  return String(value || "")
33
41
  .replace(/<script[\s\S]*?<\/script>/gi, " ")
@@ -71,35 +79,62 @@ export function chooseScraplingMode(input) {
71
79
  return assessPageAttempt(input).mode;
72
80
  }
73
81
 
74
- function pythonScript() {
82
+ function pythonDaemonScript() {
75
83
  return String.raw`
76
84
  import asyncio
85
+ import atexit
77
86
  import json
78
87
  import os
79
88
  import sys
80
89
 
81
90
  root = sys.argv[1]
82
- mode = sys.argv[2]
83
- url = sys.argv[3]
84
- payload = json.loads(sys.argv[4])
85
-
86
91
  sys.path.insert(0, root)
87
92
 
88
- async def main():
89
- from scrapling.fetchers import AsyncFetcher, DynamicFetcher, StealthyFetcher
93
+ from scrapling.fetchers import AsyncFetcher, AsyncDynamicSession, AsyncStealthySession, ProxyRotator
90
94
 
91
- timeout = payload.get("timeout")
92
- kwargs = {}
93
- if timeout:
94
- kwargs["timeout"] = timeout
95
+ sessions = {}
95
96
 
96
- if mode == "async":
97
- response = await AsyncFetcher.get(url, **kwargs)
98
- elif mode == "dynamic":
99
- response = DynamicFetcher.fetch(url, **kwargs)
100
- else:
101
- response = StealthyFetcher.fetch(url, **kwargs)
97
+ def session_key(mode, proxy_rotation):
98
+ if not proxy_rotation:
99
+ return mode
100
+ return f"{mode}:{json.dumps(proxy_rotation, sort_keys=True)}"
102
101
 
102
+ async def build_session(mode, payload):
103
+ proxy_rotation = payload.get("proxyRotation") or []
104
+ key = session_key(mode, proxy_rotation)
105
+ session = sessions.get(key)
106
+ if session is not None:
107
+ return session
108
+
109
+ kwargs = {
110
+ "headless": True,
111
+ "disable_resources": True,
112
+ "network_idle": True,
113
+ "timeout": payload.get("timeout") or 30000,
114
+ }
115
+ if proxy_rotation:
116
+ kwargs["proxy_rotator"] = ProxyRotator(proxy_rotation)
117
+
118
+ session = AsyncDynamicSession(**kwargs) if mode == "dynamic" else AsyncStealthySession(**kwargs)
119
+ await session.start()
120
+ sessions[key] = session
121
+ return session
122
+
123
+ async def cleanup():
124
+ for session in sessions.values():
125
+ try:
126
+ await session.close()
127
+ except Exception:
128
+ pass
129
+ sessions.clear()
130
+
131
+ def cleanup_sync():
132
+ asyncio.get_event_loop().run_until_complete(cleanup())
133
+
134
+ atexit.register(cleanup_sync)
135
+
136
+
137
+ def normalize_response(response, fallback_url):
103
138
  headers = {}
104
139
  raw_headers = getattr(response, "headers", None)
105
140
  if hasattr(raw_headers, "items"):
@@ -120,77 +155,289 @@ async def main():
120
155
  elif not isinstance(body, str):
121
156
  body = str(body or "")
122
157
 
123
- out = {
158
+ return {
124
159
  "ok": True,
125
- "url": getattr(response, "url", url),
160
+ "url": getattr(response, "url", fallback_url),
126
161
  "status": getattr(response, "status", 200),
127
162
  "contentType": headers.get("content-type", ""),
128
163
  "body": body,
129
164
  "headers": headers,
130
165
  }
131
- print(json.dumps(out))
132
166
 
133
- try:
134
- asyncio.run(main())
135
- except Exception as exc:
136
- print(json.dumps({"ok": False, "error": str(exc), "type": exc.__class__.__name__}))
137
- sys.exit(1)
167
+
168
+ async def handle_job(job):
169
+ mode = job.get("mode")
170
+ url = job.get("url")
171
+ payload = job.get("payload") or {}
172
+ timeout = payload.get("timeout") or 30000
173
+ proxy = payload.get("proxy")
174
+
175
+ kwargs = {"timeout": timeout}
176
+ if proxy:
177
+ kwargs["proxy"] = proxy
178
+
179
+ if mode == "async":
180
+ response = await AsyncFetcher.get(url, **kwargs)
181
+ else:
182
+ session = await build_session(mode, payload)
183
+ response = await session.fetch(url, **kwargs)
184
+
185
+ out = normalize_response(response, url)
186
+ out["id"] = job.get("id")
187
+ return out
188
+
189
+
190
+ async def main():
191
+ print(json.dumps({"type": "ready"}), flush=True)
192
+
193
+ for raw_line in sys.stdin:
194
+ line = raw_line.strip()
195
+ if not line:
196
+ continue
197
+ try:
198
+ job = json.loads(line)
199
+ except Exception as exc:
200
+ print(json.dumps({"type": "error", "ok": False, "error": str(exc)}), flush=True)
201
+ continue
202
+
203
+ if job.get("type") == "shutdown":
204
+ break
205
+
206
+ try:
207
+ out = await handle_job(job)
208
+ except Exception as exc:
209
+ out = {"id": job.get("id"), "ok": False, "error": str(exc), "type": exc.__class__.__name__}
210
+ print(json.dumps(out), flush=True)
211
+
212
+ await cleanup()
213
+
214
+
215
+ asyncio.run(main())
138
216
  `;
139
217
  }
140
218
 
219
+ function handleDaemonStdout(state, chunk) {
220
+ state.stdoutBuffer += String(chunk || "");
221
+ while (state.stdoutBuffer.includes("\n")) {
222
+ const newlineIndex = state.stdoutBuffer.indexOf("\n");
223
+ const line = state.stdoutBuffer.slice(0, newlineIndex).trim();
224
+ state.stdoutBuffer = state.stdoutBuffer.slice(newlineIndex + 1);
225
+ if (!line) continue;
226
+
227
+ let parsed;
228
+ try {
229
+ parsed = JSON.parse(line);
230
+ } catch {
231
+ continue;
232
+ }
233
+
234
+ if (parsed.type === "ready") {
235
+ state.ready = true;
236
+ state.resolveReady?.(state);
237
+ continue;
238
+ }
239
+
240
+ const pending = state.pending.get(parsed.id);
241
+ if (!pending) continue;
242
+ state.pending.delete(parsed.id);
243
+ pending.cleanup?.();
244
+ pending.resolve(parsed.ok ? parsed : null);
245
+ scheduleDaemonIdleStop(state);
246
+ }
247
+ }
248
+
249
+ function failDaemonState(state) {
250
+ if (state.idleTimer) clearTimeout(state.idleTimer);
251
+ for (const pending of state.pending.values()) {
252
+ pending.cleanup?.();
253
+ pending.resolve(null);
254
+ }
255
+ state.pending.clear();
256
+ }
257
+
258
+ function scheduleDaemonIdleStop(state) {
259
+ if (state.idleTimer) clearTimeout(state.idleTimer);
260
+ if (state.pending.size > 0) return;
261
+ state.idleTimer = setTimeout(() => {
262
+ if (daemonState === state && state.pending.size === 0) void stopScraplingDaemon();
263
+ }, DAEMON_IDLE_TIMEOUT_MS);
264
+ state.idleTimer.unref?.();
265
+ }
266
+
267
+ function resolvePythonExecutable() {
268
+ if (process.env.PYTHON) return process.env.PYTHON;
269
+ const venvPython = path.join(process.cwd(), ".venv-scrapling", "bin", "python");
270
+ return existsSync(venvPython) ? venvPython : "python3";
271
+ }
272
+
273
+ function daemonEnv() {
274
+ return {
275
+ ...process.env,
276
+ PYTHONPATH: [SCRAPLING_ROOT, process.env.PYTHONPATH].filter(Boolean).join(path.delimiter),
277
+ };
278
+ }
279
+
280
+ function validateScraplingRuntime() {
281
+ if (runtimeStatus) return runtimeStatus;
282
+ const python = resolvePythonExecutable();
283
+ const probe = spawnSync(python, ["-c", "import sys; sys.path.insert(0, sys.argv[1]); import lxml, patchright, playwright, scrapling; print('OK')", SCRAPLING_ROOT], {
284
+ env: daemonEnv(),
285
+ encoding: "utf8",
286
+ timeout: 15000,
287
+ });
288
+
289
+ runtimeStatus = probe.status === 0
290
+ ? { ok: true, python }
291
+ : {
292
+ ok: false,
293
+ python,
294
+ error: (probe.stderr || probe.stdout || `scrapling runtime check failed with status ${probe.status ?? "unknown"}`).trim(),
295
+ };
296
+ return runtimeStatus;
297
+ }
298
+
299
+ export function getScraplingRuntimeStatus() {
300
+ return validateScraplingRuntime();
301
+ }
302
+
303
+ function ensureExitHook() {
304
+ if (exitHookInstalled) return;
305
+ exitHookInstalled = true;
306
+ process.once("exit", () => {
307
+ daemonState?.child?.kill?.("SIGKILL");
308
+ });
309
+ }
310
+
311
+ async function ensureScraplingDaemon() {
312
+ if (daemonState?.ready) return daemonState;
313
+ if (daemonState?.readyPromise) return daemonState.readyPromise;
314
+
315
+ ensureExitHook();
316
+ const runtime = validateScraplingRuntime();
317
+ if (!runtime.ok) throw new Error(runtime.error || "scrapling runtime unavailable");
318
+ const child = spawnProcess(runtime.python, ["-c", pythonDaemonScript(), SCRAPLING_ROOT], {
319
+ env: daemonEnv(),
320
+ stdio: ["pipe", "pipe", "pipe"],
321
+ });
322
+
323
+ const state = {
324
+ child,
325
+ pending: new Map(),
326
+ stdoutBuffer: "",
327
+ stderrBuffer: "",
328
+ ready: false,
329
+ readyPromise: null,
330
+ resolveReady: null,
331
+ rejectReady: null,
332
+ idleTimer: null,
333
+ };
334
+
335
+ state.readyPromise = new Promise((resolve, reject) => {
336
+ state.resolveReady = resolve;
337
+ state.rejectReady = reject;
338
+ });
339
+
340
+ child.stdout.on("data", (chunk) => handleDaemonStdout(state, chunk));
341
+ child.stderr.on("data", (chunk) => {
342
+ state.stderrBuffer += String(chunk || "");
343
+ if (state.stderrBuffer.length > 20_000) state.stderrBuffer = state.stderrBuffer.slice(-20_000);
344
+ });
345
+ child.on("error", (error) => {
346
+ if (!state.ready) state.rejectReady?.(error);
347
+ failDaemonState(state);
348
+ if (daemonState === state) daemonState = null;
349
+ });
350
+ child.on("close", (code) => {
351
+ if (!state.ready) state.rejectReady?.(new Error(`scrapling daemon exited before ready (${code ?? "unknown"})`));
352
+ failDaemonState(state);
353
+ if (daemonState === state) daemonState = null;
354
+ });
355
+
356
+ daemonState = state;
357
+ return state.readyPromise;
358
+ }
359
+
360
+ function requestPayload(mode, config = {}) {
361
+ return {
362
+ timeout: mode === "stealthy"
363
+ ? (config.stealthTimeoutMs || config.pageTimeoutMs || 30_000)
364
+ : (config.pageTimeoutMs || 30_000),
365
+ proxy: config.proxy || null,
366
+ proxyRotation: Array.isArray(config.proxyRotation) && config.proxyRotation.length ? config.proxyRotation : null,
367
+ };
368
+ }
369
+
141
370
  export async function fetchWithScrapling(url, mode, signal, config = {}) {
142
371
  if (!mode) return null;
372
+ let state;
373
+ try {
374
+ state = await ensureScraplingDaemon();
375
+ } catch {
376
+ return null;
377
+ }
378
+ const id = `job-${++daemonSequence}`;
379
+ const payload = requestPayload(mode, config);
143
380
 
144
381
  return await new Promise((resolve) => {
145
- const child = spawn(process.env.PYTHON || "python3", ["-c", pythonScript(), SCRAPLING_ROOT, mode, url, JSON.stringify({ timeout: config.pageTimeoutMs || 30000 })], {
146
- env: {
147
- ...process.env,
148
- PYTHONPATH: [SCRAPLING_ROOT, process.env.PYTHONPATH].filter(Boolean).join(path.delimiter),
149
- },
150
- stdio: ["ignore", "pipe", "pipe"],
151
- });
152
-
153
- let stdout = "";
154
- let stderr = "";
155
- child.stdout.on("data", (chunk) => {
156
- stdout += chunk;
157
- });
158
- child.stderr.on("data", (chunk) => {
159
- stderr += chunk;
160
- });
161
-
382
+ let settled = false;
162
383
  const finish = (value) => {
163
- if (!signal) return resolve(value);
164
- if (signal.aborted) return resolve(null);
165
- return resolve(value);
384
+ if (settled) return;
385
+ settled = true;
386
+ resolve(signal?.aborted ? null : value);
387
+ };
388
+
389
+ const cleanup = () => {
390
+ if (signal && abort) signal.removeEventListener("abort", abort);
391
+ };
392
+
393
+ const abort = () => {
394
+ state.pending.delete(id);
395
+ cleanup();
396
+ scheduleDaemonIdleStop(state);
397
+ finish(null);
166
398
  };
167
399
 
168
- child.on("error", () => finish(null));
169
- child.on("close", (code) => {
170
- if (code !== 0) return finish(null);
171
- try {
172
- const parsed = JSON.parse(stdout.trim() || "{}");
173
- if (!parsed.ok) return finish(null);
174
- return finish(parsed);
175
- } catch {
176
- if (stderr) return finish(null);
177
- return finish(null);
178
- }
179
- });
400
+ if (state.idleTimer) clearTimeout(state.idleTimer);
401
+ state.pending.set(id, { resolve: finish, cleanup });
180
402
 
181
403
  if (signal) {
182
- const abort = () => {
183
- child.kill("SIGKILL");
184
- finish(null);
185
- };
186
- if (signal.aborted) abort();
187
- else signal.addEventListener("abort", abort, { once: true });
404
+ if (signal.aborted) return abort();
405
+ signal.addEventListener("abort", abort, { once: true });
406
+ }
407
+
408
+ try {
409
+ state.child.stdin.write(`${JSON.stringify({ id, url, mode, payload })}\n`);
410
+ } catch {
411
+ state.pending.delete(id);
412
+ cleanup();
413
+ finish(null);
188
414
  }
189
415
  });
190
416
  }
191
417
 
418
+ export async function stopScraplingDaemon() {
419
+ if (!daemonState) return;
420
+ const state = daemonState;
421
+ daemonState = null;
422
+ failDaemonState(state);
423
+ try {
424
+ state.child.kill("SIGKILL");
425
+ } catch {
426
+ // ignore
427
+ }
428
+ }
429
+
430
+ export function setScraplingSpawnForTests(factory) {
431
+ spawnProcess = factory || nodeSpawn;
432
+ }
433
+
434
+ export function setScraplingRuntimeStatusForTests(status) {
435
+ runtimeStatus = status;
436
+ }
437
+
192
438
  export const pageFetchAdapter = {
193
439
  assessPageAttempt,
194
440
  chooseScraplingMode,
195
441
  fetchWithScrapling,
442
+ stopScraplingDaemon,
196
443
  };
@@ -35,7 +35,7 @@ export const DOMAIN_AUTHORITY_RULES = {
35
35
  },
36
36
  };
37
37
 
38
- const PLACEHOLDER_PATTERNS = [
38
+ export const PLACEHOLDER_PATTERNS = [
39
39
  /cloudflare/i,
40
40
  /access denied/i,
41
41
  /temporarily unavailable/i,
@@ -119,6 +119,20 @@ export function pageQualitySignals({ title = "", text = "", status = 200, conten
119
119
  };
120
120
  }
121
121
 
122
+ export function isUsableContent(page, config = {}) {
123
+ if (!page || !page.text) return false;
124
+ const quality = page.quality || pageQualitySignals({
125
+ title: page.title,
126
+ text: page.text,
127
+ url: page.url,
128
+ query: config.query || "",
129
+ status: page.fetchStatus ?? 200,
130
+ contentType: page.contentType || "text/html",
131
+ });
132
+ const minPageText = config.minPageText ?? WEAK_PAGE_POLICY.weakTextLimit;
133
+ return !quality.blocked && !quality.placeholder && !quality.weak && quality.plainLength >= minPageText;
134
+ }
135
+
122
136
  export function sourceAuthorityProfile({ url = "", title = "", text = "", query = "", domain = "" } = {}) {
123
137
  const hostname = normalizeHostname(url);
124
138
  const resolvedDomain = resolvePolicyDomain(query, domain);
@@ -147,42 +161,49 @@ export function sourceAuthorityProfile({ url = "", title = "", text = "", query
147
161
  return { sourceType: null, authoritative: false, domainBoost: 0, reasons: [] };
148
162
  }
149
163
 
150
- export function buildAuthorityFollowUpQueries(query = "", explicitDomain = "") {
164
+ function followUpSiteExclusions(seenUrls = []) {
165
+ const sites = [...new Set(seenUrls.map((url) => normalizeHostname(url)).filter(Boolean))];
166
+ return sites.length ? ` ${sites.map((site) => `-site:${site}`).join(" ")}` : "";
167
+ }
168
+
169
+ export function buildAuthorityFollowUpQueries(query = "", explicitDomain = "", options = {}) {
151
170
  const resolvedDomain = resolvePolicyDomain(query, explicitDomain);
152
171
  const base = baseQuery(query);
172
+ const exclusions = followUpSiteExclusions(options.seenUrls);
153
173
 
154
174
  switch (resolvedDomain) {
155
175
  case "security":
156
- return [`${base} cve advisory vendor`, `${base} nvd cisa mitre`];
176
+ return [`${base} cve advisory vendor${exclusions}`, `${base} nvd cisa mitre${exclusions}`];
157
177
  case "vendor-status":
158
- return [`${base} status page incident`, `${base} official outage status`];
178
+ return [`${base} status page incident${exclusions}`, `${base} official outage status${exclusions}`];
159
179
  case "package-registry":
160
- return [`${base} npm pypi crates readme`, `${base} official package docs`];
180
+ return [`${base} npm pypi crates readme${exclusions}`, `${base} official package docs${exclusions}`];
161
181
  case "github":
162
- return [`${base} github readme releases`, `${base} site:github.com readme docs`];
182
+ return [`${base} github readme releases${exclusions}`, `${base} site:github.com readme docs${exclusions}`];
163
183
  case "papers":
164
- return [`${base} arxiv doi publisher`, `${base} semanticscholar arxiv doi`];
184
+ return [`${base} arxiv doi publisher${exclusions}`, `${base} semanticscholar arxiv doi${exclusions}`];
165
185
  default:
166
- return [`${base} official docs`, `${base} documentation reference`];
186
+ return [`${base} official docs${exclusions}`, `${base} documentation reference${exclusions}`];
167
187
  }
168
188
  }
169
189
 
170
- export function buildConflictFollowUpQueries(query = "", explicitDomain = "") {
190
+ export function buildConflictFollowUpQueries(query = "", explicitDomain = "", options = {}) {
171
191
  const resolvedDomain = resolvePolicyDomain(query, explicitDomain);
172
192
  const base = baseQuery(query);
193
+ const exclusions = followUpSiteExclusions(options.seenUrls);
173
194
 
174
195
  switch (resolvedDomain) {
175
196
  case "security":
176
- return [`${base} vendor advisory official`, `${base} cve mitigation official`];
197
+ return [`${base} vendor advisory official${exclusions}`, `${base} cve mitigation official${exclusions}`];
177
198
  case "vendor-status":
178
- return [`${base} incident status official`, `${base} status page postmortem`];
199
+ return [`${base} incident status official${exclusions}`, `${base} status page postmortem${exclusions}`];
179
200
  case "package-registry":
180
- return [`${base} release notes changelog`, `${base} maintainer docs`];
201
+ return [`${base} release notes changelog${exclusions}`, `${base} maintainer docs${exclusions}`];
181
202
  case "github":
182
- return [`${base} github releases readme`, `${base} canonical repo docs`];
203
+ return [`${base} github releases readme${exclusions}`, `${base} canonical repo docs${exclusions}`];
183
204
  case "papers":
184
- return [`${base} arxiv doi compare`, `${base} publisher abstract official`];
205
+ return [`${base} arxiv doi compare${exclusions}`, `${base} publisher abstract official${exclusions}`];
185
206
  default:
186
- return [`${base} official docs support status`, `${base} official comparison reference`];
207
+ return [`${base} official docs support status${exclusions}`, `${base} official comparison reference${exclusions}`];
187
208
  }
188
209
  }
@@ -6,6 +6,7 @@
6
6
  "resultsPerQuery": 4,
7
7
  "maxPages": 3,
8
8
  "pageTimeoutMs": 6000,
9
+ "stealthTimeoutMs": 30000,
9
10
  "pageTextLimit": 4000,
10
11
  "minPageText": 300,
11
12
  "useJinaFallback": true,
@@ -23,6 +24,7 @@
23
24
  "resultsPerQuery": 5,
24
25
  "maxPages": 8,
25
26
  "pageTimeoutMs": 10000,
27
+ "stealthTimeoutMs": 40000,
26
28
  "pageTextLimit": 8000,
27
29
  "minPageText": 300,
28
30
  "useJinaFallback": true,
@@ -40,6 +42,7 @@
40
42
  "resultsPerQuery": 5,
41
43
  "maxPages": 6,
42
44
  "pageTimeoutMs": 10000,
45
+ "stealthTimeoutMs": 40000,
43
46
  "pageTextLimit": 8000,
44
47
  "minPageText": 300,
45
48
  "useJinaFallback": true,
@@ -57,6 +60,7 @@
57
60
  "resultsPerQuery": 5,
58
61
  "maxPages": 6,
59
62
  "pageTimeoutMs": 10000,
63
+ "stealthTimeoutMs": 40000,
60
64
  "pageTextLimit": 8000,
61
65
  "minPageText": 300,
62
66
  "useJinaFallback": true,
package/lib/research.js CHANGED
@@ -459,7 +459,7 @@ export function detectConflictSignals(pages) {
459
459
  return { detected: false, reason: null, conflictSummary: "", conflictingSourcePairs: [] };
460
460
  }
461
461
 
462
- export function detectResearchGaps(query, pages) {
462
+ export function detectResearchGaps(query, pages, options = {}) {
463
463
  const hasAuthoritativeSource = pages.some((page) => {
464
464
  const scored = scoreSourceEntry(page, query || "");
465
465
  return Boolean(page.authoritative || scored.authoritative);
@@ -468,7 +468,7 @@ export function detectResearchGaps(query, pages) {
468
468
  return {
469
469
  detected: true,
470
470
  reason: "Retrieved pages lack an authoritative docs or README source.",
471
- followupQuery: buildAuthorityFollowUpQueries(query)[0] || `${queryBase(query)} official docs`,
471
+ followupQuery: buildAuthorityFollowUpQueries(query, "", options)[0] || `${queryBase(query)} official docs`,
472
472
  missingAspects: ["authoritative sources"],
473
473
  };
474
474
  }
@@ -476,12 +476,21 @@ export function detectResearchGaps(query, pages) {
476
476
  return { detected: false, reason: null, followupQuery: null, missingAspects: [] };
477
477
  }
478
478
 
479
- export function buildFollowUpQuery(query, pages) {
479
+ export function buildFollowUpQuery(query, pages, options = {}) {
480
480
  const conflict = detectConflictSignals(pages);
481
- if (conflict.detected) return buildConflictFollowUpQueries(query)[0] || `${queryBase(query)} official docs support status`;
482
- const gaps = detectResearchGaps(query, pages);
481
+ if (conflict.detected) return buildConflictFollowUpQueries(query, "", options)[0] || `${queryBase(query)} official docs support status`;
482
+ const gaps = detectResearchGaps(query, pages, options);
483
483
  if (gaps.detected) return gaps.followupQuery;
484
- return buildAuthorityFollowUpQueries(`${queryBase(query)} clarification`)[0] || `${queryBase(query)} clarification official docs`;
484
+ return buildAuthorityFollowUpQueries(`${queryBase(query)} clarification`, "", options)[0] || `${queryBase(query)} clarification official docs`;
485
+ }
486
+
487
+ export function buildActionBasedFollowUpQuery(query, action, options = {}) {
488
+ if (action === "need_conflict_resolution") return buildConflictFollowUpQueries(query, "", options)[0] || `${queryBase(query)} official docs support status`;
489
+ if (action === "need_authority") return buildAuthorityFollowUpQueries(query, "", options)[0] || `${queryBase(query)} official docs`;
490
+ if (action === "need_recency") return `${queryBase(query)} latest`;
491
+ if (action === "need_version_context") return `${queryBase(query)} version diff`;
492
+ if (action === "need_primary_source") return `${queryBase(query)} source announcement`;
493
+ return buildAuthorityFollowUpQueries(`${queryBase(query)} clarification`, "", options)[0] || `${queryBase(query)} clarification official docs`;
485
494
  }
486
495
 
487
496
  function queryTermsForFactCheck(text) {