@ahkohd/yagami 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/.beads/.beads-credential-key +1 -0
  2. package/.beads/README.md +81 -0
  3. package/.beads/config.yaml +54 -0
  4. package/.beads/hooks/post-checkout +24 -0
  5. package/.beads/hooks/post-merge +24 -0
  6. package/.beads/hooks/pre-commit +24 -0
  7. package/.beads/hooks/pre-push +24 -0
  8. package/.beads/hooks/prepare-commit-msg +24 -0
  9. package/.beads/metadata.json +7 -0
  10. package/.github/workflows/ci.yml +43 -0
  11. package/.github/workflows/release.yml +115 -0
  12. package/AGENTS.md +150 -0
  13. package/README.md +210 -0
  14. package/biome.json +36 -0
  15. package/config/mcporter.json +8 -0
  16. package/dist/cli/theme.js +202 -0
  17. package/dist/cli/theme.js.map +1 -0
  18. package/dist/cli.js +1883 -0
  19. package/dist/cli.js.map +1 -0
  20. package/dist/config.js +223 -0
  21. package/dist/config.js.map +1 -0
  22. package/dist/daemon.js +745 -0
  23. package/dist/daemon.js.map +1 -0
  24. package/dist/engine/constants.js +131 -0
  25. package/dist/engine/constants.js.map +1 -0
  26. package/dist/engine/deep-research.js +167 -0
  27. package/dist/engine/deep-research.js.map +1 -0
  28. package/dist/engine/defuddle-utils.js +57 -0
  29. package/dist/engine/defuddle-utils.js.map +1 -0
  30. package/dist/engine/github-fetch.js +232 -0
  31. package/dist/engine/github-fetch.js.map +1 -0
  32. package/dist/engine/helpers.js +372 -0
  33. package/dist/engine/helpers.js.map +1 -0
  34. package/dist/engine/limiter.js +75 -0
  35. package/dist/engine/limiter.js.map +1 -0
  36. package/dist/engine/policy.js +313 -0
  37. package/dist/engine/policy.js.map +1 -0
  38. package/dist/engine/runtime-utils.js +65 -0
  39. package/dist/engine/runtime-utils.js.map +1 -0
  40. package/dist/engine/search-discovery.js +275 -0
  41. package/dist/engine/search-discovery.js.map +1 -0
  42. package/dist/engine/url-utils.js +72 -0
  43. package/dist/engine/url-utils.js.map +1 -0
  44. package/dist/engine.js +2030 -0
  45. package/dist/engine.js.map +1 -0
  46. package/dist/mcp.js +282 -0
  47. package/dist/mcp.js.map +1 -0
  48. package/dist/types/cli.js +2 -0
  49. package/dist/types/cli.js.map +1 -0
  50. package/dist/types/config.js +2 -0
  51. package/dist/types/config.js.map +1 -0
  52. package/dist/types/daemon.js +2 -0
  53. package/dist/types/daemon.js.map +1 -0
  54. package/dist/types/engine.js +2 -0
  55. package/dist/types/engine.js.map +1 -0
  56. package/package.json +66 -0
  57. package/packages/pi-yagami-search/README.md +39 -0
  58. package/packages/pi-yagami-search/extensions/yagami-search.ts +273 -0
  59. package/packages/pi-yagami-search/package.json +41 -0
  60. package/src/cli/theme.ts +260 -0
  61. package/src/cli.ts +2226 -0
  62. package/src/config.ts +250 -0
  63. package/src/daemon.ts +990 -0
  64. package/src/engine/constants.ts +147 -0
  65. package/src/engine/deep-research.ts +207 -0
  66. package/src/engine/defuddle-utils.ts +75 -0
  67. package/src/engine/github-fetch.ts +265 -0
  68. package/src/engine/helpers.ts +394 -0
  69. package/src/engine/limiter.ts +97 -0
  70. package/src/engine/policy.ts +392 -0
  71. package/src/engine/runtime-utils.ts +79 -0
  72. package/src/engine/search-discovery.ts +351 -0
  73. package/src/engine/url-utils.ts +86 -0
  74. package/src/engine.ts +2516 -0
  75. package/src/mcp.ts +337 -0
  76. package/src/shims-cli.d.ts +3 -0
  77. package/src/types/cli.ts +7 -0
  78. package/src/types/config.ts +53 -0
  79. package/src/types/daemon.ts +22 -0
  80. package/src/types/engine.ts +194 -0
  81. package/tsconfig.json +18 -0
package/dist/engine.js ADDED
@@ -0,0 +1,2030 @@
1
+ // TypeScript-native engine implementation.
2
+ import { randomUUID } from "node:crypto";
3
+ import { Agent } from "@mariozechner/pi-agent-core";
4
+ import { Type } from "@mariozechner/pi-ai";
5
+ import { lightpanda } from "@lightpanda/browser";
6
+ import { chromium } from "playwright-core";
7
+ import { buildDeepCustomInstruction, buildDeepFollowUpPrompts, composeDeepResearchReport, createDeepResearchTask, evictOldDeepResearchTasks, getDeepEffortProfile, resolveDeepEffort, } from "./engine/deep-research.js";
8
+ import { ConcurrencyLimiter } from "./engine/limiter.js";
9
+ import { withSuppressedDefuddleWarnings } from "./engine/defuddle-utils.js";
10
+ import { tryFetchGitHubRepoContent } from "./engine/github-fetch.js";
11
+ import { buildContext, clampInteger, countWords, domainMatches, extractAssistantText, extractCitationUrls, extractTextContent, extractTopTerms, getHostname, isChallengeLikeContent, isDiscoveryDomain, isHostAllowed, isValidPublicHostname, normalizeDomainFilter, normalizeEnum, parseIsoDate, parseStringList, stripHtml, toBool, truncateText, } from "./engine/helpers.js";
12
+ import { buildSystemPrompt, deriveResearchPlan, normalizeResearchPolicy } from "./engine/policy.js";
13
+ import { discoverSearchResults, parseDuckDuckGoResults, resolveSearchEngineTemplate, } from "./engine/search-discovery.js";
14
+ import { anthropicModelsUrl, delay, fetchJson, joinUrl, normalizeLlmApi, normalizeThinkingLevel, resolveRuntimeApiKey, } from "./engine/runtime-utils.js";
15
+ import { normalizeUniqueUrls, normalizeUrl, normalizeUrlForDedupe } from "./engine/url-utils.js";
16
+ function collectResultCitations(answer, toolProfiles) {
17
+ const candidates = [];
18
+ for (const url of extractCitationUrls(answer)) {
19
+ candidates.push(url);
20
+ }
21
+ for (const tool of toolProfiles) {
22
+ if (tool.isError)
23
+ continue;
24
+ const url = String(tool.url || "").trim();
25
+ if (url)
26
+ candidates.push(url);
27
+ }
28
+ const citations = [];
29
+ const seen = new Set();
30
+ for (const candidate of candidates) {
31
+ let normalized;
32
+ let dedupeKey;
33
+ try {
34
+ normalized = normalizeUrl(candidate);
35
+ dedupeKey = normalizeUrlForDedupe(normalized);
36
+ }
37
+ catch {
38
+ continue;
39
+ }
40
+ const hostname = getHostname(normalized);
41
+ if (!hostname)
42
+ continue;
43
+ if (!isValidPublicHostname(hostname))
44
+ continue;
45
+ if (isDiscoveryDomain(hostname))
46
+ continue;
47
+ if (seen.has(dedupeKey))
48
+ continue;
49
+ seen.add(dedupeKey);
50
+ citations.push(normalized);
51
+ }
52
+ return citations;
53
+ }
54
+ function collectResultFindings(toolsUsed) {
55
+ const findings = [];
56
+ const seen = new Set();
57
+ for (const tool of toolsUsed) {
58
+ if (String(tool.toolName || "") !== "present")
59
+ continue;
60
+ if (tool.isError)
61
+ continue;
62
+ const details = tool.details || {};
63
+ const rawUrl = String(details.url || details.finalUrl || "").trim();
64
+ if (!rawUrl)
65
+ continue;
66
+ let normalizedUrl;
67
+ let dedupeKey;
68
+ try {
69
+ normalizedUrl = normalizeUrl(rawUrl);
70
+ dedupeKey = normalizeUrlForDedupe(normalizedUrl);
71
+ }
72
+ catch {
73
+ continue;
74
+ }
75
+ if (seen.has(dedupeKey))
76
+ continue;
77
+ const content = String(details.content || "").trim();
78
+ const wordCount = Number(details.wordCount || 0) || 0;
79
+ if (!content)
80
+ continue;
81
+ seen.add(dedupeKey);
82
+ findings.push({
83
+ sourceType: "present",
84
+ rank: findings.length + 1,
85
+ url: normalizedUrl,
86
+ title: String(details.title || "").trim(),
87
+ author: String(details.author || "").trim(),
88
+ published: String(details.published || "").trim(),
89
+ wordCount: wordCount || undefined,
90
+ documentId: String(details.documentId || "").trim(),
91
+ content,
92
+ truncated: Boolean(details.truncated),
93
+ });
94
+ }
95
+ if (findings.length > 0) {
96
+ return findings;
97
+ }
98
+ for (const tool of toolsUsed) {
99
+ if (String(tool.toolName || "") !== "browse")
100
+ continue;
101
+ if (tool.isError)
102
+ continue;
103
+ const details = tool.details || {};
104
+ const rawUrl = String(details.finalUrl || details.url || "").trim();
105
+ if (!rawUrl)
106
+ continue;
107
+ let normalizedUrl;
108
+ let dedupeKey;
109
+ try {
110
+ normalizedUrl = normalizeUrl(rawUrl);
111
+ dedupeKey = normalizeUrlForDedupe(normalizedUrl);
112
+ }
113
+ catch {
114
+ continue;
115
+ }
116
+ if (seen.has(dedupeKey))
117
+ continue;
118
+ seen.add(dedupeKey);
119
+ findings.push({
120
+ sourceType: "browse",
121
+ rank: findings.length + 1,
122
+ url: normalizedUrl,
123
+ title: String(details.title || "").trim(),
124
+ status: Number(details.status || 0) || undefined,
125
+ documentId: String(details.documentId || "").trim(),
126
+ content: "",
127
+ });
128
+ }
129
+ return findings;
130
+ }
131
+ function buildCollatedAnswer(findings, toolsUsed) {
132
+ const lines = [];
133
+ for (const finding of findings) {
134
+ const url = String(finding.url || "").trim();
135
+ const title = String(finding.title || "").trim() || "Untitled";
136
+ const author = String(finding.author || "").trim() || "Unknown";
137
+ const publishedRaw = String(finding.published || "").trim();
138
+ const published = publishedRaw && publishedRaw.toLowerCase() !== "unknown" ? publishedRaw : "Unknown";
139
+ const content = String(finding.content || "").trim();
140
+ lines.push(`**Title:** ${title} `);
141
+ lines.push(`**Author:** ${author} `);
142
+ lines.push(`**Published Date:** ${published} `);
143
+ lines.push(`**URL:** ${url}`);
144
+ lines.push("");
145
+ if (content) {
146
+ lines.push(content.replace(/```/g, "``\u200b`"));
147
+ }
148
+ lines.push("");
149
+ lines.push("---");
150
+ lines.push("");
151
+ }
152
+ const failedCalls = toolsUsed
153
+ .filter((tool) => Boolean(tool.isError))
154
+ .slice(0, 12)
155
+ .map((tool) => {
156
+ const details = tool.details || {};
157
+ const rawUrl = String(details.url || details.finalUrl || "").trim();
158
+ const rawError = String(tool.errorMessage || "")
159
+ .trim()
160
+ .split("\n")[0] || "tool call failed";
161
+ return {
162
+ toolName: String(tool.toolName || "tool"),
163
+ url: rawUrl,
164
+ error: rawError,
165
+ };
166
+ });
167
+ if (findings.length === 0) {
168
+ if (toolsUsed.length > 0 && failedCalls.length === 0) {
169
+ lines.push("No sources were selected from gathered pages.", "");
170
+ }
171
+ else {
172
+ lines.push("No sources were successfully extracted.", "");
173
+ }
174
+ if (failedCalls.length > 0) {
175
+ lines.push("Errors:");
176
+ for (const failed of failedCalls) {
177
+ const suffix = failed.url ? ` ${failed.url}` : "";
178
+ lines.push(`- [${failed.toolName}]${suffix} — ${failed.error}`);
179
+ }
180
+ lines.push("");
181
+ }
182
+ }
183
+ return lines.join("\n").trim();
184
+ }
185
+ function isExplicitNoneCollateAnswer(answer) {
186
+ const raw = String(answer || "").trim();
187
+ if (!raw)
188
+ return false;
189
+ if (/^NONE$/i.test(raw))
190
+ return true;
191
+ const blockMatch = raw.match(/SOURCES([\s\S]*?)SOURCES/i);
192
+ if (!blockMatch)
193
+ return false;
194
+ return /^NONE$/i.test(String(blockMatch[1] || "").trim());
195
+ }
196
+ function throwIfAborted(signal, message = "operation aborted") {
197
+ if (signal?.aborted) {
198
+ throw new Error(message);
199
+ }
200
+ }
201
+ function asAbortSignal(value) {
202
+ if (!value || typeof value !== "object")
203
+ return undefined;
204
+ const candidate = value;
205
+ if (typeof candidate.aborted === "boolean" &&
206
+ typeof candidate.addEventListener === "function" &&
207
+ typeof candidate.removeEventListener === "function") {
208
+ return value;
209
+ }
210
+ return undefined;
211
+ }
212
+ function toFiniteNonNegativeNumber(value) {
213
+ const num = Number(value);
214
+ if (!Number.isFinite(num) || num < 0)
215
+ return 0;
216
+ return num;
217
+ }
218
+ export class YagamiEngine {
219
+ config;
220
+ logger;
221
+ operationLimiter;
222
+ browseLimiter;
223
+ model;
224
+ initPromise;
225
+ browser;
226
+ lightpandaManaged;
227
+ lightpandaProcess;
228
+ documents;
229
+ urlCache;
230
+ presentCache;
231
+ deepResearchTasks;
232
+ metrics;
233
+ constructor(config, logger = console) {
234
+ this.config = config;
235
+ this.logger = logger;
236
+ this.browser = null;
237
+ this.model = null;
238
+ this.lightpandaProcess = null;
239
+ this.lightpandaManaged = false;
240
+ this.documents = new Map();
241
+ this.urlCache = new Map();
242
+ this.presentCache = new Map();
243
+ this.deepResearchTasks = new Map();
244
+ this.operationLimiter = new ConcurrencyLimiter(Math.max(1, Number(this.config.operationConcurrency || 4)));
245
+ this.browseLimiter = new ConcurrencyLimiter(Math.max(1, Number(this.config.browseConcurrency || 8)));
246
+ this.initPromise = null;
247
+ this.metrics = {
248
+ queries: 0,
249
+ activeQueries: 0,
250
+ cacheHits: 0,
251
+ cacheMisses: 0,
252
+ startedAt: Date.now(),
253
+ tokenInput: 0,
254
+ tokenOutput: 0,
255
+ tokenCacheRead: 0,
256
+ tokenCacheWrite: 0,
257
+ tokenTotal: 0,
258
+ costInput: 0,
259
+ costOutput: 0,
260
+ costCacheRead: 0,
261
+ costCacheWrite: 0,
262
+ costTotal: 0,
263
+ };
264
+ }
265
+ log(message) {
266
+ this.logger.log(`[yagami] ${message}`);
267
+ }
268
+ normalizeResearchPolicy(rawPolicy = {}) {
269
+ return normalizeResearchPolicy(rawPolicy);
270
+ }
271
+ deriveResearchPlan(query, options = {}) {
272
+ return deriveResearchPlan(query, this.config, options);
273
+ }
274
+ buildSystemPrompt(plan) {
275
+ const resolvedSearch = resolveSearchEngineTemplate(this.config.searchEngine, this.config.searchEngineUrlTemplate);
276
+ const template = resolvedSearch.template.includes("{query}")
277
+ ? resolvedSearch.template.replace(/\{query\}/g, "<url-encoded query>")
278
+ : resolvedSearch.template.includes("%s")
279
+ ? resolvedSearch.template.replace(/%s/g, "<url-encoded query>")
280
+ : `${resolvedSearch.template}<url-encoded query>`;
281
+ return buildSystemPrompt(plan, { engine: resolvedSearch.engine, template });
282
+ }
283
+ async init() {
284
+ if (this.model)
285
+ return;
286
+ if (this.initPromise) {
287
+ await this.initPromise;
288
+ return;
289
+ }
290
+ this.initPromise = (async () => {
291
+ const api = normalizeLlmApi(this.config.llmApi);
292
+ const provider = api === "anthropic-messages" ? "anthropic" : "openai";
293
+ const baseUrl = this.config.llmBaseUrl;
294
+ const configuredModelId = this.config.llmModel;
295
+ const modelId = configuredModelId || (await this.detectModelId(api, baseUrl));
296
+ if (api === "anthropic-messages") {
297
+ this.model = {
298
+ id: modelId,
299
+ name: `Anthropic ${modelId}`,
300
+ api: "anthropic-messages",
301
+ provider: "anthropic",
302
+ baseUrl,
303
+ reasoning: false,
304
+ input: ["text"],
305
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
306
+ contextWindow: 262144,
307
+ maxTokens: 8192,
308
+ };
309
+ }
310
+ else {
311
+ this.model = {
312
+ id: modelId,
313
+ name: `Local ${modelId}`,
314
+ api: "openai-completions",
315
+ provider,
316
+ baseUrl,
317
+ reasoning: false,
318
+ input: ["text"],
319
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
320
+ contextWindow: 262144,
321
+ maxTokens: 8192,
322
+ compat: {
323
+ supportsDeveloperRole: false,
324
+ supportsReasoningEffort: false,
325
+ },
326
+ };
327
+ }
328
+ this.log(`using model ${this.model.id} (${this.model.api}) via ${this.model.baseUrl}`);
329
+ })();
330
+ try {
331
+ await this.initPromise;
332
+ }
333
+ finally {
334
+ this.initPromise = null;
335
+ }
336
+ }
337
+ async detectModelId(api, baseUrl) {
338
+ if (api === "anthropic-messages") {
339
+ const data = await fetchJson(anthropicModelsUrl(baseUrl), {
340
+ method: "GET",
341
+ headers: {
342
+ "x-api-key": resolveRuntimeApiKey(api, this.config.llmApiKey),
343
+ "anthropic-version": "2023-06-01",
344
+ },
345
+ });
346
+ const modelId = (data?.data?.[0]?.id || "").trim();
347
+ if (!modelId) {
348
+ throw new Error(`Could not detect model from ${anthropicModelsUrl(baseUrl)}. Set YAGAMI_LLM_MODEL explicitly.`);
349
+ }
350
+ return modelId;
351
+ }
352
+ const data = await fetchJson(joinUrl(baseUrl, "models"), {
353
+ method: "GET",
354
+ headers: {
355
+ authorization: `Bearer ${resolveRuntimeApiKey(api, this.config.llmApiKey)}`,
356
+ },
357
+ });
358
+ const modelId = (data?.data?.[0]?.id || "").trim();
359
+ if (!modelId) {
360
+ throw new Error(`Could not detect model from ${joinUrl(baseUrl, "models")}. Set YAGAMI_LLM_MODEL explicitly.`);
361
+ }
362
+ return modelId;
363
+ }
364
+ async connectBrowser() {
365
+ this.log(`connecting to Lightpanda CDP at ${this.config.lightpandaCdpUrl}`);
366
+ const browser = await chromium.connectOverCDP(this.config.lightpandaCdpUrl);
367
+ browser.on("disconnected", () => {
368
+ this.log("CDP connection disconnected");
369
+ this.browser = null;
370
+ });
371
+ return browser;
372
+ }
373
+ async startManagedLightpanda() {
374
+ if (this.lightpandaProcess)
375
+ return;
376
+ this.log(`starting managed Lightpanda on ${this.config.lightpandaHost}:${this.config.lightpandaPort}`);
377
+ const processHandle = (await lightpanda.serve({
378
+ host: this.config.lightpandaHost,
379
+ port: this.config.lightpandaPort,
380
+ }));
381
+ this.lightpandaProcess = processHandle;
382
+ this.lightpandaManaged = true;
383
+ processHandle.on("exit", (code, signal) => {
384
+ this.log(`managed Lightpanda exited (code=${String(code ?? "null")}, signal=${String(signal ?? "null")})`);
385
+ this.lightpandaProcess = null;
386
+ this.lightpandaManaged = false;
387
+ });
388
+ processHandle.on("error", (error) => {
389
+ const message = error?.message || String(error);
390
+ this.log(`managed Lightpanda error: ${message}`);
391
+ });
392
+ }
393
+ async ensureBrowser() {
394
+ if (this.browser)
395
+ return this.browser;
396
+ try {
397
+ this.browser = await this.connectBrowser();
398
+ return this.browser;
399
+ }
400
+ catch (initialError) {
401
+ if (!this.config.lightpandaAutoStart) {
402
+ throw initialError;
403
+ }
404
+ if (!this.lightpandaProcess) {
405
+ await this.startManagedLightpanda();
406
+ }
407
+ const retries = 12;
408
+ for (let i = 0; i < retries; i += 1) {
409
+ try {
410
+ this.browser = await this.connectBrowser();
411
+ return this.browser;
412
+ }
413
+ catch {
414
+ await delay(250);
415
+ }
416
+ }
417
+ throw initialError;
418
+ }
419
+ }
420
+ evictOldDocuments() {
421
+ while (this.documents.size > this.config.maxDocuments) {
422
+ const oldestKey = this.documents.keys().next().value;
423
+ if (!oldestKey)
424
+ break;
425
+ this.documents.delete(oldestKey);
426
+ this.presentCache.delete(oldestKey);
427
+ }
428
+ }
429
+ getCachedByUrl(url) {
430
+ const entry = this.urlCache.get(url);
431
+ if (!entry)
432
+ return null;
433
+ if (entry.expiresAt < Date.now()) {
434
+ this.urlCache.delete(url);
435
+ return null;
436
+ }
437
+ const doc = this.documents.get(entry.documentId);
438
+ if (!doc) {
439
+ this.urlCache.delete(url);
440
+ return null;
441
+ }
442
+ return doc;
443
+ }
444
+ formatBrowseResult(doc, fromCache) {
445
+ return {
446
+ documentId: doc.id,
447
+ url: doc.url,
448
+ finalUrl: doc.finalUrl,
449
+ status: doc.status,
450
+ title: doc.title || "",
451
+ fetchedAt: new Date(Number(doc.fetchedAt || Date.now())).toISOString(),
452
+ bytes: Buffer.byteLength(String(doc.html || ""), "utf8"),
453
+ fromCache,
454
+ };
455
+ }
456
+ isRecoverableBrowseError(error) {
457
+ const message = (error instanceof Error ? error.message : String(error || "")).toLowerCase();
458
+ if (!message)
459
+ return false;
460
+ const recoverablePatterns = [
461
+ "target page, context or browser has been closed",
462
+ "target closed",
463
+ "browser has been closed",
464
+ "connection closed",
465
+ "has been disconnected",
466
+ "disconnected",
467
+ "socket hang up",
468
+ "econnreset",
469
+ "websocket is not open",
470
+ "connect econnrefused",
471
+ "failed to connect",
472
+ "protocol error",
473
+ ];
474
+ return recoverablePatterns.some((pattern) => message.includes(pattern));
475
+ }
476
+ isTimeoutBrowseError(error) {
477
+ const message = (error instanceof Error ? error.message : String(error || "")).toLowerCase();
478
+ return message.includes("timeout");
479
+ }
480
+ extractTitleFromHtml(html) {
481
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
482
+ if (!titleMatch?.[1])
483
+ return "";
484
+ return titleMatch[1].replace(/\s+/g, " ").trim();
485
+ }
486
+ extractTextFromHtmlFallback(html) {
487
+ const withoutScripts = String(html || "")
488
+ .replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, " ")
489
+ .replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, " ");
490
+ return stripHtml(withoutScripts);
491
+ }
492
+ getHttpEscalationReason(result) {
493
+ const html = String(result.html || "").trim();
494
+ const title = String(result.title || "").trim();
495
+ if (result.status >= 400) {
496
+ return `http status ${result.status}`;
497
+ }
498
+ if (!html) {
499
+ return "empty html";
500
+ }
501
+ if (isChallengeLikeContent(title, html)) {
502
+ return "challenge/interstitial content";
503
+ }
504
+ if (html.length < 800) {
505
+ return "html too short";
506
+ }
507
+ return null;
508
+ }
509
+ async browseViaHttpFallback(url, abortSignal) {
510
+ throwIfAborted(abortSignal, "browse aborted");
511
+ const controller = new AbortController();
512
+ const timeoutId = setTimeout(() => controller.abort(), this.config.browseLinkTimeoutMs);
513
+ const onAbort = () => {
514
+ controller.abort();
515
+ };
516
+ if (abortSignal)
517
+ abortSignal.addEventListener("abort", onAbort, { once: true });
518
+ try {
519
+ const response = await fetch(url, {
520
+ redirect: "follow",
521
+ signal: controller.signal,
522
+ headers: {
523
+ "user-agent": "Yagami/0.1 (+https://github.com/yagami)",
524
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
525
+ },
526
+ });
527
+ throwIfAborted(abortSignal, "browse aborted");
528
+ const htmlRaw = await response.text();
529
+ const html = truncateText(htmlRaw, this.config.maxHtmlChars);
530
+ return {
531
+ status: response.status,
532
+ finalUrl: response.url || url,
533
+ html,
534
+ title: this.extractTitleFromHtml(html),
535
+ };
536
+ }
537
+ finally {
538
+ clearTimeout(timeoutId);
539
+ if (abortSignal)
540
+ abortSignal.removeEventListener("abort", onAbort);
541
+ }
542
+ }
543
+ async resetBrowserForRetry() {
544
+ const currentBrowser = this.browser;
545
+ this.browser = null;
546
+ if (currentBrowser && typeof currentBrowser.close === "function") {
547
+ try {
548
+ await currentBrowser.close();
549
+ }
550
+ catch {
551
+ // Ignore close errors while recovering from a failed browse attempt.
552
+ }
553
+ }
554
+ await delay(150);
555
+ }
556
+ async browse(rawUrl, options = {}) {
557
+ const abortSignal = asAbortSignal(options.abortSignal);
558
+ return await this.enqueueBrowse(async () => {
559
+ throwIfAborted(abortSignal, "browse aborted");
560
+ const startedAt = Date.now();
561
+ const url = normalizeUrl(rawUrl);
562
+ const bypassCache = toBool(options.bypassCache, false);
563
+ if (!bypassCache) {
564
+ const cached = this.getCachedByUrl(url);
565
+ if (cached) {
566
+ this.metrics.cacheHits += 1;
567
+ return {
568
+ ...this.formatBrowseResult(cached, true),
569
+ timing: {
570
+ cache: "hit",
571
+ totalMs: Date.now() - startedAt,
572
+ },
573
+ };
574
+ }
575
+ }
576
+ this.metrics.cacheMisses += 1;
577
+ const persistDocument = (source) => {
578
+ const doc = {
579
+ id: randomUUID(),
580
+ url,
581
+ finalUrl: source.finalUrl,
582
+ status: source.status,
583
+ title: source.title,
584
+ html: source.html,
585
+ fetchedAt: Date.now(),
586
+ };
587
+ this.documents.set(doc.id, doc);
588
+ if (!bypassCache) {
589
+ this.urlCache.set(url, {
590
+ documentId: doc.id,
591
+ expiresAt: Date.now() + this.config.cacheTtlMs,
592
+ });
593
+ }
594
+ this.evictOldDocuments();
595
+ return doc;
596
+ };
597
+ let httpAttempt = null;
598
+ let httpFirstError = null;
599
+ try {
600
+ const httpStart = Date.now();
601
+ const httpResult = await this.browseViaHttpFallback(url, abortSignal);
602
+ const httpDurationMs = Date.now() - httpStart;
603
+ const escalationReason = this.getHttpEscalationReason(httpResult);
604
+ httpAttempt = {
605
+ ...httpResult,
606
+ durationMs: httpDurationMs,
607
+ escalationReason,
608
+ };
609
+ if (!escalationReason) {
610
+ const doc = persistDocument(httpResult);
611
+ return {
612
+ ...this.formatBrowseResult(doc, false),
613
+ timing: {
614
+ cache: "miss",
615
+ totalMs: Date.now() - startedAt,
616
+ strategy: "http",
617
+ httpMs: httpDurationMs,
618
+ },
619
+ };
620
+ }
621
+ this.log(`http-first browse escalation for ${url}: ${escalationReason}`);
622
+ }
623
+ catch (error) {
624
+ httpFirstError = error;
625
+ const message = error instanceof Error ? error.message : String(error);
626
+ this.log(`http-first browse failed for ${url}: ${message}`);
627
+ }
628
+ const maxAttempts = 2;
629
+ let lastError = null;
630
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
631
+ let ensureBrowserMs = 0;
632
+ let contextMs = 0;
633
+ let newPageMs = 0;
634
+ let context = null;
635
+ let page = null;
636
+ let abortCleanup = null;
637
+ try {
638
+ throwIfAborted(abortSignal, "browse aborted");
639
+ const ensureBrowserStart = Date.now();
640
+ const browser = await this.ensureBrowser();
641
+ ensureBrowserMs = Date.now() - ensureBrowserStart;
642
+ const contextStart = Date.now();
643
+ context = await browser.newContext();
644
+ contextMs = Date.now() - contextStart;
645
+ const pageStart = Date.now();
646
+ page = await context.newPage();
647
+ newPageMs = Date.now() - pageStart;
648
+ if (abortSignal) {
649
+ const onAbort = () => {
650
+ void page?.close().catch(() => { });
651
+ void context?.close().catch(() => { });
652
+ };
653
+ abortSignal.addEventListener("abort", onAbort, { once: true });
654
+ abortCleanup = () => abortSignal.removeEventListener("abort", onAbort);
655
+ }
656
+ throwIfAborted(abortSignal, "browse aborted");
657
+ const gotoStart = Date.now();
658
+ const response = await page.goto(url, {
659
+ waitUntil: "domcontentloaded",
660
+ timeout: this.config.browseLinkTimeoutMs,
661
+ });
662
+ const gotoMs = Date.now() - gotoStart;
663
+ throwIfAborted(abortSignal, "browse aborted");
664
+ const contentStart = Date.now();
665
+ const htmlRaw = await page.content();
666
+ const html = truncateText(htmlRaw, this.config.maxHtmlChars);
667
+ const contentMs = Date.now() - contentStart;
668
+ const titleStart = Date.now();
669
+ const title = await page.title();
670
+ const titleMs = Date.now() - titleStart;
671
+ throwIfAborted(abortSignal, "browse aborted");
672
+ const doc = persistDocument({
673
+ finalUrl: page.url(),
674
+ status: response?.status() ?? 0,
675
+ title,
676
+ html,
677
+ });
678
+ return {
679
+ ...this.formatBrowseResult(doc, false),
680
+ timing: {
681
+ cache: "miss",
682
+ totalMs: Date.now() - startedAt,
683
+ strategy: "browser",
684
+ ensureBrowserMs,
685
+ contextMs,
686
+ newPageMs,
687
+ gotoMs,
688
+ contentMs,
689
+ titleMs,
690
+ httpFirstMs: httpAttempt?.durationMs,
691
+ httpEscalationReason: httpAttempt?.escalationReason || undefined,
692
+ httpFirstError: httpFirstError instanceof Error
693
+ ? httpFirstError.message
694
+ : httpFirstError
695
+ ? String(httpFirstError)
696
+ : undefined,
697
+ },
698
+ };
699
+ }
700
+ catch (error) {
701
+ lastError = error;
702
+ if (abortSignal?.aborted) {
703
+ break;
704
+ }
705
+ if (this.isTimeoutBrowseError(error) && !httpAttempt) {
706
+ try {
707
+ const fallbackStart = Date.now();
708
+ const fallback = await this.browseViaHttpFallback(url, abortSignal);
709
+ const fallbackMs = Date.now() - fallbackStart;
710
+ const escalationReason = this.getHttpEscalationReason(fallback);
711
+ httpAttempt = {
712
+ ...fallback,
713
+ durationMs: fallbackMs,
714
+ escalationReason,
715
+ };
716
+ if (!escalationReason) {
717
+ const doc = persistDocument(fallback);
718
+ return {
719
+ ...this.formatBrowseResult(doc, false),
720
+ timing: {
721
+ cache: "miss",
722
+ totalMs: Date.now() - startedAt,
723
+ strategy: "http-timeout-fallback",
724
+ fallbackMs,
725
+ },
726
+ };
727
+ }
728
+ this.log(`http fallback escalation for ${url}: ${escalationReason}`);
729
+ }
730
+ catch (fallbackError) {
731
+ const fallbackMessage = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
732
+ this.log(`http fallback failed for ${url}: ${fallbackMessage}`);
733
+ lastError = fallbackError;
734
+ }
735
+ }
736
+ const shouldRetry = attempt < maxAttempts && this.isRecoverableBrowseError(error);
737
+ if (!shouldRetry) {
738
+ break;
739
+ }
740
+ const message = error instanceof Error ? error.message : String(error);
741
+ this.log(`browse attempt failed for ${url}; retrying with fresh browser: ${message}`);
742
+ await this.resetBrowserForRetry();
743
+ }
744
+ finally {
745
+ if (abortCleanup)
746
+ abortCleanup();
747
+ if (page)
748
+ await page.close().catch(() => { });
749
+ if (context)
750
+ await context.close().catch(() => { });
751
+ }
752
+ }
753
+ throwIfAborted(abortSignal, "browse aborted");
754
+ if (httpAttempt) {
755
+ const doc = persistDocument(httpAttempt);
756
+ return {
757
+ ...this.formatBrowseResult(doc, false),
758
+ timing: {
759
+ cache: "miss",
760
+ totalMs: Date.now() - startedAt,
761
+ strategy: "http-after-browser-failure",
762
+ httpMs: httpAttempt.durationMs,
763
+ httpEscalationReason: httpAttempt.escalationReason || undefined,
764
+ browserError: lastError instanceof Error ? lastError.message : lastError ? String(lastError) : undefined,
765
+ },
766
+ };
767
+ }
768
+ if (lastError instanceof Error)
769
+ throw lastError;
770
+ if (httpFirstError instanceof Error)
771
+ throw httpFirstError;
772
+ throw new Error(`browse failed for ${url}`);
773
+ }, abortSignal);
774
+ }
775
+ async present(documentId, maxChars = this.config.maxMarkdownChars) {
776
+ const startedAt = Date.now();
777
+ const doc = this.documents.get(documentId);
778
+ if (!doc) {
779
+ throw new Error(`Unknown documentId: ${documentId}. Call browse(url) first.`);
780
+ }
781
+ const cached = this.presentCache.get(documentId);
782
+ if (cached && cached.maxChars === maxChars) {
783
+ return {
784
+ ...cached.payload,
785
+ timing: {
786
+ cache: "hit",
787
+ totalMs: Date.now() - startedAt,
788
+ },
789
+ };
790
+ }
791
+ const importStart = Date.now();
792
+ const [defuddleModule, linkedomModule] = await Promise.all([import("defuddle/node"), import("linkedom")]);
793
+ const importMs = Date.now() - importStart;
794
+ const Defuddle = defuddleModule.Defuddle;
795
+ const parseHTML = linkedomModule.parseHTML;
796
+ const parseStart = Date.now();
797
+ const parsed = parseHTML(String(doc.html || ""));
798
+ const document = parsed.document;
799
+ const windowLike = document.defaultView ||
800
+ document;
801
+ if (typeof windowLike.getComputedStyle !== "function") {
802
+ windowLike.getComputedStyle = () => new Proxy({}, { get: () => "" });
803
+ }
804
+ const parseMs = Date.now() - parseStart;
805
+ const extractStart = Date.now();
806
+ let extracted;
807
+ let extractor = "defuddle";
808
+ let extractorError = null;
809
+ try {
810
+ const extractedResult = await withSuppressedDefuddleWarnings(async () => await Defuddle(document, String(doc.finalUrl || doc.url || ""), { markdown: true }));
811
+ extracted = extractedResult.value;
812
+ }
813
+ catch (error) {
814
+ extractor = "fallback-strip-html";
815
+ extractorError = error instanceof Error ? error.message : String(error);
816
+ this.log(`present defuddle fallback for ${String(doc.finalUrl || doc.url || "")}: ${extractorError}`);
817
+ const fallbackText = this.extractTextFromHtmlFallback(String(doc.html || ""));
818
+ extracted = {
819
+ title: String(doc.title || this.extractTitleFromHtml(String(doc.html || ""))),
820
+ author: "Unknown",
821
+ published: "Unknown",
822
+ wordCount: countWords(fallbackText),
823
+ content: fallbackText,
824
+ };
825
+ }
826
+ const extractMs = Date.now() - extractStart;
827
+ const formatStart = Date.now();
828
+ const rawContent = String(extracted.content || "");
829
+ const content = truncateText(rawContent, maxChars);
830
+ const payload = {
831
+ documentId,
832
+ url: String(doc.finalUrl || doc.url || ""),
833
+ title: String(extracted.title || doc.title || ""),
834
+ author: String(extracted.author || "Unknown"),
835
+ published: String(extracted.published || "Unknown"),
836
+ wordCount: Number(extracted.wordCount || countWords(content)),
837
+ content,
838
+ truncated: rawContent.length > content.length,
839
+ extractor,
840
+ extractorError: extractorError || undefined,
841
+ };
842
+ const formatMs = Date.now() - formatStart;
843
+ this.presentCache.set(documentId, { maxChars, payload });
844
+ return {
845
+ ...payload,
846
+ timing: {
847
+ cache: "miss",
848
+ totalMs: Date.now() - startedAt,
849
+ importMs,
850
+ parseMs,
851
+ extractMs,
852
+ formatMs,
853
+ },
854
+ };
855
+ }
856
+ async parseDuckDuckGoResults(html, options = {}) {
857
+ return await parseDuckDuckGoResults(html, options);
858
+ }
859
+ async discoverSearchResults(query, options = {}) {
860
+ return await discoverSearchResults(query, {
861
+ ...options,
862
+ searchEngine: options.searchEngine ?? this.config.searchEngine,
863
+ searchEngineUrlTemplate: options.searchEngineUrlTemplate ?? this.config.searchEngineUrlTemplate,
864
+ });
865
+ }
866
+ async tryFetchGitHubRepoContent(requestedUrl, maxCharacters) {
867
+ return await tryFetchGitHubRepoContent(requestedUrl, maxCharacters, {
868
+ log: (message) => this.log(message),
869
+ });
870
+ }
871
+ async fetchContent(url, options = {}) {
872
+ const startedAt = Date.now();
873
+ const maxCharacters = clampInteger(options.maxCharacters, 3000, {
874
+ min: 500,
875
+ max: 200000,
876
+ });
877
+ const noCache = toBool(options.noCache, false);
878
+ const requestedUrl = normalizeUrl(url);
879
+ const githubResult = await this.tryFetchGitHubRepoContent(requestedUrl, maxCharacters);
880
+ if (githubResult) {
881
+ return githubResult;
882
+ }
883
+ const browseResult = await this.browse(requestedUrl, { bypassCache: noCache });
884
+ const documentId = String(browseResult.documentId || "");
885
+ if (!documentId) {
886
+ throw new Error("browse() returned no documentId");
887
+ }
888
+ const presentResult = await this.present(documentId, maxCharacters);
889
+ const browseTiming = browseResult.timing || {};
890
+ const presentTiming = presentResult.timing || {};
891
+ const browseCache = String(browseTiming.cache || (browseResult.fromCache ? "hit" : "miss"));
892
+ const presentCache = String(presentTiming.cache || "miss");
893
+ return {
894
+ url: String(presentResult.url || ""),
895
+ requestedUrl,
896
+ title: String(presentResult.title || ""),
897
+ author: String(presentResult.author || "Unknown"),
898
+ published: String(presentResult.published || "Unknown"),
899
+ wordCount: Number(presentResult.wordCount || 0),
900
+ content: String(presentResult.content || ""),
901
+ truncated: Boolean(presentResult.truncated),
902
+ documentId,
903
+ status: Number(browseResult.status || 0),
904
+ cache: {
905
+ browse: browseCache,
906
+ present: presentCache,
907
+ },
908
+ timing: {
909
+ totalMs: Date.now() - startedAt,
910
+ browseMs: browseTiming.totalMs || null,
911
+ presentMs: presentTiming.totalMs || null,
912
+ },
913
+ };
914
+ }
915
+ async webSearch(query, options = {}) {
916
+ const startedAt = Date.now();
917
+ const normalizedQuery = String(query || "").trim();
918
+ if (!normalizedQuery)
919
+ throw new Error("query is required");
920
+ const type = normalizeEnum(options.type, ["auto", "fast"], "auto");
921
+ const livecrawl = normalizeEnum(options.livecrawl, ["fallback", "preferred"], "fallback");
922
+ const numResults = clampInteger(options.numResults, 8, { min: 1, max: 20 });
923
+ const contextMaxCharacters = clampInteger(options.contextMaxCharacters, 10000, {
924
+ min: 1000,
925
+ max: 200000,
926
+ });
927
+ const textMaxCharacters = clampInteger(options.textMaxCharacters, Math.max(1500, Math.floor(contextMaxCharacters / Math.max(1, numResults))), { min: 500, max: 50000 });
928
+ const discovery = await this.discoverSearchResults(normalizedQuery, {
929
+ ...options,
930
+ numResults: Math.max(numResults * 3, numResults + 8),
931
+ type,
932
+ livecrawl,
933
+ });
934
+ const discovered = Array.isArray(discovery.results) ? discovery.results : [];
935
+ const selected = discovered.slice(0, numResults);
936
+ const results = [];
937
+ for (const result of selected) {
938
+ try {
939
+ const content = await this.fetchContent(String(result.url || ""), {
940
+ maxCharacters: textMaxCharacters,
941
+ noCache: toBool(options.noCache, false),
942
+ });
943
+ const contentRecord = content;
944
+ results.push({
945
+ rank: result.rank,
946
+ url: contentRecord.url,
947
+ title: contentRecord.title || result.title,
948
+ snippet: result.snippet,
949
+ author: contentRecord.author,
950
+ published: contentRecord.published,
951
+ content: contentRecord.content,
952
+ wordCount: contentRecord.wordCount,
953
+ status: contentRecord.status,
954
+ cache: contentRecord.cache,
955
+ });
956
+ }
957
+ catch (error) {
958
+ results.push({
959
+ rank: result.rank,
960
+ url: result.url,
961
+ title: result.title,
962
+ snippet: result.snippet,
963
+ error: error instanceof Error ? error.message : String(error),
964
+ });
965
+ }
966
+ }
967
+ const context = buildContext(results, contextMaxCharacters);
968
+ return {
969
+ query: normalizedQuery,
970
+ type,
971
+ livecrawl,
972
+ context,
973
+ results,
974
+ searchUrl: discovery.searchUrl,
975
+ discoveredResults: discovered.length,
976
+ durationMs: Date.now() - startedAt,
977
+ };
978
+ }
979
+ async webSearchAdvanced(options = {}) {
980
+ const query = String(options.query || "").trim();
981
+ if (!query)
982
+ throw new Error("query is required");
983
+ const type = normalizeEnum(options.type, ["auto", "fast", "neural"], "auto");
984
+ const livecrawl = normalizeEnum(options.livecrawl, ["never", "fallback", "always", "preferred"], "fallback");
985
+ const numResults = clampInteger(options.numResults, 10, { min: 1, max: 20 });
986
+ const textMaxCharacters = clampInteger(options.textMaxCharacters, 3500, {
987
+ min: 500,
988
+ max: 50000,
989
+ });
990
+ const contextMaxCharacters = clampInteger(options.contextMaxCharacters, 14000, {
991
+ min: 1000,
992
+ max: 200000,
993
+ });
994
+ const base = (await this.webSearch(query, {
995
+ ...options,
996
+ numResults,
997
+ type: type === "neural" ? "auto" : type,
998
+ livecrawl: livecrawl === "always" ? "preferred" : livecrawl,
999
+ textMaxCharacters,
1000
+ contextMaxCharacters,
1001
+ }));
1002
+ const startDate = parseIsoDate(options.startPublishedDate);
1003
+ const endDate = parseIsoDate(options.endPublishedDate);
1004
+ const baseResults = Array.isArray(base.results) ? base.results : [];
1005
+ let filteredResults = baseResults;
1006
+ if (startDate || endDate) {
1007
+ filteredResults = filteredResults.filter((result) => {
1008
+ const publishedDate = parseIsoDate(result.published);
1009
+ if (!publishedDate)
1010
+ return true;
1011
+ if (startDate && publishedDate < startDate)
1012
+ return false;
1013
+ if (endDate && publishedDate > endDate)
1014
+ return false;
1015
+ return true;
1016
+ });
1017
+ }
1018
+ filteredResults = filteredResults.slice(0, numResults).map((result, index) => ({
1019
+ ...result,
1020
+ rank: index + 1,
1021
+ }));
1022
+ return {
1023
+ query,
1024
+ type,
1025
+ livecrawl,
1026
+ category: options.category || null,
1027
+ context: buildContext(filteredResults, contextMaxCharacters),
1028
+ results: filteredResults,
1029
+ searchUrl: base.searchUrl,
1030
+ discoveredResults: base.discoveredResults,
1031
+ filtersApplied: {
1032
+ includeDomains: parseStringList(options.includeDomains)
1033
+ .map((value) => normalizeDomainFilter(value))
1034
+ .filter(Boolean),
1035
+ excludeDomains: parseStringList(options.excludeDomains)
1036
+ .map((value) => normalizeDomainFilter(value))
1037
+ .filter(Boolean),
1038
+ includeText: parseStringList(options.includeText),
1039
+ excludeText: parseStringList(options.excludeText),
1040
+ startPublishedDate: options.startPublishedDate || null,
1041
+ endPublishedDate: options.endPublishedDate || null,
1042
+ },
1043
+ durationMs: base.durationMs,
1044
+ };
1045
+ }
1046
+ async getCodeContext(query, options = {}) {
1047
+ const normalizedQuery = String(query || "").trim();
1048
+ if (!normalizedQuery)
1049
+ throw new Error("query is required");
1050
+ const tokensNum = clampInteger(options.tokensNum, 5000, { min: 1000, max: 50000 });
1051
+ const contextMaxCharacters = clampInteger(options.contextMaxCharacters, tokensNum * 4, {
1052
+ min: 4000,
1053
+ max: 200000,
1054
+ });
1055
+ const includeDomains = Array.from(new Set([
1056
+ ...parseStringList(options.includeDomains),
1057
+ "github.com",
1058
+ "stackoverflow.com",
1059
+ "developer.mozilla.org",
1060
+ "docs.python.org",
1061
+ "npmjs.com",
1062
+ ]));
1063
+ let search = await this.webSearchAdvanced({
1064
+ query: normalizedQuery,
1065
+ numResults: clampInteger(options.numResults, 8, { min: 1, max: 12 }),
1066
+ type: normalizeEnum(options.type, ["auto", "fast", "neural"], "fast"),
1067
+ includeDomains,
1068
+ contextMaxCharacters,
1069
+ textMaxCharacters: clampInteger(options.textMaxCharacters, 5000, { min: 1000, max: 50000 }),
1070
+ livecrawl: normalizeEnum(options.livecrawl, ["never", "fallback", "always", "preferred"], "fallback"),
1071
+ });
1072
+ if (!Array.isArray(search.results) || search.results.length === 0) {
1073
+ search = (await this.webSearch(`${normalizedQuery} github stackoverflow documentation`, {
1074
+ numResults: clampInteger(options.numResults, 6, { min: 1, max: 12 }),
1075
+ type: "fast",
1076
+ livecrawl: normalizeEnum(options.livecrawl, ["fallback", "preferred"], "fallback"),
1077
+ contextMaxCharacters,
1078
+ textMaxCharacters: clampInteger(options.textMaxCharacters, 5000, { min: 1000, max: 50000 }),
1079
+ }));
1080
+ }
1081
+ return {
1082
+ query: normalizedQuery,
1083
+ tokensNum,
1084
+ response: search.context,
1085
+ results: search.results,
1086
+ durationMs: search.durationMs,
1087
+ };
1088
+ }
1089
+ async companyResearch(companyName, options = {}) {
1090
+ const normalizedCompany = String(companyName || "").trim();
1091
+ if (!normalizedCompany)
1092
+ throw new Error("companyName is required");
1093
+ const numResults = clampInteger(options.numResults, 3, { min: 1, max: 10 });
1094
+ const contextMaxCharacters = clampInteger(options.contextMaxCharacters, 14000, {
1095
+ min: 4000,
1096
+ max: 200000,
1097
+ });
1098
+ let search = await this.webSearchAdvanced({
1099
+ query: `${normalizedCompany} company`,
1100
+ category: "company",
1101
+ numResults,
1102
+ type: normalizeEnum(options.type, ["auto", "fast", "neural"], "auto"),
1103
+ contextMaxCharacters,
1104
+ textMaxCharacters: clampInteger(options.textMaxCharacters, 7000, {
1105
+ min: 1000,
1106
+ max: 50000,
1107
+ }),
1108
+ livecrawl: normalizeEnum(options.livecrawl, ["never", "fallback", "always", "preferred"], "fallback"),
1109
+ includeDomains: options.includeDomains,
1110
+ excludeDomains: options.excludeDomains,
1111
+ includeText: options.includeText,
1112
+ excludeText: options.excludeText,
1113
+ });
1114
+ if (!Array.isArray(search.results) || search.results.length === 0) {
1115
+ search = (await this.webSearch(`${normalizedCompany} official website products services latest news`, {
1116
+ numResults,
1117
+ type: "auto",
1118
+ livecrawl: "fallback",
1119
+ contextMaxCharacters,
1120
+ textMaxCharacters: clampInteger(options.textMaxCharacters, 7000, {
1121
+ min: 1000,
1122
+ max: 50000,
1123
+ }),
1124
+ }));
1125
+ }
1126
+ return {
1127
+ companyName: normalizedCompany,
1128
+ query: search.query,
1129
+ context: search.context,
1130
+ results: search.results,
1131
+ durationMs: search.durationMs,
1132
+ };
1133
+ }
1134
+ async findSimilar(url, options = {}) {
1135
+ const startedAt = Date.now();
1136
+ const normalizedUrl = normalizeUrl(url);
1137
+ const numResults = clampInteger(options.numResults, 5, { min: 1, max: 12 });
1138
+ const seedHost = getHostname(normalizedUrl);
1139
+ let seedContent = null;
1140
+ try {
1141
+ seedContent = (await this.fetchContent(normalizedUrl, {
1142
+ maxCharacters: clampInteger(options.seedMaxCharacters, 1200, { min: 500, max: 6000 }),
1143
+ noCache: toBool(options.noCache, false),
1144
+ }));
1145
+ }
1146
+ catch {
1147
+ // best effort only
1148
+ }
1149
+ const seedTitle = String(seedContent?.title || seedHost || normalizedUrl);
1150
+ const keywordTerms = extractTopTerms(seedTitle, 4);
1151
+ const query = ["alternatives to", seedTitle, ...keywordTerms].join(" ").trim();
1152
+ const extraExcludeDomains = parseStringList(options.excludeDomains)
1153
+ .map((value) => normalizeDomainFilter(value))
1154
+ .filter(Boolean);
1155
+ const discovery = await this.discoverSearchResults(query, {
1156
+ numResults: Math.max(numResults * 2, numResults + 4),
1157
+ type: normalizeEnum(options.type, ["auto", "fast", "neural"], "auto"),
1158
+ includeDomains: options.includeDomains,
1159
+ excludeDomains: [seedHost, ...extraExcludeDomains],
1160
+ includeText: options.includeText,
1161
+ excludeText: options.excludeText,
1162
+ category: options.category,
1163
+ });
1164
+ const discoveryResults = Array.isArray(discovery.results) ? discovery.results : [];
1165
+ const similarResults = discoveryResults
1166
+ .filter((result) => !domainMatches(getHostname(result.url), [seedHost]))
1167
+ .slice(0, numResults)
1168
+ .map((result, index) => ({
1169
+ rank: index + 1,
1170
+ url: result.url,
1171
+ title: result.title,
1172
+ snippet: result.snippet,
1173
+ domain: result.domain,
1174
+ }));
1175
+ const context = truncateText(similarResults
1176
+ .map((result) => [
1177
+ `TITLE: ${result.title || result.url}`,
1178
+ `URL: ${result.url}`,
1179
+ result.snippet ? `SNIPPET: ${result.snippet}` : "",
1180
+ ]
1181
+ .filter(Boolean)
1182
+ .join("\n"))
1183
+ .join("\n\n---\n\n"), clampInteger(options.contextMaxCharacters, 12000, {
1184
+ min: 2000,
1185
+ max: 200000,
1186
+ }));
1187
+ return {
1188
+ url: normalizedUrl,
1189
+ query,
1190
+ results: similarResults,
1191
+ context,
1192
+ durationMs: Date.now() - startedAt,
1193
+ };
1194
+ }
1195
+ pruneExpiredUrlCache() {
1196
+ const now = Date.now();
1197
+ for (const [url, entry] of this.urlCache.entries()) {
1198
+ if (entry.expiresAt <= now || !this.documents.has(entry.documentId)) {
1199
+ this.urlCache.delete(url);
1200
+ }
1201
+ }
1202
+ }
1203
+ getCacheEntries(limit = 20) {
1204
+ this.pruneExpiredUrlCache();
1205
+ const now = Date.now();
1206
+ const normalizedLimit = Math.max(1, Math.floor(limit || 20));
1207
+ const entries = [];
1208
+ for (const [url, entry] of this.urlCache.entries()) {
1209
+ const doc = this.documents.get(entry.documentId);
1210
+ if (!doc)
1211
+ continue;
1212
+ const fetchedAt = toFiniteNonNegativeNumber(doc.fetchedAt || now);
1213
+ const expiresAt = toFiniteNonNegativeNumber(entry.expiresAt || now);
1214
+ entries.push({
1215
+ url,
1216
+ finalUrl: String(doc.finalUrl || doc.url || url),
1217
+ title: String(doc.title || "").trim(),
1218
+ documentId: entry.documentId,
1219
+ fetchedAt: new Date(fetchedAt || now).toISOString(),
1220
+ expiresAt: new Date(expiresAt || now).toISOString(),
1221
+ ageMs: Math.max(0, now - (fetchedAt || now)),
1222
+ ttlMs: Math.max(0, expiresAt - now),
1223
+ bytes: Buffer.byteLength(String(doc.html || ""), "utf8"),
1224
+ });
1225
+ }
1226
+ entries.sort((left, right) => Number(left.ttlMs || 0) - Number(right.ttlMs || 0));
1227
+ return entries.slice(0, normalizedLimit);
1228
+ }
1229
+ getTokenUsageSummary() {
1230
+ const queries = Math.max(1, this.metrics.queries);
1231
+ return {
1232
+ input: this.metrics.tokenInput,
1233
+ output: this.metrics.tokenOutput,
1234
+ cacheRead: this.metrics.tokenCacheRead,
1235
+ cacheWrite: this.metrics.tokenCacheWrite,
1236
+ total: this.metrics.tokenTotal,
1237
+ avgPerQuery: Math.round(this.metrics.tokenTotal / queries),
1238
+ cost: {
1239
+ input: this.metrics.costInput,
1240
+ output: this.metrics.costOutput,
1241
+ cacheRead: this.metrics.costCacheRead,
1242
+ cacheWrite: this.metrics.costCacheWrite,
1243
+ total: this.metrics.costTotal,
1244
+ },
1245
+ };
1246
+ }
1247
+ getHealth(options = {}) {
1248
+ this.pruneExpiredUrlCache();
1249
+ const totalCacheLookups = this.metrics.cacheHits + this.metrics.cacheMisses;
1250
+ const cacheHitRate = totalCacheLookups > 0 ? this.metrics.cacheHits / totalCacheLookups : 0;
1251
+ const health = {
1252
+ model: this.model?.id || "uninitialized",
1253
+ llmApi: this.config.llmApi,
1254
+ llmBaseUrl: this.config.llmBaseUrl,
1255
+ searchEngine: this.config.searchEngine,
1256
+ searchEngineUrlTemplate: this.config.searchEngineUrlTemplate,
1257
+ cdpUrl: this.config.lightpandaCdpUrl,
1258
+ lightpandaAutoStart: this.config.lightpandaAutoStart,
1259
+ lightpandaManaged: this.lightpandaManaged,
1260
+ lightpandaManagedPid: this.lightpandaProcess?.pid || null,
1261
+ documentsCached: this.documents.size,
1262
+ urlCacheEntries: this.urlCache.size,
1263
+ deepResearchTasks: this.deepResearchTasks.size,
1264
+ researchMaxPages: this.config.researchMaxPages,
1265
+ researchMaxHops: this.config.researchMaxHops,
1266
+ researchSameDomainOnly: this.config.researchSameDomainOnly,
1267
+ browseLinkTimeoutMs: this.config.browseLinkTimeoutMs,
1268
+ queryTimeoutMs: this.config.queryTimeoutMs,
1269
+ cacheTtlMs: this.config.cacheTtlMs,
1270
+ maxHtmlChars: this.config.maxHtmlChars,
1271
+ maxMarkdownChars: this.config.maxMarkdownChars,
1272
+ operationConcurrency: this.config.operationConcurrency,
1273
+ browseConcurrency: this.config.browseConcurrency,
1274
+ operationSlotsActive: this.operationLimiter.active,
1275
+ operationSlotsPending: this.operationLimiter.pending,
1276
+ browseSlotsActive: this.browseLimiter.active,
1277
+ browseSlotsPending: this.browseLimiter.pending,
1278
+ toolExecutionMode: this.config.toolExecutionMode,
1279
+ queries: this.metrics.queries,
1280
+ activeQueries: this.metrics.activeQueries,
1281
+ cacheHits: this.metrics.cacheHits,
1282
+ cacheMisses: this.metrics.cacheMisses,
1283
+ cacheHitRate,
1284
+ tokens: this.getTokenUsageSummary(),
1285
+ uptimeSec: Math.floor((Date.now() - this.metrics.startedAt) / 1000),
1286
+ };
1287
+ if (options.includeCacheEntries) {
1288
+ health.cacheEntries = this.getCacheEntries(options.cacheEntriesLimit);
1289
+ }
1290
+ return health;
1291
+ }
1292
+ createTools(plan) {
1293
+ const uniqueBrowsedUrls = new Set();
1294
+ const successfulBrowseByUrl = new Map();
1295
+ const failedUrls = new Set();
1296
+ const hostFailures = new Map();
1297
+ const browseTool = {
1298
+ name: "browse",
1299
+ label: "Browse",
1300
+ description: "Open a URL in Lightpanda and cache raw HTML internally. Returns a documentId. Pass that documentId to present(). This query has a fixed maximum page budget.",
1301
+ parameters: Type.Object({
1302
+ url: Type.String({ description: "Full URL to browse" }),
1303
+ }),
1304
+ execute: async (_toolCallId, params, signal) => {
1305
+ const toolParams = params;
1306
+ const normalizedUrl = normalizeUrl(toolParams.url);
1307
+ const hostname = getHostname(normalizedUrl);
1308
+ if (plan.sameDomainOnly && !isHostAllowed(hostname, plan.seedHosts)) {
1309
+ throw new Error(`Blocked by same-domain policy. Allowed domains: ${Array.from(plan.seedHosts).join(", ")}`);
1310
+ }
1311
+ if (plan.policy?.includeDomains?.length &&
1312
+ !domainMatches(hostname, plan.policy.includeDomains) &&
1313
+ !isDiscoveryDomain(hostname)) {
1314
+ throw new Error(`Blocked by includeDomains policy. Allowed domains: ${plan.policy.includeDomains.join(", ")}`);
1315
+ }
1316
+ if (plan.policy?.excludeDomains?.length && domainMatches(hostname, plan.policy.excludeDomains)) {
1317
+ throw new Error(`Blocked by excludeDomains policy. Excluded domains: ${plan.policy.excludeDomains.join(", ")}`);
1318
+ }
1319
+ const existing = successfulBrowseByUrl.get(normalizedUrl);
1320
+ if (existing) {
1321
+ const text = [
1322
+ `DOCUMENT_ID: ${String(existing.documentId || "")}`,
1323
+ `URL: ${String(existing.url || normalizedUrl)}`,
1324
+ `FINAL_URL: ${String(existing.finalUrl || existing.url || normalizedUrl)}`,
1325
+ `STATUS: ${String(existing.status || "")}`,
1326
+ `TITLE: ${String(existing.title || "")}`,
1327
+ `BYTES: ${String(existing.bytes || "")}`,
1328
+ "FROM_CACHE: true",
1329
+ "REUSED_IN_QUERY: true",
1330
+ `PAGES_USED: ${uniqueBrowsedUrls.size}/${plan.maxPages}`,
1331
+ "NOTE: URL already browsed in this query. Reusing existing documentId.",
1332
+ ].join("\n");
1333
+ return {
1334
+ content: [{ type: "text", text }],
1335
+ details: {
1336
+ ...existing,
1337
+ fromCache: true,
1338
+ reusedInQuery: true,
1339
+ pagesUsed: uniqueBrowsedUrls.size,
1340
+ maxPages: plan.maxPages,
1341
+ },
1342
+ };
1343
+ }
1344
+ if (failedUrls.has(normalizedUrl)) {
1345
+ throw new Error(`URL previously failed in this query and is skipped: ${normalizedUrl}`);
1346
+ }
1347
+ const hostFailureCount = hostFailures.get(hostname) || 0;
1348
+ if (hostFailureCount >= 2) {
1349
+ throw new Error(`Host failure circuit open for ${hostname} (failures=${hostFailureCount}). Skipping for this query.`);
1350
+ }
1351
+ const isNewUrl = !uniqueBrowsedUrls.has(normalizedUrl);
1352
+ if (isNewUrl && uniqueBrowsedUrls.size >= plan.maxPages) {
1353
+ throw new Error(`Browse page budget exhausted (${plan.maxPages} pages).`);
1354
+ }
1355
+ let result;
1356
+ try {
1357
+ result = await this.browse(normalizedUrl, { abortSignal: signal });
1358
+ }
1359
+ catch (error) {
1360
+ failedUrls.add(normalizedUrl);
1361
+ hostFailures.set(hostname, hostFailureCount + 1);
1362
+ throw error;
1363
+ }
1364
+ uniqueBrowsedUrls.add(normalizedUrl);
1365
+ successfulBrowseByUrl.set(normalizedUrl, result);
1366
+ failedUrls.delete(normalizedUrl);
1367
+ hostFailures.set(hostname, 0);
1368
+ const text = [
1369
+ `DOCUMENT_ID: ${result.documentId}`,
1370
+ `URL: ${result.url}`,
1371
+ `FINAL_URL: ${result.finalUrl}`,
1372
+ `STATUS: ${result.status}`,
1373
+ `TITLE: ${String(result.title || "")}`,
1374
+ `BYTES: ${result.bytes}`,
1375
+ `FROM_CACHE: ${result.fromCache}`,
1376
+ `PAGES_USED: ${uniqueBrowsedUrls.size}/${plan.maxPages}`,
1377
+ ].join("\n");
1378
+ return {
1379
+ content: [{ type: "text", text }],
1380
+ details: {
1381
+ ...result,
1382
+ pagesUsed: uniqueBrowsedUrls.size,
1383
+ maxPages: plan.maxPages,
1384
+ },
1385
+ };
1386
+ },
1387
+ };
1388
+ const presentTool = {
1389
+ name: "present",
1390
+ label: "Present",
1391
+ description: "Extract clean markdown from a browsed document. Input is a documentId returned by browse().",
1392
+ parameters: Type.Object({
1393
+ documentId: Type.String({ description: "Document id returned from browse()" }),
1394
+ maxChars: Type.Optional(Type.Integer({
1395
+ description: "Maximum markdown characters to return",
1396
+ minimum: 1000,
1397
+ maximum: 200000,
1398
+ })),
1399
+ }),
1400
+ execute: async (_toolCallId, params, signal) => {
1401
+ throwIfAborted(signal, "present aborted");
1402
+ const toolParams = params;
1403
+ const requestedMaxChars = toolParams.maxChars === undefined
1404
+ ? undefined
1405
+ : clampInteger(toolParams.maxChars, this.config.maxMarkdownChars, {
1406
+ min: 1000,
1407
+ max: 200000,
1408
+ });
1409
+ const effectiveMaxChars = Math.max(requestedMaxChars ?? this.config.maxMarkdownChars, this.config.maxMarkdownChars);
1410
+ const result = await this.present(String(toolParams.documentId || ""), effectiveMaxChars);
1411
+ const policy = plan.policy || {};
1412
+ const haystack = `${result.title}\n${result.content}`.toLowerCase();
1413
+ if (isChallengeLikeContent(result.title, result.content)) {
1414
+ const modeLabel = policy.mode ? `${policy.mode} mode` : "current mode";
1415
+ throw new Error(`Filtered challenge/interstitial page in ${modeLabel}.`);
1416
+ }
1417
+ if (policy.includeText?.length) {
1418
+ const missing = policy.includeText.filter((term) => !haystack.includes(term));
1419
+ if (missing.length > 0) {
1420
+ throw new Error(`Filtered by includeText policy. Missing terms: ${missing.join(", ")}`);
1421
+ }
1422
+ }
1423
+ if (policy.excludeText?.length) {
1424
+ const matched = policy.excludeText.filter((term) => haystack.includes(term));
1425
+ if (matched.length > 0) {
1426
+ throw new Error(`Filtered by excludeText policy. Matched terms: ${matched.join(", ")}`);
1427
+ }
1428
+ }
1429
+ const publishedDate = parseIsoDate(result.published);
1430
+ if (policy.startDate && publishedDate && publishedDate < policy.startDate) {
1431
+ throw new Error(`Filtered by startPublishedDate policy: ${policy.startPublishedDate}`);
1432
+ }
1433
+ if (policy.endDate && publishedDate && publishedDate > policy.endDate) {
1434
+ throw new Error(`Filtered by endPublishedDate policy: ${policy.endPublishedDate}`);
1435
+ }
1436
+ const text = [
1437
+ `TITLE: ${result.title}`,
1438
+ `URL: ${result.url}`,
1439
+ `AUTHOR: ${result.author}`,
1440
+ `PUBLISHED: ${result.published}`,
1441
+ `WORD_COUNT: ${result.wordCount}`,
1442
+ "",
1443
+ "CONTENT_MARKDOWN:",
1444
+ String(result.content || ""),
1445
+ ].join("\n");
1446
+ return {
1447
+ content: [{ type: "text", text }],
1448
+ details: result,
1449
+ };
1450
+ },
1451
+ };
1452
+ return [browseTool, presentTool];
1453
+ }
1454
+ async enqueueOperation(operation, signal) {
1455
+ const release = await this.operationLimiter.acquire(signal, "operation aborted before execution");
1456
+ try {
1457
+ throwIfAborted(signal, "operation aborted before execution");
1458
+ return await operation();
1459
+ }
1460
+ finally {
1461
+ release();
1462
+ }
1463
+ }
1464
+ async enqueueBrowse(operation, signal) {
1465
+ const release = await this.browseLimiter.acquire(signal, "browse aborted before execution");
1466
+ try {
1467
+ throwIfAborted(signal, "browse aborted before execution");
1468
+ return await operation();
1469
+ }
1470
+ finally {
1471
+ release();
1472
+ }
1473
+ }
1474
+ async enqueueQuery(query, options = {}) {
1475
+ const abortSignal = asAbortSignal(options.abortSignal);
1476
+ return await this.enqueueOperation(() => this.runQuery(query, options), abortSignal);
1477
+ }
1478
+ async runQuery(query, options = {}) {
1479
+ const externalAbortSignal = asAbortSignal(options.abortSignal);
1480
+ throwIfAborted(externalAbortSignal, "request aborted by client");
1481
+ await this.init();
1482
+ if (!this.model) {
1483
+ throw new Error("model failed to initialize");
1484
+ }
1485
+ this.metrics.queries += 1;
1486
+ this.metrics.activeQueries += 1;
1487
+ const startedAt = Date.now();
1488
+ try {
1489
+ const toolsUsed = [];
1490
+ const toolCallStarts = new Map();
1491
+ const toolProfiles = [];
1492
+ const turns = [];
1493
+ let currentTurnStart = null;
1494
+ let turnCounter = 0;
1495
+ const assistantMessages = [];
1496
+ let currentAssistantStart = null;
1497
+ let assistantCounter = 0;
1498
+ let firstAssistantTokenMs = null;
1499
+ const onProgress = typeof options.onProgress === "function"
1500
+ ? options.onProgress
1501
+ : null;
1502
+ const emitProgress = (payload) => {
1503
+ if (!onProgress)
1504
+ return;
1505
+ try {
1506
+ onProgress({ ...payload, timestamp: Date.now() });
1507
+ }
1508
+ catch {
1509
+ // ignore progress sink errors
1510
+ }
1511
+ };
1512
+ const researchPlan = this.deriveResearchPlan(query, options);
1513
+ const queryTimeoutMs = clampInteger(options.queryTimeoutMs, this.config.queryTimeoutMs, {
1514
+ min: 1000,
1515
+ max: 30 * 60 * 1000,
1516
+ });
1517
+ const thinkingLevel = normalizeThinkingLevel(options.thinkingLevel, "off");
1518
+ const isDeep = researchPlan.policy.mode === "deep";
1519
+ emitProgress({
1520
+ type: "query_start",
1521
+ query,
1522
+ researchPlan: {
1523
+ maxPages: researchPlan.maxPages,
1524
+ maxHops: researchPlan.maxHops,
1525
+ sameDomainOnly: researchPlan.sameDomainOnly,
1526
+ seedUrls: researchPlan.seedUrls,
1527
+ policy: researchPlan.policy,
1528
+ },
1529
+ queryTimeoutMs,
1530
+ thinkingLevel,
1531
+ });
1532
+ const systemPrompt = this.buildSystemPrompt(researchPlan);
1533
+ const agent = new Agent({
1534
+ initialState: {
1535
+ systemPrompt,
1536
+ model: this.model,
1537
+ thinkingLevel,
1538
+ tools: this.createTools(researchPlan),
1539
+ messages: [],
1540
+ },
1541
+ getApiKey: () => resolveRuntimeApiKey(normalizeLlmApi(this.config.llmApi), this.config.llmApiKey),
1542
+ toolExecution: this.config.toolExecutionMode,
1543
+ });
1544
+ const unsubscribe = agent.subscribe((event) => {
1545
+ if (event.type === "turn_start") {
1546
+ turnCounter += 1;
1547
+ currentTurnStart = Date.now();
1548
+ emitProgress({ type: "turn_start", turn: turnCounter });
1549
+ }
1550
+ if (event.type === "turn_end") {
1551
+ const endedAt = Date.now();
1552
+ const durationMs = currentTurnStart ? endedAt - currentTurnStart : null;
1553
+ const toolResultsCount = Array.isArray(event.toolResults) ? event.toolResults.length : 0;
1554
+ turns.push({
1555
+ turn: turnCounter,
1556
+ durationMs,
1557
+ toolResults: toolResultsCount,
1558
+ });
1559
+ emitProgress({
1560
+ type: "turn_end",
1561
+ turn: turnCounter,
1562
+ durationMs,
1563
+ toolResults: toolResultsCount,
1564
+ });
1565
+ currentTurnStart = null;
1566
+ }
1567
+ if (event.type === "message_start" && event.message?.role === "assistant") {
1568
+ assistantCounter += 1;
1569
+ currentAssistantStart = Date.now();
1570
+ }
1571
+ if (event.type === "message_update" &&
1572
+ event.message?.role === "assistant" &&
1573
+ event.assistantMessageEvent?.type === "text_delta") {
1574
+ if (firstAssistantTokenMs === null) {
1575
+ firstAssistantTokenMs = Date.now() - startedAt;
1576
+ emitProgress({ type: "first_token", latencyMs: firstAssistantTokenMs });
1577
+ }
1578
+ const delta = String(event.assistantMessageEvent.delta || "");
1579
+ if (delta) {
1580
+ emitProgress({
1581
+ type: "assistant_delta",
1582
+ turn: turnCounter,
1583
+ message: assistantCounter,
1584
+ delta,
1585
+ });
1586
+ }
1587
+ }
1588
+ if (event.type === "message_end" && event.message?.role === "assistant") {
1589
+ const endedAt = Date.now();
1590
+ const durationMs = currentAssistantStart ? endedAt - currentAssistantStart : null;
1591
+ assistantMessages.push({
1592
+ message: assistantCounter,
1593
+ durationMs,
1594
+ });
1595
+ currentAssistantStart = null;
1596
+ }
1597
+ if (event.type === "tool_execution_start") {
1598
+ toolCallStarts.set(event.toolCallId, {
1599
+ startedAt: Date.now(),
1600
+ args: event.args,
1601
+ toolName: event.toolName,
1602
+ });
1603
+ emitProgress({
1604
+ type: "tool_start",
1605
+ toolCallId: event.toolCallId,
1606
+ toolName: event.toolName,
1607
+ args: event.args,
1608
+ });
1609
+ }
1610
+ if (event.type === "tool_execution_end") {
1611
+ const start = toolCallStarts.get(event.toolCallId);
1612
+ const durationMs = start ? Date.now() - start.startedAt : null;
1613
+ const details = (event.result?.details ?? null);
1614
+ const errorMessage = event.isError ? extractTextContent(event.result?.content) : "";
1615
+ toolCallStarts.delete(event.toolCallId);
1616
+ toolsUsed.push({
1617
+ toolName: event.toolName,
1618
+ isError: event.isError,
1619
+ durationMs,
1620
+ details,
1621
+ errorMessage,
1622
+ });
1623
+ const cacheFlag = details?.fromCache;
1624
+ const timingCache = details?.timing?.cache;
1625
+ const profileEntry = {
1626
+ toolCallId: event.toolCallId,
1627
+ toolName: event.toolName,
1628
+ isError: event.isError,
1629
+ durationMs,
1630
+ url: details?.url || details?.finalUrl || start?.args?.url || null,
1631
+ documentId: details?.documentId || start?.args?.documentId || null,
1632
+ title: details?.title || null,
1633
+ cache: cacheFlag === true ? "hit" : cacheFlag === false ? "miss" : timingCache || "unknown",
1634
+ pagesUsed: typeof details?.pagesUsed === "number" ? details.pagesUsed : undefined,
1635
+ maxPages: typeof details?.maxPages === "number" ? details.maxPages : undefined,
1636
+ timing: details?.timing || null,
1637
+ errorMessage,
1638
+ };
1639
+ toolProfiles.push(profileEntry);
1640
+ emitProgress({
1641
+ type: "tool_end",
1642
+ ...profileEntry,
1643
+ });
1644
+ }
1645
+ });
1646
+ const rawFollowUpPrompts = options.followUpPrompts;
1647
+ const followUpPrompts = Array.isArray(rawFollowUpPrompts)
1648
+ ? rawFollowUpPrompts.map((value) => String(value || "").trim()).filter(Boolean)
1649
+ : typeof rawFollowUpPrompts === "string"
1650
+ ? [rawFollowUpPrompts.trim()].filter(Boolean)
1651
+ : [];
1652
+ let abortedByTimeout = false;
1653
+ let abortedByClient = false;
1654
+ const onClientAbort = () => {
1655
+ abortedByClient = true;
1656
+ agent.abort();
1657
+ };
1658
+ const awaitPromptWithAbort = async (promptPromise) => {
1659
+ if (!externalAbortSignal) {
1660
+ await promptPromise;
1661
+ return;
1662
+ }
1663
+ throwIfAborted(externalAbortSignal, "request aborted by client");
1664
+ let onAbort = null;
1665
+ const abortPromise = new Promise((_resolve, reject) => {
1666
+ onAbort = () => reject(new Error("request aborted by client"));
1667
+ externalAbortSignal.addEventListener("abort", onAbort, { once: true });
1668
+ });
1669
+ // If abort wins the race, promptPromise may reject later. Keep it handled.
1670
+ void promptPromise.catch(() => { });
1671
+ try {
1672
+ await Promise.race([promptPromise, abortPromise]);
1673
+ }
1674
+ finally {
1675
+ if (onAbort) {
1676
+ externalAbortSignal.removeEventListener("abort", onAbort);
1677
+ }
1678
+ }
1679
+ };
1680
+ if (externalAbortSignal) {
1681
+ externalAbortSignal.addEventListener("abort", onClientAbort, { once: true });
1682
+ }
1683
+ const timeout = setTimeout(() => {
1684
+ abortedByTimeout = true;
1685
+ agent.abort();
1686
+ }, queryTimeoutMs);
1687
+ try {
1688
+ throwIfAborted(externalAbortSignal, "request aborted by client");
1689
+ await awaitPromptWithAbort(agent.prompt(query));
1690
+ for (const followUpPrompt of followUpPrompts) {
1691
+ throwIfAborted(externalAbortSignal, "request aborted by client");
1692
+ await awaitPromptWithAbort(agent.prompt(followUpPrompt));
1693
+ }
1694
+ }
1695
+ finally {
1696
+ clearTimeout(timeout);
1697
+ if (externalAbortSignal) {
1698
+ externalAbortSignal.removeEventListener("abort", onClientAbort);
1699
+ }
1700
+ unsubscribe();
1701
+ }
1702
+ const assistantMessage = [...agent.state.messages]
1703
+ .reverse()
1704
+ .find((message) => message?.role === "assistant");
1705
+ if (!assistantMessage) {
1706
+ if (abortedByClient || externalAbortSignal?.aborted) {
1707
+ throw new Error("request aborted by client");
1708
+ }
1709
+ if (abortedByTimeout) {
1710
+ throw new Error(`query timed out after ${queryTimeoutMs}ms`);
1711
+ }
1712
+ throw new Error("model request failed: no assistant response returned");
1713
+ }
1714
+ const usageRecord = (assistantMessage.usage || {}) ?? {};
1715
+ const usageCostRecord = (usageRecord.cost || {}) ?? {};
1716
+ const usageSummary = {
1717
+ input: toFiniteNonNegativeNumber(usageRecord.input),
1718
+ output: toFiniteNonNegativeNumber(usageRecord.output),
1719
+ cacheRead: toFiniteNonNegativeNumber(usageRecord.cacheRead),
1720
+ cacheWrite: toFiniteNonNegativeNumber(usageRecord.cacheWrite),
1721
+ total: toFiniteNonNegativeNumber(usageRecord.totalTokens),
1722
+ cost: {
1723
+ input: toFiniteNonNegativeNumber(usageCostRecord.input),
1724
+ output: toFiniteNonNegativeNumber(usageCostRecord.output),
1725
+ cacheRead: toFiniteNonNegativeNumber(usageCostRecord.cacheRead),
1726
+ cacheWrite: toFiniteNonNegativeNumber(usageCostRecord.cacheWrite),
1727
+ total: toFiniteNonNegativeNumber(usageCostRecord.total),
1728
+ },
1729
+ };
1730
+ this.metrics.tokenInput += usageSummary.input;
1731
+ this.metrics.tokenOutput += usageSummary.output;
1732
+ this.metrics.tokenCacheRead += usageSummary.cacheRead;
1733
+ this.metrics.tokenCacheWrite += usageSummary.cacheWrite;
1734
+ this.metrics.tokenTotal += usageSummary.total;
1735
+ this.metrics.costInput += usageSummary.cost.input;
1736
+ this.metrics.costOutput += usageSummary.cost.output;
1737
+ this.metrics.costCacheRead += usageSummary.cost.cacheRead;
1738
+ this.metrics.costCacheWrite += usageSummary.cost.cacheWrite;
1739
+ this.metrics.costTotal += usageSummary.cost.total;
1740
+ const stopReason = String(assistantMessage.stopReason || "")
1741
+ .trim()
1742
+ .toLowerCase();
1743
+ const modelError = String(assistantMessage.errorMessage || "").trim();
1744
+ if (stopReason === "error" || stopReason === "aborted") {
1745
+ if (abortedByClient || externalAbortSignal?.aborted) {
1746
+ throw new Error("request aborted by client");
1747
+ }
1748
+ if (abortedByTimeout) {
1749
+ throw new Error(`query timed out after ${queryTimeoutMs}ms`);
1750
+ }
1751
+ throw new Error(modelError ? `model request failed: ${modelError}` : "model request failed");
1752
+ }
1753
+ const synthesizedAnswer = extractAssistantText(assistantMessage);
1754
+ if (!synthesizedAnswer.trim() && toolsUsed.length === 0) {
1755
+ const reason = modelError || stopReason || "empty assistant response and no tool activity";
1756
+ throw new Error(`model request failed: ${reason}`);
1757
+ }
1758
+ const findings = collectResultFindings(toolsUsed);
1759
+ if (!isDeep && toolsUsed.length === 0) {
1760
+ const assistantSnippet = synthesizedAnswer.replace(/\s+/g, " ").trim().slice(0, 180);
1761
+ const reason = modelError || assistantSnippet || stopReason || "assistant produced no tool calls";
1762
+ throw new Error(`model request failed: no tool calls executed (${reason}). Ensure model endpoint is reachable and supports tool calling.`);
1763
+ }
1764
+ let answer;
1765
+ let citations;
1766
+ let findingsForResult;
1767
+ if (isDeep) {
1768
+ answer = synthesizedAnswer;
1769
+ citations = collectResultCitations(answer, toolProfiles);
1770
+ findingsForResult = findings;
1771
+ }
1772
+ else {
1773
+ // Search mode: model selects URLs, code fills content from findings
1774
+ const explicitNone = isExplicitNoneCollateAnswer(synthesizedAnswer);
1775
+ const modelUrls = explicitNone ? [] : extractCitationUrls(synthesizedAnswer);
1776
+ let effectiveFindings;
1777
+ if (explicitNone) {
1778
+ effectiveFindings = [];
1779
+ }
1780
+ else if (modelUrls.length > 0) {
1781
+ const selectedKeys = new Set();
1782
+ for (const url of modelUrls) {
1783
+ try {
1784
+ selectedKeys.add(normalizeUrlForDedupe(normalizeUrl(url)));
1785
+ }
1786
+ catch {
1787
+ // ignore malformed URL in model output
1788
+ }
1789
+ }
1790
+ effectiveFindings = findings
1791
+ .filter((finding) => {
1792
+ try {
1793
+ const key = normalizeUrlForDedupe(normalizeUrl(String(finding.url || "")));
1794
+ return selectedKeys.has(key);
1795
+ }
1796
+ catch {
1797
+ return false;
1798
+ }
1799
+ })
1800
+ .slice(0, 6);
1801
+ }
1802
+ else {
1803
+ // Fallback: model output had no URLs, use all findings
1804
+ effectiveFindings = findings.slice(0, 6);
1805
+ }
1806
+ if (!explicitNone && effectiveFindings.length === 0 && findings.length > 0) {
1807
+ // Model returned URLs we could not normalize/map; avoid empty output when evidence exists.
1808
+ effectiveFindings = findings.slice(0, 6);
1809
+ }
1810
+ answer = explicitNone ? "" : buildCollatedAnswer(effectiveFindings, toolsUsed);
1811
+ citations = normalizeUniqueUrls(effectiveFindings.map((finding) => String(finding.url || "")).filter(Boolean));
1812
+ findingsForResult = effectiveFindings;
1813
+ }
1814
+ const durationMs = Date.now() - startedAt;
1815
+ const byTool = {};
1816
+ for (const tool of toolProfiles) {
1817
+ const key = String(tool.toolName || "unknown");
1818
+ if (!byTool[key]) {
1819
+ byTool[key] = {
1820
+ count: 0,
1821
+ errors: 0,
1822
+ durationSumMs: 0,
1823
+ durationAvgMs: 0,
1824
+ cacheHits: 0,
1825
+ cacheMisses: 0,
1826
+ };
1827
+ }
1828
+ byTool[key].count += 1;
1829
+ if (tool.isError)
1830
+ byTool[key].errors += 1;
1831
+ if (typeof tool.durationMs === "number") {
1832
+ byTool[key].durationSumMs += tool.durationMs;
1833
+ }
1834
+ if (tool.cache === "hit")
1835
+ byTool[key].cacheHits += 1;
1836
+ if (tool.cache === "miss")
1837
+ byTool[key].cacheMisses += 1;
1838
+ }
1839
+ for (const key of Object.keys(byTool)) {
1840
+ byTool[key].durationAvgMs =
1841
+ byTool[key].count > 0 ? Number((byTool[key].durationSumMs / byTool[key].count).toFixed(2)) : 0;
1842
+ }
1843
+ const assistantDurationSumMs = assistantMessages.reduce((sum, msg) => sum + (typeof msg.durationMs === "number" ? msg.durationMs : 0), 0);
1844
+ const toolDurationSumMs = toolProfiles.reduce((sum, tool) => sum + (typeof tool.durationMs === "number" ? tool.durationMs : 0), 0);
1845
+ const profile = {
1846
+ totalMs: durationMs,
1847
+ firstAssistantTokenMs,
1848
+ turns,
1849
+ assistantMessages,
1850
+ tools: toolProfiles,
1851
+ summary: {
1852
+ turnCount: turns.length,
1853
+ assistantMessageCount: assistantMessages.length,
1854
+ toolCallCount: toolProfiles.length,
1855
+ toolErrorCount: toolProfiles.filter((tool) => tool.isError).length,
1856
+ followUpPromptCount: followUpPrompts.length,
1857
+ queryTimeoutMs,
1858
+ thinkingLevel,
1859
+ assistantDurationSumMs,
1860
+ toolDurationSumMs,
1861
+ note: "toolDurationSumMs may exceed totalMs because tool calls can run in parallel.",
1862
+ },
1863
+ byTool,
1864
+ };
1865
+ const result = {
1866
+ query,
1867
+ answer,
1868
+ findings: findingsForResult,
1869
+ citations,
1870
+ toolsUsed,
1871
+ usage: usageSummary,
1872
+ profile,
1873
+ researchPlan: {
1874
+ maxPages: researchPlan.maxPages,
1875
+ maxHops: researchPlan.maxHops,
1876
+ sameDomainOnly: researchPlan.sameDomainOnly,
1877
+ seedUrls: researchPlan.seedUrls,
1878
+ policy: researchPlan.policy,
1879
+ },
1880
+ durationMs,
1881
+ model: this.model?.id,
1882
+ createdAt: new Date().toISOString(),
1883
+ };
1884
+ emitProgress({
1885
+ type: "query_end",
1886
+ durationMs,
1887
+ citationsCount: citations.length,
1888
+ toolCalls: toolProfiles.length,
1889
+ toolErrors: profile.summary.toolErrorCount,
1890
+ });
1891
+ return result;
1892
+ }
1893
+ finally {
1894
+ this.metrics.activeQueries = Math.max(0, this.metrics.activeQueries - 1);
1895
+ }
1896
+ }
1897
+ async close() {
1898
+ if (this.browser) {
1899
+ await this.browser.close().catch(() => { });
1900
+ this.browser = null;
1901
+ }
1902
+ if (this.lightpandaProcess && this.config.lightpandaAutoStop) {
1903
+ this.log("stopping managed Lightpanda process");
1904
+ await new Promise((resolve) => {
1905
+ const processHandle = this.lightpandaProcess;
1906
+ if (!processHandle)
1907
+ return resolve();
1908
+ const done = () => resolve();
1909
+ processHandle.once("exit", done);
1910
+ try {
1911
+ processHandle.kill("SIGTERM");
1912
+ }
1913
+ catch {
1914
+ processHandle.removeListener("exit", done);
1915
+ resolve();
1916
+ }
1917
+ setTimeout(() => {
1918
+ if (this.lightpandaProcess) {
1919
+ try {
1920
+ processHandle.kill("SIGKILL");
1921
+ }
1922
+ catch {
1923
+ // ignore
1924
+ }
1925
+ }
1926
+ }, 800);
1927
+ });
1928
+ this.lightpandaProcess = null;
1929
+ this.lightpandaManaged = false;
1930
+ }
1931
+ }
1932
+ evictOldDeepResearchTasks(maxTasks = 100) {
1933
+ evictOldDeepResearchTasks(this.deepResearchTasks, maxTasks);
1934
+ }
1935
+ composeDeepResearchReport(instructions, effort, searchResult) {
1936
+ return composeDeepResearchReport(instructions, resolveDeepEffort(effort), searchResult);
1937
+ }
1938
+ async deepResearchStart(instructions, options = {}) {
1939
+ const normalizedInstructions = String(instructions ?? "").trim();
1940
+ if (!normalizedInstructions)
1941
+ throw new Error("instructions is required");
1942
+ const effort = resolveDeepEffort(options.effort);
1943
+ const task = createDeepResearchTask(normalizedInstructions, effort);
1944
+ this.deepResearchTasks.set(task.researchId, task);
1945
+ this.evictOldDeepResearchTasks();
1946
+ void (async () => {
1947
+ const startedAt = Date.now();
1948
+ task.status = "running";
1949
+ task.startedAt = new Date().toISOString();
1950
+ try {
1951
+ const profile = getDeepEffortProfile(effort);
1952
+ const customInstruction = buildDeepCustomInstruction(effort, profile);
1953
+ const followUpPrompts = buildDeepFollowUpPrompts(effort, profile);
1954
+ const queryResult = await this.enqueueQuery(normalizedInstructions, {
1955
+ researchPolicy: {
1956
+ mode: "deep",
1957
+ type: "auto",
1958
+ livecrawl: "fallback",
1959
+ numResults: profile.numResults,
1960
+ maxHops: profile.maxHops,
1961
+ customInstruction,
1962
+ },
1963
+ thinkingLevel: profile.thinkingLevel,
1964
+ queryTimeoutMs: profile.queryTimeoutMs,
1965
+ followUpPrompts,
1966
+ });
1967
+ task.status = "completed";
1968
+ task.completedAt = new Date().toISOString();
1969
+ task.durationMs = Date.now() - startedAt;
1970
+ task.report = String(queryResult.answer || "No report generated");
1971
+ task.citations = Array.isArray(queryResult.citations)
1972
+ ? queryResult.citations.map((url) => String(url || "")).filter(Boolean)
1973
+ : [];
1974
+ }
1975
+ catch (error) {
1976
+ task.status = "failed";
1977
+ task.completedAt = new Date().toISOString();
1978
+ task.durationMs = Date.now() - startedAt;
1979
+ task.error = error instanceof Error ? error.message : String(error);
1980
+ }
1981
+ })();
1982
+ return {
1983
+ success: true,
1984
+ researchId: task.researchId,
1985
+ effort,
1986
+ status: "pending",
1987
+ message: `Research started. Call yagami deep check ${task.researchId}`,
1988
+ };
1989
+ }
1990
+ async deepResearchCheck(researchId) {
1991
+ const id = String(researchId ?? "").trim();
1992
+ if (!id)
1993
+ throw new Error("researchId is required");
1994
+ const task = this.deepResearchTasks.get(id);
1995
+ if (!task) {
1996
+ throw new Error(`Unknown researchId: ${id}`);
1997
+ }
1998
+ if (task.status === "completed") {
1999
+ return {
2000
+ success: true,
2001
+ status: "completed",
2002
+ report: task.report || "No report generated",
2003
+ citations: task.citations || [],
2004
+ costDollars: task.costDollars || 0,
2005
+ durationMs: task.durationMs || undefined,
2006
+ effort: task.effort,
2007
+ };
2008
+ }
2009
+ if (task.status === "running" || task.status === "pending") {
2010
+ return {
2011
+ status: task.status,
2012
+ message: "Research in progress. Call yagami deep check again with the same researchId.",
2013
+ effort: task.effort,
2014
+ };
2015
+ }
2016
+ return {
2017
+ success: false,
2018
+ status: "failed",
2019
+ error: task.error || "Research failed",
2020
+ effort: task.effort,
2021
+ };
2022
+ }
2023
+ }
2024
+ // TS-native engine module re-exports.
2025
+ export { sanitizeUrlCandidate, normalizeUrl, normalizeUniqueUrls } from "./engine/url-utils.js";
2026
+ export { clampInteger, normalizeEnum, normalizeCountryCode, getCompanyCountryProfile, toArray, parseStringList, parseUrlList, toBool, normalizeWhitespace, decodeHtmlEntities, stripHtml, normalizeDomainFilter, domainMatches, isDiscoveryDomain, isValidPublicHostname, parseIsoDate, isChallengeLikeContent, extractTopTerms, unwrapDuckDuckGoHref, isTrackingOrAdUrl, categoryProfile, truncateText, countWords, extractAssistantText, extractTextContent, buildContext, getHostname, isHostAllowed, extractSeedUrls, extractCitationUrls, normalizePotentialUrls, } from "./engine/helpers.js";
2027
+ export { normalizeResearchPolicy, deriveResearchPlan, buildSystemPrompt } from "./engine/policy.js";
2028
+ export { resolveDeepEffort, getDeepEffortProfile, buildDeepCustomInstruction, buildDeepFollowUpPrompts, createDeepResearchTask, evictOldDeepResearchTasks, composeDeepResearchReport, extractDeepResearchCitations, } from "./engine/deep-research.js";
2029
+ export { URL_REGEX, DEEP_EFFORT_LEVELS, CODE_PREFERRED_DOMAINS, COMPANY_PREFERRED_DOMAINS, COMPANY_COUNTRY_ALIASES, COMPANY_COUNTRY_PROFILES, } from "./engine/constants.js";
2030
+ //# sourceMappingURL=engine.js.map