unbrowse 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2251,7 +2251,7 @@ function shouldRetryEphemeralProfileError(error) {
2251
2251
  }
2252
2252
  function shouldRestartKuriForError(error) {
2253
2253
  const message = error instanceof Error ? error.message : String(error ?? "");
2254
- return /CDP command failed|target closed|session closed|No target with given id/i.test(message);
2254
+ return /CDP command failed|target closed|session closed|No target with given id|No tabs available and failed to create one/i.test(message);
2255
2255
  }
2256
2256
  function extractRouteHint(url) {
2257
2257
  try {
@@ -11204,11 +11204,26 @@ function shouldFallbackToBrowserReplay(data, endpoint, intent, contextUrl) {
11204
11204
  return false;
11205
11205
  if (endpoint.dom_extraction && typeof data === "string" && isHtml(data))
11206
11206
  return false;
11207
- if (typeof data === "string")
11207
+ if (typeof data === "string") {
11208
+ if (isHtml(data) && looksLikeSearchAuthOrHomepageBounceHtml(data))
11209
+ return false;
11208
11210
  return isHtml(data) || isSpaShell(data);
11211
+ }
11209
11212
  const assessment = assessIntentResult(data, intent);
11210
11213
  return assessment.verdict === "fail";
11211
11214
  }
11215
+ function looksLikeSearchAuthOrHomepageBounceHtml(html, finalUrl) {
11216
+ if (!isHtml(html))
11217
+ return false;
11218
+ const lower = html.toLowerCase();
11219
+ const titleMatch = lower.match(/<title[^>]*>([^<]+)</i);
11220
+ const title = titleMatch?.[1]?.trim() ?? "";
11221
+ const final = finalUrl?.toLowerCase() ?? "";
11222
+ const combined = `${title} ${lower}`;
11223
+ const hasLawnetBounceMarkers = /about lawnet legal research/.test(combined) || /what is lawnet/.test(combined) || /forgot password/.test(combined) || /lawnet legal research, a service of/.test(combined) || /\/lawnet\/web\/lawnet\/about-lawnet\b/.test(combined) || /\/lawnet\/web\/lawnet\/home\b/.test(final);
11224
+ const hasGenericAuthMarkers = /\b(login|log in|sign in|forgot password)\b/.test(combined) && /\b(search|legal research|lawnet)\b/.test(combined);
11225
+ return hasLawnetBounceMarkers || hasGenericAuthMarkers;
11226
+ }
11212
11227
  function buildSampleRequestFromUrl(url) {
11213
11228
  try {
11214
11229
  return Object.fromEntries(sanitizeNavigationQueryParams(new URL(url)).searchParams.entries());
@@ -12340,8 +12355,10 @@ async function tryHttpFetch(url, authHeaders, cookies) {
12340
12355
  }
12341
12356
  async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders, cookies) {
12342
12357
  const extractionIntent = deriveDomExecutionIntent(endpoint, intent);
12358
+ const isCapturedPageArtifact = /captured page artifact/i.test(endpoint.description ?? "");
12343
12359
  const ssrResult = await tryHttpFetch(url, authHeaders, cookies);
12344
12360
  if (ssrResult) {
12361
+ const looksLikeBounce = looksLikeSearchAuthOrHomepageBounceHtml(ssrResult.html, ssrResult.final_url);
12345
12362
  const ssrExtracted = extractFromDOMWithHint(ssrResult.html, extractionIntent, endpoint.dom_extraction);
12346
12363
  if (ssrExtracted.data) {
12347
12364
  const ssrQuality = validateExtractionQuality(ssrExtracted.data, ssrExtracted.confidence, extractionIntent);
@@ -12373,6 +12390,41 @@ async function executeDomExtractionEndpoint(endpoint, url, intent, authHeaders,
12373
12390
  };
12374
12391
  }
12375
12392
  }
12393
+ if (isCapturedPageArtifact) {
12394
+ return {
12395
+ data: {
12396
+ error: "low_quality_dom_extraction",
12397
+ message: `Structured DOM extraction was rejected: ${looksLikeBounce ? "search_auth_or_homepage_bounce" : "captured_page_artifact_miss"}`
12398
+ },
12399
+ status: 422,
12400
+ trace_id: nanoid5(),
12401
+ network_events: [toTraceNetworkEvent({
12402
+ url: ssrResult.final_url,
12403
+ method: "GET",
12404
+ requestHeaders: authHeaders,
12405
+ responseStatus: 200,
12406
+ responseHeaders: { "content-type": "text/html" },
12407
+ responseBody: ssrResult.html
12408
+ })]
12409
+ };
12410
+ }
12411
+ } else if (isCapturedPageArtifact && looksLikeBounce) {
12412
+ return {
12413
+ data: {
12414
+ error: "low_quality_dom_extraction",
12415
+ message: "Structured DOM extraction was rejected: search_auth_or_homepage_bounce"
12416
+ },
12417
+ status: 422,
12418
+ trace_id: nanoid5(),
12419
+ network_events: [toTraceNetworkEvent({
12420
+ url: ssrResult.final_url,
12421
+ method: "GET",
12422
+ requestHeaders: authHeaders,
12423
+ responseStatus: 200,
12424
+ responseHeaders: { "content-type": "text/html" },
12425
+ responseBody: ssrResult.html
12426
+ })]
12427
+ };
12376
12428
  }
12377
12429
  console.log(`[ssr-fast] miss, falling back to browser`);
12378
12430
  } else {
@@ -14125,6 +14177,24 @@ function promoteResultSnapshot(cacheKey, skill, endpointId, result, trace, respo
14125
14177
  expires: Date.now() + ROUTE_CACHE_TTL
14126
14178
  });
14127
14179
  }
14180
+ function invalidateResolveCacheEntries(cacheKeys, domainKeys = []) {
14181
+ let routeCacheDirty = false;
14182
+ let domainCacheDirty = false;
14183
+ for (const cacheKey of new Set(cacheKeys.filter(Boolean))) {
14184
+ routeResultCache.delete(cacheKey);
14185
+ capturedDomainCache.delete(cacheKey);
14186
+ if (skillRouteCache.delete(cacheKey))
14187
+ routeCacheDirty = true;
14188
+ }
14189
+ for (const domainKey of new Set(domainKeys.filter(Boolean))) {
14190
+ if (domainSkillCache.delete(domainKey))
14191
+ domainCacheDirty = true;
14192
+ }
14193
+ if (routeCacheDirty)
14194
+ persistRouteCache();
14195
+ if (domainCacheDirty)
14196
+ persistDomainCache();
14197
+ }
14128
14198
  async function getSkillWithTimeout(skillId, scope, timeoutMs = MARKETPLACE_GET_SKILL_TIMEOUT_MS) {
14129
14199
  return Promise.race([
14130
14200
  getSkill2(skillId, scope),
@@ -14176,6 +14246,10 @@ function isCachedSkillRelevantForIntent(skill, intent, contextUrl) {
14176
14246
  const resolvedSkill = withContextReplayEndpoint(skill, intent, contextUrl);
14177
14247
  const ranked = rankEndpoints(resolvedSkill.endpoints, intent, resolvedSkill.domain, contextUrl);
14178
14248
  const top = ranked[0];
14249
+ const isSearchIntent = /\b(search|find|lookup|browse|discover)\b/i.test(intent);
14250
+ if (top && isSearchIntent && contextUrl && /captured page artifact/i.test(top.endpoint.description ?? "") && top.endpoint.response_schema?.type !== "array" && top.endpoint.url_template === contextUrl && !skillHasBetterStructuredSearchEndpoint(resolvedSkill, top.endpoint.endpoint_id, intent, contextUrl)) {
14251
+ return false;
14252
+ }
14179
14253
  if (top && isEducationCatalogIntent(intent) && isRootContextUrl(contextUrl) && /captured page artifact/i.test(top.endpoint.description ?? "") && top.endpoint.url_template === contextUrl) {
14180
14254
  return false;
14181
14255
  }
@@ -14324,6 +14398,9 @@ async function withDomainCaptureLock(domain, fn) {
14324
14398
  captureDomainLocks.delete(domain);
14325
14399
  }
14326
14400
  }
14401
+ function shouldFallbackToLiveCaptureAfterAutoexecFailure(autoexecFailedAll, contextUrl) {
14402
+ return autoexecFailedAll && !!contextUrl;
14403
+ }
14327
14404
  function computeCompositeScore(embeddingScore, skill) {
14328
14405
  const reliabilities = skill.endpoints.map((e) => e.reliability_score);
14329
14406
  const avgReliability = reliabilities.length > 0 ? reliabilities.reduce((a, b) => a + b, 0) / reliabilities.length : 0.5;
@@ -14904,13 +14981,20 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
14904
14981
  const autoResult = await tryAutoExecute(skill, source);
14905
14982
  if (autoResult) {
14906
14983
  promoteLearnedSkill(clientScope, cacheKey, skill, autoResult.trace.endpoint_id ?? "", context?.url);
14907
- return autoResult;
14984
+ return { orchestratorResult: autoResult, autoexecFailedAll: false };
14908
14985
  }
14986
+ return {
14987
+ orchestratorResult: buildDeferral(skill, source, extraFields),
14988
+ autoexecFailedAll: true
14989
+ };
14909
14990
  } catch (err) {
14910
14991
  console.warn(`[auto-exec] failed, falling back to deferral: ${err.message}`);
14911
14992
  }
14912
14993
  }
14913
- return buildDeferral(skill, source, extraFields);
14994
+ return {
14995
+ orchestratorResult: buildDeferral(skill, source, extraFields),
14996
+ autoexecFailedAll: false
14997
+ };
14914
14998
  }
14915
14999
  function buildDeferral(skill, source, extraFields) {
14916
15000
  const resolvedSkill = withContextReplayEndpoint(skill, intent, context?.url);
@@ -15238,6 +15322,7 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15238
15322
  return null;
15239
15323
  }
15240
15324
  const requestedDomain = context?.domain ?? (context?.url ? new URL(context.url).hostname : null);
15325
+ const requestedDomainCacheKey = getDomainReuseKey(context?.url ?? requestedDomain);
15241
15326
  const resolveCacheKey = buildResolveCacheKey(requestedDomain, intent, context?.url);
15242
15327
  const cacheKey = scopedCacheKey(clientScope, resolveCacheKey);
15243
15328
  if (!forceCapture && !agentChoseEndpoint) {
@@ -15246,10 +15331,15 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15246
15331
  if (cachedResult.expires <= Date.now() || !isAcceptableIntentResult(cachedResult.result, intent) || !isCachedSkillRelevantForIntent(cachedResult.skill, intent, context?.url)) {
15247
15332
  routeResultCache.delete(cacheKey);
15248
15333
  } else {
15249
- timing.cache_hit = true;
15250
15334
  const deferred2 = await buildDeferralWithAutoExec(cachedResult.skill, "marketplace");
15251
- deferred2.timing.cache_hit = true;
15252
- return deferred2;
15335
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred2.autoexecFailedAll, context?.url)) {
15336
+ console.log("[route-result-cache] stale cached skill; retrying via live capture");
15337
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
15338
+ } else {
15339
+ timing.cache_hit = true;
15340
+ deferred2.orchestratorResult.timing.cache_hit = true;
15341
+ return deferred2.orchestratorResult;
15342
+ }
15253
15343
  }
15254
15344
  }
15255
15345
  }
@@ -15282,10 +15372,15 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15282
15372
  if (bestCached.scopedKey !== cacheKey) {
15283
15373
  promoteLearnedSkill(clientScope, resolveCacheKey, bestCached.skill, bestCached.entry.endpointId, context?.url);
15284
15374
  }
15285
- timing.cache_hit = true;
15286
15375
  const deferred2 = await buildDeferralWithAutoExec(bestCached.skill, "marketplace");
15287
- deferred2.timing.cache_hit = true;
15288
- return deferred2;
15376
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred2.autoexecFailedAll, context?.url)) {
15377
+ console.log("[route-cache] stale cached skill; retrying via live capture");
15378
+ invalidateResolveCacheEntries([cacheKey, bestCached.scopedKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
15379
+ } else {
15380
+ timing.cache_hit = true;
15381
+ deferred2.orchestratorResult.timing.cache_hit = true;
15382
+ return deferred2.orchestratorResult;
15383
+ }
15289
15384
  }
15290
15385
  }
15291
15386
  if (!forceCapture && !agentChoseEndpoint && requestedDomain) {
@@ -15294,11 +15389,16 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15294
15389
  if (domainCached && Date.now() - domainCached.ts < 7 * 24 * 60 * 60000) {
15295
15390
  const skill = readSkillSnapshot(domainCached.localSkillPath) ?? await getSkill2(domainCached.skillId, clientScope);
15296
15391
  if (skill && isCachedSkillRelevantForIntent(skill, intent, context?.url)) {
15297
- timing.cache_hit = true;
15298
15392
  console.log(`[domain-cache] hit for ${domainKey} → skill ${skill.skill_id.slice(0, 15)}`);
15299
15393
  const result2 = await buildDeferralWithAutoExec(skill, "marketplace");
15300
- result2.timing.cache_hit = true;
15301
- return result2;
15394
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(result2.autoexecFailedAll, context?.url)) {
15395
+ console.log(`[domain-cache] stale skill for ${domainKey}; retrying via live capture`);
15396
+ invalidateResolveCacheEntries([cacheKey], [domainKey]);
15397
+ } else {
15398
+ timing.cache_hit = true;
15399
+ result2.orchestratorResult.timing.cache_hit = true;
15400
+ return result2.orchestratorResult;
15401
+ }
15302
15402
  } else if (skill) {
15303
15403
  const ranked = rankEndpoints(skill.endpoints, intent, skill.domain, context?.url);
15304
15404
  const top = ranked[0];
@@ -15459,7 +15559,11 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15459
15559
  if (best.endpointId) {
15460
15560
  console.log(`[search] endpoint-level hit hint: ${best.endpointId} score=${best.candidate.score.toFixed(3)}`);
15461
15561
  }
15462
- return await buildDeferralWithAutoExec(best.skill, "marketplace");
15562
+ const deferred2 = await buildDeferralWithAutoExec(best.skill, "marketplace");
15563
+ if (!shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred2.autoexecFailedAll, context?.url)) {
15564
+ return deferred2.orchestratorResult;
15565
+ }
15566
+ console.log("[marketplace] stale top skill; retrying via live capture");
15463
15567
  }
15464
15568
  }
15465
15569
  }
@@ -15472,8 +15576,6 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15472
15576
  if (!isCachedSkillRelevantForIntent(domainHit.skill, intent, context?.url)) {
15473
15577
  capturedDomainCache.delete(cacheKey);
15474
15578
  } else {
15475
- timing.cache_hit = true;
15476
- let staleCachedEndpoint = false;
15477
15579
  if (agentChoseEndpoint) {
15478
15580
  const execOut = await executeSkill(domainHit.skill, { ...params, endpoint_id: params.endpoint_id ?? domainHit.endpointId }, projection, { ...options, intent, contextUrl: context?.url });
15479
15581
  if (execOut.trace.success && isAcceptableIntentResult(execOut.result, intent)) {
@@ -15488,11 +15590,17 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15488
15590
  extraction_hints: execOut.extraction_hints
15489
15591
  };
15490
15592
  }
15491
- staleCachedEndpoint = true;
15593
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
15492
15594
  }
15493
15595
  const deferred2 = await buildDeferralWithAutoExec(domainHit.skill, "marketplace");
15494
- deferred2.timing.cache_hit = true;
15495
- return deferred2;
15596
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred2.autoexecFailedAll, context?.url)) {
15597
+ console.log("[captured-domain-cache] stale skill; retrying via live capture");
15598
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
15599
+ } else {
15600
+ timing.cache_hit = true;
15601
+ deferred2.orchestratorResult.timing.cache_hit = true;
15602
+ return deferred2.orchestratorResult;
15603
+ }
15496
15604
  }
15497
15605
  }
15498
15606
  const bypassLiveCaptureQueue = shouldBypassLiveCaptureQueue(context?.url);
@@ -15522,9 +15630,9 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15522
15630
  auth_recommended: true,
15523
15631
  auth_hint: captureResult2.auth_hint
15524
15632
  } : undefined);
15525
- queuePassivePublishIfExecuted(learned_skill, deferred2, parityBaseline2);
15526
- deferred2.timing.cache_hit = true;
15527
- return deferred2;
15633
+ queuePassivePublishIfExecuted(learned_skill, deferred2.orchestratorResult, parityBaseline2);
15634
+ deferred2.orchestratorResult.timing.cache_hit = true;
15635
+ return deferred2.orchestratorResult;
15528
15636
  }
15529
15637
  return {
15530
15638
  result,
@@ -15725,8 +15833,8 @@ async function resolveAndExecute(intent, params = {}, context, projection, optio
15725
15833
  auth_recommended: true,
15726
15834
  auth_hint: captureResult.auth_hint
15727
15835
  } : undefined);
15728
- queuePassivePublishIfExecuted(learned_skill, deferred, parityBaseline);
15729
- return deferred;
15836
+ queuePassivePublishIfExecuted(learned_skill, deferred.orchestratorResult, parityBaseline);
15837
+ return deferred.orchestratorResult;
15730
15838
  }
15731
15839
  async function getOrCreateBrowserCaptureSkill() {
15732
15840
  const existing = await getSkill2(BROWSER_CAPTURE_SKILL_ID);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "2.1.1",
3
+ "version": "2.1.2",
4
4
  "description": "Reverse-engineer any website into reusable API skills. npm CLI + local engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -315,9 +315,9 @@ function shouldRetryEphemeralProfileError(error: unknown): boolean {
315
315
  return /persistentcontext|target page, context or browser has been closed|browser has been closed|page has been closed/i.test(message);
316
316
  }
317
317
 
318
- function shouldRestartKuriForError(error: unknown): boolean {
318
+ export function shouldRestartKuriForError(error: unknown): boolean {
319
319
  const message = error instanceof Error ? error.message : String(error ?? "");
320
- return /CDP command failed|target closed|session closed|No target with given id/i.test(message);
320
+ return /CDP command failed|target closed|session closed|No target with given id|No tabs available and failed to create one/i.test(message);
321
321
  }
322
322
 
323
323
  /**
@@ -746,7 +746,7 @@ export function buildStructuredReplayHeaders(
746
746
  return headers;
747
747
  }
748
748
 
749
- function shouldFallbackToBrowserReplay(
749
+ export function shouldFallbackToBrowserReplay(
750
750
  data: unknown,
751
751
  endpoint: EndpointDescriptor,
752
752
  intent?: string,
@@ -755,11 +755,37 @@ function shouldFallbackToBrowserReplay(
755
755
  const replayUrl = resolveExecutionUrlTemplate(endpoint, contextUrl);
756
756
  if (!isDocumentLikeUrl(replayUrl)) return false;
757
757
  if (endpoint.dom_extraction && typeof data === "string" && isHtml(data)) return false;
758
- if (typeof data === "string") return isHtml(data) || isSpaShell(data);
758
+ if (typeof data === "string") {
759
+ if (isHtml(data) && looksLikeSearchAuthOrHomepageBounceHtml(data)) return false;
760
+ return isHtml(data) || isSpaShell(data);
761
+ }
759
762
  const assessment = assessIntentResult(data, intent);
760
763
  return assessment.verdict === "fail";
761
764
  }
762
765
 
766
+ export function looksLikeSearchAuthOrHomepageBounceHtml(
767
+ html: string,
768
+ finalUrl?: string,
769
+ ): boolean {
770
+ if (!isHtml(html)) return false;
771
+ const lower = html.toLowerCase();
772
+ const titleMatch = lower.match(/<title[^>]*>([^<]+)</i);
773
+ const title = titleMatch?.[1]?.trim() ?? "";
774
+ const final = finalUrl?.toLowerCase() ?? "";
775
+ const combined = `${title} ${lower}`;
776
+ const hasLawnetBounceMarkers =
777
+ /about lawnet legal research/.test(combined) ||
778
+ /what is lawnet/.test(combined) ||
779
+ /forgot password/.test(combined) ||
780
+ /lawnet legal research, a service of/.test(combined) ||
781
+ /\/lawnet\/web\/lawnet\/about-lawnet\b/.test(combined) ||
782
+ /\/lawnet\/web\/lawnet\/home\b/.test(final);
783
+ const hasGenericAuthMarkers =
784
+ /\b(login|log in|sign in|forgot password)\b/.test(combined) &&
785
+ /\b(search|legal research|lawnet)\b/.test(combined);
786
+ return hasLawnetBounceMarkers || hasGenericAuthMarkers;
787
+ }
788
+
763
789
  function buildSampleRequestFromUrl(url: string): Record<string, unknown> {
764
790
  try {
765
791
  return Object.fromEntries(sanitizeNavigationQueryParams(new URL(url)).searchParams.entries());
@@ -2127,10 +2153,12 @@ async function executeDomExtractionEndpoint(
2127
2153
  cookies: Array<{ name: string; value: string; domain: string }>,
2128
2154
  ): Promise<{ data: unknown; status: number; trace_id: string; network_events?: TraceNetworkEvent[] }> {
2129
2155
  const extractionIntent = deriveDomExecutionIntent(endpoint, intent);
2156
+ const isCapturedPageArtifact = /captured page artifact/i.test(endpoint.description ?? "");
2130
2157
 
2131
2158
  // SSR fast-path: try plain HTTP fetch before browser
2132
2159
  const ssrResult = await tryHttpFetch(url, authHeaders, cookies);
2133
2160
  if (ssrResult) {
2161
+ const looksLikeBounce = looksLikeSearchAuthOrHomepageBounceHtml(ssrResult.html, ssrResult.final_url);
2134
2162
  const ssrExtracted = extractFromDOMWithHint(ssrResult.html, extractionIntent, endpoint.dom_extraction);
2135
2163
  if (ssrExtracted.data) {
2136
2164
  const ssrQuality = validateExtractionQuality(ssrExtracted.data, ssrExtracted.confidence, extractionIntent);
@@ -2162,6 +2190,41 @@ async function executeDomExtractionEndpoint(
2162
2190
  };
2163
2191
  }
2164
2192
  }
2193
+ if (isCapturedPageArtifact) {
2194
+ return {
2195
+ data: {
2196
+ error: "low_quality_dom_extraction",
2197
+ message: `Structured DOM extraction was rejected: ${looksLikeBounce ? "search_auth_or_homepage_bounce" : "captured_page_artifact_miss"}`,
2198
+ },
2199
+ status: 422,
2200
+ trace_id: nanoid(),
2201
+ network_events: [toTraceNetworkEvent({
2202
+ url: ssrResult.final_url,
2203
+ method: "GET",
2204
+ requestHeaders: authHeaders,
2205
+ responseStatus: 200,
2206
+ responseHeaders: { "content-type": "text/html" },
2207
+ responseBody: ssrResult.html,
2208
+ })],
2209
+ };
2210
+ }
2211
+ } else if (isCapturedPageArtifact && looksLikeBounce) {
2212
+ return {
2213
+ data: {
2214
+ error: "low_quality_dom_extraction",
2215
+ message: "Structured DOM extraction was rejected: search_auth_or_homepage_bounce",
2216
+ },
2217
+ status: 422,
2218
+ trace_id: nanoid(),
2219
+ network_events: [toTraceNetworkEvent({
2220
+ url: ssrResult.final_url,
2221
+ method: "GET",
2222
+ requestHeaders: authHeaders,
2223
+ responseStatus: 200,
2224
+ responseHeaders: { "content-type": "text/html" },
2225
+ responseBody: ssrResult.html,
2226
+ })],
2227
+ };
2165
2228
  }
2166
2229
  console.log(`[ssr-fast] miss, falling back to browser`);
2167
2230
  } else {
@@ -329,6 +329,21 @@ function promoteResultSnapshot(
329
329
  });
330
330
  }
331
331
 
332
+ function invalidateResolveCacheEntries(cacheKeys: string[], domainKeys: string[] = []): void {
333
+ let routeCacheDirty = false;
334
+ let domainCacheDirty = false;
335
+ for (const cacheKey of new Set(cacheKeys.filter(Boolean))) {
336
+ routeResultCache.delete(cacheKey);
337
+ capturedDomainCache.delete(cacheKey);
338
+ if (skillRouteCache.delete(cacheKey)) routeCacheDirty = true;
339
+ }
340
+ for (const domainKey of new Set(domainKeys.filter(Boolean))) {
341
+ if (domainSkillCache.delete(domainKey)) domainCacheDirty = true;
342
+ }
343
+ if (routeCacheDirty) persistRouteCache();
344
+ if (domainCacheDirty) persistDomainCache();
345
+ }
346
+
332
347
  async function getSkillWithTimeout(
333
348
  skillId: string,
334
349
  scope: string,
@@ -408,6 +423,23 @@ export function isCachedSkillRelevantForIntent(
408
423
  contextUrl,
409
424
  );
410
425
  const top = ranked[0];
426
+ const isSearchIntent = /\b(search|find|lookup|browse|discover)\b/i.test(intent);
427
+ if (
428
+ top &&
429
+ isSearchIntent &&
430
+ contextUrl &&
431
+ /captured page artifact/i.test(top.endpoint.description ?? "") &&
432
+ top.endpoint.response_schema?.type !== "array" &&
433
+ top.endpoint.url_template === contextUrl &&
434
+ !skillHasBetterStructuredSearchEndpoint(
435
+ resolvedSkill,
436
+ top.endpoint.endpoint_id,
437
+ intent,
438
+ contextUrl,
439
+ )
440
+ ) {
441
+ return false;
442
+ }
411
443
  if (
412
444
  top &&
413
445
  isEducationCatalogIntent(intent) &&
@@ -626,6 +658,18 @@ export interface OrchestratorResult {
626
658
  extraction_hints?: import("../transform/schema-hints.js").ExtractionHint;
627
659
  }
628
660
 
661
+ type AutoExecDecision = {
662
+ orchestratorResult: OrchestratorResult;
663
+ autoexecFailedAll: boolean;
664
+ };
665
+
666
+ export function shouldFallbackToLiveCaptureAfterAutoexecFailure(
667
+ autoexecFailedAll: boolean,
668
+ contextUrl?: string,
669
+ ): boolean {
670
+ return autoexecFailedAll && !!contextUrl;
671
+ }
672
+
629
673
  function computeCompositeScore(embeddingScore: number, skill: SkillManifest): number {
630
674
  // Average reliability across endpoints
631
675
  const reliabilities = skill.endpoints.map((e) => e.reliability_score);
@@ -1391,7 +1435,7 @@ export async function resolveAndExecute(
1391
1435
  skill: SkillManifest,
1392
1436
  source: "marketplace" | "live-capture",
1393
1437
  extraFields?: Record<string, unknown>,
1394
- ): Promise<OrchestratorResult> {
1438
+ ): Promise<AutoExecDecision> {
1395
1439
  // Only attempt auto-exec if we have an intent to infer params from
1396
1440
  if (intent && intent.trim().length > 0) {
1397
1441
  try {
@@ -1399,13 +1443,20 @@ export async function resolveAndExecute(
1399
1443
  if (autoResult) {
1400
1444
  // Promote to marketplace cache so subsequent requests skip live-capture
1401
1445
  promoteLearnedSkill(clientScope, cacheKey, skill, autoResult.trace.endpoint_id ?? "", context?.url);
1402
- return autoResult;
1446
+ return { orchestratorResult: autoResult, autoexecFailedAll: false };
1403
1447
  }
1448
+ return {
1449
+ orchestratorResult: buildDeferral(skill, source, extraFields),
1450
+ autoexecFailedAll: true,
1451
+ };
1404
1452
  } catch (err) {
1405
1453
  console.warn(`[auto-exec] failed, falling back to deferral: ${(err as Error).message}`);
1406
1454
  }
1407
1455
  }
1408
- return buildDeferral(skill, source, extraFields);
1456
+ return {
1457
+ orchestratorResult: buildDeferral(skill, source, extraFields),
1458
+ autoexecFailedAll: false,
1459
+ };
1409
1460
  }
1410
1461
 
1411
1462
  /** Build a deferral response — returns the skill + ranked endpoints for the agent to choose. */
@@ -1882,6 +1933,7 @@ export async function resolveAndExecute(
1882
1933
  }
1883
1934
 
1884
1935
  const requestedDomain = context?.domain ?? (context?.url ? new URL(context.url).hostname : null);
1936
+ const requestedDomainCacheKey = getDomainReuseKey(context?.url ?? requestedDomain);
1885
1937
  const resolveCacheKey = buildResolveCacheKey(requestedDomain, intent, context?.url);
1886
1938
  const cacheKey = scopedCacheKey(clientScope, resolveCacheKey);
1887
1939
 
@@ -1895,10 +1947,15 @@ export async function resolveAndExecute(
1895
1947
  ) {
1896
1948
  routeResultCache.delete(cacheKey);
1897
1949
  } else {
1898
- timing.cache_hit = true;
1899
1950
  const deferred = await buildDeferralWithAutoExec(cachedResult.skill, "marketplace");
1900
- deferred.timing.cache_hit = true;
1901
- return deferred;
1951
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred.autoexecFailedAll, context?.url)) {
1952
+ console.log("[route-result-cache] stale cached skill; retrying via live capture");
1953
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
1954
+ } else {
1955
+ timing.cache_hit = true;
1956
+ deferred.orchestratorResult.timing.cache_hit = true;
1957
+ return deferred.orchestratorResult;
1958
+ }
1902
1959
  }
1903
1960
  }
1904
1961
  }
@@ -1940,10 +1997,18 @@ export async function resolveAndExecute(
1940
1997
  context?.url,
1941
1998
  );
1942
1999
  }
1943
- timing.cache_hit = true;
1944
2000
  const deferred = await buildDeferralWithAutoExec(bestCached.skill, "marketplace");
1945
- deferred.timing.cache_hit = true;
1946
- return deferred;
2001
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred.autoexecFailedAll, context?.url)) {
2002
+ console.log("[route-cache] stale cached skill; retrying via live capture");
2003
+ invalidateResolveCacheEntries(
2004
+ [cacheKey, bestCached.scopedKey],
2005
+ requestedDomainCacheKey ? [requestedDomainCacheKey] : [],
2006
+ );
2007
+ } else {
2008
+ timing.cache_hit = true;
2009
+ deferred.orchestratorResult.timing.cache_hit = true;
2010
+ return deferred.orchestratorResult;
2011
+ }
1947
2012
  }
1948
2013
  }
1949
2014
 
@@ -1954,11 +2019,16 @@ export async function resolveAndExecute(
1954
2019
  if (domainCached && Date.now() - domainCached.ts < 7 * 24 * 60 * 60_000) {
1955
2020
  const skill = readSkillSnapshot(domainCached.localSkillPath) ?? await getSkill(domainCached.skillId, clientScope);
1956
2021
  if (skill && isCachedSkillRelevantForIntent(skill, intent, context?.url)) {
1957
- timing.cache_hit = true;
1958
2022
  console.log(`[domain-cache] hit for ${domainKey} → skill ${skill.skill_id.slice(0, 15)}`);
1959
2023
  const result = await buildDeferralWithAutoExec(skill, "marketplace");
1960
- result.timing.cache_hit = true;
1961
- return result;
2024
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(result.autoexecFailedAll, context?.url)) {
2025
+ console.log(`[domain-cache] stale skill for ${domainKey}; retrying via live capture`);
2026
+ invalidateResolveCacheEntries([cacheKey], [domainKey]);
2027
+ } else {
2028
+ timing.cache_hit = true;
2029
+ result.orchestratorResult.timing.cache_hit = true;
2030
+ return result.orchestratorResult;
2031
+ }
1962
2032
  } else if (skill) {
1963
2033
  const ranked = rankEndpoints(skill.endpoints, intent, skill.domain, context?.url);
1964
2034
  const top = ranked[0];
@@ -2202,7 +2272,11 @@ export async function resolveAndExecute(
2202
2272
  `[search] endpoint-level hit hint: ${best.endpointId} score=${best.candidate.score.toFixed(3)}`,
2203
2273
  );
2204
2274
  }
2205
- return await buildDeferralWithAutoExec(best.skill, "marketplace");
2275
+ const deferred = await buildDeferralWithAutoExec(best.skill, "marketplace");
2276
+ if (!shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred.autoexecFailedAll, context?.url)) {
2277
+ return deferred.orchestratorResult;
2278
+ }
2279
+ console.log("[marketplace] stale top skill; retrying via live capture");
2206
2280
  }
2207
2281
  }
2208
2282
  } // end !forceCapture
@@ -2222,8 +2296,6 @@ export async function resolveAndExecute(
2222
2296
  if (!isCachedSkillRelevantForIntent(domainHit.skill, intent, context?.url)) {
2223
2297
  capturedDomainCache.delete(cacheKey);
2224
2298
  } else {
2225
- timing.cache_hit = true;
2226
- let staleCachedEndpoint = false;
2227
2299
  if (agentChoseEndpoint) {
2228
2300
  const execOut = await executeSkill(
2229
2301
  domainHit.skill,
@@ -2254,14 +2326,20 @@ export async function resolveAndExecute(
2254
2326
  execOut.trace,
2255
2327
  ),
2256
2328
  response_schema: execOut.response_schema,
2257
- extraction_hints: execOut.extraction_hints,
2258
- };
2329
+ extraction_hints: execOut.extraction_hints,
2330
+ };
2259
2331
  }
2260
- staleCachedEndpoint = true;
2332
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
2261
2333
  }
2262
2334
  const deferred = await buildDeferralWithAutoExec(domainHit.skill, "marketplace");
2263
- deferred.timing.cache_hit = true;
2264
- return deferred;
2335
+ if (shouldFallbackToLiveCaptureAfterAutoexecFailure(deferred.autoexecFailedAll, context?.url)) {
2336
+ console.log("[captured-domain-cache] stale skill; retrying via live capture");
2337
+ invalidateResolveCacheEntries([cacheKey], requestedDomainCacheKey ? [requestedDomainCacheKey] : []);
2338
+ } else {
2339
+ timing.cache_hit = true;
2340
+ deferred.orchestratorResult.timing.cache_hit = true;
2341
+ return deferred.orchestratorResult;
2342
+ }
2265
2343
  }
2266
2344
  }
2267
2345
 
@@ -2299,13 +2377,13 @@ export async function resolveAndExecute(
2299
2377
  authRecommended
2300
2378
  ? {
2301
2379
  auth_recommended: true,
2302
- auth_hint: captureResult!.auth_hint,
2303
- }
2380
+ auth_hint: captureResult!.auth_hint,
2381
+ }
2304
2382
  : undefined,
2305
2383
  );
2306
- queuePassivePublishIfExecuted(learned_skill, deferred, parityBaseline);
2307
- deferred.timing.cache_hit = true;
2308
- return deferred;
2384
+ queuePassivePublishIfExecuted(learned_skill, deferred.orchestratorResult, parityBaseline);
2385
+ deferred.orchestratorResult.timing.cache_hit = true;
2386
+ return deferred.orchestratorResult;
2309
2387
  }
2310
2388
  return {
2311
2389
  result,
@@ -2596,8 +2674,8 @@ export async function resolveAndExecute(
2596
2674
  }
2597
2675
  : undefined,
2598
2676
  );
2599
- queuePassivePublishIfExecuted(learned_skill, deferred, parityBaseline);
2600
- return deferred;
2677
+ queuePassivePublishIfExecuted(learned_skill, deferred.orchestratorResult, parityBaseline);
2678
+ return deferred.orchestratorResult;
2601
2679
  }
2602
2680
 
2603
2681
  async function getOrCreateBrowserCaptureSkill(): Promise<SkillManifest> {