unbrowse 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/dist/cli.js +455 -96
  2. package/dist/index.js +2 -6
  3. package/dist/mcp.js +695 -46
  4. package/dist/server.js +25811 -0
  5. package/package.json +1 -2
  6. package/vendor/kuri/darwin-arm64/kuri +0 -0
  7. package/vendor/kuri/darwin-x64/kuri +0 -0
  8. package/vendor/kuri/linux-arm64/kuri +0 -0
  9. package/vendor/kuri/linux-x64/kuri +0 -0
  10. package/vendor/kuri/manifest.json +7 -10
  11. package/runtime-src/agent-outcome.ts +0 -166
  12. package/runtime-src/analytics-session.ts +0 -55
  13. package/runtime-src/api/browse-index.ts +0 -317
  14. package/runtime-src/api/browse-session.ts +0 -572
  15. package/runtime-src/api/browse-submit-prereqs.ts +0 -48
  16. package/runtime-src/api/browse-submit.ts +0 -1184
  17. package/runtime-src/api/routes.ts +0 -1823
  18. package/runtime-src/auth/browser-cookies.ts +0 -423
  19. package/runtime-src/auth/index.ts +0 -535
  20. package/runtime-src/auth/runtime.ts +0 -116
  21. package/runtime-src/browser/index.ts +0 -659
  22. package/runtime-src/browser/types.ts +0 -41
  23. package/runtime-src/build-info.generated.ts +0 -6
  24. package/runtime-src/capture/index.ts +0 -1794
  25. package/runtime-src/capture/prefetch.ts +0 -95
  26. package/runtime-src/capture/rsc.ts +0 -45
  27. package/runtime-src/cli/shortcuts.ts +0 -273
  28. package/runtime-src/cli.ts +0 -1572
  29. package/runtime-src/client/graph-client.ts +0 -100
  30. package/runtime-src/client/index.ts +0 -1425
  31. package/runtime-src/debug-trace.ts +0 -18
  32. package/runtime-src/domain.ts +0 -38
  33. package/runtime-src/execution/index.ts +0 -3397
  34. package/runtime-src/execution/retry.ts +0 -46
  35. package/runtime-src/execution/robots.ts +0 -167
  36. package/runtime-src/execution/search-forms.ts +0 -188
  37. package/runtime-src/extraction/index.ts +0 -1507
  38. package/runtime-src/foundry/publish-bundle.ts +0 -392
  39. package/runtime-src/graph/agent-augment.ts +0 -315
  40. package/runtime-src/graph/index.ts +0 -1524
  41. package/runtime-src/graph/local-fixtures.ts +0 -393
  42. package/runtime-src/graph/local-harness.ts +0 -646
  43. package/runtime-src/graph/planner.ts +0 -411
  44. package/runtime-src/graph/session.ts +0 -294
  45. package/runtime-src/graph/trace-store.ts +0 -136
  46. package/runtime-src/index.ts +0 -24
  47. package/runtime-src/indexer/index.ts +0 -465
  48. package/runtime-src/intent-match.ts +0 -1515
  49. package/runtime-src/kuri/client.ts +0 -1839
  50. package/runtime-src/logger.ts +0 -30
  51. package/runtime-src/marketplace/index.ts +0 -103
  52. package/runtime-src/mcp.ts +0 -1747
  53. package/runtime-src/orchestrator/browser-agent.ts +0 -374
  54. package/runtime-src/orchestrator/dag-advisor.ts +0 -59
  55. package/runtime-src/orchestrator/dag-feedback.ts +0 -257
  56. package/runtime-src/orchestrator/first-pass-action.ts +0 -403
  57. package/runtime-src/orchestrator/index.ts +0 -4480
  58. package/runtime-src/orchestrator/passive-publish.ts +0 -187
  59. package/runtime-src/orchestrator/timing-economics.ts +0 -80
  60. package/runtime-src/payments/cascade.ts +0 -137
  61. package/runtime-src/payments/index.ts +0 -270
  62. package/runtime-src/payments/lobster-pay.ts +0 -182
  63. package/runtime-src/payments/wallet.ts +0 -98
  64. package/runtime-src/publish/review-context.ts +0 -93
  65. package/runtime-src/publish/sanitize.ts +0 -197
  66. package/runtime-src/publish/schema-review.ts +0 -192
  67. package/runtime-src/publish-admission.ts +0 -388
  68. package/runtime-src/ratelimit/index.ts +0 -23
  69. package/runtime-src/reverse-engineer/bundle-scanner.ts +0 -127
  70. package/runtime-src/reverse-engineer/description-prompt.ts +0 -213
  71. package/runtime-src/reverse-engineer/index.ts +0 -1551
  72. package/runtime-src/router.ts +0 -17
  73. package/runtime-src/routing-telemetry.ts +0 -395
  74. package/runtime-src/runtime/browser-access.ts +0 -11
  75. package/runtime-src/runtime/browser-auth.ts +0 -12
  76. package/runtime-src/runtime/browser-host.ts +0 -48
  77. package/runtime-src/runtime/lifecycle.ts +0 -17
  78. package/runtime-src/runtime/local-server.ts +0 -311
  79. package/runtime-src/runtime/paths.ts +0 -99
  80. package/runtime-src/runtime/setup.ts +0 -251
  81. package/runtime-src/runtime/supervisor.ts +0 -69
  82. package/runtime-src/runtime/update-hints.ts +0 -351
  83. package/runtime-src/server.ts +0 -100
  84. package/runtime-src/session-logs.ts +0 -142
  85. package/runtime-src/settings.ts +0 -221
  86. package/runtime-src/single-binary.ts +0 -143
  87. package/runtime-src/site-policy.ts +0 -54
  88. package/runtime-src/stale-cleanup-runner.ts +0 -144
  89. package/runtime-src/stale-cleanup.ts +0 -133
  90. package/runtime-src/telemetry-attribution.ts +0 -120
  91. package/runtime-src/telemetry.ts +0 -253
  92. package/runtime-src/template-params.ts +0 -141
  93. package/runtime-src/transform/drift.ts +0 -60
  94. package/runtime-src/transform/index.ts +0 -277
  95. package/runtime-src/types/index.ts +0 -1
  96. package/runtime-src/types/skill.ts +0 -912
  97. package/runtime-src/vault/index.ts +0 -196
  98. package/runtime-src/verification/auth-gate.ts +0 -8
  99. package/runtime-src/verification/candidates.ts +0 -27
  100. package/runtime-src/verification/index.ts +0 -120
  101. package/runtime-src/verification/matrix.ts +0 -30
  102. package/runtime-src/version.ts +0 -148
  103. package/runtime-src/workflow/artifact.ts +0 -161
  104. package/runtime-src/workflow/compile.ts +0 -808
  105. package/runtime-src/workflow/publish.ts +0 -225
  106. package/runtime-src/workflow/runtime.ts +0 -213
  107. package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unbrowse",
3
- "version": "3.1.0",
3
+ "version": "3.2.0",
4
4
  "description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -9,7 +9,6 @@
9
9
  "files": [
10
10
  "bin",
11
11
  "dist",
12
- "runtime-src",
13
12
  "vendor/kuri",
14
13
  "scripts/release-assets.mjs",
15
14
  "scripts/postinstall.mjs",
Binary file
Binary file
Binary file
Binary file
@@ -1,28 +1,25 @@
1
1
  {
2
2
  "repo_url": "https://github.com/justrach/kuri.git",
3
3
  "branch": "adding-extensions",
4
- "source_sha": "08eecbe3740f046a46f656eed7ebfc66c1bad9bb",
5
- "built_at": "2026-04-05T06:43:57.212Z",
4
+ "source_sha": "eadfaa5f921f7152e1762aed5ed64b3a4fbefbf3",
5
+ "built_at": "2026-04-06T05:01:20.543Z",
6
6
  "binaries": {
7
7
  "darwin-arm64": {
8
8
  "zig_target": "aarch64-macos",
9
- "sha256": "1553633e722d18059dedffa8a52d55ed6c052e4961fd2753ee0b62be60b241bf"
9
+ "sha256": "1796501e393403016723c6b69266b834e2db04ba2559f51c84c957bd85c3927b",
10
+ "source": "prebuilt"
10
11
  },
11
12
  "darwin-x64": {
12
13
  "zig_target": "x86_64-macos",
13
- "sha256": "b5eb07e631c6ddad64019c8d0c86c32cb76a74ff0791ac5611a3aa3550767ec8"
14
+ "sha256": "f9adbebad3b17c10fc359b8125a33eda6890ec728cb2b6c625b36b895ef7c97f"
14
15
  },
15
16
  "linux-arm64": {
16
17
  "zig_target": "aarch64-linux",
17
- "sha256": "ea88a26f7b335d5842b0c1d83bfa4066bed0a119284560f6bd3833f1d240cce2"
18
+ "sha256": "30d1da652d589e5dffa4520615f958db3acf063bd831da9662c97afd50969699"
18
19
  },
19
20
  "linux-x64": {
20
21
  "zig_target": "x86_64-linux",
21
- "sha256": "175a7c59e458e952a26974f0fb5c2ce374e56f2c4c352903b481b5aa5a16978f"
22
- },
23
- "win-x64": {
24
- "zig_target": "x86_64-windows",
25
- "sha256": "176291ad9827a183ba7322ddb56cc1fa5edc7c214a264ecdf8a1d5d18366d686"
22
+ "sha256": "90a8d60715a5c1723b7dae98d90a565b92a781b16ab8721fd546a26f9d86f39f"
26
23
  }
27
24
  }
28
25
  }
@@ -1,166 +0,0 @@
1
- import type { OrchestrationTiming, SkillManifest } from "./types/index.js";
2
-
3
- export interface AgentImpact {
4
- source: string;
5
- cache_hit: boolean;
6
- browser_avoided: boolean;
7
- baseline_total_ms?: number;
8
- actual_total_ms?: number;
9
- time_saved_ms?: number;
10
- time_saved_pct: number;
11
- tokens_saved: number;
12
- tokens_saved_pct: number;
13
- baseline_cost_uc?: number;
14
- actual_cost_uc?: number;
15
- cost_saved_uc?: number;
16
- }
17
-
18
- export interface AgentNextAction {
19
- endpoint_id: string;
20
- operation_id: string;
21
- title: string;
22
- why: string;
23
- command: string;
24
- }
25
-
26
- const BROWSER_SOURCES = new Set(["live-capture", "first-pass", "browser-action"]);
27
-
28
- function edgePriority(kind: string): number {
29
- switch (kind) {
30
- case "parent_child":
31
- return 4;
32
- case "pagination":
33
- return 3;
34
- case "dependency":
35
- return 2;
36
- case "hint":
37
- return 1;
38
- case "auth":
39
- return 0;
40
- default:
41
- return -1;
42
- }
43
- }
44
-
45
- function nextActionWhy(kind: string, bindingKey: string, title: string): string {
46
- switch (kind) {
47
- case "parent_child":
48
- return `Likely next detail step after this result. Exposes ${title}.`;
49
- case "pagination":
50
- return `Likely next page or continuation step. Carries ${bindingKey || "cursor"} forward.`;
51
- case "dependency":
52
- return `Unlocks the next dependent call using ${bindingKey || "known bindings"}.`;
53
- case "auth":
54
- return "Useful once authentication is in place.";
55
- case "hint":
56
- return "Common follow-up action from the current result.";
57
- default:
58
- return "Likely follow-up action.";
59
- }
60
- }
61
-
62
- function operationTitle(operation: NonNullable<SkillManifest["operation_graph"]>["operations"][number]): string {
63
- const semantic = [operation.action_kind, operation.resource_kind]
64
- .filter(Boolean)
65
- .join(" ")
66
- .replace(/_/g, " ")
67
- .trim();
68
- return operation.description_out || semantic || operation.endpoint_id;
69
- }
70
-
71
- export function buildAgentImpact(
72
- timing?: Partial<OrchestrationTiming> | null,
73
- ): AgentImpact | undefined {
74
- if (!timing?.source) return undefined;
75
- return {
76
- source: timing.source,
77
- cache_hit: timing.cache_hit === true,
78
- browser_avoided: !BROWSER_SOURCES.has(timing.source),
79
- baseline_total_ms: timing.baseline_total_ms,
80
- actual_total_ms: timing.actual_total_ms,
81
- time_saved_ms: timing.time_saved_ms,
82
- time_saved_pct: timing.time_saved_pct ?? 0,
83
- tokens_saved: timing.tokens_saved ?? 0,
84
- tokens_saved_pct: timing.tokens_saved_pct ?? 0,
85
- baseline_cost_uc: timing.baseline_cost_uc,
86
- actual_cost_uc: timing.actual_cost_uc,
87
- cost_saved_uc: timing.cost_saved_uc,
88
- };
89
- }
90
-
91
- export function buildNextActions(
92
- skill: SkillManifest | undefined,
93
- endpointId: string | undefined,
94
- maxActions = 3,
95
- ): AgentNextAction[] {
96
- if (!skill?.operation_graph || !endpointId) return [];
97
- const graph = skill.operation_graph;
98
- const current = graph.operations.find((operation) => operation.endpoint_id === endpointId);
99
- if (!current) return [];
100
-
101
- const byOperationId = new Map(graph.operations.map((operation) => [operation.operation_id, operation]));
102
- const scored = new Map<string, {
103
- operation_id: string;
104
- endpoint_id: string;
105
- title: string;
106
- why: string;
107
- score: number;
108
- }>();
109
-
110
- for (const edge of graph.edges) {
111
- if (edge.from_operation_id !== current.operation_id) continue;
112
- const target = byOperationId.get(edge.to_operation_id);
113
- if (!target) continue;
114
-
115
- const candidate = {
116
- operation_id: target.operation_id,
117
- endpoint_id: target.endpoint_id,
118
- title: operationTitle(target),
119
- why: nextActionWhy(edge.kind, edge.binding_key, operationTitle(target)),
120
- score: (edgePriority(edge.kind) * 10) + Math.round(edge.confidence * 10),
121
- };
122
- const existing = scored.get(target.operation_id);
123
- if (!existing || candidate.score > existing.score) {
124
- scored.set(target.operation_id, candidate);
125
- }
126
- }
127
-
128
- return [...scored.values()]
129
- .sort((a, b) => b.score - a.score || a.title.localeCompare(b.title))
130
- .slice(0, maxActions)
131
- .map((candidate) => ({
132
- endpoint_id: candidate.endpoint_id,
133
- operation_id: candidate.operation_id,
134
- title: candidate.title,
135
- why: candidate.why,
136
- command: `unbrowse execute --skill ${skill.skill_id} --endpoint ${candidate.endpoint_id}`,
137
- }));
138
- }
139
-
140
- export function attachAgentOutcomeHints<T extends Record<string, unknown>>(
141
- payload: T,
142
- opts?: {
143
- skill?: SkillManifest;
144
- endpointId?: string;
145
- timing?: Partial<OrchestrationTiming> | null;
146
- },
147
- ): T & {
148
- impact?: AgentImpact;
149
- next_actions?: AgentNextAction[];
150
- } {
151
- const target = payload as Record<string, unknown>;
152
- const impact = buildAgentImpact(opts?.timing);
153
- if (impact) {
154
- target.impact = impact;
155
- }
156
-
157
- const nextActions = buildNextActions(opts?.skill, opts?.endpointId);
158
- if (nextActions.length > 0) {
159
- target.next_actions = nextActions;
160
- }
161
-
162
- return target as T & {
163
- impact?: AgentImpact;
164
- next_actions?: AgentNextAction[];
165
- };
166
- }
@@ -1,55 +0,0 @@
1
- import type { ExecutionTrace, OrchestrationTiming } from "./types/index.js";
2
-
3
- export interface AnalyticsSessionPayload {
4
- session_id: string;
5
- started_at: string;
6
- completed_at?: string;
7
- trace_version?: string;
8
- api_calls: number;
9
- discovery_queries: number;
10
- cached_skill_calls: number;
11
- fresh_index_calls: number;
12
- browser_mode: "default" | "replaced" | "manual" | "unknown";
13
- success?: boolean;
14
- source?: string;
15
- time_saved_ms?: number;
16
- time_saved_pct?: number;
17
- tokens_saved?: number;
18
- tokens_saved_pct?: number;
19
- cost_saved_uc?: number;
20
- }
21
-
22
- export function buildAnalyticsSessionPayload(
23
- sessionId: string,
24
- startedAt: string,
25
- source: OrchestrationTiming["source"] | "first-pass",
26
- trace: Pick<ExecutionTrace, "completed_at" | "trace_version" | "success" | "tokens_saved" | "tokens_saved_pct" | "api_call_count"> & {
27
- network_events?: unknown[];
28
- },
29
- timing?: Pick<OrchestrationTiming, "time_saved_ms" | "time_saved_pct" | "cost_saved_uc">,
30
- ): AnalyticsSessionPayload {
31
- const cacheLike = source === "marketplace" || source === "route-cache";
32
- const browserMode = source === "live-capture" || source === "browser-action"
33
- ? "default"
34
- : source === "first-pass"
35
- ? "default"
36
- : "replaced";
37
- return {
38
- session_id: sessionId,
39
- started_at: startedAt,
40
- completed_at: trace.completed_at,
41
- trace_version: trace.trace_version,
42
- api_calls: trace.api_call_count ?? Math.max(1, trace.network_events?.length ?? 0),
43
- discovery_queries: cacheLike ? 1 : 0,
44
- cached_skill_calls: cacheLike ? 1 : 0,
45
- fresh_index_calls: source === "live-capture" || source === "first-pass" || source === "browser-action" ? 1 : 0,
46
- browser_mode: browserMode,
47
- success: trace.success ?? true,
48
- source,
49
- time_saved_ms: timing?.time_saved_ms,
50
- time_saved_pct: timing?.time_saved_pct,
51
- tokens_saved: trace.tokens_saved,
52
- tokens_saved_pct: trace.tokens_saved_pct,
53
- cost_saved_uc: timing?.cost_saved_uc,
54
- };
55
- }
@@ -1,317 +0,0 @@
1
- import { nanoid } from "nanoid";
2
- import { readFileSync } from "node:fs";
3
- import { extractEndpoints } from "../reverse-engineer/index.js";
4
- import { buildSkillOperationGraph, inferEndpointSemantic } from "../graph/index.js";
5
- import { validateExtractionQuality } from "../execution/index.js";
6
- import { assessIntentResult } from "../intent-match.js";
7
- import type { KuriHarEntry } from "../kuri/client.js";
8
- import type { EndpointDescriptor, SkillManifest } from "../types/index.js";
9
- import type { RawRequest } from "../capture/index.js";
10
- import { cachePublishedSkill, findExistingSkillForDomain } from "../client/index.js";
11
- import { mergeEndpoints } from "../marketplace/index.js";
12
- import { upsertDagEdgesFromOperationGraph } from "../orchestrator/dag-feedback.js";
13
- import {
14
- buildResolveCacheKey,
15
- domainSkillCache,
16
- generateLocalDescription,
17
- getDomainReuseKey,
18
- invalidateRouteCacheForDomain,
19
- persistDomainCache,
20
- scopedCacheKey,
21
- snapshotPathForCacheKey,
22
- writeSkillSnapshot,
23
- } from "../orchestrator/index.js";
24
-
25
- function normalizeBrowseUrl(url: string, baseUrl?: string): string {
26
- if (!url) return url;
27
- try {
28
- return new URL(url).toString();
29
- } catch {
30
- if (!baseUrl) return url;
31
- try {
32
- return new URL(url, baseUrl).toString();
33
- } catch {
34
- return url;
35
- }
36
- }
37
- }
38
-
39
- export function harEntriesToRawRequests(entries: KuriHarEntry[], baseUrl?: string): RawRequest[] {
40
- return entries
41
- .filter((entry) => entry.request && entry.response)
42
- .map((entry) => ({
43
- url: normalizeBrowseUrl(entry.request.url, baseUrl),
44
- method: entry.request.method,
45
- request_headers: Object.fromEntries((entry.request.headers ?? []).map((header) => [header.name.toLowerCase(), header.value])),
46
- request_body: entry.request.postData?.text,
47
- response_status: entry.response.status,
48
- response_headers: Object.fromEntries((entry.response.headers ?? []).map((header) => [header.name.toLowerCase(), header.value])),
49
- response_body: entry.response.content?.text,
50
- timestamp: entry.startedDateTime ?? new Date().toISOString(),
51
- }));
52
- }
53
-
54
- export function buildBrowseRequestKey(request: RawRequest): string {
55
- return [
56
- request.method,
57
- request.url,
58
- typeof request.request_body === "string" ? request.request_body : JSON.stringify(request.request_body ?? null),
59
- ].join(":");
60
- }
61
-
62
- export function mergeBrowseRequests(intercepted: RawRequest[], harEntries: KuriHarEntry[], baseUrl?: string): RawRequest[] {
63
- const normalizedIntercepted = intercepted.map((request) => ({
64
- ...request,
65
- url: normalizeBrowseUrl(request.url, baseUrl),
66
- }));
67
- const harRequests = harEntriesToRawRequests(harEntries, baseUrl);
68
- const seen = new Set<string>();
69
- const allRequests: RawRequest[] = [];
70
-
71
- for (const request of normalizedIntercepted) {
72
- const key = buildBrowseRequestKey(request);
73
- if (!seen.has(key)) {
74
- seen.add(key);
75
- allRequests.push(request);
76
- }
77
- }
78
-
79
- for (const request of harRequests) {
80
- const key = buildBrowseRequestKey(request);
81
- if (!seen.has(key)) {
82
- seen.add(key);
83
- allRequests.push(request);
84
- }
85
- }
86
-
87
- return allRequests;
88
- }
89
-
90
- export interface BrowseIndexResult {
91
- domain: string;
92
- indexed: boolean;
93
- mode: "http" | "dom" | "none";
94
- skill: SkillManifest | null;
95
- }
96
-
97
- export function shouldIndexDomBrowseFallback(params: {
98
- requestCount: number;
99
- intent: string;
100
- extractedData: unknown;
101
- extractedConfidence: number;
102
- hasStructuredForm: boolean;
103
- }): {
104
- allow: boolean;
105
- reason?: string;
106
- intentLooksSearch: boolean;
107
- } {
108
- const { requestCount, intent, extractedData, extractedConfidence, hasStructuredForm } = params;
109
- const intentLooksSearch = /\b(search|find|lookup|filter)\b/i.test(intent);
110
-
111
- if (!extractedData) {
112
- if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
113
- return { allow: true, intentLooksSearch };
114
- }
115
- return {
116
- allow: false,
117
- reason: hasStructuredForm ? "form_only_without_network_evidence" : "no_dom_data",
118
- intentLooksSearch,
119
- };
120
- }
121
-
122
- const quality = validateExtractionQuality(extractedData, extractedConfidence, intent);
123
- if (!quality.valid) {
124
- if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
125
- return { allow: true, intentLooksSearch };
126
- }
127
- return {
128
- allow: false,
129
- reason: quality.quality_note ?? "low_quality_dom_extraction",
130
- intentLooksSearch,
131
- };
132
- }
133
-
134
- const semanticAssessment = assessIntentResult(extractedData, intent);
135
- if (semanticAssessment.verdict === "fail") {
136
- if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
137
- return { allow: true, intentLooksSearch };
138
- }
139
- return {
140
- allow: false,
141
- reason: semanticAssessment.reason ?? "dom_extraction_did_not_match_intent",
142
- intentLooksSearch,
143
- };
144
- }
145
-
146
- return { allow: true, intentLooksSearch };
147
- }
148
-
149
- export async function cacheBrowseRequests(params: {
150
- sessionUrl: string;
151
- sessionDomain: string;
152
- requests: RawRequest[];
153
- getPageHtml?: () => Promise<string>;
154
- intent?: string;
155
- }): Promise<BrowseIndexResult> {
156
- const { sessionUrl, sessionDomain, requests, getPageHtml } = params;
157
- let domain: string;
158
- try { domain = new URL(sessionUrl).hostname; } catch { domain = sessionDomain; }
159
- const intent = params.intent ?? `browse ${domain}`;
160
-
161
- const rawEndpoints = extractEndpoints(requests, undefined, { pageUrl: sessionUrl, finalUrl: sessionUrl });
162
- if (rawEndpoints.length > 0) {
163
- const existingSkill = findExistingSkillForDomain(domain);
164
- let allExisting = existingSkill?.endpoints ?? [];
165
-
166
- const domainKey = getDomainReuseKey(sessionUrl ?? domain);
167
- if (domainKey) {
168
- const cached = domainSkillCache.get(domainKey);
169
- if (cached?.localSkillPath) {
170
- try {
171
- const snapshot = JSON.parse(readFileSync(cached.localSkillPath, "utf-8"));
172
- if (snapshot?.endpoints?.length > 0) {
173
- allExisting = mergeEndpoints(allExisting, snapshot.endpoints);
174
- }
175
- } catch {
176
- // ignore stale snapshot
177
- }
178
- }
179
- }
180
-
181
- const mergedEndpoints = allExisting.length > 0 ? mergeEndpoints(allExisting, rawEndpoints) : rawEndpoints;
182
- if (!existingSkill || mergedEndpoints.length >= existingSkill.endpoints.length) {
183
- for (const endpoint of mergedEndpoints) {
184
- if (!endpoint.description) endpoint.description = generateLocalDescription(endpoint);
185
- }
186
-
187
- const quickSkill: SkillManifest = {
188
- skill_id: existingSkill?.skill_id ?? nanoid(),
189
- version: "1.0.0",
190
- schema_version: "1",
191
- lifecycle: "active",
192
- execution_type: "http",
193
- created_at: existingSkill?.created_at ?? new Date().toISOString(),
194
- updated_at: new Date().toISOString(),
195
- name: domain,
196
- intent_signature: intent,
197
- domain,
198
- description: `API skill for ${domain}`,
199
- owner_type: "agent",
200
- endpoints: mergedEndpoints,
201
- operation_graph: buildSkillOperationGraph(mergedEndpoints),
202
- intents: Array.from(new Set([...(existingSkill?.intents ?? []), intent])),
203
- };
204
-
205
- const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
206
- const scopedKey = scopedCacheKey("global", cacheKey);
207
- writeSkillSnapshot(scopedKey, quickSkill);
208
- if (domainKey) {
209
- domainSkillCache.set(domainKey, {
210
- skillId: quickSkill.skill_id,
211
- localSkillPath: snapshotPathForCacheKey(scopedKey),
212
- ts: Date.now(),
213
- });
214
- persistDomainCache();
215
- }
216
- try { cachePublishedSkill(quickSkill); } catch {}
217
- upsertDagEdgesFromOperationGraph(quickSkill);
218
- invalidateRouteCacheForDomain(domain);
219
- return { domain, indexed: true, mode: "http", skill: quickSkill };
220
- }
221
-
222
- return { domain, indexed: false, mode: "http", skill: existingSkill ?? null };
223
- }
224
-
225
- if (!getPageHtml) return { domain, indexed: false, mode: "none", skill: null };
226
-
227
- try {
228
- const html = await getPageHtml();
229
- if (!html || !html.startsWith("<")) return { domain, indexed: false, mode: "none", skill: null };
230
-
231
- const { extractFromDOM } = await import("../extraction/index.js");
232
- const { detectSearchForms, isStructuredSearchForm } = await import("../execution/search-forms.js");
233
- const { inferSchema } = await import("../transform/index.js");
234
- const { templatizeQueryParams } = await import("../execution/index.js");
235
-
236
- const extracted = extractFromDOM(html, intent);
237
- const searchForms = detectSearchForms(html);
238
- const validForm = searchForms.find((form: { form_selector: string; fields: unknown[] }) => isStructuredSearchForm(form));
239
- const domDecision = shouldIndexDomBrowseFallback({
240
- requestCount: requests.length,
241
- intent,
242
- extractedData: extracted.data,
243
- extractedConfidence: extracted.confidence,
244
- hasStructuredForm: !!validForm,
245
- });
246
-
247
- if (!domDecision.allow || !extracted.data) return { domain, indexed: false, mode: "none", skill: null };
248
-
249
- const urlTemplate = templatizeQueryParams(sessionUrl);
250
- const endpoint: EndpointDescriptor = {
251
- endpoint_id: nanoid(),
252
- method: "GET",
253
- url_template: urlTemplate,
254
- idempotency: "safe",
255
- verification_status: "verified",
256
- reliability_score: extracted.confidence ?? 0.7,
257
- description: validForm && domDecision.intentLooksSearch ? `Search form for ${domain}` : `Page content from ${domain}`,
258
- response_schema: inferSchema([extracted.data]),
259
- dom_extraction: {
260
- extraction_method: extracted.extraction_method ?? "repeated-elements",
261
- confidence: extracted.confidence ?? 0.7,
262
- ...(extracted.selector ? { selector: extracted.selector } : {}),
263
- },
264
- trigger_url: sessionUrl,
265
- ...(validForm && domDecision.intentLooksSearch ? { search_form: validForm } : {}),
266
- };
267
-
268
- endpoint.semantic = inferEndpointSemantic(endpoint, {
269
- sampleResponse: extracted.data,
270
- observedAt: new Date().toISOString(),
271
- sampleRequestUrl: sessionUrl,
272
- });
273
-
274
- const existing = findExistingSkillForDomain(domain);
275
- const allEndpoints = existing ? mergeEndpoints(existing.endpoints, [endpoint]) : [endpoint];
276
- for (const candidate of allEndpoints) {
277
- if (!candidate.description) candidate.description = generateLocalDescription(candidate);
278
- }
279
-
280
- const skill: SkillManifest = {
281
- skill_id: existing?.skill_id ?? nanoid(),
282
- version: "1.0.0",
283
- schema_version: "1",
284
- lifecycle: "active",
285
- execution_type: "http",
286
- created_at: existing?.created_at ?? new Date().toISOString(),
287
- updated_at: new Date().toISOString(),
288
- name: domain,
289
- intent_signature: intent,
290
- domain,
291
- description: `DOM skill for ${domain}`,
292
- owner_type: "agent",
293
- endpoints: allEndpoints,
294
- operation_graph: buildSkillOperationGraph(allEndpoints),
295
- intents: [...new Set([...(existing?.intents ?? []), intent])],
296
- };
297
-
298
- const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
299
- const scopedKey = scopedCacheKey("global", cacheKey);
300
- writeSkillSnapshot(scopedKey, skill);
301
- const domainReuseKey = getDomainReuseKey(sessionUrl ?? domain);
302
- if (domainReuseKey) {
303
- domainSkillCache.set(domainReuseKey, {
304
- skillId: skill.skill_id,
305
- localSkillPath: snapshotPathForCacheKey(scopedKey),
306
- ts: Date.now(),
307
- });
308
- persistDomainCache();
309
- }
310
- try { cachePublishedSkill(skill); } catch {}
311
- upsertDagEdgesFromOperationGraph(skill);
312
- invalidateRouteCacheForDomain(domain);
313
- return { domain, indexed: true, mode: "dom", skill };
314
- } catch {
315
- return { domain, indexed: false, mode: "none", skill: null };
316
- }
317
- }