unbrowse 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +455 -96
- package/dist/index.js +2 -6
- package/dist/mcp.js +695 -46
- package/dist/server.js +25811 -0
- package/package.json +1 -2
- package/vendor/kuri/darwin-arm64/kuri +0 -0
- package/vendor/kuri/darwin-x64/kuri +0 -0
- package/vendor/kuri/linux-arm64/kuri +0 -0
- package/vendor/kuri/linux-x64/kuri +0 -0
- package/vendor/kuri/manifest.json +7 -10
- package/runtime-src/agent-outcome.ts +0 -166
- package/runtime-src/analytics-session.ts +0 -55
- package/runtime-src/api/browse-index.ts +0 -317
- package/runtime-src/api/browse-session.ts +0 -572
- package/runtime-src/api/browse-submit-prereqs.ts +0 -48
- package/runtime-src/api/browse-submit.ts +0 -1184
- package/runtime-src/api/routes.ts +0 -1823
- package/runtime-src/auth/browser-cookies.ts +0 -423
- package/runtime-src/auth/index.ts +0 -535
- package/runtime-src/auth/runtime.ts +0 -116
- package/runtime-src/browser/index.ts +0 -659
- package/runtime-src/browser/types.ts +0 -41
- package/runtime-src/build-info.generated.ts +0 -6
- package/runtime-src/capture/index.ts +0 -1794
- package/runtime-src/capture/prefetch.ts +0 -95
- package/runtime-src/capture/rsc.ts +0 -45
- package/runtime-src/cli/shortcuts.ts +0 -273
- package/runtime-src/cli.ts +0 -1572
- package/runtime-src/client/graph-client.ts +0 -100
- package/runtime-src/client/index.ts +0 -1425
- package/runtime-src/debug-trace.ts +0 -18
- package/runtime-src/domain.ts +0 -38
- package/runtime-src/execution/index.ts +0 -3397
- package/runtime-src/execution/retry.ts +0 -46
- package/runtime-src/execution/robots.ts +0 -167
- package/runtime-src/execution/search-forms.ts +0 -188
- package/runtime-src/extraction/index.ts +0 -1507
- package/runtime-src/foundry/publish-bundle.ts +0 -392
- package/runtime-src/graph/agent-augment.ts +0 -315
- package/runtime-src/graph/index.ts +0 -1524
- package/runtime-src/graph/local-fixtures.ts +0 -393
- package/runtime-src/graph/local-harness.ts +0 -646
- package/runtime-src/graph/planner.ts +0 -411
- package/runtime-src/graph/session.ts +0 -294
- package/runtime-src/graph/trace-store.ts +0 -136
- package/runtime-src/index.ts +0 -24
- package/runtime-src/indexer/index.ts +0 -465
- package/runtime-src/intent-match.ts +0 -1515
- package/runtime-src/kuri/client.ts +0 -1839
- package/runtime-src/logger.ts +0 -30
- package/runtime-src/marketplace/index.ts +0 -103
- package/runtime-src/mcp.ts +0 -1747
- package/runtime-src/orchestrator/browser-agent.ts +0 -374
- package/runtime-src/orchestrator/dag-advisor.ts +0 -59
- package/runtime-src/orchestrator/dag-feedback.ts +0 -257
- package/runtime-src/orchestrator/first-pass-action.ts +0 -403
- package/runtime-src/orchestrator/index.ts +0 -4480
- package/runtime-src/orchestrator/passive-publish.ts +0 -187
- package/runtime-src/orchestrator/timing-economics.ts +0 -80
- package/runtime-src/payments/cascade.ts +0 -137
- package/runtime-src/payments/index.ts +0 -270
- package/runtime-src/payments/lobster-pay.ts +0 -182
- package/runtime-src/payments/wallet.ts +0 -98
- package/runtime-src/publish/review-context.ts +0 -93
- package/runtime-src/publish/sanitize.ts +0 -197
- package/runtime-src/publish/schema-review.ts +0 -192
- package/runtime-src/publish-admission.ts +0 -388
- package/runtime-src/ratelimit/index.ts +0 -23
- package/runtime-src/reverse-engineer/bundle-scanner.ts +0 -127
- package/runtime-src/reverse-engineer/description-prompt.ts +0 -213
- package/runtime-src/reverse-engineer/index.ts +0 -1551
- package/runtime-src/router.ts +0 -17
- package/runtime-src/routing-telemetry.ts +0 -395
- package/runtime-src/runtime/browser-access.ts +0 -11
- package/runtime-src/runtime/browser-auth.ts +0 -12
- package/runtime-src/runtime/browser-host.ts +0 -48
- package/runtime-src/runtime/lifecycle.ts +0 -17
- package/runtime-src/runtime/local-server.ts +0 -311
- package/runtime-src/runtime/paths.ts +0 -99
- package/runtime-src/runtime/setup.ts +0 -251
- package/runtime-src/runtime/supervisor.ts +0 -69
- package/runtime-src/runtime/update-hints.ts +0 -351
- package/runtime-src/server.ts +0 -100
- package/runtime-src/session-logs.ts +0 -142
- package/runtime-src/settings.ts +0 -221
- package/runtime-src/single-binary.ts +0 -143
- package/runtime-src/site-policy.ts +0 -54
- package/runtime-src/stale-cleanup-runner.ts +0 -144
- package/runtime-src/stale-cleanup.ts +0 -133
- package/runtime-src/telemetry-attribution.ts +0 -120
- package/runtime-src/telemetry.ts +0 -253
- package/runtime-src/template-params.ts +0 -141
- package/runtime-src/transform/drift.ts +0 -60
- package/runtime-src/transform/index.ts +0 -277
- package/runtime-src/types/index.ts +0 -1
- package/runtime-src/types/skill.ts +0 -912
- package/runtime-src/vault/index.ts +0 -196
- package/runtime-src/verification/auth-gate.ts +0 -8
- package/runtime-src/verification/candidates.ts +0 -27
- package/runtime-src/verification/index.ts +0 -120
- package/runtime-src/verification/matrix.ts +0 -30
- package/runtime-src/version.ts +0 -148
- package/runtime-src/workflow/artifact.ts +0 -161
- package/runtime-src/workflow/compile.ts +0 -808
- package/runtime-src/workflow/publish.ts +0 -225
- package/runtime-src/workflow/runtime.ts +0 -213
- package/vendor/kuri/win-x64/kuri.exe +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "unbrowse",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.2.0",
|
|
4
4
|
"description": "Reverse-engineer any website into reusable API skills. Zero-dep single binary with embedded browser engine.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
"files": [
|
|
10
10
|
"bin",
|
|
11
11
|
"dist",
|
|
12
|
-
"runtime-src",
|
|
13
12
|
"vendor/kuri",
|
|
14
13
|
"scripts/release-assets.mjs",
|
|
15
14
|
"scripts/postinstall.mjs",
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,28 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"repo_url": "https://github.com/justrach/kuri.git",
|
|
3
3
|
"branch": "adding-extensions",
|
|
4
|
-
"source_sha": "
|
|
5
|
-
"built_at": "2026-04-
|
|
4
|
+
"source_sha": "eadfaa5f921f7152e1762aed5ed64b3a4fbefbf3",
|
|
5
|
+
"built_at": "2026-04-06T05:01:20.543Z",
|
|
6
6
|
"binaries": {
|
|
7
7
|
"darwin-arm64": {
|
|
8
8
|
"zig_target": "aarch64-macos",
|
|
9
|
-
"sha256": "
|
|
9
|
+
"sha256": "1796501e393403016723c6b69266b834e2db04ba2559f51c84c957bd85c3927b",
|
|
10
|
+
"source": "prebuilt"
|
|
10
11
|
},
|
|
11
12
|
"darwin-x64": {
|
|
12
13
|
"zig_target": "x86_64-macos",
|
|
13
|
-
"sha256": "
|
|
14
|
+
"sha256": "f9adbebad3b17c10fc359b8125a33eda6890ec728cb2b6c625b36b895ef7c97f"
|
|
14
15
|
},
|
|
15
16
|
"linux-arm64": {
|
|
16
17
|
"zig_target": "aarch64-linux",
|
|
17
|
-
"sha256": "
|
|
18
|
+
"sha256": "30d1da652d589e5dffa4520615f958db3acf063bd831da9662c97afd50969699"
|
|
18
19
|
},
|
|
19
20
|
"linux-x64": {
|
|
20
21
|
"zig_target": "x86_64-linux",
|
|
21
|
-
"sha256": "
|
|
22
|
-
},
|
|
23
|
-
"win-x64": {
|
|
24
|
-
"zig_target": "x86_64-windows",
|
|
25
|
-
"sha256": "176291ad9827a183ba7322ddb56cc1fa5edc7c214a264ecdf8a1d5d18366d686"
|
|
22
|
+
"sha256": "90a8d60715a5c1723b7dae98d90a565b92a781b16ab8721fd546a26f9d86f39f"
|
|
26
23
|
}
|
|
27
24
|
}
|
|
28
25
|
}
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
import type { OrchestrationTiming, SkillManifest } from "./types/index.js";
|
|
2
|
-
|
|
3
|
-
export interface AgentImpact {
|
|
4
|
-
source: string;
|
|
5
|
-
cache_hit: boolean;
|
|
6
|
-
browser_avoided: boolean;
|
|
7
|
-
baseline_total_ms?: number;
|
|
8
|
-
actual_total_ms?: number;
|
|
9
|
-
time_saved_ms?: number;
|
|
10
|
-
time_saved_pct: number;
|
|
11
|
-
tokens_saved: number;
|
|
12
|
-
tokens_saved_pct: number;
|
|
13
|
-
baseline_cost_uc?: number;
|
|
14
|
-
actual_cost_uc?: number;
|
|
15
|
-
cost_saved_uc?: number;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export interface AgentNextAction {
|
|
19
|
-
endpoint_id: string;
|
|
20
|
-
operation_id: string;
|
|
21
|
-
title: string;
|
|
22
|
-
why: string;
|
|
23
|
-
command: string;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const BROWSER_SOURCES = new Set(["live-capture", "first-pass", "browser-action"]);
|
|
27
|
-
|
|
28
|
-
function edgePriority(kind: string): number {
|
|
29
|
-
switch (kind) {
|
|
30
|
-
case "parent_child":
|
|
31
|
-
return 4;
|
|
32
|
-
case "pagination":
|
|
33
|
-
return 3;
|
|
34
|
-
case "dependency":
|
|
35
|
-
return 2;
|
|
36
|
-
case "hint":
|
|
37
|
-
return 1;
|
|
38
|
-
case "auth":
|
|
39
|
-
return 0;
|
|
40
|
-
default:
|
|
41
|
-
return -1;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function nextActionWhy(kind: string, bindingKey: string, title: string): string {
|
|
46
|
-
switch (kind) {
|
|
47
|
-
case "parent_child":
|
|
48
|
-
return `Likely next detail step after this result. Exposes ${title}.`;
|
|
49
|
-
case "pagination":
|
|
50
|
-
return `Likely next page or continuation step. Carries ${bindingKey || "cursor"} forward.`;
|
|
51
|
-
case "dependency":
|
|
52
|
-
return `Unlocks the next dependent call using ${bindingKey || "known bindings"}.`;
|
|
53
|
-
case "auth":
|
|
54
|
-
return "Useful once authentication is in place.";
|
|
55
|
-
case "hint":
|
|
56
|
-
return "Common follow-up action from the current result.";
|
|
57
|
-
default:
|
|
58
|
-
return "Likely follow-up action.";
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
function operationTitle(operation: NonNullable<SkillManifest["operation_graph"]>["operations"][number]): string {
|
|
63
|
-
const semantic = [operation.action_kind, operation.resource_kind]
|
|
64
|
-
.filter(Boolean)
|
|
65
|
-
.join(" ")
|
|
66
|
-
.replace(/_/g, " ")
|
|
67
|
-
.trim();
|
|
68
|
-
return operation.description_out || semantic || operation.endpoint_id;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
export function buildAgentImpact(
|
|
72
|
-
timing?: Partial<OrchestrationTiming> | null,
|
|
73
|
-
): AgentImpact | undefined {
|
|
74
|
-
if (!timing?.source) return undefined;
|
|
75
|
-
return {
|
|
76
|
-
source: timing.source,
|
|
77
|
-
cache_hit: timing.cache_hit === true,
|
|
78
|
-
browser_avoided: !BROWSER_SOURCES.has(timing.source),
|
|
79
|
-
baseline_total_ms: timing.baseline_total_ms,
|
|
80
|
-
actual_total_ms: timing.actual_total_ms,
|
|
81
|
-
time_saved_ms: timing.time_saved_ms,
|
|
82
|
-
time_saved_pct: timing.time_saved_pct ?? 0,
|
|
83
|
-
tokens_saved: timing.tokens_saved ?? 0,
|
|
84
|
-
tokens_saved_pct: timing.tokens_saved_pct ?? 0,
|
|
85
|
-
baseline_cost_uc: timing.baseline_cost_uc,
|
|
86
|
-
actual_cost_uc: timing.actual_cost_uc,
|
|
87
|
-
cost_saved_uc: timing.cost_saved_uc,
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
export function buildNextActions(
|
|
92
|
-
skill: SkillManifest | undefined,
|
|
93
|
-
endpointId: string | undefined,
|
|
94
|
-
maxActions = 3,
|
|
95
|
-
): AgentNextAction[] {
|
|
96
|
-
if (!skill?.operation_graph || !endpointId) return [];
|
|
97
|
-
const graph = skill.operation_graph;
|
|
98
|
-
const current = graph.operations.find((operation) => operation.endpoint_id === endpointId);
|
|
99
|
-
if (!current) return [];
|
|
100
|
-
|
|
101
|
-
const byOperationId = new Map(graph.operations.map((operation) => [operation.operation_id, operation]));
|
|
102
|
-
const scored = new Map<string, {
|
|
103
|
-
operation_id: string;
|
|
104
|
-
endpoint_id: string;
|
|
105
|
-
title: string;
|
|
106
|
-
why: string;
|
|
107
|
-
score: number;
|
|
108
|
-
}>();
|
|
109
|
-
|
|
110
|
-
for (const edge of graph.edges) {
|
|
111
|
-
if (edge.from_operation_id !== current.operation_id) continue;
|
|
112
|
-
const target = byOperationId.get(edge.to_operation_id);
|
|
113
|
-
if (!target) continue;
|
|
114
|
-
|
|
115
|
-
const candidate = {
|
|
116
|
-
operation_id: target.operation_id,
|
|
117
|
-
endpoint_id: target.endpoint_id,
|
|
118
|
-
title: operationTitle(target),
|
|
119
|
-
why: nextActionWhy(edge.kind, edge.binding_key, operationTitle(target)),
|
|
120
|
-
score: (edgePriority(edge.kind) * 10) + Math.round(edge.confidence * 10),
|
|
121
|
-
};
|
|
122
|
-
const existing = scored.get(target.operation_id);
|
|
123
|
-
if (!existing || candidate.score > existing.score) {
|
|
124
|
-
scored.set(target.operation_id, candidate);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return [...scored.values()]
|
|
129
|
-
.sort((a, b) => b.score - a.score || a.title.localeCompare(b.title))
|
|
130
|
-
.slice(0, maxActions)
|
|
131
|
-
.map((candidate) => ({
|
|
132
|
-
endpoint_id: candidate.endpoint_id,
|
|
133
|
-
operation_id: candidate.operation_id,
|
|
134
|
-
title: candidate.title,
|
|
135
|
-
why: candidate.why,
|
|
136
|
-
command: `unbrowse execute --skill ${skill.skill_id} --endpoint ${candidate.endpoint_id}`,
|
|
137
|
-
}));
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
export function attachAgentOutcomeHints<T extends Record<string, unknown>>(
|
|
141
|
-
payload: T,
|
|
142
|
-
opts?: {
|
|
143
|
-
skill?: SkillManifest;
|
|
144
|
-
endpointId?: string;
|
|
145
|
-
timing?: Partial<OrchestrationTiming> | null;
|
|
146
|
-
},
|
|
147
|
-
): T & {
|
|
148
|
-
impact?: AgentImpact;
|
|
149
|
-
next_actions?: AgentNextAction[];
|
|
150
|
-
} {
|
|
151
|
-
const target = payload as Record<string, unknown>;
|
|
152
|
-
const impact = buildAgentImpact(opts?.timing);
|
|
153
|
-
if (impact) {
|
|
154
|
-
target.impact = impact;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
const nextActions = buildNextActions(opts?.skill, opts?.endpointId);
|
|
158
|
-
if (nextActions.length > 0) {
|
|
159
|
-
target.next_actions = nextActions;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
return target as T & {
|
|
163
|
-
impact?: AgentImpact;
|
|
164
|
-
next_actions?: AgentNextAction[];
|
|
165
|
-
};
|
|
166
|
-
}
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import type { ExecutionTrace, OrchestrationTiming } from "./types/index.js";
|
|
2
|
-
|
|
3
|
-
export interface AnalyticsSessionPayload {
|
|
4
|
-
session_id: string;
|
|
5
|
-
started_at: string;
|
|
6
|
-
completed_at?: string;
|
|
7
|
-
trace_version?: string;
|
|
8
|
-
api_calls: number;
|
|
9
|
-
discovery_queries: number;
|
|
10
|
-
cached_skill_calls: number;
|
|
11
|
-
fresh_index_calls: number;
|
|
12
|
-
browser_mode: "default" | "replaced" | "manual" | "unknown";
|
|
13
|
-
success?: boolean;
|
|
14
|
-
source?: string;
|
|
15
|
-
time_saved_ms?: number;
|
|
16
|
-
time_saved_pct?: number;
|
|
17
|
-
tokens_saved?: number;
|
|
18
|
-
tokens_saved_pct?: number;
|
|
19
|
-
cost_saved_uc?: number;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export function buildAnalyticsSessionPayload(
|
|
23
|
-
sessionId: string,
|
|
24
|
-
startedAt: string,
|
|
25
|
-
source: OrchestrationTiming["source"] | "first-pass",
|
|
26
|
-
trace: Pick<ExecutionTrace, "completed_at" | "trace_version" | "success" | "tokens_saved" | "tokens_saved_pct" | "api_call_count"> & {
|
|
27
|
-
network_events?: unknown[];
|
|
28
|
-
},
|
|
29
|
-
timing?: Pick<OrchestrationTiming, "time_saved_ms" | "time_saved_pct" | "cost_saved_uc">,
|
|
30
|
-
): AnalyticsSessionPayload {
|
|
31
|
-
const cacheLike = source === "marketplace" || source === "route-cache";
|
|
32
|
-
const browserMode = source === "live-capture" || source === "browser-action"
|
|
33
|
-
? "default"
|
|
34
|
-
: source === "first-pass"
|
|
35
|
-
? "default"
|
|
36
|
-
: "replaced";
|
|
37
|
-
return {
|
|
38
|
-
session_id: sessionId,
|
|
39
|
-
started_at: startedAt,
|
|
40
|
-
completed_at: trace.completed_at,
|
|
41
|
-
trace_version: trace.trace_version,
|
|
42
|
-
api_calls: trace.api_call_count ?? Math.max(1, trace.network_events?.length ?? 0),
|
|
43
|
-
discovery_queries: cacheLike ? 1 : 0,
|
|
44
|
-
cached_skill_calls: cacheLike ? 1 : 0,
|
|
45
|
-
fresh_index_calls: source === "live-capture" || source === "first-pass" || source === "browser-action" ? 1 : 0,
|
|
46
|
-
browser_mode: browserMode,
|
|
47
|
-
success: trace.success ?? true,
|
|
48
|
-
source,
|
|
49
|
-
time_saved_ms: timing?.time_saved_ms,
|
|
50
|
-
time_saved_pct: timing?.time_saved_pct,
|
|
51
|
-
tokens_saved: trace.tokens_saved,
|
|
52
|
-
tokens_saved_pct: trace.tokens_saved_pct,
|
|
53
|
-
cost_saved_uc: timing?.cost_saved_uc,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
@@ -1,317 +0,0 @@
|
|
|
1
|
-
import { nanoid } from "nanoid";
|
|
2
|
-
import { readFileSync } from "node:fs";
|
|
3
|
-
import { extractEndpoints } from "../reverse-engineer/index.js";
|
|
4
|
-
import { buildSkillOperationGraph, inferEndpointSemantic } from "../graph/index.js";
|
|
5
|
-
import { validateExtractionQuality } from "../execution/index.js";
|
|
6
|
-
import { assessIntentResult } from "../intent-match.js";
|
|
7
|
-
import type { KuriHarEntry } from "../kuri/client.js";
|
|
8
|
-
import type { EndpointDescriptor, SkillManifest } from "../types/index.js";
|
|
9
|
-
import type { RawRequest } from "../capture/index.js";
|
|
10
|
-
import { cachePublishedSkill, findExistingSkillForDomain } from "../client/index.js";
|
|
11
|
-
import { mergeEndpoints } from "../marketplace/index.js";
|
|
12
|
-
import { upsertDagEdgesFromOperationGraph } from "../orchestrator/dag-feedback.js";
|
|
13
|
-
import {
|
|
14
|
-
buildResolveCacheKey,
|
|
15
|
-
domainSkillCache,
|
|
16
|
-
generateLocalDescription,
|
|
17
|
-
getDomainReuseKey,
|
|
18
|
-
invalidateRouteCacheForDomain,
|
|
19
|
-
persistDomainCache,
|
|
20
|
-
scopedCacheKey,
|
|
21
|
-
snapshotPathForCacheKey,
|
|
22
|
-
writeSkillSnapshot,
|
|
23
|
-
} from "../orchestrator/index.js";
|
|
24
|
-
|
|
25
|
-
function normalizeBrowseUrl(url: string, baseUrl?: string): string {
|
|
26
|
-
if (!url) return url;
|
|
27
|
-
try {
|
|
28
|
-
return new URL(url).toString();
|
|
29
|
-
} catch {
|
|
30
|
-
if (!baseUrl) return url;
|
|
31
|
-
try {
|
|
32
|
-
return new URL(url, baseUrl).toString();
|
|
33
|
-
} catch {
|
|
34
|
-
return url;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export function harEntriesToRawRequests(entries: KuriHarEntry[], baseUrl?: string): RawRequest[] {
|
|
40
|
-
return entries
|
|
41
|
-
.filter((entry) => entry.request && entry.response)
|
|
42
|
-
.map((entry) => ({
|
|
43
|
-
url: normalizeBrowseUrl(entry.request.url, baseUrl),
|
|
44
|
-
method: entry.request.method,
|
|
45
|
-
request_headers: Object.fromEntries((entry.request.headers ?? []).map((header) => [header.name.toLowerCase(), header.value])),
|
|
46
|
-
request_body: entry.request.postData?.text,
|
|
47
|
-
response_status: entry.response.status,
|
|
48
|
-
response_headers: Object.fromEntries((entry.response.headers ?? []).map((header) => [header.name.toLowerCase(), header.value])),
|
|
49
|
-
response_body: entry.response.content?.text,
|
|
50
|
-
timestamp: entry.startedDateTime ?? new Date().toISOString(),
|
|
51
|
-
}));
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export function buildBrowseRequestKey(request: RawRequest): string {
|
|
55
|
-
return [
|
|
56
|
-
request.method,
|
|
57
|
-
request.url,
|
|
58
|
-
typeof request.request_body === "string" ? request.request_body : JSON.stringify(request.request_body ?? null),
|
|
59
|
-
].join(":");
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export function mergeBrowseRequests(intercepted: RawRequest[], harEntries: KuriHarEntry[], baseUrl?: string): RawRequest[] {
|
|
63
|
-
const normalizedIntercepted = intercepted.map((request) => ({
|
|
64
|
-
...request,
|
|
65
|
-
url: normalizeBrowseUrl(request.url, baseUrl),
|
|
66
|
-
}));
|
|
67
|
-
const harRequests = harEntriesToRawRequests(harEntries, baseUrl);
|
|
68
|
-
const seen = new Set<string>();
|
|
69
|
-
const allRequests: RawRequest[] = [];
|
|
70
|
-
|
|
71
|
-
for (const request of normalizedIntercepted) {
|
|
72
|
-
const key = buildBrowseRequestKey(request);
|
|
73
|
-
if (!seen.has(key)) {
|
|
74
|
-
seen.add(key);
|
|
75
|
-
allRequests.push(request);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
for (const request of harRequests) {
|
|
80
|
-
const key = buildBrowseRequestKey(request);
|
|
81
|
-
if (!seen.has(key)) {
|
|
82
|
-
seen.add(key);
|
|
83
|
-
allRequests.push(request);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return allRequests;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export interface BrowseIndexResult {
|
|
91
|
-
domain: string;
|
|
92
|
-
indexed: boolean;
|
|
93
|
-
mode: "http" | "dom" | "none";
|
|
94
|
-
skill: SkillManifest | null;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export function shouldIndexDomBrowseFallback(params: {
|
|
98
|
-
requestCount: number;
|
|
99
|
-
intent: string;
|
|
100
|
-
extractedData: unknown;
|
|
101
|
-
extractedConfidence: number;
|
|
102
|
-
hasStructuredForm: boolean;
|
|
103
|
-
}): {
|
|
104
|
-
allow: boolean;
|
|
105
|
-
reason?: string;
|
|
106
|
-
intentLooksSearch: boolean;
|
|
107
|
-
} {
|
|
108
|
-
const { requestCount, intent, extractedData, extractedConfidence, hasStructuredForm } = params;
|
|
109
|
-
const intentLooksSearch = /\b(search|find|lookup|filter)\b/i.test(intent);
|
|
110
|
-
|
|
111
|
-
if (!extractedData) {
|
|
112
|
-
if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
|
|
113
|
-
return { allow: true, intentLooksSearch };
|
|
114
|
-
}
|
|
115
|
-
return {
|
|
116
|
-
allow: false,
|
|
117
|
-
reason: hasStructuredForm ? "form_only_without_network_evidence" : "no_dom_data",
|
|
118
|
-
intentLooksSearch,
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const quality = validateExtractionQuality(extractedData, extractedConfidence, intent);
|
|
123
|
-
if (!quality.valid) {
|
|
124
|
-
if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
|
|
125
|
-
return { allow: true, intentLooksSearch };
|
|
126
|
-
}
|
|
127
|
-
return {
|
|
128
|
-
allow: false,
|
|
129
|
-
reason: quality.quality_note ?? "low_quality_dom_extraction",
|
|
130
|
-
intentLooksSearch,
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const semanticAssessment = assessIntentResult(extractedData, intent);
|
|
135
|
-
if (semanticAssessment.verdict === "fail") {
|
|
136
|
-
if (hasStructuredForm && requestCount > 0 && intentLooksSearch) {
|
|
137
|
-
return { allow: true, intentLooksSearch };
|
|
138
|
-
}
|
|
139
|
-
return {
|
|
140
|
-
allow: false,
|
|
141
|
-
reason: semanticAssessment.reason ?? "dom_extraction_did_not_match_intent",
|
|
142
|
-
intentLooksSearch,
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return { allow: true, intentLooksSearch };
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export async function cacheBrowseRequests(params: {
|
|
150
|
-
sessionUrl: string;
|
|
151
|
-
sessionDomain: string;
|
|
152
|
-
requests: RawRequest[];
|
|
153
|
-
getPageHtml?: () => Promise<string>;
|
|
154
|
-
intent?: string;
|
|
155
|
-
}): Promise<BrowseIndexResult> {
|
|
156
|
-
const { sessionUrl, sessionDomain, requests, getPageHtml } = params;
|
|
157
|
-
let domain: string;
|
|
158
|
-
try { domain = new URL(sessionUrl).hostname; } catch { domain = sessionDomain; }
|
|
159
|
-
const intent = params.intent ?? `browse ${domain}`;
|
|
160
|
-
|
|
161
|
-
const rawEndpoints = extractEndpoints(requests, undefined, { pageUrl: sessionUrl, finalUrl: sessionUrl });
|
|
162
|
-
if (rawEndpoints.length > 0) {
|
|
163
|
-
const existingSkill = findExistingSkillForDomain(domain);
|
|
164
|
-
let allExisting = existingSkill?.endpoints ?? [];
|
|
165
|
-
|
|
166
|
-
const domainKey = getDomainReuseKey(sessionUrl ?? domain);
|
|
167
|
-
if (domainKey) {
|
|
168
|
-
const cached = domainSkillCache.get(domainKey);
|
|
169
|
-
if (cached?.localSkillPath) {
|
|
170
|
-
try {
|
|
171
|
-
const snapshot = JSON.parse(readFileSync(cached.localSkillPath, "utf-8"));
|
|
172
|
-
if (snapshot?.endpoints?.length > 0) {
|
|
173
|
-
allExisting = mergeEndpoints(allExisting, snapshot.endpoints);
|
|
174
|
-
}
|
|
175
|
-
} catch {
|
|
176
|
-
// ignore stale snapshot
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const mergedEndpoints = allExisting.length > 0 ? mergeEndpoints(allExisting, rawEndpoints) : rawEndpoints;
|
|
182
|
-
if (!existingSkill || mergedEndpoints.length >= existingSkill.endpoints.length) {
|
|
183
|
-
for (const endpoint of mergedEndpoints) {
|
|
184
|
-
if (!endpoint.description) endpoint.description = generateLocalDescription(endpoint);
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
const quickSkill: SkillManifest = {
|
|
188
|
-
skill_id: existingSkill?.skill_id ?? nanoid(),
|
|
189
|
-
version: "1.0.0",
|
|
190
|
-
schema_version: "1",
|
|
191
|
-
lifecycle: "active",
|
|
192
|
-
execution_type: "http",
|
|
193
|
-
created_at: existingSkill?.created_at ?? new Date().toISOString(),
|
|
194
|
-
updated_at: new Date().toISOString(),
|
|
195
|
-
name: domain,
|
|
196
|
-
intent_signature: intent,
|
|
197
|
-
domain,
|
|
198
|
-
description: `API skill for ${domain}`,
|
|
199
|
-
owner_type: "agent",
|
|
200
|
-
endpoints: mergedEndpoints,
|
|
201
|
-
operation_graph: buildSkillOperationGraph(mergedEndpoints),
|
|
202
|
-
intents: Array.from(new Set([...(existingSkill?.intents ?? []), intent])),
|
|
203
|
-
};
|
|
204
|
-
|
|
205
|
-
const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
|
|
206
|
-
const scopedKey = scopedCacheKey("global", cacheKey);
|
|
207
|
-
writeSkillSnapshot(scopedKey, quickSkill);
|
|
208
|
-
if (domainKey) {
|
|
209
|
-
domainSkillCache.set(domainKey, {
|
|
210
|
-
skillId: quickSkill.skill_id,
|
|
211
|
-
localSkillPath: snapshotPathForCacheKey(scopedKey),
|
|
212
|
-
ts: Date.now(),
|
|
213
|
-
});
|
|
214
|
-
persistDomainCache();
|
|
215
|
-
}
|
|
216
|
-
try { cachePublishedSkill(quickSkill); } catch {}
|
|
217
|
-
upsertDagEdgesFromOperationGraph(quickSkill);
|
|
218
|
-
invalidateRouteCacheForDomain(domain);
|
|
219
|
-
return { domain, indexed: true, mode: "http", skill: quickSkill };
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
return { domain, indexed: false, mode: "http", skill: existingSkill ?? null };
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
if (!getPageHtml) return { domain, indexed: false, mode: "none", skill: null };
|
|
226
|
-
|
|
227
|
-
try {
|
|
228
|
-
const html = await getPageHtml();
|
|
229
|
-
if (!html || !html.startsWith("<")) return { domain, indexed: false, mode: "none", skill: null };
|
|
230
|
-
|
|
231
|
-
const { extractFromDOM } = await import("../extraction/index.js");
|
|
232
|
-
const { detectSearchForms, isStructuredSearchForm } = await import("../execution/search-forms.js");
|
|
233
|
-
const { inferSchema } = await import("../transform/index.js");
|
|
234
|
-
const { templatizeQueryParams } = await import("../execution/index.js");
|
|
235
|
-
|
|
236
|
-
const extracted = extractFromDOM(html, intent);
|
|
237
|
-
const searchForms = detectSearchForms(html);
|
|
238
|
-
const validForm = searchForms.find((form: { form_selector: string; fields: unknown[] }) => isStructuredSearchForm(form));
|
|
239
|
-
const domDecision = shouldIndexDomBrowseFallback({
|
|
240
|
-
requestCount: requests.length,
|
|
241
|
-
intent,
|
|
242
|
-
extractedData: extracted.data,
|
|
243
|
-
extractedConfidence: extracted.confidence,
|
|
244
|
-
hasStructuredForm: !!validForm,
|
|
245
|
-
});
|
|
246
|
-
|
|
247
|
-
if (!domDecision.allow || !extracted.data) return { domain, indexed: false, mode: "none", skill: null };
|
|
248
|
-
|
|
249
|
-
const urlTemplate = templatizeQueryParams(sessionUrl);
|
|
250
|
-
const endpoint: EndpointDescriptor = {
|
|
251
|
-
endpoint_id: nanoid(),
|
|
252
|
-
method: "GET",
|
|
253
|
-
url_template: urlTemplate,
|
|
254
|
-
idempotency: "safe",
|
|
255
|
-
verification_status: "verified",
|
|
256
|
-
reliability_score: extracted.confidence ?? 0.7,
|
|
257
|
-
description: validForm && domDecision.intentLooksSearch ? `Search form for ${domain}` : `Page content from ${domain}`,
|
|
258
|
-
response_schema: inferSchema([extracted.data]),
|
|
259
|
-
dom_extraction: {
|
|
260
|
-
extraction_method: extracted.extraction_method ?? "repeated-elements",
|
|
261
|
-
confidence: extracted.confidence ?? 0.7,
|
|
262
|
-
...(extracted.selector ? { selector: extracted.selector } : {}),
|
|
263
|
-
},
|
|
264
|
-
trigger_url: sessionUrl,
|
|
265
|
-
...(validForm && domDecision.intentLooksSearch ? { search_form: validForm } : {}),
|
|
266
|
-
};
|
|
267
|
-
|
|
268
|
-
endpoint.semantic = inferEndpointSemantic(endpoint, {
|
|
269
|
-
sampleResponse: extracted.data,
|
|
270
|
-
observedAt: new Date().toISOString(),
|
|
271
|
-
sampleRequestUrl: sessionUrl,
|
|
272
|
-
});
|
|
273
|
-
|
|
274
|
-
const existing = findExistingSkillForDomain(domain);
|
|
275
|
-
const allEndpoints = existing ? mergeEndpoints(existing.endpoints, [endpoint]) : [endpoint];
|
|
276
|
-
for (const candidate of allEndpoints) {
|
|
277
|
-
if (!candidate.description) candidate.description = generateLocalDescription(candidate);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
const skill: SkillManifest = {
|
|
281
|
-
skill_id: existing?.skill_id ?? nanoid(),
|
|
282
|
-
version: "1.0.0",
|
|
283
|
-
schema_version: "1",
|
|
284
|
-
lifecycle: "active",
|
|
285
|
-
execution_type: "http",
|
|
286
|
-
created_at: existing?.created_at ?? new Date().toISOString(),
|
|
287
|
-
updated_at: new Date().toISOString(),
|
|
288
|
-
name: domain,
|
|
289
|
-
intent_signature: intent,
|
|
290
|
-
domain,
|
|
291
|
-
description: `DOM skill for ${domain}`,
|
|
292
|
-
owner_type: "agent",
|
|
293
|
-
endpoints: allEndpoints,
|
|
294
|
-
operation_graph: buildSkillOperationGraph(allEndpoints),
|
|
295
|
-
intents: [...new Set([...(existing?.intents ?? []), intent])],
|
|
296
|
-
};
|
|
297
|
-
|
|
298
|
-
const cacheKey = buildResolveCacheKey(domain, intent, sessionUrl);
|
|
299
|
-
const scopedKey = scopedCacheKey("global", cacheKey);
|
|
300
|
-
writeSkillSnapshot(scopedKey, skill);
|
|
301
|
-
const domainReuseKey = getDomainReuseKey(sessionUrl ?? domain);
|
|
302
|
-
if (domainReuseKey) {
|
|
303
|
-
domainSkillCache.set(domainReuseKey, {
|
|
304
|
-
skillId: skill.skill_id,
|
|
305
|
-
localSkillPath: snapshotPathForCacheKey(scopedKey),
|
|
306
|
-
ts: Date.now(),
|
|
307
|
-
});
|
|
308
|
-
persistDomainCache();
|
|
309
|
-
}
|
|
310
|
-
try { cachePublishedSkill(skill); } catch {}
|
|
311
|
-
upsertDagEdgesFromOperationGraph(skill);
|
|
312
|
-
invalidateRouteCacheForDomain(domain);
|
|
313
|
-
return { domain, indexed: true, mode: "dom", skill };
|
|
314
|
-
} catch {
|
|
315
|
-
return { domain, indexed: false, mode: "none", skill: null };
|
|
316
|
-
}
|
|
317
|
-
}
|