@vellumai/assistant 0.4.53 → 0.4.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +62 -349
- package/docs/architecture/integrations.md +1 -1
- package/docs/architecture/keychain-broker.md +94 -29
- package/docs/architecture/security.md +2 -2
- package/knip.json +7 -29
- package/package.json +2 -9
- package/src/__tests__/agent-loop.test.ts +1 -1
- package/src/__tests__/app-git-history.test.ts +0 -2
- package/src/__tests__/app-git-service.test.ts +1 -6
- package/src/__tests__/approval-cascade.test.ts +0 -1
- package/src/__tests__/avatar-e2e.test.ts +0 -1
- package/src/__tests__/browser-fill-credential.test.ts +1 -6
- package/src/__tests__/call-domain.test.ts +0 -1
- package/src/__tests__/call-routes-http.test.ts +0 -1
- package/src/__tests__/channel-guardian.test.ts +4 -4
- package/src/__tests__/channel-readiness-routes.test.ts +0 -1
- package/src/__tests__/channel-readiness-service.test.ts +0 -1
- package/src/__tests__/checker.test.ts +13 -11
- package/src/__tests__/claude-code-skill-regression.test.ts +0 -1
- package/src/__tests__/claude-code-tool-profiles.test.ts +1 -2
- package/src/__tests__/config-loader-backfill.test.ts +0 -3
- package/src/__tests__/config-schema.test.ts +3 -9
- package/src/__tests__/config-watcher.test.ts +11 -3
- package/src/__tests__/credential-broker-browser-fill.test.ts +27 -24
- package/src/__tests__/credential-broker-server-use.test.ts +60 -24
- package/src/__tests__/credential-security-e2e.test.ts +1 -6
- package/src/__tests__/credential-security-invariants.test.ts +13 -8
- package/src/__tests__/credential-vault-unit.test.ts +28 -12
- package/src/__tests__/credential-vault.test.ts +40 -28
- package/src/__tests__/credentials-cli.test.ts +1 -21
- package/src/__tests__/email-invite-adapter.test.ts +0 -1
- package/src/__tests__/fixtures/credential-security-fixtures.ts +3 -3
- package/src/__tests__/fixtures/media-reuse-fixtures.ts +3 -79
- package/src/__tests__/gateway-only-enforcement.test.ts +1 -21
- package/src/__tests__/guardian-action-conversation-turn.test.ts +8 -8
- package/src/__tests__/guardian-action-late-reply.test.ts +13 -14
- package/src/__tests__/guardian-action-store.test.ts +0 -57
- package/src/__tests__/guardian-outbound-http.test.ts +1 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +1 -3
- package/src/__tests__/hooks-blocking.test.ts +1 -1
- package/src/__tests__/hooks-config.test.ts +5 -29
- package/src/__tests__/hooks-discovery.test.ts +1 -1
- package/src/__tests__/hooks-integration.test.ts +1 -1
- package/src/__tests__/hooks-manager.test.ts +1 -1
- package/src/__tests__/hooks-runner.test.ts +1 -23
- package/src/__tests__/hooks-settings.test.ts +1 -1
- package/src/__tests__/hooks-templates.test.ts +1 -1
- package/src/__tests__/integration-status.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +0 -3
- package/src/__tests__/list-messages-attachments.test.ts +4 -4
- package/src/__tests__/llm-usage-store.test.ts +50 -0
- package/src/__tests__/managed-proxy-context.test.ts +41 -41
- package/src/__tests__/media-generate-image.test.ts +2 -2
- package/src/__tests__/media-reuse-story.e2e.test.ts +1 -6
- package/src/__tests__/memory-regressions.experimental.test.ts +4 -4
- package/src/__tests__/memory-regressions.test.ts +27 -27
- package/src/__tests__/memory-retrieval.benchmark.test.ts +1 -1
- package/src/__tests__/memory-upsert-concurrency.test.ts +4 -4
- package/src/__tests__/notification-decision-fallback.test.ts +1 -1
- package/src/__tests__/oauth-cli.test.ts +1 -4
- package/src/__tests__/oauth-store.test.ts +1 -3
- package/src/__tests__/openai-provider.test.ts +7 -7
- package/src/__tests__/platform.test.ts +14 -4
- package/src/__tests__/pricing.test.ts +0 -223
- package/src/__tests__/provider-commit-message-generator.test.ts +1 -4
- package/src/__tests__/provider-fail-open-selection.test.ts +58 -54
- package/src/__tests__/provider-managed-proxy-integration.test.ts +63 -63
- package/src/__tests__/provider-registry-ollama.test.ts +3 -3
- package/src/__tests__/public-ingress-urls.test.ts +1 -1
- package/src/__tests__/registry.test.ts +3 -103
- package/src/__tests__/script-proxy-injection-runtime.test.ts +2 -7
- package/src/__tests__/secret-onetime-send.test.ts +1 -6
- package/src/__tests__/secret-routes-managed-proxy.test.ts +6 -13
- package/src/__tests__/secure-keys.test.ts +241 -229
- package/src/__tests__/session-abort-tool-results.test.ts +0 -1
- package/src/__tests__/session-confirmation-signals.test.ts +0 -1
- package/src/__tests__/session-messaging-secret-redirect.test.ts +1 -7
- package/src/__tests__/session-pre-run-repair.test.ts +0 -1
- package/src/__tests__/session-provider-retry-repair.test.ts +0 -1
- package/src/__tests__/session-queue.test.ts +2 -4
- package/src/__tests__/session-slash-known.test.ts +0 -1
- package/src/__tests__/session-slash-queue.test.ts +0 -1
- package/src/__tests__/session-slash-unknown.test.ts +0 -1
- package/src/__tests__/session-workspace-injection.test.ts +0 -1
- package/src/__tests__/session-workspace-tool-tracking.test.ts +0 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/slack-channel-config.test.ts +1 -7
- package/src/__tests__/swarm-recursion.test.ts +0 -1
- package/src/__tests__/swarm-session-integration.test.ts +0 -1
- package/src/__tests__/swarm-tool.test.ts +0 -1
- package/src/__tests__/task-compiler.test.ts +1 -1
- package/src/__tests__/test-support/browser-skill-harness.ts +0 -18
- package/src/__tests__/test-support/computer-use-skill-harness.ts +0 -23
- package/src/__tests__/tool-executor.test.ts +1 -1
- package/src/__tests__/trust-store.test.ts +3 -82
- package/src/__tests__/twilio-config.test.ts +0 -1
- package/src/__tests__/twilio-provider.test.ts +0 -5
- package/src/__tests__/twilio-routes.test.ts +0 -1
- package/src/__tests__/usage-cache-backfill-migration.test.ts +10 -10
- package/src/calls/guardian-question-copy.ts +1 -1
- package/src/cli/commands/bash.ts +3 -0
- package/src/cli/commands/doctor.ts +10 -34
- package/src/cli/commands/memory.ts +3 -5
- package/src/cli/commands/sessions.ts +1 -1
- package/src/cli/commands/usage.ts +359 -0
- package/src/cli/http-client.ts +22 -12
- package/src/cli/program.ts +2 -0
- package/src/cli/reference.ts +1 -0
- package/src/cli.ts +251 -181
- package/src/config/assistant-feature-flags.ts +0 -7
- package/src/config/bundled-skills/chatgpt-import/tools/chatgpt-import.ts +1 -1
- package/src/config/bundled-skills/claude-code/SKILL.md +1 -1
- package/src/config/bundled-skills/claude-code/TOOLS.json +1 -1
- package/src/config/bundled-skills/gmail/SKILL.md +0 -1
- package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -2
- package/src/config/bundled-skills/media-processing/services/reduce.ts +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -1
- package/src/config/bundled-skills/sequences/SKILL.md +0 -1
- package/src/config/env.ts +13 -0
- package/src/config/feature-flag-registry.json +9 -41
- package/src/config/schemas/security.ts +1 -2
- package/src/config/skills.ts +1 -1
- package/src/contacts/contact-store.ts +0 -50
- package/src/daemon/approved-devices-store.ts +0 -44
- package/src/daemon/classifier.ts +1 -1
- package/src/daemon/config-watcher.ts +14 -8
- package/src/daemon/handlers/config-model.ts +1 -1
- package/src/daemon/handlers/sessions.ts +4 -116
- package/src/daemon/handlers/skills.ts +1 -1
- package/src/daemon/lifecycle.ts +13 -15
- package/src/daemon/providers-setup.ts +1 -1
- package/src/daemon/server.ts +20 -3
- package/src/daemon/session-slash.ts +2 -2
- package/src/daemon/shutdown-handlers.ts +15 -0
- package/src/daemon/watch-handler.ts +2 -2
- package/src/email/guardrails.ts +1 -1
- package/src/email/service.ts +0 -5
- package/src/hooks/templates.ts +1 -1
- package/src/media/app-icon-generator.ts +2 -2
- package/src/media/avatar-router.ts +2 -2
- package/src/media/gemini-image-service.ts +5 -5
- package/src/memory/admin.ts +2 -2
- package/src/memory/app-git-service.ts +0 -7
- package/src/memory/conversation-crud.ts +1 -1
- package/src/memory/conversation-title-service.ts +2 -2
- package/src/memory/embedding-backend.ts +30 -26
- package/src/memory/external-conversation-store.ts +0 -30
- package/src/memory/guardian-action-store.ts +0 -31
- package/src/memory/guardian-approvals.ts +1 -56
- package/src/memory/indexer.ts +4 -3
- package/src/memory/items-extractor.ts +1 -1
- package/src/memory/job-handlers/backfill.ts +5 -2
- package/src/memory/job-handlers/index-maintenance.ts +2 -2
- package/src/memory/job-handlers/media-processing.ts +2 -2
- package/src/memory/job-handlers/summarization.ts +1 -1
- package/src/memory/job-utils.ts +1 -2
- package/src/memory/jobs-worker.ts +2 -2
- package/src/memory/llm-usage-store.ts +57 -11
- package/src/memory/media-store.ts +4 -535
- package/src/memory/migrations/032-guardian-delivery-conversation-index.ts +2 -2
- package/src/memory/migrations/110-channel-guardian.ts +0 -1
- package/src/memory/published-pages-store.ts +0 -83
- package/src/memory/qdrant-circuit-breaker.ts +0 -8
- package/src/memory/retriever.ts +1 -1
- package/src/memory/schema/calls.ts +0 -67
- package/src/memory/search/semantic.ts +1 -8
- package/src/memory/shared-app-links-store.ts +0 -15
- package/src/messaging/registry.ts +0 -5
- package/src/messaging/style-analyzer.ts +1 -1
- package/src/notifications/copy-composer.ts +5 -13
- package/src/notifications/decision-engine.ts +2 -2
- package/src/notifications/deliveries-store.ts +0 -39
- package/src/notifications/guardian-question-mode.ts +6 -10
- package/src/notifications/preference-extractor.ts +1 -1
- package/src/oauth/byo-connection.test.ts +29 -20
- package/src/oauth/provider-behaviors.ts +1 -1
- package/src/permissions/checker.ts +1 -1
- package/src/permissions/shell-identity.ts +0 -5
- package/src/permissions/trust-store.ts +0 -37
- package/src/prompts/system-prompt.ts +4 -4
- package/src/prompts/templates/SOUL.md +1 -1
- package/src/providers/managed-proxy/constants.ts +8 -10
- package/src/providers/managed-proxy/context.ts +14 -9
- package/src/providers/provider-send-message.ts +4 -52
- package/src/providers/registry.ts +16 -50
- package/src/runtime/actor-token-store.ts +0 -23
- package/src/runtime/auth/__tests__/guard-tests.test.ts +64 -0
- package/src/runtime/http-router.ts +5 -1
- package/src/runtime/http-server.ts +101 -4
- package/src/runtime/invite-instruction-generator.ts +25 -51
- package/src/runtime/invite-service.ts +0 -20
- package/src/runtime/routes/attachment-routes.ts +1 -1
- package/src/runtime/routes/brain-graph-routes.ts +1 -1
- package/src/runtime/routes/call-routes.ts +1 -1
- package/src/runtime/routes/conversation-routes.ts +32 -11
- package/src/runtime/routes/debug-routes.ts +1 -1
- package/src/runtime/routes/diagnostics-routes.ts +2 -2
- package/src/runtime/routes/documents-routes.ts +3 -3
- package/src/runtime/routes/global-search-routes.ts +1 -1
- package/src/runtime/routes/guardian-bootstrap-routes.ts +0 -20
- package/src/runtime/routes/guardian-refresh-routes.ts +0 -20
- package/src/runtime/routes/secret-routes.ts +4 -4
- package/src/runtime/routes/session-management-routes.ts +27 -0
- package/src/runtime/routes/trust-rules-routes.ts +1 -1
- package/src/security/credential-backend.ts +148 -0
- package/src/security/oauth2.ts +1 -1
- package/src/security/secret-allowlist.ts +1 -1
- package/src/security/secure-keys.ts +98 -160
- package/src/security/token-manager.ts +0 -7
- package/src/sequence/guardrails.ts +0 -4
- package/src/sequence/store.ts +1 -20
- package/src/sequence/types.ts +1 -36
- package/src/signals/bash.ts +33 -0
- package/src/signals/cancel.ts +69 -0
- package/src/signals/conversation-undo.ts +127 -0
- package/src/signals/trust-rule.ts +174 -0
- package/src/skills/clawhub.ts +5 -5
- package/src/skills/managed-store.ts +4 -4
- package/src/subagent/manager.ts +8 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +366 -0
- package/src/telemetry/usage-telemetry-reporter.ts +181 -0
- package/src/tools/claude-code/claude-code.ts +2 -2
- package/src/tools/credentials/vault.ts +8 -4
- package/src/tools/memory/handlers.test.ts +24 -26
- package/src/tools/memory/handlers.ts +1 -13
- package/src/tools/registry.ts +5 -100
- package/src/tools/terminal/parser.ts +34 -4
- package/src/tools/tool-manifest.ts +0 -10
- package/src/usage/actors.ts +0 -12
- package/src/util/canonicalize-identity.ts +0 -9
- package/src/util/errors.ts +0 -3
- package/src/util/platform.ts +24 -7
- package/src/util/pricing.ts +0 -38
- package/src/watcher/constants.ts +0 -7
- package/src/watcher/providers/linear.ts +1 -1
- package/src/work-items/work-item-store.ts +4 -4
- package/src/workspace/commit-message-provider.ts +1 -1
- package/src/workspace/git-service.ts +44 -1
- package/src/workspace/provider-commit-message-generator.ts +1 -1
- package/src/__tests__/fixtures/proxy-fixtures.ts +0 -147
- package/src/browser-extension-relay/client.ts +0 -155
- package/src/contacts/index.ts +0 -18
- package/src/daemon/tls-certs.ts +0 -270
- package/src/errors.ts +0 -41
- package/src/events/index.ts +0 -18
- package/src/followups/index.ts +0 -10
- package/src/playbooks/index.ts +0 -10
- package/src/runtime/auth/index.ts +0 -44
- package/src/tasks/candidate-store.ts +0 -95
- package/src/tools/browser/api-map.ts +0 -313
- package/src/tools/browser/auto-navigate.ts +0 -469
- package/src/tools/browser/headless-browser.ts +0 -590
- package/src/tools/browser/recording-store.ts +0 -75
- package/src/tools/computer-use/registry.ts +0 -21
- package/src/tools/tasks/index.ts +0 -27
|
@@ -1,469 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* CDP-based auto-navigation for any domain.
|
|
3
|
-
*
|
|
4
|
-
* Drives Chrome through a domain's pages by discovering internal links,
|
|
5
|
-
* so the NetworkRecorder captures the API surface without manual browsing.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { getLogger } from "../../util/logger.js";
|
|
9
|
-
|
|
10
|
-
const log = getLogger("auto-navigate");
|
|
11
|
-
|
|
12
|
-
const DEFAULT_CDP_BASE = "http://localhost:9222";
|
|
13
|
-
const MAX_PAGES = 10;
|
|
14
|
-
const PAGE_WAIT_MS = 2500;
|
|
15
|
-
const SCROLL_WAIT_MS = 1000;
|
|
16
|
-
|
|
17
|
-
/** Minimal CDP client — connects to one page tab. */
|
|
18
|
-
class MiniCDP {
|
|
19
|
-
private ws: WebSocket | null = null;
|
|
20
|
-
private nextId = 1;
|
|
21
|
-
private callbacks = new Map<
|
|
22
|
-
number,
|
|
23
|
-
{ resolve: (v: unknown) => void; reject: (e: Error) => void }
|
|
24
|
-
>();
|
|
25
|
-
|
|
26
|
-
async connect(wsUrl: string): Promise<void> {
|
|
27
|
-
return new Promise((resolve, reject) => {
|
|
28
|
-
const ws = new WebSocket(wsUrl);
|
|
29
|
-
ws.onopen = () => {
|
|
30
|
-
this.ws = ws;
|
|
31
|
-
resolve();
|
|
32
|
-
};
|
|
33
|
-
ws.onerror = (e) => reject(new Error(`CDP error: ${e}`));
|
|
34
|
-
ws.onclose = () => {
|
|
35
|
-
this.ws = null;
|
|
36
|
-
for (const [, cb] of this.callbacks) {
|
|
37
|
-
cb.reject(new Error("WebSocket closed"));
|
|
38
|
-
}
|
|
39
|
-
this.callbacks.clear();
|
|
40
|
-
};
|
|
41
|
-
ws.onmessage = (event) => {
|
|
42
|
-
const msg = JSON.parse(String(event.data));
|
|
43
|
-
if (msg.id != null) {
|
|
44
|
-
const cb = this.callbacks.get(msg.id);
|
|
45
|
-
if (cb) {
|
|
46
|
-
this.callbacks.delete(msg.id);
|
|
47
|
-
if (msg.error) {
|
|
48
|
-
cb.reject(new Error(msg.error.message));
|
|
49
|
-
} else {
|
|
50
|
-
cb.resolve(msg.result);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
async send(
|
|
59
|
-
method: string,
|
|
60
|
-
params?: Record<string, unknown>,
|
|
61
|
-
): Promise<unknown> {
|
|
62
|
-
if (!this.ws) throw new Error("Not connected");
|
|
63
|
-
const id = this.nextId++;
|
|
64
|
-
return new Promise((resolve, reject) => {
|
|
65
|
-
this.callbacks.set(id, { resolve, reject });
|
|
66
|
-
this.ws!.send(JSON.stringify({ id, method, params }));
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
close() {
|
|
71
|
-
this.ws?.close();
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
export interface AutoNavProgress {
|
|
76
|
-
type: "visiting" | "discovered" | "done";
|
|
77
|
-
url?: string;
|
|
78
|
-
pageNumber?: number;
|
|
79
|
-
totalDiscovered?: number;
|
|
80
|
-
visitedCount?: number;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export interface AutoNavOptions {
|
|
84
|
-
abortSignal?: { aborted: boolean };
|
|
85
|
-
onProgress?: (p: AutoNavProgress) => void;
|
|
86
|
-
cdpBaseUrl?: string;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Navigate Chrome through a domain's pages to trigger API calls.
|
|
91
|
-
* Discovers internal links from the DOM and visits up to ~15 unique paths.
|
|
92
|
-
*
|
|
93
|
-
* @param domain The domain to crawl (e.g. "example.com").
|
|
94
|
-
* @param options Optional configuration for abort, progress, and CDP base URL.
|
|
95
|
-
* @returns List of visited page URLs.
|
|
96
|
-
*/
|
|
97
|
-
export async function autoNavigate(
|
|
98
|
-
domain: string,
|
|
99
|
-
options?: AutoNavOptions,
|
|
100
|
-
): Promise<string[]> {
|
|
101
|
-
const {
|
|
102
|
-
abortSignal,
|
|
103
|
-
onProgress,
|
|
104
|
-
cdpBaseUrl = DEFAULT_CDP_BASE,
|
|
105
|
-
} = options ?? {};
|
|
106
|
-
let wsUrl: string | null = null;
|
|
107
|
-
try {
|
|
108
|
-
const res = await fetch(`${cdpBaseUrl}/json/list`);
|
|
109
|
-
if (!res.ok) {
|
|
110
|
-
log.warn("CDP not available for auto-navigation");
|
|
111
|
-
return [];
|
|
112
|
-
}
|
|
113
|
-
const targets = (await res.json()) as Array<{
|
|
114
|
-
type: string;
|
|
115
|
-
url: string;
|
|
116
|
-
webSocketDebuggerUrl: string;
|
|
117
|
-
}>;
|
|
118
|
-
const domainTab = targets.find((t) => {
|
|
119
|
-
if (t.type !== "page") return false;
|
|
120
|
-
try {
|
|
121
|
-
const hostname = new URL(t.url).hostname;
|
|
122
|
-
return hostname === domain || hostname.endsWith("." + domain);
|
|
123
|
-
} catch {
|
|
124
|
-
return false;
|
|
125
|
-
}
|
|
126
|
-
});
|
|
127
|
-
wsUrl =
|
|
128
|
-
domainTab?.webSocketDebuggerUrl ??
|
|
129
|
-
targets.find((t) => t.type === "page")?.webSocketDebuggerUrl ??
|
|
130
|
-
null;
|
|
131
|
-
} catch (err) {
|
|
132
|
-
log.warn({ err }, "Failed to discover Chrome tabs");
|
|
133
|
-
return [];
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if (!wsUrl) {
|
|
137
|
-
log.warn("No Chrome tab found for auto-navigation");
|
|
138
|
-
return [];
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
const cdp = new MiniCDP();
|
|
142
|
-
try {
|
|
143
|
-
await cdp.connect(wsUrl);
|
|
144
|
-
} catch (err) {
|
|
145
|
-
log.warn({ err }, "Failed to connect CDP for auto-navigation");
|
|
146
|
-
return [];
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
await cdp.send("Page.bringToFront").catch(() => {});
|
|
150
|
-
await cdp.send("Page.enable").catch(() => {});
|
|
151
|
-
|
|
152
|
-
const rootUrl = `https://${domain}/`;
|
|
153
|
-
const visited = new Set<string>();
|
|
154
|
-
const visitedUrls: string[] = [];
|
|
155
|
-
|
|
156
|
-
// Navigate to the domain root first
|
|
157
|
-
try {
|
|
158
|
-
onProgress?.({ type: "visiting", url: rootUrl, pageNumber: 1 });
|
|
159
|
-
await cdp.send("Page.navigate", { url: rootUrl });
|
|
160
|
-
await sleep(PAGE_WAIT_MS);
|
|
161
|
-
visited.add("/");
|
|
162
|
-
visitedUrls.push(rootUrl);
|
|
163
|
-
log.info({ url: rootUrl }, "Visited root page");
|
|
164
|
-
} catch (err) {
|
|
165
|
-
log.warn({ err }, "Failed to navigate to domain root");
|
|
166
|
-
cdp.close();
|
|
167
|
-
return [];
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
if (abortSignal?.aborted) {
|
|
171
|
-
cdp.close();
|
|
172
|
-
return visitedUrls;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// Scroll the root page to trigger lazy content
|
|
176
|
-
await scrollPage(cdp);
|
|
177
|
-
await sleep(SCROLL_WAIT_MS);
|
|
178
|
-
|
|
179
|
-
// Discover internal links from the current page
|
|
180
|
-
let discoveredLinks = await discoverInternalLinks(cdp, domain);
|
|
181
|
-
// Sort links: deeper paths first (more likely to be content pages), skip shallow nav links
|
|
182
|
-
discoveredLinks = rankLinks(discoveredLinks);
|
|
183
|
-
onProgress?.({ type: "discovered", totalDiscovered: discoveredLinks.length });
|
|
184
|
-
log.info(
|
|
185
|
-
{ count: discoveredLinks.length },
|
|
186
|
-
"Discovered internal links from root",
|
|
187
|
-
);
|
|
188
|
-
|
|
189
|
-
// Visit discovered pages
|
|
190
|
-
for (const link of discoveredLinks) {
|
|
191
|
-
if (abortSignal?.aborted) break;
|
|
192
|
-
if (visited.size >= MAX_PAGES) break;
|
|
193
|
-
if (visited.has(link.key)) continue;
|
|
194
|
-
|
|
195
|
-
const url = link.url;
|
|
196
|
-
onProgress?.({
|
|
197
|
-
type: "visiting",
|
|
198
|
-
url,
|
|
199
|
-
pageNumber: visited.size + 1,
|
|
200
|
-
totalDiscovered: discoveredLinks.length,
|
|
201
|
-
});
|
|
202
|
-
log.info({ url }, "Auto-navigate visiting page");
|
|
203
|
-
|
|
204
|
-
try {
|
|
205
|
-
await cdp.send("Page.navigate", { url });
|
|
206
|
-
await sleep(PAGE_WAIT_MS);
|
|
207
|
-
visited.add(link.key);
|
|
208
|
-
visitedUrls.push(url);
|
|
209
|
-
|
|
210
|
-
// Scroll to trigger lazy-loaded content
|
|
211
|
-
await scrollPage(cdp);
|
|
212
|
-
await sleep(SCROLL_WAIT_MS);
|
|
213
|
-
|
|
214
|
-
// Click tabs/buttons within the page (NOT nav links — those navigate away)
|
|
215
|
-
await clickPageTabs(cdp);
|
|
216
|
-
await sleep(800);
|
|
217
|
-
|
|
218
|
-
// Discover more links from this page
|
|
219
|
-
const newLinks = await discoverInternalLinks(cdp, domain);
|
|
220
|
-
for (const nl of newLinks) {
|
|
221
|
-
if (
|
|
222
|
-
!visited.has(nl.key) &&
|
|
223
|
-
!discoveredLinks.some((l) => l.key === nl.key)
|
|
224
|
-
) {
|
|
225
|
-
discoveredLinks.push(nl);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
log.info({ url }, "Auto-navigate page completed");
|
|
230
|
-
} catch (err) {
|
|
231
|
-
log.warn({ err, url }, "Auto-navigate page failed");
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
cdp.close();
|
|
236
|
-
onProgress?.({
|
|
237
|
-
type: "done",
|
|
238
|
-
visitedCount: visitedUrls.length,
|
|
239
|
-
totalDiscovered: discoveredLinks.length,
|
|
240
|
-
});
|
|
241
|
-
log.info(
|
|
242
|
-
{ visited: visitedUrls.length, total: discoveredLinks.length + 1 },
|
|
243
|
-
"Auto-navigation finished",
|
|
244
|
-
);
|
|
245
|
-
return visitedUrls;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
interface DiscoveredLink {
|
|
249
|
-
/** Full URL to navigate to (preserves subdomain). */
|
|
250
|
-
url: string;
|
|
251
|
-
/** Deduplication key: origin + pathname. */
|
|
252
|
-
key: string;
|
|
253
|
-
/** Path depth (number of segments). */
|
|
254
|
-
depth: number;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
/** Paths that are typically navigation chrome, not content pages. */
|
|
258
|
-
const SKIP_PATHS = [
|
|
259
|
-
"/home",
|
|
260
|
-
"/login",
|
|
261
|
-
"/signup",
|
|
262
|
-
"/register",
|
|
263
|
-
"/sign-up",
|
|
264
|
-
"/sign-in",
|
|
265
|
-
"/help",
|
|
266
|
-
"/support",
|
|
267
|
-
"/contact",
|
|
268
|
-
"/about",
|
|
269
|
-
"/terms",
|
|
270
|
-
"/privacy",
|
|
271
|
-
"/careers",
|
|
272
|
-
"/press",
|
|
273
|
-
"/blog",
|
|
274
|
-
"/faq",
|
|
275
|
-
"/sitemap",
|
|
276
|
-
];
|
|
277
|
-
|
|
278
|
-
/** Path patterns that indicate high-value purchase/content flows. */
|
|
279
|
-
const HIGH_VALUE_PATTERNS = [
|
|
280
|
-
/\/orders/i,
|
|
281
|
-
/\/cart/i,
|
|
282
|
-
/\/checkout/i,
|
|
283
|
-
/\/account/i,
|
|
284
|
-
/\/settings/i,
|
|
285
|
-
/\/store\//i,
|
|
286
|
-
/\/restaurant\//i,
|
|
287
|
-
/\/menu/i,
|
|
288
|
-
/\/payment/i,
|
|
289
|
-
/\/profile/i,
|
|
290
|
-
/\/history/i,
|
|
291
|
-
/\/favorites/i,
|
|
292
|
-
/\/saved/i,
|
|
293
|
-
/\/search/i,
|
|
294
|
-
/\/category/i,
|
|
295
|
-
/\/collection/i,
|
|
296
|
-
];
|
|
297
|
-
|
|
298
|
-
/** Sort links to prioritize purchase/content flows, deduplicate by pattern. */
|
|
299
|
-
function rankLinks(links: DiscoveredLink[]): DiscoveredLink[] {
|
|
300
|
-
const filtered = links.filter((l) => {
|
|
301
|
-
const path = new URL(l.url).pathname.toLowerCase();
|
|
302
|
-
if (SKIP_PATHS.some((skip) => path === skip || path === skip + "/"))
|
|
303
|
-
return false;
|
|
304
|
-
return true;
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
// Deduplicate by host+path pattern — keep only one of /store/123, /store/456
|
|
308
|
-
// but preserve different subdomains (shop.example.com vs admin.example.com)
|
|
309
|
-
const byPattern = new Map<string, DiscoveredLink>();
|
|
310
|
-
for (const link of filtered) {
|
|
311
|
-
const parsed = new URL(link.url);
|
|
312
|
-
// Collapse numeric/hash segments to find the pattern
|
|
313
|
-
const pathPattern = parsed.pathname
|
|
314
|
-
.replace(/\/\d+/g, "/{id}")
|
|
315
|
-
.replace(/\/[a-f0-9]{8,}/gi, "/{id}");
|
|
316
|
-
const pattern = parsed.hostname + pathPattern;
|
|
317
|
-
if (!byPattern.has(pattern)) {
|
|
318
|
-
byPattern.set(pattern, link);
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
return [...byPattern.values()].sort((a, b) => {
|
|
323
|
-
const aPath = new URL(a.url).pathname.toLowerCase();
|
|
324
|
-
const bPath = new URL(b.url).pathname.toLowerCase();
|
|
325
|
-
// High-value paths first
|
|
326
|
-
const aHighValue = HIGH_VALUE_PATTERNS.some((p) => p.test(aPath)) ? 1 : 0;
|
|
327
|
-
const bHighValue = HIGH_VALUE_PATTERNS.some((p) => p.test(bPath)) ? 1 : 0;
|
|
328
|
-
if (aHighValue !== bHighValue) return bHighValue - aHighValue;
|
|
329
|
-
// Then by depth (deeper = more specific)
|
|
330
|
-
return Math.min(b.depth, 4) - Math.min(a.depth, 4);
|
|
331
|
-
});
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
/** Extract internal links from the current page DOM, preserving subdomains. */
|
|
335
|
-
async function discoverInternalLinks(
|
|
336
|
-
cdp: MiniCDP,
|
|
337
|
-
domain: string,
|
|
338
|
-
): Promise<DiscoveredLink[]> {
|
|
339
|
-
try {
|
|
340
|
-
const result = (await cdp.send("Runtime.evaluate", {
|
|
341
|
-
expression: `
|
|
342
|
-
(function() {
|
|
343
|
-
const domain = ${JSON.stringify(domain)};
|
|
344
|
-
const seen = new Set();
|
|
345
|
-
const links = [];
|
|
346
|
-
for (const a of document.querySelectorAll('a[href]')) {
|
|
347
|
-
const href = a.getAttribute('href');
|
|
348
|
-
if (!href) continue;
|
|
349
|
-
try {
|
|
350
|
-
const url = new URL(href, location.origin);
|
|
351
|
-
if (url.hostname !== domain && !url.hostname.endsWith('.' + domain)) continue;
|
|
352
|
-
const path = url.pathname;
|
|
353
|
-
// Skip anchors, query-only links, file downloads, and trivial paths
|
|
354
|
-
if (path === '/' || path === '') continue;
|
|
355
|
-
if (path.match(/\\.(png|jpg|jpeg|gif|svg|css|js|woff|pdf|zip)$/i)) continue;
|
|
356
|
-
const key = url.origin + url.pathname;
|
|
357
|
-
if (!seen.has(key)) {
|
|
358
|
-
seen.add(key);
|
|
359
|
-
links.push({
|
|
360
|
-
url: url.origin + url.pathname,
|
|
361
|
-
key,
|
|
362
|
-
depth: path.split('/').filter(Boolean).length,
|
|
363
|
-
});
|
|
364
|
-
}
|
|
365
|
-
} catch { /* skip malformed URLs */ }
|
|
366
|
-
}
|
|
367
|
-
return links;
|
|
368
|
-
})()
|
|
369
|
-
`,
|
|
370
|
-
awaitPromise: false,
|
|
371
|
-
returnByValue: true,
|
|
372
|
-
})) as { result?: { value?: DiscoveredLink[] } };
|
|
373
|
-
return result?.result?.value ?? [];
|
|
374
|
-
} catch {
|
|
375
|
-
return [];
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
/** Scroll the page to trigger lazy-loaded content. */
|
|
380
|
-
async function scrollPage(cdp: MiniCDP): Promise<void> {
|
|
381
|
-
// Scroll in increments to trigger multiple lazy-load thresholds
|
|
382
|
-
for (let i = 0; i < 3; i++) {
|
|
383
|
-
await cdp
|
|
384
|
-
.send("Runtime.evaluate", {
|
|
385
|
-
expression: "window.scrollBy(0, 600)",
|
|
386
|
-
awaitPromise: false,
|
|
387
|
-
})
|
|
388
|
-
.catch(() => {});
|
|
389
|
-
await sleep(500);
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
/**
|
|
394
|
-
* Click tabs, buttons, and flow-relevant elements within the current page.
|
|
395
|
-
* Avoids clicking navigation links (which would navigate away).
|
|
396
|
-
*/
|
|
397
|
-
async function clickPageTabs(cdp: MiniCDP): Promise<void> {
|
|
398
|
-
const selectors = [
|
|
399
|
-
'[role="tab"]:not(:first-child)',
|
|
400
|
-
'[role="tablist"] button:not(:first-child)',
|
|
401
|
-
"button[data-tab]",
|
|
402
|
-
'[data-testid*="tab"]',
|
|
403
|
-
'button[aria-expanded="false"]',
|
|
404
|
-
];
|
|
405
|
-
|
|
406
|
-
for (const selector of selectors) {
|
|
407
|
-
await clickInPage(cdp, selector);
|
|
408
|
-
await sleep(600);
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
// Also try clicking purchase-flow buttons to trigger API calls
|
|
412
|
-
// (Add to Cart, etc. — these fire API requests even if we don't complete the flow)
|
|
413
|
-
await clickByText(cdp, "Add to Cart");
|
|
414
|
-
await clickByText(cdp, "Add to Order");
|
|
415
|
-
await clickByText(cdp, "Add Item");
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
/** Click a button by its visible text content. */
|
|
419
|
-
async function clickByText(cdp: MiniCDP, text: string): Promise<boolean> {
|
|
420
|
-
try {
|
|
421
|
-
const result = (await cdp.send("Runtime.evaluate", {
|
|
422
|
-
expression: `
|
|
423
|
-
(function() {
|
|
424
|
-
const buttons = document.querySelectorAll('button, [role="button"]');
|
|
425
|
-
for (const btn of buttons) {
|
|
426
|
-
if (btn.textContent && btn.textContent.trim().toLowerCase().includes(${JSON.stringify(
|
|
427
|
-
text.toLowerCase(),
|
|
428
|
-
)})) {
|
|
429
|
-
btn.scrollIntoView({ block: 'center' });
|
|
430
|
-
btn.click();
|
|
431
|
-
return true;
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
return false;
|
|
435
|
-
})()
|
|
436
|
-
`,
|
|
437
|
-
awaitPromise: false,
|
|
438
|
-
returnByValue: true,
|
|
439
|
-
})) as { result?: { value?: boolean } };
|
|
440
|
-
return result?.result?.value === true;
|
|
441
|
-
} catch {
|
|
442
|
-
return false;
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
async function clickInPage(cdp: MiniCDP, selector: string): Promise<boolean> {
|
|
447
|
-
try {
|
|
448
|
-
const result = (await cdp.send("Runtime.evaluate", {
|
|
449
|
-
expression: `
|
|
450
|
-
(function() {
|
|
451
|
-
const el = document.querySelector(${JSON.stringify(selector)});
|
|
452
|
-
if (!el) return false;
|
|
453
|
-
el.scrollIntoView({ block: 'center' });
|
|
454
|
-
el.click();
|
|
455
|
-
return true;
|
|
456
|
-
})()
|
|
457
|
-
`,
|
|
458
|
-
awaitPromise: false,
|
|
459
|
-
returnByValue: true,
|
|
460
|
-
})) as { result?: { value?: boolean } };
|
|
461
|
-
return result?.result?.value === true;
|
|
462
|
-
} catch {
|
|
463
|
-
return false;
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
function sleep(ms: number): Promise<void> {
|
|
468
|
-
return new Promise((r) => setTimeout(r, ms));
|
|
469
|
-
}
|