browser-flow-tracker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ // Generates a first-draft Markdown flow document from a normalized flow.
2
+ // Claude/Cursor can read the JSON for detail; this is the human-readable draft.
3
+
4
+ function shortBody(body, isJson, max = 600) {
5
+ if (body == null || body === '') return '';
6
+ let s = isJson ? JSON.stringify(body, null, 2) : String(body);
7
+ if (s.length > max) s = s.slice(0, max) + '\n… [truncated]';
8
+ return s;
9
+ }
10
+
11
+ function safeLabel(s) {
12
+ return String(s).replace(/[^\w./:-]/g, '_').slice(0, 40);
13
+ }
14
+
15
+ export function toMarkdown(session, options = {}) {
16
+ const { flow, stats } = session;
17
+ const apiFlow = flow.filter((e) => e.category === 'api' || e.category === 'websocket' || e.category === 'sse');
18
+ const title = options.title || `API Flow — ${session.target?.title || session.target?.url || 'recording'}`;
19
+ const lines = [];
20
+
21
+ lines.push(`# ${title}`);
22
+ lines.push('');
23
+ lines.push(`> Recorded ${session.startedAt || ''} → ${session.stoppedAt || ''}`);
24
+ if (session.target?.url) lines.push(`> Page: ${session.target.url}`);
25
+ lines.push('');
26
+
27
+ // Overview
28
+ lines.push('## Overview');
29
+ lines.push('');
30
+ lines.push(`- **API calls captured:** ${apiFlow.length}`);
31
+ lines.push(`- **Total requests seen:** ${stats.totalCaptured} (${stats.droppedCount} filtered as noise)`);
32
+ const hosts = Object.entries(stats.byHost).sort((a, b) => b[1] - a[1]);
33
+ if (hosts.length) {
34
+ lines.push(`- **Hosts:** ${hosts.map(([h, n]) => `\`${h}\` (${n})`).join(', ')}`);
35
+ }
36
+ const statuses = Object.entries(stats.byStatus).sort();
37
+ if (statuses.length) {
38
+ lines.push(`- **Status codes:** ${statuses.map(([s, n]) => `${s}×${n}`).join(', ')}`);
39
+ }
40
+ lines.push('');
41
+
42
+ // Sequence diagram
43
+ lines.push('## Sequence diagram');
44
+ lines.push('');
45
+ lines.push('```mermaid');
46
+ lines.push('sequenceDiagram');
47
+ lines.push(' participant B as Browser');
48
+ const participants = new Map();
49
+ for (const e of apiFlow) {
50
+ if (!participants.has(e.host)) participants.set(e.host, `S${participants.size}`);
51
+ }
52
+ for (const [host, id] of participants) lines.push(` participant ${id} as ${host}`);
53
+ for (const e of apiFlow) {
54
+ const id = participants.get(e.host);
55
+ const label = `${e.method} ${safeLabel(e.path)}`;
56
+ lines.push(` B->>${id}: ${label}`);
57
+ const st = e.failed ? `FAILED ${e.errorText || ''}` : `${e.status || '?'} ${e.statusText || ''}`.trim();
58
+ lines.push(` ${id}-->>B: ${st}`);
59
+ }
60
+ lines.push('```');
61
+ lines.push('');
62
+
63
+ // Ordered endpoint table
64
+ lines.push('## Call sequence');
65
+ lines.push('');
66
+ lines.push('| # | Method | Endpoint | Status | Time |');
67
+ lines.push('|---|--------|----------|--------|------|');
68
+ apiFlow.forEach((e, i) => {
69
+ const status = e.failed ? `✗ ${e.errorText || 'failed'}` : e.status || '';
70
+ const time = e.durationMs != null ? `${e.durationMs}ms` : '';
71
+ lines.push(`| ${i + 1} | ${e.method} | \`${e.host}${e.path}\` | ${status} | ${time} |`);
72
+ });
73
+ lines.push('');
74
+
75
+ // Detailed breakdown
76
+ lines.push('## Call details');
77
+ lines.push('');
78
+ apiFlow.forEach((e, i) => {
79
+ lines.push(`### ${i + 1}. ${e.method} ${e.path}`);
80
+ lines.push('');
81
+ lines.push(`- **URL:** \`${e.url}\``);
82
+ lines.push(`- **Status:** ${e.failed ? `✗ ${e.errorText}` : `${e.status || ''} ${e.statusText || ''}`}`);
83
+ if (e.durationMs != null) lines.push(`- **Duration:** ${e.durationMs}ms`);
84
+ if (e.initiator) lines.push(`- **Initiated by:** ${e.initiator}`);
85
+ if (e.query && Object.keys(e.query).length) {
86
+ lines.push(`- **Query params:** \`${JSON.stringify(e.query)}\``);
87
+ }
88
+ const reqStr = shortBody(e.requestBody, e.requestBodyIsJson);
89
+ if (reqStr) {
90
+ lines.push('');
91
+ lines.push('**Request body:**');
92
+ lines.push('```json');
93
+ lines.push(reqStr);
94
+ lines.push('```');
95
+ }
96
+ const resStr = shortBody(e.responseBody, e.responseBodyIsJson);
97
+ if (resStr) {
98
+ lines.push('');
99
+ lines.push('**Response body:**');
100
+ lines.push('```json');
101
+ lines.push(resStr);
102
+ lines.push('```');
103
+ }
104
+ lines.push('');
105
+ });
106
+
107
+ lines.push('---');
108
+ lines.push('_Draft generated by browser-flow-tracker. Refine with Claude/Cursor using the accompanying `.flow.json`._');
109
+ lines.push('');
110
+ return lines.join('\n');
111
+ }
package/src/filter.js ADDED
@@ -0,0 +1,89 @@
1
+ // Classifies captured requests so we can keep the meaningful API traffic and
2
+ // drop the noise (static assets, fonts, analytics/telemetry beacons).
3
+
4
+ const STATIC_EXT = /\.(css|scss|less|js|mjs|cjs|map|png|jpe?g|gif|svg|webp|avif|ico|bmp|woff2?|ttf|otf|eot|mp4|webm|mp3|wav|ogg|pdf)(\?|$)/i;
5
+
6
+ // Resource types (from CDP Network.ResourceType) that are almost never "the API".
7
+ const NOISE_TYPES = new Set([
8
+ 'Image', 'Font', 'Stylesheet', 'Media', 'Manifest', 'TextTrack',
9
+ 'CSPViolationReport', 'Ping', 'Prefetch',
10
+ ]);
11
+
12
+ // Third-party analytics / telemetry / ad hosts. Substring match on hostname.
13
+ const ANALYTICS_HOSTS = [
14
+ 'google-analytics.com', 'googletagmanager.com', 'analytics.google.com',
15
+ 'doubleclick.net', 'googlesyndication.com', 'googleadservices.com',
16
+ 'facebook.com/tr', 'connect.facebook.net', 'segment.io', 'segment.com',
17
+ 'mixpanel.com', 'amplitude.com', 'hotjar.com', 'fullstory.com',
18
+ 'sentry.io', 'bugsnag.com', 'datadoghq.com', 'newrelic.com', 'nr-data.net',
19
+ 'intercom.io', 'clarity.ms', 'optimizely.com', 'launchdarkly.com',
20
+ 'cloudflareinsights.com', 'doubleverify.com', 'branch.io', 'appsflyer.com',
21
+ 'posthog.com', 'i.posthog.com', 'heapanalytics.com', 'mouseflow.com',
22
+ 'quantserve.com', 'scorecardresearch.com', 'snowplowanalytics.com',
23
+ 'stats.g.doubleclick.net', 'analytics.tiktok.com', 'bat.bing.com',
24
+ ];
25
+
26
+ // Analytics/telemetry that hits a FIRST-PARTY host (so host matching misses it):
27
+ // e.g. Google Analytics' /g/collect, Cloudflare RUM's /cdn-cgi/rum. Matched on path.
28
+ const ANALYTICS_PATH_RE = /\/cdn-cgi\/(rum|beacon)|\/g\/collect|\/j\/collect|\/mp\/collect|\/gtag\/|\/gtm\.js|\/piwik\.php|\/matomo\.php|\/b\/ss\//i;
29
+
30
+ // Google Analytics / Measurement Protocol requests carry a tid=G-/UA-/GT- id.
31
+ function looksLikeGoogleAnalytics(query) {
32
+ const tid = query?.get?.('tid');
33
+ return typeof tid === 'string' && /^(G-|UA-|GT-|AW-|DC-)/.test(tid);
34
+ }
35
+
36
+ /**
37
+ * Decide whether a request is "API-relevant".
38
+ * Returns { keep: boolean, reason: string, category: string }.
39
+ */
40
+ export function classify(req) {
41
+ const type = req.resourceType || '';
42
+ const url = req.url || '';
43
+ let host = '';
44
+ let path = url;
45
+ let query = null;
46
+ try {
47
+ const u = new URL(url);
48
+ host = u.hostname;
49
+ path = u.pathname + u.search;
50
+ query = u.searchParams;
51
+ } catch {
52
+ // non-URL (data:, blob:) — always noise for our purposes
53
+ return { keep: false, reason: 'non-http scheme', category: 'other' };
54
+ }
55
+
56
+ if (url.startsWith('data:') || url.startsWith('blob:')) {
57
+ return { keep: false, reason: 'inline resource', category: 'other' };
58
+ }
59
+ if (ANALYTICS_HOSTS.some((h) => host.includes(h.split('/')[0]) && url.includes(h))) {
60
+ return { keep: false, reason: 'analytics/telemetry host', category: 'analytics' };
61
+ }
62
+ if (ANALYTICS_PATH_RE.test(path)) {
63
+ return { keep: false, reason: 'analytics/telemetry endpoint', category: 'analytics' };
64
+ }
65
+ if (looksLikeGoogleAnalytics(query)) {
66
+ return { keep: false, reason: 'google analytics beacon', category: 'analytics' };
67
+ }
68
+ if (NOISE_TYPES.has(type)) {
69
+ return { keep: false, reason: `resource type ${type}`, category: 'asset' };
70
+ }
71
+ if (STATIC_EXT.test(path) && type !== 'XHR' && type !== 'Fetch') {
72
+ return { keep: false, reason: 'static asset extension', category: 'asset' };
73
+ }
74
+
75
+ // Keep XHR/Fetch (the classic API calls), WebSockets, EventSource, and the
76
+ // top-level document navigation (useful context for where a flow starts).
77
+ if (type === 'XHR' || type === 'Fetch') {
78
+ return { keep: true, reason: 'xhr/fetch', category: 'api' };
79
+ }
80
+ if (type === 'WebSocket') return { keep: true, reason: 'websocket', category: 'websocket' };
81
+ if (type === 'EventSource') return { keep: true, reason: 'server-sent events', category: 'sse' };
82
+ if (type === 'Document') return { keep: true, reason: 'navigation', category: 'document' };
83
+
84
+ // GraphQL / JSON endpoints sometimes come through as "Other".
85
+ if (/graphql|\/api\/|\/v\d+\//i.test(path)) {
86
+ return { keep: true, reason: 'looks like an api path', category: 'api' };
87
+ }
88
+ return { keep: false, reason: `type ${type || 'unknown'}`, category: 'other' };
89
+ }
@@ -0,0 +1,109 @@
1
+ // Turns raw CDP records into a clean, ordered, API-focused flow.
2
+
3
+ import { classify } from './filter.js';
4
+
5
+ const SENSITIVE_HEADERS = new Set([
6
+ 'authorization', 'cookie', 'set-cookie', 'x-api-key', 'x-auth-token',
7
+ 'proxy-authorization', 'x-csrf-token',
8
+ ]);
9
+
10
+ function redactHeaders(headers, redact) {
11
+ if (!headers) return {};
12
+ const out = {};
13
+ for (const [k, v] of Object.entries(headers)) {
14
+ out[k] = redact && SENSITIVE_HEADERS.has(k.toLowerCase()) ? '[redacted]' : v;
15
+ }
16
+ return out;
17
+ }
18
+
19
+ function tryParseJson(body) {
20
+ if (typeof body !== 'string') return undefined;
21
+ const t = body.trim();
22
+ if (!t || (t[0] !== '{' && t[0] !== '[')) return undefined;
23
+ try { return JSON.parse(t); } catch { return undefined; }
24
+ }
25
+
26
+ function splitUrl(url) {
27
+ try {
28
+ const u = new URL(url);
29
+ return { host: u.hostname, path: u.pathname, query: u.search ? Object.fromEntries(u.searchParams) : undefined };
30
+ } catch {
31
+ return { host: '', path: url, query: undefined };
32
+ }
33
+ }
34
+
35
+ /**
36
+ * @param {Array} records raw records from CdpRecorder
37
+ * @param {object} opts { includeNoise, redact }
38
+ * @returns { flow, dropped, stats }
39
+ */
40
+ export function normalize(records, { includeNoise = false, redact = true } = {}) {
41
+ const flow = [];
42
+ const dropped = [];
43
+ const hostCounts = {};
44
+ const statusCounts = {};
45
+
46
+ for (const rec of records) {
47
+ if (!rec.url) continue;
48
+ const c = classify(rec);
49
+ const { host, path, query } = splitUrl(rec.url);
50
+ const entry = {
51
+ index: rec.index,
52
+ category: c.category,
53
+ method: rec.method || 'GET',
54
+ url: rec.url,
55
+ host,
56
+ path,
57
+ query,
58
+ resourceType: rec.resourceType,
59
+ status: rec.status,
60
+ statusText: rec.statusText,
61
+ durationMs: rec.durationMs != null ? Math.round(rec.durationMs) : undefined,
62
+ sizeBytes: rec.encodedDataLength,
63
+ fromCache: rec.fromCache || undefined,
64
+ failed: rec.failed || undefined,
65
+ errorText: rec.errorText,
66
+ initiator: rec.initiator?.type,
67
+ redirects: rec.redirects,
68
+ requestHeaders: redactHeaders(rec.requestHeaders, redact),
69
+ responseHeaders: redactHeaders(rec.responseHeaders, redact),
70
+ };
71
+
72
+ const reqJson = tryParseJson(rec.requestBody);
73
+ entry.requestBody = reqJson !== undefined ? reqJson : rec.requestBody;
74
+ entry.requestBodyIsJson = reqJson !== undefined;
75
+
76
+ if (!rec.responseBodyBase64) {
77
+ const resJson = tryParseJson(rec.responseBody);
78
+ entry.responseBody = resJson !== undefined ? resJson : rec.responseBody;
79
+ entry.responseBodyIsJson = resJson !== undefined;
80
+ entry.responseBodyTruncated = rec.responseBodyTruncated || undefined;
81
+ } else {
82
+ entry.responseBody = '[binary]';
83
+ }
84
+
85
+ if (c.keep || includeNoise) {
86
+ if (!c.keep) entry.filteredReason = c.reason;
87
+ flow.push(entry);
88
+ } else {
89
+ dropped.push({ method: entry.method, url: rec.url, reason: c.reason });
90
+ }
91
+
92
+ if (c.keep) {
93
+ hostCounts[host] = (hostCounts[host] || 0) + 1;
94
+ if (rec.status) statusCounts[rec.status] = (statusCounts[rec.status] || 0) + 1;
95
+ }
96
+ }
97
+
98
+ return {
99
+ flow,
100
+ dropped,
101
+ stats: {
102
+ totalCaptured: records.length,
103
+ kept: flow.filter((e) => !e.filteredReason).length,
104
+ droppedCount: dropped.length,
105
+ byHost: hostCounts,
106
+ byStatus: statusCounts,
107
+ },
108
+ };
109
+ }
package/src/session.js ADDED
@@ -0,0 +1,185 @@
1
+ // Orchestrates a recording session: optionally launch a browser, attach the
2
+ // CDP recorder, then on stop normalize + write JSON / HAR / Markdown outputs.
3
+
4
+ import { mkdirSync, writeFileSync, readFileSync, existsSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ import { homedir } from 'node:os';
7
+ import net from 'node:net';
8
+ import CDP from 'chrome-remote-interface';
9
+ import { CdpRecorder } from './cdpRecorder.js';
10
+ import { launchBrowser } from './browsers.js';
11
+ import { normalize } from './normalize.js';
12
+ import { toHar } from './exporters/har.js';
13
+ import { toMarkdown } from './exporters/markdown.js';
14
+
15
+ function stamp() {
16
+ // Filesystem-safe timestamp without relying on locale.
17
+ return new Date().toISOString().replace(/[:.]/g, '-');
18
+ }
19
+
20
+ // Grab an OS-assigned free TCP port. Fixes the #1 launch failure: colliding
21
+ // with something already sitting on the default 9222 debug port.
22
+ function findFreePort() {
23
+ return new Promise((resolve, reject) => {
24
+ const srv = net.createServer();
25
+ srv.unref();
26
+ srv.on('error', reject);
27
+ srv.listen(0, '127.0.0.1', () => {
28
+ const { port } = srv.address();
29
+ srv.close(() => resolve(port));
30
+ });
31
+ });
32
+ }
33
+
34
+ // Is a CDP endpoint already answering on this port? (i.e. our persistent
35
+ // profile's browser is still open from a previous recording).
36
+ async function isCdpAlive(port, host = 'localhost') {
37
+ try {
38
+ await CDP.List({ port, host });
39
+ return true;
40
+ } catch {
41
+ return false;
42
+ }
43
+ }
44
+
45
+ // A persistent, dedicated profile per browser. Separate from the user's normal
46
+ // profile (so it never conflicts with their running browser) and persistent
47
+ // (so a login done once carries over to the next recording).
48
+ function profileDir(id) {
49
+ const dir = join(homedir(), '.browser-flow-tracker', 'profiles', id || 'default');
50
+ mkdirSync(dir, { recursive: true });
51
+ return dir;
52
+ }
53
+
54
+ export class TrackingSession {
55
+ constructor(opts = {}) {
56
+ this.opts = opts;
57
+ this.recorder = null;
58
+ this.launched = null;
59
+ this.active = false;
60
+ this.finalized = null; // set when files have been written
61
+ this.closedByUser = false; // true if finalized because the browser was closed
62
+ }
63
+
64
+ async start() {
65
+ const { launch, browser, host = 'localhost', url, headless, urlMatch } = this.opts;
66
+ // In attach mode, honor the given port (the user started their browser on it).
67
+ let port = this.opts.port || 9222;
68
+ this.reusedExisting = false;
69
+
70
+ if (launch) {
71
+ const dir = profileDir(browser);
72
+ const portFile = join(dir, '.bft-port');
73
+
74
+ // Reuse an already-open recording window (persistent profile still running)?
75
+ if (existsSync(portFile)) {
76
+ const prev = Number(readFileSync(portFile, 'utf8').trim());
77
+ if (prev && (await isCdpAlive(prev, host))) {
78
+ port = prev;
79
+ this.reusedExisting = true;
80
+ }
81
+ }
82
+
83
+ if (!this.reusedExisting) {
84
+ // Fresh launch: a free port + a dedicated persistent profile means this
85
+ // never collides with the user's normal browser or a busy 9222.
86
+ port = await findFreePort();
87
+ this.launched = launchBrowser({ id: browser, port, userDataDir: dir, headless });
88
+ writeFileSync(portFile, String(port));
89
+ }
90
+ }
91
+
92
+ const targetFilter = urlMatch
93
+ ? (t) => t.url && t.url.includes(urlMatch)
94
+ : undefined;
95
+
96
+ this.recorder = new CdpRecorder({ port, host, targetFilter });
97
+ // If the user closes the browser/tab, auto-finalize and write the files.
98
+ this.recorder.onDisconnect = () => { this._onBrowserClosed(); };
99
+ const info = await this.recorder.start();
100
+ this.active = true;
101
+ this.port = port;
102
+
103
+ // Drive the initial navigation ourselves (after we're already listening, so
104
+ // no early requests are missed). The user does the rest of the clicking.
105
+ if (launch && url) {
106
+ await this.recorder.navigate(url);
107
+ }
108
+
109
+ return {
110
+ attachedTo: info.target,
111
+ launched: Boolean(this.launched),
112
+ reusedExisting: this.reusedExisting,
113
+ port,
114
+ };
115
+ }
116
+
117
+ // Live snapshot without stopping the session.
118
+ snapshot() {
119
+ if (!this.recorder) return { flow: [], stats: {} };
120
+ const raw = this.recorder.getRecords();
121
+ return normalize(raw, {
122
+ includeNoise: this.opts.includeNoise,
123
+ redact: this.opts.redact !== false,
124
+ });
125
+ }
126
+
127
+ // Called when the CDP socket drops because the user closed the browser/tab.
128
+ _onBrowserClosed() {
129
+ if (!this.active || this.finalized) return; // already stopping/stopped
130
+ this.closedByUser = true;
131
+ // Fire-and-forget: write files with the session's default output settings.
132
+ this.stop({
133
+ outDir: this.opts.outDir,
134
+ name: this.opts.name,
135
+ closeBrowser: false,
136
+ }).catch(() => { /* nothing we can do here */ });
137
+ }
138
+
139
+ async stop({ outDir, name, write = true, closeBrowser = false } = {}) {
140
+ // Idempotent: if we already wrote files (e.g. the browser was closed), just
141
+ // return that result instead of erroring on a later explicit stop.
142
+ if (this.finalized) return this.finalized;
143
+ if (!this.recorder) throw new Error('Session not started.');
144
+ const raw = await this.recorder.stop();
145
+ this.active = false;
146
+
147
+ const norm = normalize(raw.records, {
148
+ includeNoise: this.opts.includeNoise,
149
+ redact: this.opts.redact !== false,
150
+ });
151
+
152
+ const session = {
153
+ tool: 'browser-flow-tracker',
154
+ version: '0.1.0',
155
+ startedAt: raw.startedAt,
156
+ stoppedAt: raw.stoppedAt,
157
+ target: raw.target,
158
+ stats: norm.stats,
159
+ flow: norm.flow,
160
+ dropped: norm.dropped,
161
+ };
162
+
163
+ let files = null;
164
+ if (write) {
165
+ const dir = outDir || join(process.cwd(), 'recordings');
166
+ mkdirSync(dir, { recursive: true });
167
+ const base = name || `flow-${stamp()}`;
168
+ const jsonPath = join(dir, `${base}.flow.json`);
169
+ const harPath = join(dir, `${base}.har`);
170
+ const mdPath = join(dir, `${base}.md`);
171
+ writeFileSync(jsonPath, JSON.stringify(session, null, 2));
172
+ writeFileSync(harPath, JSON.stringify(toHar(session.flow, session), null, 2));
173
+ writeFileSync(mdPath, toMarkdown({ ...session }, { title: this.opts.title }));
174
+ files = { json: jsonPath, har: harPath, markdown: mdPath };
175
+ }
176
+
177
+ if (closeBrowser && this.launched?.child) {
178
+ try { this.launched.child.kill(); } catch { /* ignore */ }
179
+ }
180
+
181
+ const result = { session, files, closedByUser: this.closedByUser };
182
+ this.finalized = result;
183
+ return result;
184
+ }
185
+ }