@ulpi/browse 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/constants.ts CHANGED
@@ -1,9 +1,11 @@
1
+ const BROWSE_TIMEOUT = parseInt(process.env.BROWSE_TIMEOUT || '0', 10);
2
+
1
3
  export const DEFAULTS = {
2
4
  PORT_RANGE_START: 9400,
3
5
  PORT_RANGE_END: 10400,
4
6
  IDLE_TIMEOUT_MS: 30 * 60 * 1000, // 30 min
5
- COMMAND_TIMEOUT_MS: 15_000, // 15s for navigation
6
- ACTION_TIMEOUT_MS: 5_000, // 5s for clicks/fills
7
+ COMMAND_TIMEOUT_MS: BROWSE_TIMEOUT || 15_000, // 15s for navigation
8
+ ACTION_TIMEOUT_MS: BROWSE_TIMEOUT || 5_000, // 5s for clicks/fills
7
9
  HEALTH_CHECK_TIMEOUT_MS: 2_000,
8
10
  BUFFER_HIGH_WATER_MARK: 50_000,
9
11
  BUFFER_FLUSH_INTERVAL_MS: 1_000,
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Domain filter — blocks navigation and sub-resource requests outside an allowlist.
3
+ *
4
+ * Supports:
5
+ * - Exact domain: "example.com" matches only example.com
6
+ * - Wildcard: "*.example.com" matches example.com AND any subdomain
7
+ * - Case-insensitive matching
8
+ */
9
+
10
+ export class DomainFilter {
11
+ private domains: string[];
12
+
13
+ constructor(domains: string[]) {
14
+ this.domains = domains.map(d => d.toLowerCase());
15
+ }
16
+
17
+ /**
18
+ * Check if a URL's domain is in the allowlist.
19
+ * Returns true if allowed, false if blocked.
20
+ * Non-HTTP URLs (about:blank, data:, etc.) are always allowed.
21
+ */
22
+ isAllowed(url: string): boolean {
23
+ // Non-HTTP(S) URLs are always allowed
24
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
25
+ return true;
26
+ }
27
+
28
+ let hostname: string;
29
+ try {
30
+ hostname = new URL(url).hostname.toLowerCase();
31
+ } catch {
32
+ return false; // Invalid URL = blocked
33
+ }
34
+
35
+ for (const pattern of this.domains) {
36
+ if (pattern.startsWith('*.')) {
37
+ // Wildcard: *.example.com matches example.com itself AND any subdomain
38
+ const base = pattern.slice(2); // "example.com"
39
+ if (hostname === base || hostname.endsWith('.' + base)) {
40
+ return true;
41
+ }
42
+ } else {
43
+ // Exact match
44
+ if (hostname === pattern) {
45
+ return true;
46
+ }
47
+ }
48
+ }
49
+
50
+ return false;
51
+ }
52
+
53
+ /**
54
+ * Get a human-readable error message for a blocked URL.
55
+ */
56
+ blockedMessage(url: string): string {
57
+ let hostname = url;
58
+ try {
59
+ hostname = new URL(url).hostname;
60
+ } catch {}
61
+ return `Domain "${hostname}" is not in the allowed list: ${this.domains.join(', ')}`;
62
+ }
63
+
64
+ /**
65
+ * Generate a JS init script that wraps WebSocket, EventSource, and
66
+ * navigator.sendBeacon with domain checks. Playwright's context.route()
67
+ * only covers HTTP — these JS-level APIs bypass it entirely.
68
+ *
69
+ * Injected via context.addInitScript() so it runs before any page JS.
70
+ */
71
+ generateInitScript(): string {
72
+ const domainsJson = JSON.stringify(this.domains);
73
+ return `(function() {
74
+ const __allowedDomains = ${domainsJson};
75
+
76
+ function __isAllowed(url) {
77
+ if (!url) return true;
78
+ var str = String(url);
79
+ // Normalize ws/wss to http/https for URL parsing
80
+ if (str.startsWith('ws://')) str = 'http://' + str.slice(5);
81
+ else if (str.startsWith('wss://')) str = 'https://' + str.slice(6);
82
+ // Non-HTTP(S) always allowed (data:, blob:, etc.)
83
+ if (!str.startsWith('http://') && !str.startsWith('https://')) return true;
84
+ var hostname;
85
+ try { hostname = new URL(str).hostname.toLowerCase(); } catch(e) { return false; }
86
+ for (var i = 0; i < __allowedDomains.length; i++) {
87
+ var pattern = __allowedDomains[i];
88
+ if (pattern.startsWith('*.')) {
89
+ var base = pattern.slice(2);
90
+ if (hostname === base || hostname.endsWith('.' + base)) return true;
91
+ } else {
92
+ if (hostname === pattern) return true;
93
+ }
94
+ }
95
+ return false;
96
+ }
97
+
98
+ // Wrap WebSocket
99
+ var OrigWebSocket = window.WebSocket;
100
+ if (OrigWebSocket) {
101
+ window.WebSocket = function(url, protocols) {
102
+ if (!__isAllowed(url)) throw new Error('WebSocket blocked by domain filter: ' + url);
103
+ if (protocols !== undefined) return new OrigWebSocket(url, protocols);
104
+ return new OrigWebSocket(url);
105
+ };
106
+ window.WebSocket.prototype = OrigWebSocket.prototype;
107
+ window.WebSocket.CONNECTING = OrigWebSocket.CONNECTING;
108
+ window.WebSocket.OPEN = OrigWebSocket.OPEN;
109
+ window.WebSocket.CLOSING = OrigWebSocket.CLOSING;
110
+ window.WebSocket.CLOSED = OrigWebSocket.CLOSED;
111
+ }
112
+
113
+ // Wrap EventSource
114
+ var OrigEventSource = window.EventSource;
115
+ if (OrigEventSource) {
116
+ window.EventSource = function(url, opts) {
117
+ if (!__isAllowed(url)) throw new Error('EventSource blocked by domain filter: ' + url);
118
+ if (opts !== undefined) return new OrigEventSource(url, opts);
119
+ return new OrigEventSource(url);
120
+ };
121
+ window.EventSource.prototype = OrigEventSource.prototype;
122
+ }
123
+
124
+ // Wrap navigator.sendBeacon
125
+ if (navigator.sendBeacon) {
126
+ var origSendBeacon = navigator.sendBeacon.bind(navigator);
127
+ navigator.sendBeacon = function(url, data) {
128
+ if (!__isAllowed(url)) return false;
129
+ return origSendBeacon(url, data);
130
+ };
131
+ }
132
+ })();`;
133
+ }
134
+ }
package/src/har.ts ADDED
@@ -0,0 +1,66 @@
1
+ /**
2
+ * HAR 1.2 export — converts NetworkEntry[] to HTTP Archive format
3
+ */
4
+
5
+ import type { NetworkEntry } from './buffers';
6
+
7
+ export interface HarRecording {
8
+ startTime: number;
9
+ active: boolean;
10
+ }
11
+
12
+ function parseQueryString(url: string): Array<{ name: string; value: string }> {
13
+ try {
14
+ const u = new URL(url);
15
+ return [...u.searchParams.entries()].map(([name, value]) => ({ name, value }));
16
+ } catch {
17
+ return [];
18
+ }
19
+ }
20
+
21
+ export function formatAsHar(entries: NetworkEntry[], startTime: number): object {
22
+ const harEntries = entries
23
+ .filter(e => e.timestamp >= startTime)
24
+ .map(e => ({
25
+ startedDateTime: new Date(e.timestamp).toISOString(),
26
+ time: e.duration || 0,
27
+ request: {
28
+ method: e.method,
29
+ url: e.url,
30
+ httpVersion: 'HTTP/1.1',
31
+ cookies: [],
32
+ headers: [],
33
+ queryString: parseQueryString(e.url),
34
+ headersSize: -1,
35
+ bodySize: -1,
36
+ },
37
+ response: {
38
+ status: e.status || 0,
39
+ statusText: '',
40
+ httpVersion: 'HTTP/1.1',
41
+ cookies: [],
42
+ headers: [],
43
+ content: {
44
+ size: e.size || 0,
45
+ mimeType: '',
46
+ },
47
+ redirectURL: '',
48
+ headersSize: -1,
49
+ bodySize: e.size || -1,
50
+ },
51
+ cache: {},
52
+ timings: {
53
+ send: 0,
54
+ wait: e.duration || 0,
55
+ receive: 0,
56
+ },
57
+ }));
58
+
59
+ return {
60
+ log: {
61
+ version: '1.2',
62
+ creator: { name: '@ulpi/browse', version: '0.2.0' },
63
+ entries: harEntries,
64
+ },
65
+ };
66
+ }
package/src/policy.ts ADDED
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Action policy — gate commands via JSON config
3
+ *
4
+ * File: browse-policy.json (project root) or BROWSE_POLICY env var
5
+ * Format: { default: "allow"|"deny", deny?: string[], confirm?: string[], allow?: string[] }
6
+ * Precedence: deny > confirm > allow whitelist > default
7
+ * Hot-reloads on mtime change.
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+
13
+ interface ActionPolicy {
14
+ default?: 'allow' | 'deny';
15
+ deny?: string[];
16
+ confirm?: string[];
17
+ allow?: string[];
18
+ }
19
+
20
+ export type PolicyResult = 'allow' | 'deny' | 'confirm';
21
+
22
+ /**
23
+ * Walk up from cwd looking for a file by name.
24
+ * Returns the full path if found, or null.
25
+ */
26
+ function findFileUpward(filename: string): string | null {
27
+ let dir = process.cwd();
28
+ for (let i = 0; i < 20; i++) {
29
+ const candidate = path.join(dir, filename);
30
+ if (fs.existsSync(candidate)) return candidate;
31
+ const parent = path.dirname(dir);
32
+ if (parent === dir) break;
33
+ dir = parent;
34
+ }
35
+ return null;
36
+ }
37
+
38
+ export class PolicyChecker {
39
+ private filePath: string | null = null;
40
+ private lastMtime: number = 0;
41
+ private policy: ActionPolicy | null = null;
42
+ private confirmOverrides: Set<string> | null = null;
43
+
44
+ constructor(filePath?: string) {
45
+ // Explicit path from env or argument, or walk up from cwd to find browse-policy.json.
46
+ this.filePath = filePath || process.env.BROWSE_POLICY || findFileUpward('browse-policy.json') || 'browse-policy.json';
47
+
48
+ // Parse BROWSE_CONFIRM_ACTIONS env var
49
+ const confirmEnv = process.env.BROWSE_CONFIRM_ACTIONS;
50
+ if (confirmEnv) {
51
+ this.confirmOverrides = new Set(
52
+ confirmEnv.split(',').map(s => s.trim()).filter(Boolean)
53
+ );
54
+ }
55
+
56
+ this.reload();
57
+ }
58
+
59
+ private reload(): void {
60
+ if (!this.filePath) return;
61
+ try {
62
+ const stat = fs.statSync(this.filePath);
63
+ if (stat.mtimeMs === this.lastMtime) return;
64
+ this.lastMtime = stat.mtimeMs;
65
+
66
+ const raw = fs.readFileSync(this.filePath, 'utf-8');
67
+ this.policy = JSON.parse(raw);
68
+ } catch {
69
+ // File missing or invalid — if it was loaded before, keep last-known-good.
70
+ // If it never existed, policy stays null (everything allowed).
71
+ }
72
+ }
73
+
74
+ check(command: string): PolicyResult {
75
+ this.reload();
76
+
77
+ // Env var overrides take priority for confirm
78
+ if (this.confirmOverrides?.has(command)) return 'confirm';
79
+
80
+ if (!this.policy) return 'allow';
81
+
82
+ // Precedence: deny > confirm > allow whitelist > default
83
+ if (this.policy.deny?.includes(command)) return 'deny';
84
+ if (this.policy.confirm?.includes(command)) return 'confirm';
85
+ if (this.policy.allow) {
86
+ return this.policy.allow.includes(command) ? 'allow' : 'deny';
87
+ }
88
+ return this.policy.default || 'allow';
89
+ }
90
+
91
+ isActive(): boolean {
92
+ return this.policy !== null || this.confirmOverrides !== null;
93
+ }
94
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Sanitize a user-supplied name for safe use in file paths.
3
+ * Strips path separators and parent directory references.
4
+ */
5
+ export function sanitizeName(name: string): string {
6
+ const sanitized = name.replace(/[\/\\]/g, '_').replace(/\.\./g, '_');
7
+ if (!sanitized || /^[._]+$/.test(sanitized)) {
8
+ throw new Error(`Invalid name: "${name}"`);
9
+ }
10
+ return sanitized;
11
+ }