@venturewild/workspace 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ // SessionReporter — the proactive, consented "is this user okay?" feed.
2
+ //
3
+ // WHY: the get-in path is self-serve, but if a real user gets stuck or their
4
+ // install breaks, we were blind unless they ran `operator enable` and messaged
5
+ // us — which makes us a bottleneck (docs/user-experience.md §5). This forwards a
6
+ // LIVE, REDACTED stream of session events + install health to bmo-sync, keyed by
7
+ // account, established at first load — so a stuck/broken user is never invisible
8
+ // and never has to ask.
9
+ //
10
+ // PRIVACY — the load-bearing boundary: this feed carries WHAT happened (a turn
11
+ // ran, tool X fired, an error, token/cost), NEVER the words. Chat text, tool
12
+ // inputs, file contents, and paths are reduced to lengths/counts before anything
13
+ // leaves the machine. Conversation *content* is a separate, separately-consented
14
+ // channel (transcript.mjs). `redactEvent` is an ALLOWLIST projection — any event
15
+ // type we don't explicitly model forwards only `{type}`, so a new event can
16
+ // never leak by default. The matching test asserts a secret typed into a chat
17
+ // turn never appears in the payload.
18
+ //
19
+ // Modeled on error-reporter.mjs (fire-and-forget, rate-limited, disable-able).
20
+ // Gated by BOTH consent (user toggle) AND the shared WILD_WORKSPACE_NO_TELEMETRY
21
+ // kill switch; inert without an accountToken (can't key it) or on a localhost
22
+ // bmo-sync URL (dev).
23
+
24
+ import os from 'node:os';
25
+ import { APP_VERSION } from './config.mjs';
26
+
27
+ function sanitizeUsage(u) {
28
+ if (!u || typeof u !== 'object') return null;
29
+ const out = {
30
+ input_tokens: Number(u.input_tokens) || 0,
31
+ output_tokens: Number(u.output_tokens) || 0,
32
+ };
33
+ if (typeof u.cost_usd === 'number') out.cost_usd = u.cost_usd;
34
+ return out;
35
+ }
36
+
37
+ /**
38
+ * Project one ActivityBus event to a SAFE shape. Allowlist by type; anything not
39
+ * listed forwards only {type, ts, id}. NEVER returns chat text, tool inputs,
40
+ * file paths, or file contents.
41
+ */
42
+ export function redactEvent(ev) {
43
+ if (!ev || typeof ev !== 'object') return null;
44
+ const base = { type: ev.type, ts: ev.ts, id: ev.id };
45
+ if (ev.messageId) base.messageId = ev.messageId;
46
+ switch (ev.type) {
47
+ case 'chat-user':
48
+ // The user's prompt — length only, never the words.
49
+ return { ...base, textLen: typeof ev.text === 'string' ? ev.text.length : 0 };
50
+ case 'chat-stream': {
51
+ const c = ev.chunk || {};
52
+ const safe = { ...base, chunkType: c.type };
53
+ if (c.type === 'text' && typeof c.text === 'string') safe.textLen = c.text.length;
54
+ // tool NAME is safe ("Edit", "Bash"); the tool INPUT is not — never forward it.
55
+ if (c.type === 'tool-use') safe.tool = typeof c.name === 'string' ? c.name : c.tool || null;
56
+ if (c.type === 'usage' && c.usage) safe.usage = sanitizeUsage(c.usage);
57
+ if (c.type === 'error') safe.hasError = true; // the flag, not the message
58
+ return safe;
59
+ }
60
+ case 'usage':
61
+ return { ...base, usage: sanitizeUsage(ev.usage) };
62
+ case 'chat-end':
63
+ return { ...base, code: ev.code };
64
+ case 'identity-changed':
65
+ return { ...base, tone: ev.tone || null }; // tone only; drop the agent's name
66
+ case 'onboarded':
67
+ return { ...base, at: ev.at || null };
68
+ case 'agent-changed':
69
+ return { ...base, agentId: ev.agentId || null };
70
+ case 'daemon-status':
71
+ return { ...base, status: ev.status || null };
72
+ case 'operator-action':
73
+ return { ...base, action: ev.action || null }; // action verb, not its detail
74
+ case 'inbox-change':
75
+ return { ...base, count: Array.isArray(ev.snapshot) ? ev.snapshot.length : undefined };
76
+ case 'presence-join':
77
+ case 'presence-leave':
78
+ case 'presence-focus':
79
+ // sessionId + role are safe; `focus` can be a file path → dropped.
80
+ return { ...base, sessionId: ev.sessionId, role: ev.role };
81
+ default:
82
+ // Unknown event → minimal envelope only. Privacy fails CLOSED.
83
+ return { type: ev.type, ts: ev.ts, id: ev.id };
84
+ }
85
+ }
86
+
87
+ const FLUSH_INTERVAL_MS = 15_000; // batch window
88
+ const MAX_BATCH = 50; // flush early once this many buffer
89
+ const MAX_BUFFER = 500; // hard cap — drop oldest beyond this
90
+
91
+ export class SessionReporter {
92
+ constructor({
93
+ bmoSyncUrl,
94
+ accountToken,
95
+ slug = null,
96
+ workspaceId = 'workspace',
97
+ sessionId = null,
98
+ enabled = true,
99
+ endpointPath = '/api/telemetry',
100
+ flushIntervalMs = FLUSH_INTERVAL_MS,
101
+ maxBatch = MAX_BATCH,
102
+ fetchImpl = (...a) => globalThis.fetch(...a),
103
+ nowImpl = () => Date.now(),
104
+ } = {}) {
105
+ this.bmoSyncUrl = bmoSyncUrl ? bmoSyncUrl.replace(/\/$/, '') : null;
106
+ this.accountToken = accountToken || null;
107
+ this.slug = slug;
108
+ this.workspaceId = workspaceId;
109
+ this.sessionId = sessionId;
110
+ this.endpointPath = endpointPath;
111
+ this.flushIntervalMs = flushIntervalMs;
112
+ this.maxBatch = maxBatch;
113
+ this.fetchImpl = fetchImpl;
114
+ this.nowImpl = nowImpl;
115
+ this.buffer = [];
116
+ this.timer = null;
117
+ // Inert without a token, without a server, or on localhost (dev).
118
+ this._capable =
119
+ Boolean(this.accountToken) &&
120
+ Boolean(this.bmoSyncUrl) &&
121
+ !this.bmoSyncUrl.startsWith('http://127') &&
122
+ !this.bmoSyncUrl.startsWith('http://localhost');
123
+ this.enabled = enabled !== false && this._capable;
124
+ }
125
+
126
+ /** Live consent toggle — no restart needed when the user flips observability. */
127
+ setEnabled(on) {
128
+ this.enabled = Boolean(on) && this._capable;
129
+ if (!this.enabled) this.buffer = [];
130
+ }
131
+
132
+ /** Feed one ActivityBus event. Redacts + buffers; flushes on size. No-op when off. */
133
+ ingest(ev) {
134
+ if (!this.enabled) return;
135
+ const safe = redactEvent(ev);
136
+ if (!safe) return;
137
+ this.buffer.push(safe);
138
+ if (this.buffer.length > MAX_BUFFER) this.buffer.splice(0, this.buffer.length - MAX_BUFFER);
139
+ if (this.buffer.length >= this.maxBatch) this.flush();
140
+ }
141
+
142
+ /** POST the install-health snapshot alongside events (called by the supervisor path too). */
143
+ envelope(events, extra = {}) {
144
+ return {
145
+ account_token: this.accountToken,
146
+ slug: this.slug,
147
+ workspace_id: this.workspaceId,
148
+ session_id: this.sessionId,
149
+ app_version: APP_VERSION,
150
+ os: `${os.platform()}-${os.arch()}`,
151
+ sent_at: Math.floor(this.nowImpl() / 1000),
152
+ events,
153
+ ...extra,
154
+ };
155
+ }
156
+
157
+ /** Fire-and-forget flush of the current buffer. Never throws. */
158
+ flush() {
159
+ if (!this.enabled || !this.buffer.length) return;
160
+ const events = this.buffer;
161
+ this.buffer = [];
162
+ this._post(this.envelope(events));
163
+ }
164
+
165
+ _post(body) {
166
+ const url = `${this.bmoSyncUrl}${this.endpointPath}`;
167
+ const ctrl = new AbortController();
168
+ const timer = setTimeout(() => ctrl.abort(), 5000);
169
+ if (timer.unref) timer.unref();
170
+ let p;
171
+ try {
172
+ // Call synchronously so the request is observable without awaiting a tick.
173
+ p = this.fetchImpl(url, {
174
+ method: 'POST',
175
+ headers: { 'content-type': 'application/json' },
176
+ body: JSON.stringify(body),
177
+ signal: ctrl.signal,
178
+ });
179
+ } catch {
180
+ clearTimeout(timer);
181
+ return; // telemetry must never break the user's path
182
+ }
183
+ Promise.resolve(p)
184
+ .catch(() => {})
185
+ .finally(() => clearTimeout(timer));
186
+ }
187
+
188
+ start() {
189
+ if (this.timer || !this._capable) return;
190
+ this.timer = setInterval(() => this.flush(), this.flushIntervalMs);
191
+ if (this.timer.unref) this.timer.unref(); // never keep the process alive
192
+ }
193
+
194
+ stop() {
195
+ if (this.timer) {
196
+ clearInterval(this.timer);
197
+ this.timer = null;
198
+ }
199
+ this.flush();
200
+ }
201
+ }
@@ -0,0 +1,217 @@
1
+ // WorkspaceSupervisor — keeps the wild-workspace server alive in the background.
2
+ //
3
+ // The server itself auto-starts the bmo-sync daemon on boot (DaemonSupervisor),
4
+ // so keeping the server up brings the whole local stack — public URL included —
5
+ // back to life. This is the watchdog half of the always-on feature
6
+ // (docs/always-on-design.md); `service.mjs` is the per-OS autostart half that
7
+ // launches this hidden at login via `wild-workspace service run`.
8
+ //
9
+ // Design (all proven on Windows incl. a real reboot, 2026-05-30):
10
+ // - Health-driven: polls GET /api/health and (re)spawns the server only when
11
+ // it is down — so it never fights a server someone else started and handles
12
+ // crash recovery naturally.
13
+ // - Singleton: an exclusive lockfile in the machine-global dir
14
+ // (~/.wild-workspace, NEVER the synced workspace — locked principle #1).
15
+ // A stale lock whose pid is dead is taken over.
16
+ // - Exponential backoff (capped) so a crash-looping server can't spin the CPU.
17
+ // - Everything is logged — silent death is the #1 un-debuggable failure mode.
18
+ //
19
+ // Every external touch-point (spawn, health probe, clock) is an injected seam
20
+ // so the suite never spawns a real process.
21
+
22
+ import { spawn } from 'node:child_process';
23
+ import http from 'node:http';
24
+ import fs from 'node:fs';
25
+ import os from 'node:os';
26
+ import path from 'node:path';
27
+ import { fileURLToPath } from 'node:url';
28
+
29
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
30
+ const DEFAULT_SERVER_ENTRY = path.join(__dirname, 'index.mjs');
31
+
32
+ /** Resolve true iff the local server answers /api/health. Never throws. */
33
+ export function probeHealth(port, timeoutMs = 2500) {
34
+ return new Promise((resolve) => {
35
+ const req = http.get(
36
+ { host: '127.0.0.1', port, path: '/api/health', timeout: timeoutMs },
37
+ (res) => { res.resume(); resolve(res.statusCode > 0); },
38
+ );
39
+ req.on('error', () => resolve(false));
40
+ req.on('timeout', () => { req.destroy(); resolve(false); });
41
+ });
42
+ }
43
+
44
+ export class WorkspaceSupervisor {
45
+ constructor({
46
+ serverEntry = DEFAULT_SERVER_ENTRY,
47
+ workspaceDir = process.cwd(),
48
+ port = Number(process.env.WILD_WORKSPACE_PORT || 5173),
49
+ globalDir = path.join(os.homedir(), '.wild-workspace'),
50
+ node = process.execPath,
51
+ pollMs = 3000,
52
+ backoffStartMs = 1000,
53
+ backoffMaxMs = 30000,
54
+ probeTimeoutMs = 2500,
55
+ spawnImpl = spawn,
56
+ probeImpl = probeHealth,
57
+ nowImpl = () => Date.now(),
58
+ env = process.env,
59
+ crashLoopThreshold = 3,
60
+ diagnosticsImpl = null,
61
+ } = {}) {
62
+ Object.assign(this, {
63
+ serverEntry, workspaceDir, port, globalDir, node, pollMs,
64
+ backoffStartMs, backoffMaxMs, probeTimeoutMs, spawnImpl, probeImpl, nowImpl, env,
65
+ crashLoopThreshold, diagnosticsImpl,
66
+ });
67
+ this.logFile = path.join(globalDir, 'supervisor.log');
68
+ this.serverLogFile = path.join(globalDir, 'server.out.log');
69
+ this.lockFile = path.join(globalDir, 'supervisor.lock');
70
+ this.child = null;
71
+ this.backoff = backoffStartMs;
72
+ this.lastSpawn = 0;
73
+ this.timer = null;
74
+ this.spawnCount = 0; // consecutive spawns without becoming healthy
75
+ this.pushedThisEpisode = false; // crash-loop diagnostics pushed once per episode
76
+ }
77
+
78
+ log(msg) {
79
+ try { fs.appendFileSync(this.logFile, `[${new Date().toISOString()}] ${msg}\n`); } catch { /* best-effort */ }
80
+ }
81
+
82
+ /** Is a pid alive? EPERM means "exists, not ours" → still alive. */
83
+ pidAlive(pid) {
84
+ try { process.kill(pid, 0); return true; } catch (e) { return !!(e && e.code === 'EPERM'); }
85
+ }
86
+
87
+ /** Exclusive lock; take over ONLY a stale lock (recorded pid no longer alive). */
88
+ acquireLock() {
89
+ try { fs.mkdirSync(this.globalDir, { recursive: true }); } catch { /* surfaced below */ }
90
+ try {
91
+ const fd = fs.openSync(this.lockFile, 'wx');
92
+ fs.writeSync(fd, String(process.pid));
93
+ fs.closeSync(fd);
94
+ return true;
95
+ } catch {
96
+ let old = null;
97
+ try { old = Number(fs.readFileSync(this.lockFile, 'utf8').trim()); } catch { /* unreadable */ }
98
+ if (old && this.pidAlive(old)) {
99
+ this.log(`live supervisor pid=${old} already running; exiting`);
100
+ return false;
101
+ }
102
+ try { fs.writeFileSync(this.lockFile, String(process.pid)); this.log('took over stale lock'); return true; }
103
+ catch { return false; }
104
+ }
105
+ }
106
+
107
+ releaseLock() {
108
+ try {
109
+ if (Number(fs.readFileSync(this.lockFile, 'utf8').trim()) === process.pid) fs.unlinkSync(this.lockFile);
110
+ } catch { /* already gone */ }
111
+ }
112
+
113
+ spawnServer() {
114
+ let out = 'ignore';
115
+ try { out = fs.openSync(this.serverLogFile, 'a'); } catch { /* output discarded */ }
116
+ this.child = this.spawnImpl(this.node, [this.serverEntry], {
117
+ cwd: this.workspaceDir,
118
+ windowsHide: true,
119
+ stdio: ['ignore', out, out],
120
+ env: { ...this.env, WILD_WORKSPACE_NO_OPEN: '1', WILD_WORKSPACE_DIR: this.workspaceDir },
121
+ });
122
+ if (typeof out === 'number') { try { fs.closeSync(out); } catch { /* parent fd */ } }
123
+ this.lastSpawn = this.nowImpl();
124
+ const pid = this.child && this.child.pid;
125
+ this.log(`spawned server pid=${pid} (backoff=${this.backoff}ms)`);
126
+ if (this.child && this.child.on) {
127
+ this.child.on('exit', (code, sig) => { this.log(`server pid=${pid} exited code=${code} sig=${sig}`); this.child = null; });
128
+ }
129
+ return this.child;
130
+ }
131
+
132
+ /** One supervision step. Returns its decision (exposed for tests). */
133
+ async tick() {
134
+ if (await this.probeImpl(this.port, this.probeTimeoutMs)) {
135
+ this.backoff = this.backoffStartMs; // healthy → reset backoff
136
+ this.spawnCount = 0; // healthy → not a crash loop
137
+ this.pushedThisEpisode = false;
138
+ return 'healthy';
139
+ }
140
+ if (this.child) return 'booting'; // spawned, still coming up
141
+ if (this.nowImpl() - this.lastSpawn < this.backoff) return 'backoff';
142
+ this.spawnServer();
143
+ this.backoff = Math.min(this.backoff * 2, this.backoffMaxMs);
144
+ this.spawnCount += 1;
145
+ // Crash loop: the server won't stay up, so the operator channel (which rides
146
+ // the :5173 server) can't reach this machine at all. Push an install-down
147
+ // `doctor` bundle to bmo-sync ONCE per episode so support sees it anyway —
148
+ // the install-failed-before-server-up case (docs/user-experience.md §5).
149
+ if (this.spawnCount >= this.crashLoopThreshold && !this.pushedThisEpisode) {
150
+ this.pushedThisEpisode = true;
151
+ Promise.resolve(this.pushDiagnostics()).catch((e) => this.log(`diag push error: ${e?.message || e}`));
152
+ }
153
+ return 'spawned';
154
+ }
155
+
156
+ /**
157
+ * Push an install-down diagnostic bundle to bmo-sync. Injected (`diagnosticsImpl`)
158
+ * in tests; the real path is consent- + token-gated and never runs under the
159
+ * test runner. Best-effort, never throws into the supervision loop.
160
+ */
161
+ async pushDiagnostics() {
162
+ if (this.diagnosticsImpl) return this.diagnosticsImpl(this);
163
+ if (process.env.VITEST || process.env.NODE_ENV === 'test') return;
164
+ try {
165
+ const [{ buildConfig }, { runDoctor }, { loadObservabilityConsent }] = await Promise.all([
166
+ import('./config.mjs'),
167
+ import('./doctor.mjs'),
168
+ import('./observability.mjs'),
169
+ ]);
170
+ const config = buildConfig({ workspaceDir: this.workspaceDir, port: this.port });
171
+ if (!config.accountToken) return; // can't key it to a user
172
+ if (process.env.WILD_WORKSPACE_NO_TELEMETRY === '1') return; // kill switch
173
+ if (!loadObservabilityConsent(config.dataDir).enabled) return; // consent
174
+ const report = await runDoctor({ config });
175
+ const url = `${config.bmoSyncServerUrl.replace(/\/$/, '')}/api/telemetry`;
176
+ const ctrl = new AbortController();
177
+ const t = setTimeout(() => ctrl.abort(), 5000);
178
+ try {
179
+ await fetch(url, {
180
+ method: 'POST',
181
+ headers: { 'content-type': 'application/json' },
182
+ body: JSON.stringify({
183
+ account_token: config.accountToken,
184
+ slug: config.account?.slug || null,
185
+ workspace_id: config.workspaceId,
186
+ kind: 'install-down',
187
+ doctor: report,
188
+ sent_at: Math.floor(Date.now() / 1000),
189
+ }),
190
+ signal: ctrl.signal,
191
+ });
192
+ this.log(`pushed install-down diagnostics (fail=${report.summary?.fail})`);
193
+ } finally {
194
+ clearTimeout(t);
195
+ }
196
+ } catch (e) {
197
+ this.log(`diagnostics push failed: ${e?.message || e}`);
198
+ }
199
+ }
200
+
201
+ /** Acquire the lock and start the supervision loop. Idempotent across processes. */
202
+ start() {
203
+ if (!this.acquireLock()) return { started: false, reason: 'already-running' };
204
+ process.on('exit', () => this.releaseLock());
205
+ process.on('SIGTERM', () => process.exit(0));
206
+ process.on('SIGINT', () => process.exit(0));
207
+ this.log(`supervisor start pid=${process.pid} watching http://127.0.0.1:${this.port}/api/health (workspace=${this.workspaceDir})`);
208
+ this.timer = setInterval(() => { this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`)); }, this.pollMs);
209
+ this.tick().catch((e) => this.log(`tick error: ${e?.message || e}`));
210
+ return { started: true };
211
+ }
212
+
213
+ stop() {
214
+ if (this.timer) { clearInterval(this.timer); this.timer = null; }
215
+ this.releaseLock();
216
+ }
217
+ }