freddie 0.0.117 → 0.0.119

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "freddie",
3
- "version": "0.0.117",
3
+ "version": "0.0.119",
4
4
  "type": "module",
5
5
  "description": "Open JS agent harness built on pi-mono, floosie, xstate, and anentrypoint-design",
6
6
  "bin": {
@@ -27,7 +27,7 @@
27
27
  "@mariozechner/pi-ai": "^0.70.6",
28
28
  "@mariozechner/pi-coding-agent": "^0.70.6",
29
29
  "@mariozechner/pi-tui": "^0.70.6",
30
- "acptoapi": "^1.0.112",
30
+ "acptoapi": "^1.0.114",
31
31
  "anentrypoint-design": "^0.0.140",
32
32
  "commander": "^14.0.0",
33
33
  "express": "^5.0.0",
@@ -0,0 +1,49 @@
1
+ // Exposes live persisted machine snapshots over HTTP for the dashboard.
2
+ // GET /api/machines -> { count, kinds:{<kind>:n}, machines:[{kind,key,status,state,updated}] }
3
+ // GET /api/machines/:kind -> machines filtered to one kind
4
+ // POST /api/machines/resume -> { ok, summary } : drive resumeAll() on demand
5
+ import { list } from '../../src/machines/snapshot-store.js'
6
+ import { registerDebug } from '../../src/observability/debug.js'
7
+
8
+ // window.__debug.machines() / GET /debug/machines — live persisted machine census.
9
+ registerDebug('machines', () => ({ note: 'GET /api/machines for live snapshots', kinds: ['agent', 'cron', 'batch', 'gateway', 'gateway-msg', 'acp', 'acp-prompt'] }))
10
+
11
+ async function snapshotRows(kind = null) {
12
+ const rows = await list({ kind, status: null })
13
+ return rows.map(r => {
14
+ let state = null
15
+ try { const ps = JSON.parse(r.snapshot_json || 'null'); state = ps?.value ?? null } catch {}
16
+ return { kind: r.kind, key: r.key, status: r.status, state, machine_id: r.machine_id, updated: r.updated }
17
+ })
18
+ }
19
+
20
+ export default {
21
+ name: 'gui-machines', surfaces: 'gui',
22
+ register({ gui }) {
23
+ gui.route('GET', '/api/machines', async (_req, res) => {
24
+ try {
25
+ // list() does not return snapshot_json (truncated for size); re-read full per row.
26
+ const { db } = await import('../../src/db.js')
27
+ const d = await db()
28
+ await d.exec(`CREATE TABLE IF NOT EXISTS machine_snapshots (kind TEXT, key TEXT, schema_version INTEGER, machine_id TEXT, snapshot_json TEXT, status TEXT, updated INTEGER, PRIMARY KEY(kind,key))`)
29
+ const all = await d.prepare(`SELECT kind, key, machine_id, snapshot_json, status, updated FROM machine_snapshots ORDER BY updated DESC`).all()
30
+ const kinds = {}
31
+ const machines = all.map(r => {
32
+ kinds[r.kind] = (kinds[r.kind] || 0) + 1
33
+ let state = null
34
+ try { state = JSON.parse(r.snapshot_json || 'null')?.value ?? null } catch {}
35
+ return { kind: r.kind, key: r.key, status: r.status, state, machine_id: r.machine_id, updated: r.updated }
36
+ })
37
+ res.json({ count: machines.length, kinds, machines })
38
+ } catch (e) { res.status(500).json({ error: String(e.message || e) }) }
39
+ })
40
+ gui.route('GET', '/api/machines/:kind', async (req, res) => {
41
+ try { res.json({ machines: await snapshotRows(req.params.kind) }) }
42
+ catch (e) { res.status(500).json({ error: String(e.message || e) }) }
43
+ })
44
+ gui.route('POST', '/api/machines/resume', async (_req, res) => {
45
+ try { const { resumeAll } = await import('../../src/machines/resume.js'); res.json({ ok: true, summary: await resumeAll() }) }
46
+ catch (e) { res.status(500).json({ error: String(e.message || e) }) }
47
+ })
48
+ },
49
+ }
package/src/acp/main.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { AcpServer } from './server.js'
2
- export function startAcpStdio() {
2
+ export async function startAcpStdio() {
3
+ try { const { resumeAll } = await import('../machines/resume.js'); await resumeAll() } catch (_) {}
3
4
  const srv = new AcpServer()
4
5
  srv.start()
5
6
  process.on('SIGINT', () => { srv.stop(); process.exit(0) })
package/src/acp/server.js CHANGED
@@ -6,9 +6,26 @@ import { logger } from '../observability/log.js'
6
6
  import { Events } from './events.js'
7
7
  import { checkPermission, rememberAllow, rememberDeny } from './permissions.js'
8
8
  import { AcpSessionManager } from './session.js'
9
+ import { createMachine, createActor } from 'xstate'
10
+ import { persist, load, clear } from '../machines/snapshot-store.js'
9
11
 
10
12
  const log = logger('acp')
11
13
 
14
+ // ACP server lifecycle machine: stopped -> running -> stopped. Persisted so an
15
+ // active snapshot on boot signals the server was serving; per-prompt processing
16
+ // is persisted separately under kind=acp-prompt so an interrupted prompt.submit
17
+ // is observable + resumable after a restart.
18
+ export function createAcpMachine() {
19
+ return createMachine({
20
+ id: 'freddie-acp',
21
+ initial: 'stopped',
22
+ states: {
23
+ stopped: { on: { START: 'running' } },
24
+ running: { on: { STOP: 'stopped' } },
25
+ },
26
+ })
27
+ }
28
+
12
29
  const CAPABILITIES = {
13
30
  name: 'freddie', version: '0.4.0',
14
31
  methods: ['initialize', 'session.new', 'session.resume', 'session.list', 'session.end', 'prompt.submit', 'tool.list', 'permission.respond', 'shutdown'],
@@ -21,13 +38,19 @@ export class AcpServer extends EventEmitter {
21
38
  this.in = stdin; this.out = stdout; this.callLLM = callLLM
22
39
  this.sessions = new AcpSessionManager()
23
40
  this._pendingPerm = new Map()
41
+ this.machine = createAcpMachine()
42
+ this.actor = createActor(this.machine)
43
+ this.actor.subscribe(() => { persist('acp', 'lifecycle', this.actor.getPersistedSnapshot()).catch(() => {}) })
44
+ this.actor.start()
24
45
  }
46
+ get state() { return this.actor.getSnapshot().value }
25
47
  start() {
26
48
  const rl = readline.createInterface({ input: this.in, crlfDelay: Infinity })
27
49
  rl.on('line', (l) => this.handle(l).catch(e => this.send({ jsonrpc: '2.0', error: { message: String(e) } })))
28
50
  this.rl = rl
51
+ this.actor.send({ type: 'START' })
29
52
  }
30
- stop() { this.rl?.close() }
53
+ stop() { this.rl?.close(); try { this.actor.send({ type: 'STOP' }) } catch {} }
31
54
  send(o) { this.out.write(JSON.stringify(o) + '\n') }
32
55
  async handle(line) {
33
56
  if (!line.trim()) return
@@ -75,7 +98,12 @@ const METHODS = {
75
98
  if (!srv.sessions.isActive(sessionId)) throw new Error('session not active')
76
99
  srv.sessions.appendUser(sessionId, prompt)
77
100
  Events.messageDelta((o) => srv.send(o), { sessionId, role: 'user', content: prompt })
78
- const out = await runTurn({ prompt, callLLM: srv.callLLM })
101
+ // Persist in-flight prompt under kind=acp-prompt keyed by sessionId so a
102
+ // refresh mid-turn is observable + resumable (the agent snapshot for the
103
+ // turn itself lives under kind=agent via runTurn sessionKey).
104
+ await persist('acp-prompt', sessionId, { status: 'active', value: 'running', context: { sessionId, prompt } })
105
+ const out = await runTurn({ prompt, callLLM: srv.callLLM, sessionKey: 'acp:' + sessionId })
106
+ await clear('acp-prompt', sessionId)
79
107
  srv.sessions.appendAssistant(sessionId, out.result || '')
80
108
  Events.messageComplete((o) => srv.send(o), { sessionId, role: 'assistant', content: out.result || '' })
81
109
  return { result: out.result, error: out.error, iterations: out.iterations }
@@ -121,7 +121,7 @@ function adaptResponse(r) {
121
121
 
122
122
  function tryParseJson(s) { try { return typeof s === 'string' ? JSON.parse(s) : (s || {}) } catch { return {} } }
123
123
 
124
- export async function isReachable(timeoutMs = 2000) {
124
+ export async function isReachable(timeoutMs = 10000) {
125
125
  try {
126
126
  const controller = new AbortController()
127
127
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
@@ -3,6 +3,8 @@ import { bootHost } from '../host/index.js'
3
3
  import { getEnabledToolSchemas } from '../toolsets.js'
4
4
  import { logger } from '../observability/log.js'
5
5
  import { resolveCallLLM } from './llm_resolver.js'
6
+ import { createPersistentActor } from '../machines/persistent-actor.js'
7
+ import { randomUUID } from 'node:crypto'
6
8
 
7
9
  const log = logger('agent')
8
10
 
@@ -55,7 +57,18 @@ export function createAgentMachine({ provider, model, maxIterations = 90, callLL
55
57
  input: ({ context }) => ({ messages: context.messages, model: context.model, provider: context.provider, enabledToolsets: context.enabledToolsets, disabledToolsets: context.disabledToolsets }),
56
58
  onDone: [
57
59
  { guard: ({ event }) => Array.isArray(event.output?.tool_calls) && event.output.tool_calls.length > 0, target: 'tool_calls', actions: assign({ messages: ({ context, event }) => [...context.messages, { role: 'assistant', content: event.output.content || '', tool_calls: event.output.tool_calls }] }) },
58
- { target: 'done', actions: assign({ messages: ({ context, event }) => [...context.messages, { role: 'assistant', content: event.output.content || '' }], lastResult: ({ event }) => event.output.content || '' }) },
60
+ { target: 'done', actions: assign({ messages: ({ context, event }) => [...context.messages, { role: 'assistant', content: event.output.content || '' }], lastResult: ({ context, event }) => {
61
+ // Prefer this turn's content, but if the model ended with empty
62
+ // text (it may have put its answer in an earlier turn alongside a
63
+ // tool_call), fall back to the last non-empty assistant message so
64
+ // the caller never gets an empty result after a successful run.
65
+ if (event.output.content && event.output.content.trim()) return event.output.content;
66
+ for (let i = context.messages.length - 1; i >= 0; i--) {
67
+ const m = context.messages[i];
68
+ if (m.role === 'assistant' && typeof m.content === 'string' && m.content.trim()) return m.content;
69
+ }
70
+ return event.output.content || '';
71
+ } }) },
59
72
  ],
60
73
  onError: { target: 'done', actions: assign({ error: ({ event }) => String(event.error?.message || event.error) }) },
61
74
  },
@@ -159,21 +172,14 @@ function mergeHookExtras(messages, r, tag) {
159
172
  return e.length ? [...messages, ...e] : messages
160
173
  }
161
174
 
162
- export async function runTurn({ prompt, messages = [], model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations = 90, timeoutMs = 30000, cwd, skill, witnessPath } = {}) {
163
- const events = []; const h = await bootHost()
164
- await h.hooks.invoke('onSessionStart', { prompt, model, provider, skill, cwd })
165
- let initMessages = [...messages]; const sysParts = []
166
- if (cwd) sysParts.push(`Working directory: ${cwd}. Always pass cwd="${cwd}" to bash tool calls. When reading or writing files use paths relative to this directory or absolute paths under it.`)
167
- if (skill) { const sd = h.pi.skills.get(skill); if (sd?.content) sysParts.push('Skill context:\n' + sd.content) }
168
- if (sysParts.length) initMessages.unshift({ role: 'user', content: sysParts.join('\n\n') })
169
- const inbound = await h.hooks.invoke('onMessageInbound', { content: prompt })
170
- if (inbound?.behavior === 'block') { await h.hooks.invoke('onSessionEnd', { reason: 'prompt_blocked' }); return { messages: initMessages, result: null, error: 'prompt blocked by plugsdk hook: ' + (inbound.reason || 'denied'), iterations: 0 } }
171
- initMessages = mergeHookExtras(initMessages, inbound, 'onMessageInbound')
172
- const machine = createAgentMachine({ model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations, events })
173
- const actor = createActor(machine, { input: { messages: initMessages } }); actor.start(); actor.send({ type: 'SUBMIT', prompt })
175
+ // Drive a started persistent agent actor to its final state, wiring timeout +
176
+ // session-end hooks + trajectory. Shared by runTurn (fresh) and resumeTurn
177
+ // (rehydrated from a persisted snapshot after a refresh/restart).
178
+ async function driveAgentActor({ pa, h, events, prompt, provider, model, skill, cwd, witnessPath, timeoutMs }) {
179
+ const { actor } = pa
174
180
  return await new Promise((resolve, reject) => {
175
181
  let sub
176
- const cleanup = () => { try { sub?.unsubscribe() } catch {} try { actor.stop() } catch {} }
182
+ const cleanup = () => { try { sub?.unsubscribe() } catch {} ; pa.flush().catch(() => {}).finally(() => { try { actor.stop() } catch {} }) }
177
183
  const t = setTimeout(() => { cleanup(); reject(new Error('agent turn timeout')) }, timeoutMs)
178
184
  sub = actor.subscribe(snap => { if (snap.status !== 'done') return; clearTimeout(t)
179
185
  ;(async () => {
@@ -183,8 +189,9 @@ export async function runTurn({ prompt, messages = [], model, provider, callLLM,
183
189
  await h.hooks.invoke('onSessionEnd', { reason: out?.error ? 'error' : 'ok', iterations: out?.iterations })
184
190
  const errorStack = out?.error ? (events.find(e => e.type === 'llm_call' && !e.ok)?.stack || null) : null
185
191
  await writeTrajectory(out, { prompt, provider, model, skill, cwd, events, errorStack, witnessPath })
186
- // Unsubscribe + stop the actor once the turn is done — a finished
187
- // actor should not be left running with live subscriptions/handles.
192
+ // Unsubscribe, flush the final snapshot (persistent-actor clears it on
193
+ // the done state) + stop the actor a finished actor should not be
194
+ // left running with live subscriptions/handles.
188
195
  cleanup()
189
196
  resolve(out)
190
197
  })().catch(e => { cleanup(); reject(e) })
@@ -192,6 +199,39 @@ export async function runTurn({ prompt, messages = [], model, provider, callLLM,
192
199
  })
193
200
  }
194
201
 
202
+ export async function runTurn({ prompt, messages = [], model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations = 90, timeoutMs = 30000, cwd, skill, witnessPath, sessionKey } = {}) {
203
+ const events = []; const h = await bootHost()
204
+ await h.hooks.invoke('onSessionStart', { prompt, model, provider, skill, cwd })
205
+ let initMessages = [...messages]; const sysParts = []
206
+ if (cwd) sysParts.push(`Working directory: ${cwd}. Always pass cwd="${cwd}" to bash tool calls. When reading or writing files use paths relative to this directory or absolute paths under it.`)
207
+ if (skill) { const sd = h.pi.skills.get(skill); if (sd?.content) sysParts.push('Skill context:\n' + sd.content) }
208
+ if (sysParts.length) initMessages.unshift({ role: 'user', content: sysParts.join('\n\n') })
209
+ const inbound = await h.hooks.invoke('onMessageInbound', { content: prompt })
210
+ if (inbound?.behavior === 'block') { await h.hooks.invoke('onSessionEnd', { reason: 'prompt_blocked' }); return { messages: initMessages, result: null, error: 'prompt blocked by plugsdk hook: ' + (inbound.reason || 'denied'), iterations: 0 } }
211
+ initMessages = mergeHookExtras(initMessages, inbound, 'onMessageInbound')
212
+ const machine = createAgentMachine({ model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations, events })
213
+ // Persist the turn snapshot under kind=agent so an interrupted turn (process
214
+ // refresh mid-tool-call) resumes exactly where it stopped via resumeTurn.
215
+ const key = sessionKey || randomUUID()
216
+ const pa = await createPersistentActor(machine, { kind: 'agent', key, input: { messages: initMessages } })
217
+ pa.actor.send({ type: 'SUBMIT', prompt })
218
+ return await driveAgentActor({ pa, h, events, prompt, provider, model, skill, cwd, witnessPath, timeoutMs })
219
+ }
220
+
221
+ // Rehydrate an interrupted turn from its persisted snapshot and drive it to
222
+ // completion. Returns null if no live snapshot exists for the key (already
223
+ // completed or never persisted) — caller falls back to a fresh runTurn.
224
+ export async function resumeTurn({ sessionKey, model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations = 90, timeoutMs = 30000, cwd, skill, witnessPath } = {}) {
225
+ if (!sessionKey) throw new Error('resumeTurn requires sessionKey')
226
+ const { load } = await import('../machines/snapshot-store.js')
227
+ if (!(await load('agent', sessionKey))) return null
228
+ const events = []; const h = await bootHost()
229
+ const machine = createAgentMachine({ model, provider, callLLM, enabledToolsets, disabledToolsets, maxIterations, events })
230
+ const pa = await createPersistentActor(machine, { kind: 'agent', key: sessionKey, input: { messages: [] } })
231
+ if (!pa.resumed) { await pa.forget(); return null }
232
+ return await driveAgentActor({ pa, h, events, prompt: '', provider, model, skill, cwd, witnessPath, timeoutMs })
233
+ }
234
+
195
235
  export async function invokeCompactHooks({ trigger = 'auto', messages = [] } = {}) {
196
236
  const h = await bootHost()
197
237
  const pre = await h.hooks.invoke('onPreCompact', { trigger, messages })
package/src/batch.js CHANGED
@@ -3,30 +3,94 @@ import path from 'node:path'
3
3
  import { runTurn } from './agent/machine.js'
4
4
  import { getFreddieHome } from './home.js'
5
5
  import { randomUUID } from 'node:crypto'
6
+ import { createMachine, assign, fromPromise } from 'xstate'
7
+ import { createPersistentActor } from './machines/persistent-actor.js'
8
+ import { load } from './machines/snapshot-store.js'
6
9
 
7
- export async function runBatch({ prompts = [], concurrency = 4, model, callLLM } = {}) {
10
+ // Run one prompt and append its result to the batch jsonl file.
11
+ async function runOne({ job, model, callLLM, file }) {
12
+ let rec
13
+ try {
14
+ const out = await runTurn({ prompt: job.p, model, callLLM, timeoutMs: 60000 })
15
+ rec = { i: job.i, prompt: job.p, result: out.result, error: out.error }
16
+ } catch (e) {
17
+ rec = { i: job.i, prompt: job.p, error: String(e?.message || e) }
18
+ }
19
+ fs.appendFileSync(file, JSON.stringify(rec) + '\n')
20
+ return rec
21
+ }
22
+
23
+ // xstate batch machine. Context tracks done[] (indices completed) + results so a
24
+ // refreshed batch resumes only the unfinished prompts. running -> running until
25
+ // every index is done, then -> complete (final). The persisted snapshot is keyed
26
+ // by kind=batch key=<batchId>.
27
+ export function createBatchMachine({ prompts, concurrency, model, callLLM, file } = {}) {
28
+ return createMachine({
29
+ id: 'freddie-batch',
30
+ initial: 'running',
31
+ output: ({ context }) => ({ id: context.id, file: context.file, results: context.results }),
32
+ context: ({ input }) => ({
33
+ id: input.id, file: input.file, model: input.model, concurrency: input.concurrency,
34
+ prompts: input.prompts, done: input.done || [], results: input.results || new Array(input.prompts.length).fill(null),
35
+ }),
36
+ states: {
37
+ running: {
38
+ always: { guard: ({ context }) => context.done.length >= context.prompts.length, target: 'complete' },
39
+ invoke: {
40
+ src: fromPromise(async ({ input }) => {
41
+ const { context } = input
42
+ const pending = context.prompts
43
+ .map((p, i) => ({ i, p }))
44
+ .filter(({ i }) => !context.done.includes(i))
45
+ .slice(0, context.concurrency)
46
+ return await Promise.all(pending.map(job => runOne({ job, model: context.model, callLLM, file: context.file })))
47
+ }),
48
+ input: ({ context }) => ({ context }),
49
+ onDone: {
50
+ target: 'running',
51
+ reenter: true,
52
+ actions: assign({
53
+ done: ({ context, event }) => [...context.done, ...event.output.map(r => r.i)],
54
+ results: ({ context, event }) => { const r = [...context.results]; for (const rec of event.output) r[rec.i] = rec; return r },
55
+ }),
56
+ },
57
+ },
58
+ },
59
+ complete: { type: 'final', output: ({ context }) => ({ id: context.id, file: context.file, results: context.results }) },
60
+ },
61
+ })
62
+ }
63
+
64
+ export async function runBatch({ prompts = [], concurrency = 4, model, callLLM, batchId } = {}) {
8
65
  if (!Array.isArray(prompts) || prompts.length === 0) throw new Error('prompts required')
9
- const id = randomUUID()
66
+ const id = batchId || randomUUID()
10
67
  const dir = path.join(getFreddieHome(), 'batches')
11
68
  fs.mkdirSync(dir, { recursive: true })
12
69
  const file = path.join(dir, id + '.jsonl')
13
- const stream = fs.createWriteStream(file, { flags: 'a' })
14
- const queue = prompts.map((p, i) => ({ i, p }))
15
- const results = new Array(prompts.length)
16
- const workers = Array.from({ length: Math.min(concurrency, prompts.length) }, async () => {
17
- while (queue.length) {
18
- const job = queue.shift()
19
- if (!job) break
20
- try {
21
- const out = await runTurn({ prompt: job.p, model, callLLM, timeoutMs: 60000 })
22
- results[job.i] = { i: job.i, prompt: job.p, result: out.result, error: out.error }
23
- } catch (e) {
24
- results[job.i] = { i: job.i, prompt: job.p, error: String(e?.message || e) }
25
- }
26
- stream.write(JSON.stringify(results[job.i]) + '\n')
27
- }
70
+ const machine = createBatchMachine({ model, callLLM })
71
+ const pa = await createPersistentActor(machine, { kind: 'batch', key: id, input: { id, file, model, concurrency, prompts } })
72
+ return await driveBatch(pa)
73
+ }
74
+
75
+ // Resume an interrupted batch from its persisted snapshot — only the prompts not
76
+ // yet in context.done get re-run. Returns null if no live snapshot for the id.
77
+ export async function resumeBatch({ batchId, model, callLLM } = {}) {
78
+ if (!batchId) throw new Error('resumeBatch requires batchId')
79
+ if (!(await load('batch', batchId))) return null
80
+ const machine = createBatchMachine({ model, callLLM })
81
+ const pa = await createPersistentActor(machine, { kind: 'batch', key: batchId, input: { id: batchId, file: '', model, concurrency: 4, prompts: [] } })
82
+ if (!pa.resumed) { await pa.forget(); return null }
83
+ return await driveBatch(pa)
84
+ }
85
+
86
+ function driveBatch(pa) {
87
+ const { actor } = pa
88
+ return new Promise((resolve, reject) => {
89
+ const sub = actor.subscribe(snap => {
90
+ if (snap.status !== 'done') return
91
+ const out = snap.output
92
+ pa.flush().catch(() => {}).finally(() => { try { sub.unsubscribe() } catch {}; try { actor.stop() } catch {}; resolve(out) })
93
+ })
94
+ actor.subscribe({ error: (e) => { try { sub.unsubscribe() } catch {}; reject(e) } })
28
95
  })
29
- await Promise.all(workers)
30
- await new Promise(r => stream.end(r))
31
- return { id, file, results }
32
96
  }
@@ -4,6 +4,8 @@ import { registerBuiltinHooks } from '../gateway/builtin_hooks/index.js'
4
4
  let _gateway = null
5
5
  export async function startGateway({ port = 0, hooks = true } = {}) {
6
6
  if (_gateway) return _gateway
7
+ // Rehydrate interrupted agent turns / batches before the gateway starts taking traffic.
8
+ try { const { resumeAll } = await import('../machines/resume.js'); await resumeAll() } catch (_) {}
7
9
  const wh = await makePlatform('webhook', { port })
8
10
  const api = await makePlatform('api_server', { port: 0 })
9
11
  const gw = new Gateway({ platforms: { webhook: wh, api_server: api } })
@@ -2,11 +2,11 @@ import { db } from '../db.js'
2
2
  import { parseCron, matches } from './cron-parse.js'
3
3
  import { runTurn } from '../agent/machine.js'
4
4
  import { logger } from '../observability/log.js'
5
+ import { createMachine, assign, fromPromise } from 'xstate'
6
+ import { createPersistentActor } from '../machines/persistent-actor.js'
5
7
 
6
8
  const log = logger('cron')
7
9
 
8
- let _interval = null
9
-
10
10
  async function init() {
11
11
  const d = await db()
12
12
  await d.exec(`CREATE TABLE IF NOT EXISTS cron_jobs (id INTEGER PRIMARY KEY AUTOINCREMENT, cron TEXT NOT NULL, prompt TEXT NOT NULL, model TEXT, last_run INTEGER, created INTEGER NOT NULL, enabled INTEGER NOT NULL DEFAULT 1)`)
@@ -52,6 +52,31 @@ export async function tick(now = new Date(), { callLLM = null } = {}) {
52
52
  return fired
53
53
  }
54
54
 
55
+ // xstate scheduler machine: idle -> ticking -> idle. The persisted snapshot
56
+ // carries tick_count + last_tick_ms so a refreshed scheduler resumes its cadence;
57
+ // per-job fire-state (last_run minute) lives durably in cron_jobs, so a restart
58
+ // never double-fires a job that already ran this minute.
59
+ export function createCronMachine({ callLLM = null, intervalMs = 30000 } = {}) {
60
+ return createMachine({
61
+ id: 'freddie-cron',
62
+ initial: 'idle',
63
+ context: ({ input }) => ({ tickCount: input?.tickCount || 0, lastTickMs: input?.lastTickMs || 0, intervalMs, lastFired: [] }),
64
+ states: {
65
+ idle: { after: { [intervalMs]: 'ticking' }, on: { TICK_NOW: 'ticking', STOP: 'stopped' } },
66
+ ticking: {
67
+ invoke: {
68
+ src: fromPromise(async () => tick(new Date(), { callLLM })),
69
+ onDone: { target: 'idle', actions: assign({ tickCount: ({ context }) => context.tickCount + 1, lastTickMs: () => Date.now(), lastFired: ({ event }) => (event.output || []).map(j => j.id) }) },
70
+ onError: { target: 'idle', actions: ({ event }) => log.error('cron tick errored', { err: String(event.error) }) },
71
+ },
72
+ },
73
+ stopped: { type: 'final' },
74
+ },
75
+ })
76
+ }
77
+
78
+ // Legacy in-memory interval scheduler (kept for tests + non-resumable callers).
79
+ let _interval = null
55
80
  export function startScheduler({ callLLM = null, intervalMs = 30000 } = {}) {
56
81
  stopScheduler()
57
82
  _interval = setInterval(() => { tick(new Date(), { callLLM }) }, intervalMs)
@@ -61,3 +86,22 @@ export function startScheduler({ callLLM = null, intervalMs = 30000 } = {}) {
61
86
  export function stopScheduler() {
62
87
  if (_interval) { clearInterval(_interval); _interval = null }
63
88
  }
89
+
90
+ // Resumable scheduler: persists its machine snapshot every transition under
91
+ // kind=cron key=scheduler. On boot resume.js rehydrates it and it continues
92
+ // ticking. Returns the persistent-actor handle.
93
+ let _persistentCron = null
94
+ export async function startPersistentScheduler({ callLLM = null, intervalMs = 30000 } = {}) {
95
+ if (_persistentCron) return _persistentCron
96
+ const machine = createCronMachine({ callLLM, intervalMs })
97
+ _persistentCron = await createPersistentActor(machine, { kind: 'cron', key: 'scheduler', input: {} })
98
+ return _persistentCron
99
+ }
100
+
101
+ export async function stopPersistentScheduler() {
102
+ if (!_persistentCron) return
103
+ try { _persistentCron.actor.send({ type: 'STOP' }) } catch {}
104
+ await _persistentCron.flush()
105
+ try { _persistentCron.actor.stop() } catch {}
106
+ _persistentCron = null
107
+ }
@@ -1,30 +1,68 @@
1
1
  import { logger } from '../observability/log.js'
2
2
  import { runTurn } from '../agent/machine.js'
3
+ import { createMachine, assign, fromPromise, createActor } from 'xstate'
4
+ import { persist, load, clear } from '../machines/snapshot-store.js'
5
+ import { randomUUID } from 'node:crypto'
3
6
 
4
7
  const log = logger('gateway')
5
8
 
9
+ // Gateway lifecycle machine: stopped -> starting -> running -> stopping -> stopped.
10
+ // The running state's context tracks platform names; lifecycle is the resumable
11
+ // shape. In-flight inbound message processing is persisted separately (per-message
12
+ // snapshot under kind=gateway-msg) so a refresh re-drives messages whose reply was
13
+ // never sent.
14
+ export function createGatewayMachine({ platformNames = [] } = {}) {
15
+ return createMachine({
16
+ id: 'freddie-gateway',
17
+ initial: 'stopped',
18
+ context: ({ input }) => ({ platformNames: input?.platformNames || platformNames }),
19
+ states: {
20
+ stopped: { on: { START: 'starting' } },
21
+ starting: { on: { STARTED: 'running', FAIL: 'stopped' } },
22
+ running: { on: { STOP: 'stopping' } },
23
+ stopping: { on: { STOPPED: 'stopped' } },
24
+ },
25
+ })
26
+ }
27
+
6
28
  export class Gateway {
7
29
  constructor({ platforms = {}, callLLM = null } = {}) {
8
30
  this.platforms = new Map()
9
31
  this.callLLM = callLLM
10
32
  this.hooks = { inbound: [], outbound: [] }
11
33
  for (const [name, adapter] of Object.entries(platforms)) this.register(name, adapter)
34
+ this.machine = createGatewayMachine({ platformNames: [...this.platforms.keys()] })
35
+ this.actor = createActor(this.machine, { input: { platformNames: [...this.platforms.keys()] } })
36
+ // Persist lifecycle transitions so the gateway's state is observable +
37
+ // resumable; an active snapshot on boot means the gateway was running.
38
+ this.actor.subscribe((snap) => { persist('gateway', 'lifecycle', this.actor.getPersistedSnapshot()).catch(() => {}) })
39
+ this.actor.start()
12
40
  }
41
+ get state() { return this.actor.getSnapshot().value }
13
42
  register(name, adapter) {
14
43
  this.platforms.set(name, adapter)
15
44
  adapter.on?.('message', (m) => this.handleInbound(name, m))
16
45
  }
17
46
  addHook(stage, fn) { this.hooks[stage].push(fn) }
18
47
  async start() {
48
+ this.actor.send({ type: 'START' })
19
49
  for (const a of this.platforms.values()) await a.start?.()
50
+ this.actor.send({ type: 'STARTED' })
20
51
  log.info('gateway started', { platforms: [...this.platforms.keys()] })
21
52
  }
22
53
  async stop() {
54
+ this.actor.send({ type: 'STOP' })
23
55
  for (const a of this.platforms.values()) await a.stop?.()
56
+ this.actor.send({ type: 'STOPPED' })
24
57
  log.info('gateway stopped')
25
58
  }
26
59
  async handleInbound(platform, msg) {
27
60
  log.info('inbound', { platform, from: msg.from, len: (msg.text || '').length })
61
+ // Persist the in-flight message under a stable key derived from platform +
62
+ // sender + content so a refresh mid-processing re-drives it instead of
63
+ // dropping it. The snapshot is cleared once the reply is sent.
64
+ const msgKey = msg.id || `${platform}:${msg.from}:${randomUUID()}`
65
+ await persist('gateway-msg', msgKey, { status: 'active', value: 'processing', context: { platform, from: msg.from, text: msg.text } })
28
66
  let cur = { ...msg, platform }
29
67
  for (const h of this.hooks.inbound) cur = (await h(cur)) || cur
30
68
  const result = await runTurn({ prompt: cur.text || '', callLLM: this.callLLM })
@@ -32,6 +70,7 @@ export class Gateway {
32
70
  for (const h of this.hooks.outbound) reply = (await h(reply)) || reply
33
71
  const adapter = this.platforms.get(platform)
34
72
  await adapter.send?.(reply)
73
+ await clear('gateway-msg', msgKey)
35
74
  return reply
36
75
  }
37
76
  }
@@ -134,7 +134,13 @@ export function makeCcLoaders(ccHost, env) {
134
134
  }
135
135
  return ccHost.plugins().length
136
136
  }
137
+ // gm-cc must never be auto-discovered as a cc-plugin: it ships the 24
138
+ // deprecated gm-* platform-variant skills under a manifest named 'gm', and the
139
+ // single canonical gm-skill is registered by plugins/gm-skill instead. The
140
+ // package extracts into node_modules under both 'gm-cc' and pnpm/bun temp dirs
141
+ // like '.gm-cc-<hash>', so exclude by basename prefix, not exact match.
137
142
  const CC_EXCLUDE = new Set(['gm-cc'])
143
+ const isExcludedCc = (base) => CC_EXCLUDE.has(base) || /^\.?gm-cc(-|$)/.test(base)
138
144
  async function loadCcFromNodeModules(startDir) {
139
145
  const seen = new Set(ccHost.plugins().map(p => p.root))
140
146
  let cur = path.resolve(startDir)
@@ -146,7 +152,7 @@ export function makeCcLoaders(ccHost, env) {
146
152
  ? fs.readdirSync(path.join(nm, entry.name), { withFileTypes: true }).filter(e => e.isDirectory()).map(e => path.join(nm, entry.name, e.name))
147
153
  : [path.join(nm, entry.name)]
148
154
  for (const d of dirs) {
149
- if (seen.has(d) || !isCcPluginDir(d) || CC_EXCLUDE.has(path.basename(d))) continue
155
+ if (seen.has(d) || !isCcPluginDir(d) || isExcludedCc(path.basename(d))) continue
150
156
  seen.add(d); await useCcDir(d)
151
157
  }
152
158
  }
@@ -0,0 +1,57 @@
1
+ // Persistent xstate actor wrapper.
2
+ //
3
+ // createPersistentActor rehydrates an actor from its last persisted snapshot
4
+ // (if any), auto-persists on every transition, and clears its snapshot the
5
+ // moment it reaches a final/stopped state. This is the single primitive every
6
+ // long-lived freddie subsystem uses to become resumable across a process refresh.
7
+ import { createActor } from 'xstate'
8
+ import { persist, load, clear } from './snapshot-store.js'
9
+ import { logger } from '../observability/log.js'
10
+
11
+ const log = logger('persistent-actor')
12
+
13
+ // machine: an xstate machine. kind+key: snapshot identity. input: actor input
14
+ // (used only on a fresh start — a rehydrated actor restores its own context).
15
+ // onTransition: optional callback per snapshot.
16
+ export async function createPersistentActor(machine, { kind, key, input, onTransition } = {}) {
17
+ if (!kind || !key) throw new Error('createPersistentActor requires kind and key')
18
+ const machineId = machine?.id || machine?.config?.id || null
19
+ const snapshot = await load(kind, key, { machineId })
20
+ const resumed = !!snapshot
21
+ const actor = snapshot
22
+ ? createActor(machine, { snapshot })
23
+ : createActor(machine, { input })
24
+
25
+ let persisting = Promise.resolve()
26
+ const sub = actor.subscribe((snap) => {
27
+ // Serialize persists so rapid consecutive transitions land last-write-wins
28
+ // without interleaving; the store upsert is keyed by (kind,key).
29
+ persisting = persisting.then(async () => {
30
+ try {
31
+ const ps = actor.getPersistedSnapshot()
32
+ if (snap.status === 'active') {
33
+ await persist(kind, key, ps, { machineId })
34
+ } else {
35
+ // Final/stopped: clear so a completed actor never resurrects on boot.
36
+ await clear(kind, key)
37
+ }
38
+ onTransition?.(snap)
39
+ } catch (e) {
40
+ log.error('persist failed', { kind, key, err: String(e) })
41
+ }
42
+ })
43
+ })
44
+
45
+ if (resumed) log.info('actor resumed from snapshot', { kind, key, machineId })
46
+
47
+ actor.start()
48
+ return {
49
+ actor,
50
+ resumed,
51
+ // Await all in-flight persists then unsubscribe — call before stopping so
52
+ // the final snapshot state is durable.
53
+ async flush() { await persisting; try { sub.unsubscribe() } catch {} },
54
+ // Clear this actor's snapshot explicitly (e.g. on external cancel).
55
+ async forget() { await persisting; try { sub.unsubscribe() } catch {}; await clear(kind, key) },
56
+ }
57
+ }
@@ -0,0 +1,42 @@
1
+ // Boot-time resumability driver.
2
+ //
3
+ // resumeAll() is invoked on process boot (CLI + dashboard server start). It walks
4
+ // every non-final persisted machine snapshot and rehydrates the ones that can be
5
+ // driven to completion headlessly — interrupted agent turns and unfinished
6
+ // batches. Lifecycle snapshots (gateway/acp) and in-flight message markers are
7
+ // reported but not auto-driven, since they require their live host process (open
8
+ // sockets / stdio) to resume meaningfully; resumeAll() surfaces them so the host
9
+ // can decide.
10
+ import { list, sweepDone } from './snapshot-store.js'
11
+ import { logger } from '../observability/log.js'
12
+
13
+ const log = logger('resume')
14
+
15
+ export async function resumeAll({ driveAgents = true, driveBatches = true } = {}) {
16
+ // Drop any final snapshots first so they never resurrect.
17
+ await sweepDone()
18
+ const active = await list({ status: 'active' })
19
+ const summary = { agent: 0, batch: 0, cron: 0, gateway: 0, acp: 0, 'gateway-msg': 0, 'acp-prompt': 0, resumed: [], surfaced: [] }
20
+
21
+ for (const row of active) {
22
+ summary[row.kind] = (summary[row.kind] || 0) + 1
23
+ try {
24
+ if (row.kind === 'agent' && driveAgents) {
25
+ const { resumeTurn } = await import('../agent/machine.js')
26
+ resumeTurn({ sessionKey: row.key }).then(() => log.info('agent turn resumed to completion', { key: row.key })).catch(e => log.error('agent resume failed', { key: row.key, err: String(e) }))
27
+ summary.resumed.push({ kind: 'agent', key: row.key })
28
+ } else if (row.kind === 'batch' && driveBatches) {
29
+ const { resumeBatch } = await import('../batch.js')
30
+ resumeBatch({ batchId: row.key }).then(() => log.info('batch resumed to completion', { key: row.key })).catch(e => log.error('batch resume failed', { key: row.key, err: String(e) }))
31
+ summary.resumed.push({ kind: 'batch', key: row.key })
32
+ } else {
33
+ // Lifecycle + in-flight markers: surfaced for the host to act on.
34
+ summary.surfaced.push({ kind: row.kind, key: row.key, status: row.status })
35
+ }
36
+ } catch (e) {
37
+ log.error('resume dispatch failed', { kind: row.kind, key: row.key, err: String(e) })
38
+ }
39
+ }
40
+ log.info('resumeAll complete', { active: active.length, resumed: summary.resumed.length, surfaced: summary.surfaced.length })
41
+ return summary
42
+ }
@@ -0,0 +1,111 @@
1
+ // Durable xstate snapshot store — the resumability backbone.
2
+ //
3
+ // Every long-lived freddie machine (agent turn, cron scheduler, batch runner,
4
+ // gateway, acp) persists its xstate snapshot here on every transition. After a
5
+ // refresh/restart, resume.js rehydrates each non-final snapshot into a fresh
6
+ // actor via createActor(machine, { snapshot }), so the process picks up exactly
7
+ // where it left off.
8
+ //
9
+ // Storage is libsql (shared sessions.db) keyed by (kind, key). Last-write-wins
10
+ // upsert: rapid consecutive transitions only keep the latest snapshot.
11
+ import { db } from '../db.js'
12
+ import { logger } from '../observability/log.js'
13
+
14
+ const log = logger('snapshot-store')
15
+
16
+ // Bump when the persisted-snapshot encoding or any machine definition changes
17
+ // shape incompatibly. load() discards rows whose schema_version mismatches so a
18
+ // stale snapshot from older code never crashes resume.
19
+ export const SNAPSHOT_SCHEMA_VERSION = 1
20
+
21
+ let _inited = false
22
+ async function init() {
23
+ const d = await db()
24
+ if (!_inited) {
25
+ await d.exec(`CREATE TABLE IF NOT EXISTS machine_snapshots (
26
+ kind TEXT NOT NULL,
27
+ key TEXT NOT NULL,
28
+ schema_version INTEGER NOT NULL,
29
+ machine_id TEXT,
30
+ snapshot_json TEXT NOT NULL,
31
+ status TEXT NOT NULL,
32
+ updated INTEGER NOT NULL,
33
+ PRIMARY KEY (kind, key)
34
+ )`)
35
+ _inited = true
36
+ }
37
+ return d
38
+ }
39
+
40
+ // Persist (upsert) a snapshot. status is the actor snapshot status
41
+ // ('active' | 'done' | 'error' | 'stopped'). machineId guards against rehydrating
42
+ // a snapshot into a structurally different machine after a code change.
43
+ export async function persist(kind, key, snapshot, { machineId = null } = {}) {
44
+ if (!kind || !key) throw new Error('persist requires kind and key')
45
+ const d = await init()
46
+ const status = snapshot?.status || 'active'
47
+ const json = JSON.stringify(snapshot)
48
+ await d.prepare(`INSERT INTO machine_snapshots (kind, key, schema_version, machine_id, snapshot_json, status, updated)
49
+ VALUES (?, ?, ?, ?, ?, ?, ?)
50
+ ON CONFLICT(kind, key) DO UPDATE SET
51
+ schema_version = excluded.schema_version,
52
+ machine_id = excluded.machine_id,
53
+ snapshot_json = excluded.snapshot_json,
54
+ status = excluded.status,
55
+ updated = excluded.updated`)
56
+ .run(kind, key, SNAPSHOT_SCHEMA_VERSION, machineId, json, status, Date.now())
57
+ return { kind, key, status }
58
+ }
59
+
60
+ // Load a persisted snapshot. Returns null on missing row, schema-version
61
+ // mismatch, machine-id mismatch, or unparseable JSON — every consumer falls back
62
+ // to a fresh actor and never throws.
63
+ export async function load(kind, key, { machineId = null } = {}) {
64
+ const d = await init()
65
+ const row = await d.prepare(`SELECT * FROM machine_snapshots WHERE kind = ? AND key = ?`).get(kind, key)
66
+ if (!row) return null
67
+ if (Number(row.schema_version) !== SNAPSHOT_SCHEMA_VERSION) {
68
+ log.info('discarding stale snapshot (schema mismatch)', { kind, key, had: row.schema_version, want: SNAPSHOT_SCHEMA_VERSION })
69
+ await clear(kind, key)
70
+ return null
71
+ }
72
+ if (machineId && row.machine_id && row.machine_id !== machineId) {
73
+ log.info('discarding stale snapshot (machine id mismatch)', { kind, key, had: row.machine_id, want: machineId })
74
+ await clear(kind, key)
75
+ return null
76
+ }
77
+ try {
78
+ return JSON.parse(row.snapshot_json)
79
+ } catch (e) {
80
+ log.error('unparseable snapshot, discarding', { kind, key, err: String(e) })
81
+ await clear(kind, key)
82
+ return null
83
+ }
84
+ }
85
+
86
+ export async function clear(kind, key) {
87
+ const d = await init()
88
+ await d.prepare(`DELETE FROM machine_snapshots WHERE kind = ? AND key = ?`).run(kind, key)
89
+ }
90
+
91
+ // List snapshots, optionally filtered by kind. status filter defaults to
92
+ // non-final ('active') for resume-on-boot; pass status:null for all.
93
+ export async function list({ kind = null, status = 'active' } = {}) {
94
+ const d = await init()
95
+ let sql = `SELECT kind, key, schema_version, machine_id, status, updated FROM machine_snapshots`
96
+ const where = []; const args = []
97
+ if (kind) { where.push('kind = ?'); args.push(kind) }
98
+ if (status) { where.push('status = ?'); args.push(status) }
99
+ if (where.length) sql += ' WHERE ' + where.join(' AND ')
100
+ sql += ' ORDER BY updated DESC'
101
+ return await d.prepare(sql).all(...args)
102
+ }
103
+
104
+ // Remove all final (done/error/stopped) snapshots — a completed actor must not
105
+ // resurrect on the next boot. Called opportunistically; final actors also clear
106
+ // their own row on completion.
107
+ export async function sweepDone() {
108
+ const d = await init()
109
+ const res = await d.prepare(`DELETE FROM machine_snapshots WHERE status != 'active'`).run()
110
+ return { removed: res.changes }
111
+ }
package/src/web/server.js CHANGED
@@ -7,6 +7,9 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url))
7
7
 
8
8
  export async function createDashboard({ port = 0 } = {}) {
9
9
  const host = await bootHost()
10
+ // Rehydrate any interrupted machines (agent turns, batches) from their
11
+ // persisted snapshots; surface lifecycle markers. Non-blocking on failure.
12
+ try { const { resumeAll } = await import('../machines/resume.js'); await resumeAll() } catch (_) {}
10
13
  const app = express()
11
14
  app.use(express.json())
12
15
  app.use(express.static(__dirname))