@karmaniverous/jeeves-runner 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,14 @@
1
1
  #!/usr/bin/env node
2
- import { mkdirSync, readFileSync } from 'node:fs';
2
+ import { mkdirSync, existsSync, readFileSync } from 'node:fs';
3
3
  import { dirname, extname, resolve } from 'node:path';
4
4
  import { Command } from 'commander';
5
+ import { Cron, CronPattern } from 'croner';
5
6
  import { DatabaseSync } from 'node:sqlite';
6
7
  import { pino } from 'pino';
7
8
  import Fastify from 'fastify';
9
+ import { request as request$1 } from 'node:http';
8
10
  import { request } from 'node:https';
9
11
  import { spawn } from 'node:child_process';
10
- import { Cron } from 'croner';
11
12
  import { z } from 'zod';
12
13
 
13
14
  /**
@@ -102,9 +103,76 @@ CREATE TABLE IF NOT EXISTS queues (
102
103
 
103
104
  CREATE INDEX IF NOT EXISTS idx_queues_poll ON queues(queue, status, priority DESC, created_at);
104
105
  `;
106
+ /** Migration 002: Rename queues → queue_items, create queues definition table, add dedup support. */
107
+ const MIGRATION_002 = `
108
+ -- Drop old index first (references 'queue' column)
109
+ DROP INDEX IF EXISTS idx_queues_poll;
110
+
111
+ -- Rename existing queues table to queue_items
112
+ ALTER TABLE queues RENAME TO queue_items;
113
+
114
+ -- Create new queues definition table
115
+ CREATE TABLE queues (
116
+ id TEXT PRIMARY KEY,
117
+ name TEXT NOT NULL,
118
+ description TEXT,
119
+ dedup_expr TEXT,
120
+ dedup_scope TEXT DEFAULT 'pending',
121
+ max_attempts INTEGER DEFAULT 1,
122
+ retention_days INTEGER DEFAULT 7,
123
+ created_at TEXT DEFAULT (datetime('now'))
124
+ );
125
+
126
+ -- Add new columns to queue_items
127
+ ALTER TABLE queue_items ADD COLUMN queue_id TEXT;
128
+ ALTER TABLE queue_items ADD COLUMN dedup_key TEXT;
129
+
130
+ -- Migrate existing queue column to queue_id
131
+ UPDATE queue_items SET queue_id = queue;
132
+
133
+ -- Drop old queue column
134
+ ALTER TABLE queue_items DROP COLUMN queue;
135
+
136
+ -- Create dedup lookup index
137
+ CREATE INDEX idx_queue_items_dedup ON queue_items(queue_id, dedup_key, status);
138
+
139
+ -- Create new poll index
140
+ CREATE INDEX idx_queue_items_poll ON queue_items(queue_id, status, priority DESC, created_at);
141
+
142
+ -- Seed queue definitions
143
+ INSERT INTO queues (id, name, description, dedup_expr, dedup_scope, max_attempts, retention_days) VALUES
144
+ ('email-updates', 'Email Update Queue', NULL, NULL, NULL, 1, 7),
145
+ ('email-pending', 'Email Pending', NULL, '$.threadId', 'pending', 1, 7),
146
+ ('x-posts', 'X Post Queue', NULL, '$.id', 'pending', 1, 7),
147
+ ('gh-collabs', 'GH Collab Queue', NULL, '$.full_name', 'pending', 1, 7);
148
+ `;
149
+ /** Migration 003: Rename cursors → state, add state_items table for collection state. */
150
+ const MIGRATION_003 = `
151
+ -- Rename cursors → state
152
+ ALTER TABLE cursors RENAME TO state;
153
+
154
+ -- Rename index
155
+ DROP INDEX IF EXISTS idx_cursors_expires;
156
+ CREATE INDEX idx_state_expires ON state(expires_at) WHERE expires_at IS NOT NULL;
157
+
158
+ -- Create state_items table
159
+ CREATE TABLE state_items (
160
+ namespace TEXT NOT NULL,
161
+ key TEXT NOT NULL,
162
+ item_key TEXT NOT NULL,
163
+ value TEXT,
164
+ created_at TEXT DEFAULT (datetime('now')),
165
+ updated_at TEXT DEFAULT (datetime('now')),
166
+ PRIMARY KEY (namespace, key, item_key),
167
+ FOREIGN KEY (namespace, key) REFERENCES state(namespace, key)
168
+ );
169
+ CREATE INDEX idx_state_items_ns_key ON state_items(namespace, key);
170
+ `;
105
171
  /** Registry of all migrations keyed by version number. */
106
172
  const MIGRATIONS = {
107
173
  1: MIGRATION_001,
174
+ 2: MIGRATION_002,
175
+ 3: MIGRATION_003,
108
176
  };
109
177
  /**
110
178
  * Run all pending migrations. Creates schema_version table if needed, applies migrations in order.
@@ -143,7 +211,11 @@ function registerRoutes(app, deps) {
143
211
  const { db, scheduler } = deps;
144
212
  /** GET /health — Health check. */
145
213
  app.get('/health', () => {
146
- return { ok: true, uptime: process.uptime() };
214
+ return {
215
+ ok: true,
216
+ uptime: process.uptime(),
217
+ failedRegistrations: scheduler.getFailedRegistrations().length,
218
+ };
147
219
  });
148
220
  /** GET /jobs — List all jobs with last run status. */
149
221
  app.get('/jobs', () => {
@@ -194,6 +266,7 @@ function registerRoutes(app, deps) {
194
266
  reply.code(404);
195
267
  return { error: 'Job not found' };
196
268
  }
269
+ scheduler.reconcileNow();
197
270
  return { ok: true };
198
271
  });
199
272
  /** POST /jobs/:id/disable — Disable a job. */
@@ -205,6 +278,7 @@ function registerRoutes(app, deps) {
205
278
  reply.code(404);
206
279
  return { error: 'Job not found' };
207
280
  }
281
+ scheduler.reconcileNow();
208
282
  return { ok: true };
209
283
  });
210
284
  /** GET /stats — Aggregate job statistics. */
@@ -213,6 +287,7 @@ function registerRoutes(app, deps) {
213
287
  .prepare('SELECT COUNT(*) as count FROM jobs')
214
288
  .get();
215
289
  const runningCount = scheduler.getRunningJobs().length;
290
+ const failedCount = scheduler.getFailedRegistrations().length;
216
291
  const okLastHour = db
217
292
  .prepare(`SELECT COUNT(*) as count FROM runs
218
293
  WHERE status = 'ok' AND started_at > datetime('now', '-1 hour')`)
@@ -224,6 +299,7 @@ function registerRoutes(app, deps) {
224
299
  return {
225
300
  totalJobs: totalJobs.count,
226
301
  running: runningCount,
302
+ failedRegistrations: failedCount,
227
303
  okLastHour: okLastHour.count,
228
304
  errorsLastHour: errorsLastHour.count,
229
305
  };
@@ -236,19 +312,21 @@ function registerRoutes(app, deps) {
236
312
  /**
237
313
  * Create and configure the Fastify server. Routes are registered but server is not started.
238
314
  */
239
- function createServer(config, deps) {
315
+ function createServer(deps) {
240
316
  const app = Fastify({
241
- logger: {
242
- level: config.log.level,
243
- ...(config.log.file
244
- ? {
245
- transport: {
246
- target: 'pino/file',
247
- options: { destination: config.log.file },
248
- },
249
- }
250
- : {}),
251
- },
317
+ logger: deps.loggerConfig
318
+ ? {
319
+ level: deps.loggerConfig.level,
320
+ ...(deps.loggerConfig.file
321
+ ? {
322
+ transport: {
323
+ target: 'pino/file',
324
+ options: { destination: deps.loggerConfig.file },
325
+ },
326
+ }
327
+ : {}),
328
+ }
329
+ : false,
252
330
  });
253
331
  registerRoutes(app, deps);
254
332
  return app;
@@ -259,20 +337,36 @@ function createServer(config, deps) {
259
337
  */
260
338
  /** Delete runs older than the configured retention period. */
261
339
  function pruneOldRuns(db, days, logger) {
340
+ const cutoffDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString();
262
341
  const result = db
263
- .prepare(`DELETE FROM runs WHERE started_at < datetime('now', '-${String(days)} days')`)
264
- .run();
342
+ .prepare(`DELETE FROM runs WHERE started_at < ?`)
343
+ .run(cutoffDate);
265
344
  if (result.changes > 0) {
266
345
  logger.info({ deleted: result.changes }, 'Pruned old runs');
267
346
  }
268
347
  }
269
- /** Delete expired cursor entries. */
348
+ /** Delete expired state entries. */
270
349
  function cleanExpiredCursors(db, logger) {
271
350
  const result = db
272
- .prepare(`DELETE FROM cursors WHERE expires_at IS NOT NULL AND expires_at < datetime('now')`)
351
+ .prepare(`DELETE FROM state WHERE expires_at IS NOT NULL AND expires_at < datetime('now')`)
273
352
  .run();
274
353
  if (result.changes > 0) {
275
- logger.info({ deleted: result.changes }, 'Cleaned expired cursors');
354
+ logger.info({ deleted: result.changes }, 'Cleaned expired state entries');
355
+ }
356
+ }
357
+ /** Prune old queue items based on per-queue retention settings. */
358
+ function pruneOldQueueItems(db, logger) {
359
+ const result = db
360
+ .prepare(`DELETE FROM queue_items
361
+ WHERE status IN ('done', 'failed')
362
+ AND finished_at < datetime('now', '-' ||
363
+ COALESCE(
364
+ (SELECT retention_days FROM queues WHERE queues.id = queue_items.queue_id),
365
+ 7
366
+ ) || ' days')`)
367
+ .run();
368
+ if (result.changes > 0) {
369
+ logger.info({ deleted: result.changes }, 'Pruned old queue items');
276
370
  }
277
371
  }
278
372
  /**
@@ -283,6 +377,7 @@ function createMaintenance(db, config, logger) {
283
377
  function runAll() {
284
378
  pruneOldRuns(db, config.runRetentionDays, logger);
285
379
  cleanExpiredCursors(db, logger);
380
+ pruneOldQueueItems(db, logger);
286
381
  }
287
382
  return {
288
383
  start() {
@@ -304,38 +399,128 @@ function createMaintenance(db, config, logger) {
304
399
  }
305
400
 
306
401
  /**
307
- * Slack notification module. Sends job completion/failure messages via Slack Web API (chat.postMessage). Falls back gracefully if no token.
402
+ * Shared HTTP utility for making POST requests.
308
403
  */
309
- /** Post a message to Slack via chat.postMessage API. */
310
- function postToSlack(token, channel, text) {
404
+ /** Make an HTTP/HTTPS POST request. Returns the parsed JSON response body. */
405
+ function httpPost(url, headers, body, timeoutMs = 30000) {
311
406
  return new Promise((resolve, reject) => {
312
- const payload = JSON.stringify({ channel, text });
313
- const req = request('https://slack.com/api/chat.postMessage', {
407
+ const parsedUrl = new URL(url);
408
+ const isHttps = parsedUrl.protocol === 'https:';
409
+ const requestFn = isHttps ? request : request$1;
410
+ const req = requestFn({
411
+ hostname: parsedUrl.hostname,
412
+ port: parsedUrl.port,
413
+ path: parsedUrl.pathname + parsedUrl.search,
314
414
  method: 'POST',
315
415
  headers: {
316
- 'Content-Type': 'application/json',
317
- Authorization: `Bearer ${token}`,
318
- 'Content-Length': Buffer.byteLength(payload),
416
+ ...headers,
417
+ 'Content-Length': Buffer.byteLength(body),
319
418
  },
419
+ timeout: timeoutMs,
320
420
  }, (res) => {
321
- let body = '';
421
+ let responseBody = '';
322
422
  res.on('data', (chunk) => {
323
- body += chunk.toString();
423
+ responseBody += chunk.toString();
324
424
  });
325
425
  res.on('end', () => {
326
- if (res.statusCode === 200) {
327
- resolve();
426
+ if (res.statusCode !== 200) {
427
+ reject(new Error(`HTTP ${String(res.statusCode)}: ${responseBody}`));
428
+ return;
429
+ }
430
+ try {
431
+ resolve(JSON.parse(responseBody));
328
432
  }
329
- else {
330
- reject(new Error(`Slack API returned ${String(res.statusCode)}: ${body}`));
433
+ catch {
434
+ reject(new Error(`Failed to parse JSON response: ${responseBody}`));
331
435
  }
332
436
  });
333
437
  });
334
438
  req.on('error', reject);
335
- req.write(payload);
439
+ req.on('timeout', () => {
440
+ req.destroy();
441
+ reject(new Error('Request timed out'));
442
+ });
443
+ req.write(body);
336
444
  req.end();
337
445
  });
338
446
  }
447
+
448
+ /**
449
+ * OpenClaw Gateway HTTP client for spawning and monitoring sessions.
450
+ */
451
+ /** Make an HTTP POST request to the Gateway /tools/invoke endpoint. */
452
+ function invokeGateway(url, token, tool, args, timeoutMs = 30000) {
453
+ const payload = JSON.stringify({ tool, args });
454
+ return httpPost(`${url}/tools/invoke`, {
455
+ 'Content-Type': 'application/json',
456
+ Authorization: `Bearer ${token}`,
457
+ }, payload, timeoutMs);
458
+ }
459
+ /** Create a Gateway client. */
460
+ function createGatewayClient(options) {
461
+ const { url, token, timeoutMs = 30000 } = options;
462
+ return {
463
+ async spawnSession(task, opts) {
464
+ const response = (await invokeGateway(url, token, 'sessions_spawn', {
465
+ task,
466
+ label: opts?.label,
467
+ thinking: opts?.thinking,
468
+ runTimeoutSeconds: opts?.runTimeoutSeconds,
469
+ }, timeoutMs));
470
+ if (!response.ok) {
471
+ throw new Error('Failed to spawn session');
472
+ }
473
+ return {
474
+ sessionKey: response.result.details.childSessionKey,
475
+ runId: response.result.details.runId,
476
+ };
477
+ },
478
+ async getSessionHistory(sessionKey, limit = 3) {
479
+ const response = (await invokeGateway(url, token, 'sessions_history', { sessionKey, limit, includeTools: false }, timeoutMs));
480
+ if (!response.ok) {
481
+ throw new Error('Failed to get session history');
482
+ }
483
+ return response.result;
484
+ },
485
+ async getSessionInfo(sessionKey) {
486
+ // Note: sessions_list doesn't support filtering by key, so we fetch recent sessions
487
+ // and search client-side. Consider using sessions_history with limit 1 as alternative,
488
+ // or request a sessions_get tool from Gateway for more efficient single-session lookup.
489
+ const response = (await invokeGateway(url, token, 'sessions_list', { activeMinutes: 120, limit: 500 }, // Increased from 100 to reduce false negatives
490
+ timeoutMs));
491
+ if (!response.ok) {
492
+ throw new Error('Failed to list sessions');
493
+ }
494
+ const session = response.result.find((s) => s.sessionKey === sessionKey);
495
+ if (!session)
496
+ return null;
497
+ return {
498
+ totalTokens: session.totalTokens,
499
+ model: session.model,
500
+ transcriptPath: session.transcriptPath,
501
+ };
502
+ },
503
+ async isSessionComplete(sessionKey) {
504
+ const history = await this.getSessionHistory(sessionKey, 3);
505
+ if (history.length === 0)
506
+ return false;
507
+ const lastMessage = history[history.length - 1];
508
+ return (lastMessage.role === 'assistant' && lastMessage.stopReason !== undefined);
509
+ },
510
+ };
511
+ }
512
+
513
+ /**
514
+ * Slack notification module. Sends job completion/failure messages via Slack Web API (chat.postMessage). Falls back gracefully if no token.
515
+ */
516
+ /** Post a message to Slack via chat.postMessage API. */
517
+ async function postToSlack(token, channel, text) {
518
+ const payload = JSON.stringify({ channel, text });
519
+ await httpPost('https://slack.com/api/chat.postMessage', {
520
+ 'Content-Type': 'application/json',
521
+ Authorization: `Bearer ${token}`,
522
+ }, payload);
523
+ }
339
524
  /**
340
525
  * Create a notifier that sends Slack messages for job events. If no token, logs warning and returns silently.
341
526
  */
@@ -427,12 +612,14 @@ function resolveCommand(script) {
427
612
  * Execute a job script as a child process. Captures output, parses metadata, enforces timeout.
428
613
  */
429
614
  function executeJob(options) {
430
- const { script, dbPath, jobId, runId, timeoutMs } = options;
615
+ const { script, dbPath, jobId, runId, timeoutMs, commandResolver } = options;
431
616
  const startTime = Date.now();
432
617
  return new Promise((resolve) => {
433
618
  const stdoutBuffer = new RingBuffer(100);
434
619
  const stderrBuffer = new RingBuffer(100);
435
- const { command, args } = resolveCommand(script);
620
+ const { command, args } = commandResolver
621
+ ? commandResolver(script)
622
+ : resolveCommand(script);
436
623
  const child = spawn(command, args, {
437
624
  env: {
438
625
  ...process.env,
@@ -527,66 +714,289 @@ function executeJob(options) {
527
714
  });
528
715
  }
529
716
 
717
+ /**
718
+ * Cron registration and reconciliation utilities.
719
+ */
720
+ function createCronRegistry(deps) {
721
+ const { db, logger, onScheduledRun } = deps;
722
+ const crons = new Map();
723
+ const cronSchedules = new Map();
724
+ const failedRegistrations = new Set();
725
+ function registerCron(job) {
726
+ try {
727
+ const jobId = job.id;
728
+ const cron = new Cron(job.schedule, () => {
729
+ // Re-read job from DB to get current configuration
730
+ const currentJob = db
731
+ .prepare('SELECT * FROM jobs WHERE id = ? AND enabled = 1')
732
+ .get(jobId);
733
+ if (!currentJob) {
734
+ logger.warn({ jobId }, 'Job no longer exists or disabled, skipping');
735
+ return;
736
+ }
737
+ onScheduledRun(currentJob);
738
+ });
739
+ crons.set(job.id, cron);
740
+ cronSchedules.set(job.id, job.schedule);
741
+ failedRegistrations.delete(job.id);
742
+ logger.info({ jobId: job.id, schedule: job.schedule }, 'Scheduled job');
743
+ return true;
744
+ }
745
+ catch (err) {
746
+ logger.error({ jobId: job.id, err }, 'Failed to schedule job');
747
+ failedRegistrations.add(job.id);
748
+ return false;
749
+ }
750
+ }
751
+ function reconcile() {
752
+ const enabledJobs = db
753
+ .prepare('SELECT * FROM jobs WHERE enabled = 1')
754
+ .all();
755
+ const enabledById = new Map(enabledJobs.map((j) => [j.id, j]));
756
+ // Remove disabled/deleted jobs
757
+ for (const [jobId, cron] of crons.entries()) {
758
+ if (!enabledById.has(jobId)) {
759
+ cron.stop();
760
+ crons.delete(jobId);
761
+ cronSchedules.delete(jobId);
762
+ }
763
+ }
764
+ const failedIds = [];
765
+ // Add or update enabled jobs
766
+ for (const job of enabledJobs) {
767
+ const existingCron = crons.get(job.id);
768
+ const existingSchedule = cronSchedules.get(job.id);
769
+ if (!existingCron) {
770
+ if (!registerCron(job))
771
+ failedIds.push(job.id);
772
+ continue;
773
+ }
774
+ if (existingSchedule !== job.schedule) {
775
+ existingCron.stop();
776
+ crons.delete(job.id);
777
+ cronSchedules.delete(job.id);
778
+ if (!registerCron(job))
779
+ failedIds.push(job.id);
780
+ }
781
+ }
782
+ return { totalEnabled: enabledJobs.length, failedIds };
783
+ }
784
+ function stopAll() {
785
+ for (const cron of crons.values()) {
786
+ cron.stop();
787
+ }
788
+ crons.clear();
789
+ cronSchedules.clear();
790
+ }
791
+ return {
792
+ reconcile,
793
+ stopAll,
794
+ getFailedRegistrations() {
795
+ return Array.from(failedRegistrations);
796
+ },
797
+ };
798
+ }
799
+
800
+ /**
801
+ * Notification dispatch helper for job completion events.
802
+ */
803
+ /** Dispatch notification based on execution result and job configuration. */
804
+ async function dispatchNotification(result, jobName, onSuccess, onFailure, notifier, logger) {
805
+ if (result.status === 'ok' && onSuccess) {
806
+ await notifier
807
+ .notifySuccess(jobName, result.durationMs, onSuccess)
808
+ .catch((err) => {
809
+ logger.error({ jobName, err }, 'Success notification failed');
810
+ });
811
+ }
812
+ else if (result.status !== 'ok' && onFailure) {
813
+ await notifier
814
+ .notifyFailure(jobName, result.durationMs, result.error, onFailure)
815
+ .catch((err) => {
816
+ logger.error({ jobName, err }, 'Failure notification failed');
817
+ });
818
+ }
819
+ }
820
+
821
+ /**
822
+ * Run record repository for managing job execution records.
823
+ */
824
+ /** Create a run repository for the given database connection. */
825
+ function createRunRepository(db) {
826
+ return {
827
+ createRun(jobId, trigger) {
828
+ const result = db
829
+ .prepare(`INSERT INTO runs (job_id, status, started_at, trigger)
830
+ VALUES (?, 'running', datetime('now'), ?)`)
831
+ .run(jobId, trigger);
832
+ return result.lastInsertRowid;
833
+ },
834
+ finishRun(runId, execResult) {
835
+ db.prepare(`UPDATE runs SET status = ?, finished_at = datetime('now'), duration_ms = ?,
836
+ exit_code = ?, tokens = ?, result_meta = ?, error = ?, stdout_tail = ?, stderr_tail = ?
837
+ WHERE id = ?`).run(execResult.status, execResult.durationMs, execResult.exitCode, execResult.tokens, execResult.resultMeta, execResult.error, execResult.stdoutTail, execResult.stderrTail, runId);
838
+ },
839
+ };
840
+ }
841
+
842
+ /**
843
+ * Session executor for job type='session'. Spawns OpenClaw Gateway sessions and polls for completion.
844
+ */
845
+ /** File extensions that indicate a script rather than a prompt. */
846
+ const SCRIPT_EXTENSIONS = ['.js', '.mjs', '.cjs', '.ps1', '.cmd', '.bat'];
847
+ /** Resolve task prompt from script field: read file if .md/.txt, return raw text otherwise. */
848
+ function resolveTaskPrompt(script) {
849
+ const ext = extname(script).toLowerCase();
850
+ // If script extension, caller should fall back to script executor
851
+ if (SCRIPT_EXTENSIONS.includes(ext)) {
852
+ return null;
853
+ }
854
+ // If .md or .txt, read file contents
855
+ if (ext === '.md' || ext === '.txt') {
856
+ if (!existsSync(script)) {
857
+ throw new Error(`Prompt file not found: ${script}`);
858
+ }
859
+ return readFileSync(script, 'utf-8');
860
+ }
861
+ // Otherwise, treat script as raw prompt text
862
+ return script;
863
+ }
864
+ /** Poll for session completion with exponential backoff (capped). */
865
+ async function pollCompletion(gatewayClient, sessionKey, timeoutMs, initialIntervalMs = 5000) {
866
+ const startTime = Date.now();
867
+ let interval = initialIntervalMs;
868
+ const maxInterval = 15000;
869
+ while (Date.now() - startTime < timeoutMs) {
870
+ const isComplete = await gatewayClient.isSessionComplete(sessionKey);
871
+ if (isComplete)
872
+ return;
873
+ await new Promise((resolve) => setTimeout(resolve, interval));
874
+ interval = Math.min(interval * 1.2, maxInterval); // Exponential backoff capped
875
+ }
876
+ throw new Error(`Session timed out after ${String(timeoutMs)}ms`);
877
+ }
878
+ /**
879
+ * Execute a session job: spawn a Gateway session, poll for completion, fetch token usage.
880
+ */
881
+ async function executeSession(options) {
882
+ const { script, jobId, timeoutMs = 300000, gatewayClient, pollIntervalMs, } = options;
883
+ const startTime = Date.now();
884
+ try {
885
+ // Resolve task prompt
886
+ const taskPrompt = resolveTaskPrompt(script);
887
+ if (taskPrompt === null) {
888
+ throw new Error('Session job script has script extension; expected prompt text or .md/.txt file');
889
+ }
890
+ // Spawn session
891
+ const { sessionKey } = await gatewayClient.spawnSession(taskPrompt, {
892
+ label: jobId,
893
+ thinking: 'low',
894
+ runTimeoutSeconds: Math.floor(timeoutMs / 1000),
895
+ });
896
+ // Poll for completion
897
+ await pollCompletion(gatewayClient, sessionKey, timeoutMs, pollIntervalMs);
898
+ // Fetch session info for token count
899
+ const sessionInfo = await gatewayClient.getSessionInfo(sessionKey);
900
+ const tokens = sessionInfo?.totalTokens ?? null;
901
+ const durationMs = Date.now() - startTime;
902
+ return {
903
+ status: 'ok',
904
+ exitCode: null,
905
+ durationMs,
906
+ tokens,
907
+ resultMeta: sessionKey,
908
+ stdoutTail: `Session completed: ${sessionKey}`,
909
+ stderrTail: '',
910
+ error: null,
911
+ };
912
+ }
913
+ catch (err) {
914
+ const durationMs = Date.now() - startTime;
915
+ const errorMessage = err instanceof Error ? err.message : 'Unknown session error';
916
+ // Check if timeout
917
+ if (errorMessage.includes('timed out')) {
918
+ return {
919
+ status: 'timeout',
920
+ exitCode: null,
921
+ durationMs,
922
+ tokens: null,
923
+ resultMeta: null,
924
+ stdoutTail: '',
925
+ stderrTail: errorMessage,
926
+ error: errorMessage,
927
+ };
928
+ }
929
+ return {
930
+ status: 'error',
931
+ exitCode: null,
932
+ durationMs,
933
+ tokens: null,
934
+ resultMeta: null,
935
+ stdoutTail: '',
936
+ stderrTail: errorMessage,
937
+ error: errorMessage,
938
+ };
939
+ }
940
+ }
941
+
530
942
  /**
531
943
  * Croner-based job scheduler. Loads enabled jobs, creates cron instances, manages execution, respects overlap policies and concurrency limits.
532
944
  */
945
+ // JobRow is imported from cron-registry
533
946
  /**
534
947
  * Create the job scheduler. Manages cron schedules, job execution, overlap policies, and notifications.
535
948
  */
536
949
  function createScheduler(deps) {
537
- const { db, executor, notifier, config, logger } = deps;
538
- const crons = new Map();
950
+ const { db, executor, notifier, config, logger, gatewayClient } = deps;
539
951
  const runningJobs = new Set();
540
- /** Insert a run record and return its ID. */
541
- function createRun(jobId, trigger) {
542
- const result = db
543
- .prepare(`INSERT INTO runs (job_id, status, started_at, trigger)
544
- VALUES (?, 'running', datetime('now'), ?)`)
545
- .run(jobId, trigger);
546
- return result.lastInsertRowid;
547
- }
548
- /** Update run record with completion data. */
549
- function finishRun(runId, execResult) {
550
- db.prepare(`UPDATE runs SET status = ?, finished_at = datetime('now'), duration_ms = ?,
551
- exit_code = ?, tokens = ?, result_meta = ?, error = ?, stdout_tail = ?, stderr_tail = ?
552
- WHERE id = ?`).run(execResult.status, execResult.durationMs, execResult.exitCode, execResult.tokens, execResult.resultMeta, execResult.error, execResult.stdoutTail, execResult.stderrTail, runId);
553
- }
952
+ const cronRegistry = createCronRegistry({
953
+ db,
954
+ logger,
955
+ onScheduledRun: (job) => {
956
+ void onScheduledRun(job);
957
+ },
958
+ });
959
+ const runRepository = createRunRepository(db);
960
+ let reconcileInterval = null;
554
961
  /** Execute a job: create run record, run script, update record, send notifications. */
555
962
  async function runJob(job, trigger) {
556
- const { id, name, script, timeout_ms, on_success, on_failure } = job;
963
+ const { id, name, script, type, timeout_ms, on_success, on_failure } = job;
557
964
  // Check concurrency limit
558
965
  if (runningJobs.size >= config.maxConcurrency) {
559
966
  logger.warn({ jobId: id }, 'Max concurrency reached, skipping job');
560
967
  throw new Error('Max concurrency reached');
561
968
  }
562
969
  runningJobs.add(id);
563
- const runId = createRun(id, trigger);
564
- logger.info({ jobId: id, runId, trigger }, 'Starting job');
970
+ const runId = runRepository.createRun(id, trigger);
971
+ logger.info({ jobId: id, runId, trigger, type }, 'Starting job');
565
972
  try {
566
- const result = await executor({
567
- script,
568
- dbPath: config.dbPath,
569
- jobId: id,
570
- runId,
571
- timeoutMs: timeout_ms ?? undefined,
572
- });
573
- finishRun(runId, result);
574
- logger.info({ jobId: id, runId, status: result.status }, 'Job finished');
575
- // Send notifications
576
- if (result.status === 'ok' && on_success) {
577
- await notifier
578
- .notifySuccess(name, result.durationMs, on_success)
579
- .catch((err) => {
580
- logger.error({ jobId: id, err }, 'Notification failed');
973
+ let result;
974
+ // Route based on job type
975
+ if (type === 'session') {
976
+ if (!gatewayClient) {
977
+ throw new Error('Session job requires Gateway client (gateway.tokenPath not configured)');
978
+ }
979
+ result = await executeSession({
980
+ script,
981
+ jobId: id,
982
+ timeoutMs: timeout_ms ?? undefined,
983
+ gatewayClient,
581
984
  });
582
985
  }
583
- else if (result.status !== 'ok' && on_failure) {
584
- await notifier
585
- .notifyFailure(name, result.durationMs, result.error, on_failure)
586
- .catch((err) => {
587
- logger.error({ jobId: id, err }, 'Notification failed');
986
+ else {
987
+ // Default to script executor
988
+ result = await executor({
989
+ script,
990
+ dbPath: config.dbPath,
991
+ jobId: id,
992
+ runId,
993
+ timeoutMs: timeout_ms ?? undefined,
588
994
  });
589
995
  }
996
+ runRepository.finishRun(runId, result);
997
+ logger.info({ jobId: id, runId, status: result.status }, 'Job finished');
998
+ // Send notifications
999
+ await dispatchNotification(result, name, on_success, on_failure, notifier, logger);
590
1000
  return result;
591
1001
  }
592
1002
  finally {
@@ -602,63 +1012,53 @@ function createScheduler(deps) {
602
1012
  logger.info({ jobId: id }, 'Job already running, skipping (overlap_policy=skip)');
603
1013
  return;
604
1014
  }
605
- else if (overlap_policy === 'queue') {
606
- logger.info({ jobId: id }, 'Job already running, queueing (overlap_policy=queue)');
607
- // In a real implementation, we'd queue this. For now, just skip.
608
- return;
609
- }
610
1015
  // 'allow' policy: proceed
611
1016
  }
612
1017
  await runJob(job, 'schedule').catch((err) => {
613
1018
  logger.error({ jobId: id, err }, 'Job execution failed');
614
1019
  });
615
1020
  }
1021
+ // Cron registration and reconciliation are handled by cronRegistry.
616
1022
  return {
617
1023
  start() {
618
- // Load all enabled jobs
619
- const jobs = db
620
- .prepare('SELECT * FROM jobs WHERE enabled = 1')
621
- .all();
622
- logger.info({ count: jobs.length }, 'Loading jobs');
623
- for (const job of jobs) {
624
- try {
625
- const jobId = job.id;
626
- const cron = new Cron(job.schedule, () => {
627
- // Re-read job from DB to get current configuration
628
- const currentJob = db
629
- .prepare('SELECT * FROM jobs WHERE id = ? AND enabled = 1')
630
- .get(jobId);
631
- if (!currentJob) {
632
- logger.warn({ jobId }, 'Job no longer exists or disabled, skipping');
633
- return;
634
- }
635
- void onScheduledRun(currentJob);
636
- });
637
- crons.set(job.id, cron);
638
- logger.info({ jobId: job.id, schedule: job.schedule }, 'Scheduled job');
639
- }
640
- catch (err) {
641
- logger.error({ jobId: job.id, err }, 'Failed to schedule job');
1024
+ const { totalEnabled, failedIds } = cronRegistry.reconcile();
1025
+ logger.info({ count: totalEnabled }, 'Loading jobs');
1026
+ if (failedIds.length > 0) {
1027
+ const ok = totalEnabled - failedIds.length;
1028
+ logger.warn({ failed: failedIds.length, total: totalEnabled }, `${String(failedIds.length)} of ${String(totalEnabled)} jobs failed to register`);
1029
+ const message = `⚠️ jeeves-runner started: ${String(ok)}/${String(totalEnabled)} jobs scheduled, ${String(failedIds.length)} failed: ${failedIds.join(', ')}`;
1030
+ const channel = config.notifications.defaultOnFailure;
1031
+ if (channel) {
1032
+ void notifier.notifyFailure('jeeves-runner', 0, message, channel);
642
1033
  }
643
1034
  }
1035
+ if (reconcileInterval === null && config.reconcileIntervalMs > 0) {
1036
+ reconcileInterval = setInterval(() => {
1037
+ try {
1038
+ cronRegistry.reconcile();
1039
+ }
1040
+ catch (err) {
1041
+ logger.error({ err }, 'Reconciliation failed');
1042
+ }
1043
+ }, config.reconcileIntervalMs);
1044
+ }
644
1045
  },
645
- stop() {
1046
+ async stop() {
646
1047
  logger.info('Stopping scheduler');
647
- // Stop all crons
648
- for (const cron of crons.values()) {
649
- cron.stop();
1048
+ if (reconcileInterval) {
1049
+ clearInterval(reconcileInterval);
1050
+ reconcileInterval = null;
650
1051
  }
651
- crons.clear();
652
- // Wait for running jobs (simple poll with timeout)
1052
+ // Stop all crons
1053
+ cronRegistry.stopAll();
1054
+ // Wait for running jobs (with timeout)
653
1055
  const deadline = Date.now() + config.shutdownGraceMs;
654
- const checkInterval = setInterval(() => {
655
- if (runningJobs.size === 0 || Date.now() > deadline) {
656
- clearInterval(checkInterval);
657
- if (runningJobs.size > 0) {
658
- logger.warn({ count: runningJobs.size }, 'Forced shutdown with running jobs');
659
- }
660
- }
661
- }, 100);
1056
+ while (runningJobs.size > 0 && Date.now() < deadline) {
1057
+ await new Promise((resolve) => setTimeout(resolve, 100));
1058
+ }
1059
+ if (runningJobs.size > 0) {
1060
+ logger.warn({ count: runningJobs.size }, 'Forced shutdown with running jobs');
1061
+ }
662
1062
  },
663
1063
  async triggerJob(jobId) {
664
1064
  const job = db.prepare('SELECT * FROM jobs WHERE id = ?').get(jobId);
@@ -666,9 +1066,15 @@ function createScheduler(deps) {
666
1066
  throw new Error(`Job not found: ${jobId}`);
667
1067
  return runJob(job, 'manual');
668
1068
  },
1069
+ reconcileNow() {
1070
+ cronRegistry.reconcile();
1071
+ },
669
1072
  getRunningJobs() {
670
1073
  return Array.from(runningJobs);
671
1074
  },
1075
+ getFailedRegistrations() {
1076
+ return cronRegistry.getFailedRegistrations();
1077
+ },
672
1078
  };
673
1079
  }
674
1080
 
@@ -678,22 +1084,22 @@ function createScheduler(deps) {
678
1084
  /**
679
1085
  * Create the runner. Initializes database, scheduler, API server, and sets up graceful shutdown.
680
1086
  */
681
- function createRunner(config) {
1087
+ function createRunner(config, deps) {
682
1088
  let db = null;
683
1089
  let scheduler = null;
684
1090
  let server = null;
685
1091
  let maintenance = null;
686
1092
  const logger = pino({
687
- level: config.log.level,
688
- ...(config.log.file
689
- ? {
690
- transport: {
691
- target: 'pino/file',
692
- options: { destination: config.log.file },
693
- },
694
- }
695
- : {}),
696
- });
1093
+ level: config.log.level,
1094
+ ...(config.log.file
1095
+ ? {
1096
+ transport: {
1097
+ target: 'pino/file',
1098
+ options: { destination: config.log.file },
1099
+ },
1100
+ }
1101
+ : {}),
1102
+ });
697
1103
  return {
698
1104
  async start() {
699
1105
  logger.info('Starting runner');
@@ -706,6 +1112,19 @@ function createRunner(config) {
706
1112
  ? readFileSync(config.notifications.slackTokenPath, 'utf-8').trim()
707
1113
  : null;
708
1114
  const notifier = createNotifier({ slackToken });
1115
+ // Gateway client (optional, for session-type jobs)
1116
+ const gatewayToken = config.gateway.tokenPath
1117
+ ? readFileSync(config.gateway.tokenPath, 'utf-8').trim()
1118
+ : (process.env.OPENCLAW_GATEWAY_TOKEN ?? null);
1119
+ const gatewayClient = gatewayToken && config.gateway.url
1120
+ ? createGatewayClient({
1121
+ url: config.gateway.url,
1122
+ token: gatewayToken,
1123
+ })
1124
+ : undefined;
1125
+ if (gatewayClient) {
1126
+ logger.info('Gateway client initialized');
1127
+ }
709
1128
  // Maintenance (run retention pruning + cursor cleanup)
710
1129
  maintenance = createMaintenance(db, {
711
1130
  runRetentionDays: config.runRetentionDays,
@@ -720,11 +1139,16 @@ function createRunner(config) {
720
1139
  notifier,
721
1140
  config,
722
1141
  logger,
1142
+ gatewayClient,
723
1143
  });
724
1144
  scheduler.start();
725
1145
  logger.info('Scheduler started');
726
1146
  // API server
727
- server = createServer(config, { db, scheduler });
1147
+ server = createServer({
1148
+ db,
1149
+ scheduler,
1150
+ loggerConfig: { level: config.log.level, file: config.log.file },
1151
+ });
728
1152
  await server.listen({ port: config.port, host: '127.0.0.1' });
729
1153
  logger.info({ port: config.port }, 'API server listening');
730
1154
  // Graceful shutdown
@@ -747,7 +1171,7 @@ function createRunner(config) {
747
1171
  logger.info('Maintenance stopped');
748
1172
  }
749
1173
  if (scheduler) {
750
- scheduler.stop();
1174
+ await scheduler.stop();
751
1175
  logger.info('Scheduler stopped');
752
1176
  }
753
1177
  if (server) {
@@ -769,30 +1193,54 @@ function createRunner(config) {
769
1193
  */
770
1194
  /** Notification configuration sub-schema. */
771
1195
  const notificationsSchema = z.object({
1196
+ /** Path to Slack bot token file. */
772
1197
  slackTokenPath: z.string().optional(),
1198
+ /** Default Slack channel ID for failure notifications. */
773
1199
  defaultOnFailure: z.string().nullable().default(null),
1200
+ /** Default Slack channel ID for success notifications. */
774
1201
  defaultOnSuccess: z.string().nullable().default(null),
775
1202
  });
776
1203
  /** Log configuration sub-schema. */
777
1204
  const logSchema = z.object({
1205
+ /** Log level threshold (trace, debug, info, warn, error, fatal). */
778
1206
  level: z
779
1207
  .enum(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
780
1208
  .default('info'),
1209
+ /** Optional log file path. */
781
1210
  file: z.string().optional(),
782
1211
  });
1212
+ /** Gateway configuration sub-schema. */
1213
+ const gatewaySchema = z.object({
1214
+ /** OpenClaw Gateway URL. */
1215
+ url: z.string().default('http://127.0.0.1:18789'),
1216
+ /** Path to file containing Gateway auth token. */
1217
+ tokenPath: z.string().optional(),
1218
+ });
783
1219
  /** Full runner configuration schema. Validates and provides defaults. */
784
1220
  const runnerConfigSchema = z.object({
1221
+ /** HTTP server port for the runner API. */
785
1222
  port: z.number().default(3100),
1223
+ /** Path to SQLite database file. */
786
1224
  dbPath: z.string().default('./data/runner.sqlite'),
1225
+ /** Maximum number of concurrent job executions. */
787
1226
  maxConcurrency: z.number().default(4),
1227
+ /** Number of days to retain completed run records. */
788
1228
  runRetentionDays: z.number().default(30),
1229
+ /** Interval in milliseconds for cursor cleanup task. */
789
1230
  cursorCleanupIntervalMs: z.number().default(3600000),
1231
+ /** Grace period in milliseconds for shutdown completion. */
790
1232
  shutdownGraceMs: z.number().default(30000),
1233
+ /** Interval in milliseconds for job reconciliation checks. */
1234
+ reconcileIntervalMs: z.number().default(60000),
1235
+ /** Notification configuration for job completion events. */
791
1236
  notifications: notificationsSchema.default({
792
1237
  defaultOnFailure: null,
793
1238
  defaultOnSuccess: null,
794
1239
  }),
1240
+ /** Logging configuration. */
795
1241
  log: logSchema.default({ level: 'info' }),
1242
+ /** Gateway configuration for session-type jobs. */
1243
+ gateway: gatewaySchema.default({ url: 'http://127.0.0.1:18789' }),
796
1244
  });
797
1245
 
798
1246
  /**
@@ -850,12 +1298,30 @@ program
850
1298
  .option('-t, --type <type>', 'Job type (script|session)', 'script')
851
1299
  .option('-d, --description <desc>', 'Job description')
852
1300
  .option('--timeout <ms>', 'Timeout in ms')
853
- .option('--overlap <policy>', 'Overlap policy (skip|queue|allow)', 'skip')
1301
+ .option('--overlap <policy>', 'Overlap policy (skip|allow)', 'skip')
854
1302
  .option('--on-failure <channel>', 'Slack channel for failure alerts')
855
1303
  .option('--on-success <channel>', 'Slack channel for success alerts')
856
1304
  .option('-c, --config <path>', 'Path to config file')
857
1305
  .action((options) => {
858
1306
  const config = loadConfig(options.config);
1307
+ // Validate schedule expression before inserting
1308
+ try {
1309
+ new CronPattern(options.schedule);
1310
+ }
1311
+ catch (err) {
1312
+ console.error(`Invalid schedule expression: ${err instanceof Error ? err.message : String(err)}`);
1313
+ process.exit(1);
1314
+ }
1315
+ // Validate overlap_policy
1316
+ if (!['skip', 'allow'].includes(options.overlap)) {
1317
+ console.error(`Invalid overlap policy '${options.overlap}'. Supported values: skip, allow`);
1318
+ process.exit(1);
1319
+ }
1320
+ // Validate job type
1321
+ if (!['script', 'session'].includes(options.type)) {
1322
+ console.error(`Invalid job type '${options.type}'. Supported values: script, session`);
1323
+ process.exit(1);
1324
+ }
859
1325
  const db = createConnection(config.dbPath);
860
1326
  runMigrations(db);
861
1327
  db.prepare(`INSERT INTO jobs (id, name, schedule, script, type, description, timeout_ms, overlap_policy, on_failure, on_success)