clementine-agent 1.0.28 → 1.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,9 +12,19 @@
12
12
  import type { AgentProfile, OnTextCallback, OnToolActivityCallback, VerboseLevel } from '../types.js';
13
13
  import { AgentManager } from './agent-manager.js';
14
14
  /**
15
- * Estimate token count using a weighted heuristic.
16
- * BPE tokenizers average ~4 chars/token for prose, but code, punctuation,
17
- * and whitespace-heavy content tokenize differently.
15
+ * Estimate token count for Claude.
16
+ *
17
+ * Anthropic's published rule of thumb is ~3.5 chars/token for English prose.
18
+ * Clementine's prompts blend English guidance with code, JSON, YAML, and
19
+ * structured memory — so we use 3.3 chars/token, slightly denser than pure
20
+ * English, which tracks within ~10% of the SDK's reported input_tokens in
21
+ * practice (see audit.jsonl tokens_in for live calibration).
22
+ *
23
+ * The previous weighted-regex heuristic (words×1.3 + punct×0.8 + lines×0.5)
24
+ * systematically undercounted code and JSON, triggering spurious compactions.
25
+ *
26
+ * Callers that need exact counts should read `usage.input_tokens` from the
27
+ * SDK result; this function is for pre-flight planning only.
18
28
  */
19
29
  export declare function estimateTokens(text: string): number;
20
30
  export interface ProjectMeta {
@@ -144,20 +144,24 @@ function getChannelToolDenyList(channel) {
144
144
  }
145
145
  // ── Token estimation & context window guard ─────────────────────────
146
146
  /**
147
- * Estimate token count using a weighted heuristic.
148
- * BPE tokenizers average ~4 chars/token for prose, but code, punctuation,
149
- * and whitespace-heavy content tokenize differently.
147
+ * Estimate token count for Claude.
148
+ *
149
+ * Anthropic's published rule of thumb is ~3.5 chars/token for English prose.
150
+ * Clementine's prompts blend English guidance with code, JSON, YAML, and
151
+ * structured memory — so we use 3.3 chars/token, slightly denser than pure
152
+ * English, which tracks within ~10% of the SDK's reported input_tokens in
153
+ * practice (see audit.jsonl tokens_in for live calibration).
154
+ *
155
+ * The previous weighted-regex heuristic (words×1.3 + punct×0.8 + lines×0.5)
156
+ * systematically undercounted code and JSON, triggering spurious compactions.
157
+ *
158
+ * Callers that need exact counts should read `usage.input_tokens` from the
159
+ * SDK result; this function is for pre-flight planning only.
150
160
  */
151
161
  export function estimateTokens(text) {
152
162
  if (!text)
153
163
  return 0;
154
- // Count words (sequences of alphanumeric chars) — average ~1.3 tokens per word
155
- const words = text.match(/\b\w+\b/g)?.length ?? 0;
156
- // Count non-word tokens: punctuation, brackets, operators (each is ~1 token)
157
- const punctuation = text.match(/[^\w\s]/g)?.length ?? 0;
158
- // Newlines and indentation: roughly 1 token per line
159
- const lines = text.split('\n').length;
160
- return Math.ceil(words * 1.3 + punctuation * 0.8 + lines * 0.5);
164
+ return Math.ceil(text.length / 3.3);
161
165
  }
162
166
  /**
163
167
  * Strip lone Unicode surrogates (U+D800–U+DFFF) from a string so it can be
@@ -765,6 +769,21 @@ export class PersonalAssistant {
765
769
  try {
766
770
  const data = JSON.parse(fs.readFileSync(SESSIONS_FILE, 'utf-8'));
767
771
  const now = Date.now();
772
+ // Drop old-format Slack session keys that pre-date workspace namespacing
773
+ // (`slack:user:*`, `slack:dm:*`). The new format is
774
+ // `slack:team:{teamId}:user:{userId}`; old keys can't be safely remapped
775
+ // because the originating workspace isn't known, so they're dropped and
776
+ // users rotate into a fresh session on their next message.
777
+ let droppedLegacy = 0;
778
+ for (const key of Object.keys(data)) {
779
+ if (/^slack:(user|dm):/.test(key)) {
780
+ delete data[key];
781
+ droppedLegacy++;
782
+ }
783
+ }
784
+ if (droppedLegacy > 0) {
785
+ logger.info({ dropped: droppedLegacy }, 'Migrated sessions: dropped pre-workspace-namespacing Slack keys');
786
+ }
768
787
  for (const [key, entry] of Object.entries(data)) {
769
788
  const ts = new Date(entry.timestamp);
770
789
  if (now - ts.getTime() > SESSION_EXPIRY_MS)
@@ -59,7 +59,7 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
59
59
  app.error(async (error) => {
60
60
  logger.error({ err: error }, 'Slack app error — continuing');
61
61
  });
62
- app.message(async ({ message, client }) => {
62
+ app.message(async ({ message, client, context }) => {
63
63
  try {
64
64
  // Type guard: only handle regular user messages
65
65
  if (!('user' in message) || !('text' in message))
@@ -72,6 +72,10 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
72
72
  if (slackBotManager?.getOwnedChannelIds().includes(message.channel))
73
73
  return;
74
74
  const userId = message.user;
75
+ // Slack user IDs are scoped per-workspace, so a bare `slack:user:{uid}`
76
+ // collides across workspaces. Namespace by team/workspace ID so sessions
77
+ // stay isolated even when the same bot is installed in multiple workspaces.
78
+ const teamId = context.teamId ?? (await client.auth.test().then(r => r.team_id).catch(() => 'unknown'));
75
79
  // Owner-only check
76
80
  if (SLACK_OWNER_USER_ID && userId !== SLACK_OWNER_USER_ID) {
77
81
  logger.warn(`Ignored Slack message from non-owner: ${userId}`);
@@ -93,7 +97,7 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
93
97
  return;
94
98
  const channel = message.channel;
95
99
  const threadTs = ('thread_ts' in message ? message.thread_ts : undefined) ?? message.ts;
96
- const sessionKey = `slack:user:${userId}`;
100
+ const sessionKey = `slack:team:${teamId}:user:${userId}`;
97
101
  // ── !stop — abort active query (bypasses session lock) ────────────
98
102
  if (text === '!stop' || text === '/stop') {
99
103
  const stopped = gateway.stopSession(sessionKey);
@@ -60,6 +60,7 @@ export declare class CronScheduler {
60
60
  private disabledJobs;
61
61
  private scheduledTasks;
62
62
  private runningJobs;
63
+ private runMetadata;
63
64
  private completedJobs;
64
65
  private watching;
65
66
  readonly runLog: CronRunLog;
@@ -71,7 +72,21 @@ export declare class CronScheduler {
71
72
  private goalTriggerDir;
72
73
  private triggerTimer;
73
74
  private statusChangeListeners;
75
+ private static readonly RUNNING_JOBS_FILE;
74
76
  constructor(gateway: Gateway, dispatcher: NotificationDispatcher);
77
+ /**
78
+ * Atomically persist the current runningJobs set to disk. Uses write-then-
79
+ * rename so a crash mid-write cannot corrupt the file.
80
+ */
81
+ private persistRunningJobs;
82
+ /**
83
+ * On startup, read the persisted running-jobs file. Any entries present
84
+ * represent jobs interrupted by a previous crash. Surface each to audit.jsonl
85
+ * and clear the file. Deliberately do NOT auto-restart — the next scheduled
86
+ * tick handles it, avoiding duplicate external side effects (emails sent,
87
+ * commits pushed, etc.) from a partial prior run.
88
+ */
89
+ private reconcileInterruptedJobs;
75
90
  /** Load job definitions from CRON.md and agent dirs without scheduling tasks. */
76
91
  private loadJobDefinitions;
77
92
  /** Register a listener that fires when system state changes (job start/finish, self-improve, etc). */
@@ -7,7 +7,7 @@
7
7
  * retry helpers, CronRunLog, and daily-note logging utilities used by both schedulers.
8
8
  */
9
9
  import { execSync } from 'node:child_process';
10
- import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync, unlinkSync, watchFile, unwatchFile, writeFileSync, } from 'node:fs';
10
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, unlinkSync, watchFile, unwatchFile, writeFileSync, } from 'node:fs';
11
11
  import path from 'node:path';
12
12
  import cron from 'node-cron';
13
13
  import matter from 'gray-matter';
@@ -17,6 +17,7 @@ import { listAllGoals, findGoalPath, readGoalById } from '../tools/shared.js';
17
17
  import { scanner } from '../security/scanner.js';
18
18
  import { parseAllWorkflows as parseAllWorkflowsSync } from '../agent/workflow-runner.js';
19
19
  import { SelfImproveLoop } from '../agent/self-improve.js';
20
+ import { logAuditJsonl } from '../agent/hooks.js';
20
21
  const logger = pino({ name: 'clementine.cron' });
21
22
  /** Default timeout for standard cron jobs (10 minutes). */
22
23
  const CRON_STANDARD_TIMEOUT_MS = 10 * 60 * 1000;
@@ -332,6 +333,7 @@ export class CronScheduler {
332
333
  disabledJobs = new Set();
333
334
  scheduledTasks = new Map();
334
335
  runningJobs = new Set();
336
+ runMetadata = new Map();
335
337
  completedJobs = new Map(); // jobName → completion timestamp
336
338
  watching = false;
337
339
  runLog;
@@ -346,6 +348,10 @@ export class CronScheduler {
346
348
  triggerTimer = null;
347
349
  // Event-driven status change listeners (used by Discord status embed)
348
350
  statusChangeListeners = [];
351
+ // Disk-backed mirror of runningJobs for crash-safe idempotency. If the
352
+ // daemon dies mid-run, startup reconciliation surfaces the interrupted job
353
+ // to audit.jsonl and clears the file so the next scheduled tick proceeds.
354
+ static RUNNING_JOBS_FILE = path.join(BASE_DIR, 'cron-running.json');
349
355
  constructor(gateway, dispatcher) {
350
356
  this.gateway = gateway;
351
357
  this.dispatcher = dispatcher;
@@ -355,6 +361,65 @@ export class CronScheduler {
355
361
  // query jobs on connect which happens before start().
356
362
  this.loadJobDefinitions();
357
363
  }
364
+ /**
365
+ * Atomically persist the current runningJobs set to disk. Uses write-then-
366
+ * rename so a crash mid-write cannot corrupt the file.
367
+ */
368
+ persistRunningJobs(metaByName) {
369
+ try {
370
+ const entries = [...this.runningJobs].map(name => ({
371
+ jobName: name,
372
+ startedAt: metaByName?.get(name)?.startedAt ?? new Date().toISOString(),
373
+ runId: metaByName?.get(name)?.runId ?? '',
374
+ pid: process.pid,
375
+ }));
376
+ const tmp = CronScheduler.RUNNING_JOBS_FILE + '.tmp';
377
+ writeFileSync(tmp, JSON.stringify(entries, null, 2));
378
+ renameSync(tmp, CronScheduler.RUNNING_JOBS_FILE);
379
+ }
380
+ catch (err) {
381
+ logger.debug({ err }, 'Failed to persist running-jobs file');
382
+ }
383
+ }
384
+ /**
385
+ * On startup, read the persisted running-jobs file. Any entries present
386
+ * represent jobs interrupted by a previous crash. Surface each to audit.jsonl
387
+ * and clear the file. Deliberately do NOT auto-restart — the next scheduled
388
+ * tick handles it, avoiding duplicate external side effects (emails sent,
389
+ * commits pushed, etc.) from a partial prior run.
390
+ */
391
+ reconcileInterruptedJobs() {
392
+ try {
393
+ if (!existsSync(CronScheduler.RUNNING_JOBS_FILE))
394
+ return;
395
+ const raw = readFileSync(CronScheduler.RUNNING_JOBS_FILE, 'utf-8');
396
+ const entries = JSON.parse(raw);
397
+ if (!Array.isArray(entries) || entries.length === 0) {
398
+ unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
399
+ return;
400
+ }
401
+ const detectedAt = new Date().toISOString();
402
+ for (const entry of entries) {
403
+ logger.warn({ ...entry, detectedAt }, 'Interrupted cron job detected on startup');
404
+ logAuditJsonl({
405
+ event_type: 'cron_interrupted',
406
+ jobName: entry.jobName,
407
+ runId: entry.runId,
408
+ startedAt: entry.startedAt,
409
+ detectedAt,
410
+ previousPid: entry.pid,
411
+ });
412
+ }
413
+ unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
414
+ }
415
+ catch (err) {
416
+ logger.warn({ err }, 'Failed to reconcile running-jobs file — starting fresh');
417
+ try {
418
+ unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
419
+ }
420
+ catch { /* ignore */ }
421
+ }
422
+ }
358
423
  /** Load job definitions from CRON.md and agent dirs without scheduling tasks. */
359
424
  loadJobDefinitions() {
360
425
  this.jobs = parseCronJobs();
@@ -376,6 +441,9 @@ export class CronScheduler {
376
441
  }
377
442
  }
378
443
  start() {
444
+ // Surface any jobs that were mid-run when the daemon last died and clear
445
+ // the crash-consistency file before scheduling new ticks.
446
+ this.reconcileInterruptedJobs();
379
447
  this.reloadJobs();
380
448
  this.reloadWorkflows();
381
449
  this.watchCronFile();
@@ -800,6 +868,11 @@ export class CronScheduler {
800
868
  catch { /* non-fatal */ }
801
869
  }
802
870
  this.runningJobs.add(job.name);
871
+ this.runMetadata.set(job.name, {
872
+ startedAt: new Date().toISOString(),
873
+ runId: Math.random().toString(36).slice(2, 10),
874
+ });
875
+ this.persistRunningJobs(this.runMetadata);
803
876
  this.emitStatusChange();
804
877
  try {
805
878
  logger.info(`Running cron job: ${job.name}${job.agentSlug ? ` (agent: ${job.agentSlug})` : ''}`);
@@ -969,6 +1042,8 @@ export class CronScheduler {
969
1042
  }
970
1043
  finally {
971
1044
  this.runningJobs.delete(job.name);
1045
+ this.runMetadata.delete(job.name);
1046
+ this.persistRunningJobs(this.runMetadata);
972
1047
  this.emitStatusChange();
973
1048
  // Fire-and-forget: check if this agent's profile needs self-learning update
974
1049
  if (job.agentSlug) {
@@ -748,6 +748,8 @@ export class Gateway {
748
748
  const isOwnerDm = sessionKey.startsWith('discord:user:') ||
749
749
  sessionKey.startsWith('discord:agent:') ||
750
750
  sessionKey.startsWith('slack:dm:') ||
751
+ // New workspace-namespaced Slack DMs: slack:team:{teamId}:user:{userId}
752
+ /^slack:team:[^:]+:(user|dm):/.test(sessionKey) ||
751
753
  sessionKey.startsWith('telegram:');
752
754
  const shouldBlock = scan.verdict === 'block' && !isOwnerDm;
753
755
  if (shouldBlock) {
@@ -1308,6 +1310,8 @@ export class Gateway {
1308
1310
  const isOwnerDm = sessionKey.startsWith('discord:user:') ||
1309
1311
  sessionKey.startsWith('discord:agent:') ||
1310
1312
  sessionKey.startsWith('slack:dm:') ||
1313
+ // New workspace-namespaced Slack DMs: slack:team:{teamId}:user:{userId}
1314
+ /^slack:team:[^:]+:(user|dm):/.test(sessionKey) ||
1311
1315
  sessionKey.startsWith('telegram:');
1312
1316
  const shouldBlock = scan.verdict === 'block' && !isOwnerDm;
1313
1317
  if (shouldBlock) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.28",
3
+ "version": "1.0.29",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",