clementine-agent 1.0.28 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -12,9 +12,19 @@
|
|
|
12
12
|
import type { AgentProfile, OnTextCallback, OnToolActivityCallback, VerboseLevel } from '../types.js';
|
|
13
13
|
import { AgentManager } from './agent-manager.js';
|
|
14
14
|
/**
|
|
15
|
-
* Estimate token count
|
|
16
|
-
*
|
|
17
|
-
*
|
|
15
|
+
* Estimate token count for Claude.
|
|
16
|
+
*
|
|
17
|
+
* Anthropic's published rule of thumb is ~3.5 chars/token for English prose.
|
|
18
|
+
* Clementine's prompts blend English guidance with code, JSON, YAML, and
|
|
19
|
+
* structured memory — so we use 3.3 chars/token, slightly denser than pure
|
|
20
|
+
* English, which tracks within ~10% of the SDK's reported input_tokens in
|
|
21
|
+
* practice (see audit.jsonl tokens_in for live calibration).
|
|
22
|
+
*
|
|
23
|
+
* The previous weighted-regex heuristic (words×1.3 + punct×0.8 + lines×0.5)
|
|
24
|
+
* systematically undercounted code and JSON, triggering spurious compactions.
|
|
25
|
+
*
|
|
26
|
+
* Callers that need exact counts should read `usage.input_tokens` from the
|
|
27
|
+
* SDK result; this function is for pre-flight planning only.
|
|
18
28
|
*/
|
|
19
29
|
export declare function estimateTokens(text: string): number;
|
|
20
30
|
export interface ProjectMeta {
|
package/dist/agent/assistant.js
CHANGED
|
@@ -144,20 +144,24 @@ function getChannelToolDenyList(channel) {
|
|
|
144
144
|
}
|
|
145
145
|
// ── Token estimation & context window guard ─────────────────────────
|
|
146
146
|
/**
|
|
147
|
-
* Estimate token count
|
|
148
|
-
*
|
|
149
|
-
*
|
|
147
|
+
* Estimate token count for Claude.
|
|
148
|
+
*
|
|
149
|
+
* Anthropic's published rule of thumb is ~3.5 chars/token for English prose.
|
|
150
|
+
* Clementine's prompts blend English guidance with code, JSON, YAML, and
|
|
151
|
+
* structured memory — so we use 3.3 chars/token, slightly denser than pure
|
|
152
|
+
* English, which tracks within ~10% of the SDK's reported input_tokens in
|
|
153
|
+
* practice (see audit.jsonl tokens_in for live calibration).
|
|
154
|
+
*
|
|
155
|
+
* The previous weighted-regex heuristic (words×1.3 + punct×0.8 + lines×0.5)
|
|
156
|
+
* systematically undercounted code and JSON, triggering spurious compactions.
|
|
157
|
+
*
|
|
158
|
+
* Callers that need exact counts should read `usage.input_tokens` from the
|
|
159
|
+
* SDK result; this function is for pre-flight planning only.
|
|
150
160
|
*/
|
|
151
161
|
export function estimateTokens(text) {
|
|
152
162
|
if (!text)
|
|
153
163
|
return 0;
|
|
154
|
-
|
|
155
|
-
const words = text.match(/\b\w+\b/g)?.length ?? 0;
|
|
156
|
-
// Count non-word tokens: punctuation, brackets, operators (each is ~1 token)
|
|
157
|
-
const punctuation = text.match(/[^\w\s]/g)?.length ?? 0;
|
|
158
|
-
// Newlines and indentation: roughly 1 token per line
|
|
159
|
-
const lines = text.split('\n').length;
|
|
160
|
-
return Math.ceil(words * 1.3 + punctuation * 0.8 + lines * 0.5);
|
|
164
|
+
return Math.ceil(text.length / 3.3);
|
|
161
165
|
}
|
|
162
166
|
/**
|
|
163
167
|
* Strip lone Unicode surrogates (U+D800–U+DFFF) from a string so it can be
|
|
@@ -765,6 +769,21 @@ export class PersonalAssistant {
|
|
|
765
769
|
try {
|
|
766
770
|
const data = JSON.parse(fs.readFileSync(SESSIONS_FILE, 'utf-8'));
|
|
767
771
|
const now = Date.now();
|
|
772
|
+
// Drop old-format Slack session keys that pre-date workspace namespacing
|
|
773
|
+
// (`slack:user:*`, `slack:dm:*`). The new format is
|
|
774
|
+
// `slack:team:{teamId}:user:{userId}`; old keys can't be safely remapped
|
|
775
|
+
// because the originating workspace isn't known, so they're dropped and
|
|
776
|
+
// users rotate into a fresh session on their next message.
|
|
777
|
+
let droppedLegacy = 0;
|
|
778
|
+
for (const key of Object.keys(data)) {
|
|
779
|
+
if (/^slack:(user|dm):/.test(key)) {
|
|
780
|
+
delete data[key];
|
|
781
|
+
droppedLegacy++;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
if (droppedLegacy > 0) {
|
|
785
|
+
logger.info({ dropped: droppedLegacy }, 'Migrated sessions: dropped pre-workspace-namespacing Slack keys');
|
|
786
|
+
}
|
|
768
787
|
for (const [key, entry] of Object.entries(data)) {
|
|
769
788
|
const ts = new Date(entry.timestamp);
|
|
770
789
|
if (now - ts.getTime() > SESSION_EXPIRY_MS)
|
package/dist/channels/slack.js
CHANGED
|
@@ -59,7 +59,7 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
|
|
|
59
59
|
app.error(async (error) => {
|
|
60
60
|
logger.error({ err: error }, 'Slack app error — continuing');
|
|
61
61
|
});
|
|
62
|
-
app.message(async ({ message, client }) => {
|
|
62
|
+
app.message(async ({ message, client, context }) => {
|
|
63
63
|
try {
|
|
64
64
|
// Type guard: only handle regular user messages
|
|
65
65
|
if (!('user' in message) || !('text' in message))
|
|
@@ -72,6 +72,10 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
|
|
|
72
72
|
if (slackBotManager?.getOwnedChannelIds().includes(message.channel))
|
|
73
73
|
return;
|
|
74
74
|
const userId = message.user;
|
|
75
|
+
// Slack user IDs are scoped per-workspace, so a bare `slack:user:{uid}`
|
|
76
|
+
// collides across workspaces. Namespace by team/workspace ID so sessions
|
|
77
|
+
// stay isolated even when the same bot is installed in multiple workspaces.
|
|
78
|
+
const teamId = context.teamId ?? (await client.auth.test().then(r => r.team_id).catch(() => 'unknown'));
|
|
75
79
|
// Owner-only check
|
|
76
80
|
if (SLACK_OWNER_USER_ID && userId !== SLACK_OWNER_USER_ID) {
|
|
77
81
|
logger.warn(`Ignored Slack message from non-owner: ${userId}`);
|
|
@@ -93,7 +97,7 @@ export async function startSlack(gateway, dispatcher, slackBotManager) {
|
|
|
93
97
|
return;
|
|
94
98
|
const channel = message.channel;
|
|
95
99
|
const threadTs = ('thread_ts' in message ? message.thread_ts : undefined) ?? message.ts;
|
|
96
|
-
const sessionKey = `slack:user:${userId}`;
|
|
100
|
+
const sessionKey = `slack:team:${teamId}:user:${userId}`;
|
|
97
101
|
// ── !stop — abort active query (bypasses session lock) ────────────
|
|
98
102
|
if (text === '!stop' || text === '/stop') {
|
|
99
103
|
const stopped = gateway.stopSession(sessionKey);
|
|
@@ -60,6 +60,7 @@ export declare class CronScheduler {
|
|
|
60
60
|
private disabledJobs;
|
|
61
61
|
private scheduledTasks;
|
|
62
62
|
private runningJobs;
|
|
63
|
+
private runMetadata;
|
|
63
64
|
private completedJobs;
|
|
64
65
|
private watching;
|
|
65
66
|
readonly runLog: CronRunLog;
|
|
@@ -71,7 +72,21 @@ export declare class CronScheduler {
|
|
|
71
72
|
private goalTriggerDir;
|
|
72
73
|
private triggerTimer;
|
|
73
74
|
private statusChangeListeners;
|
|
75
|
+
private static readonly RUNNING_JOBS_FILE;
|
|
74
76
|
constructor(gateway: Gateway, dispatcher: NotificationDispatcher);
|
|
77
|
+
/**
|
|
78
|
+
* Atomically persist the current runningJobs set to disk. Uses write-then-
|
|
79
|
+
* rename so a crash mid-write cannot corrupt the file.
|
|
80
|
+
*/
|
|
81
|
+
private persistRunningJobs;
|
|
82
|
+
/**
|
|
83
|
+
* On startup, read the persisted running-jobs file. Any entries present
|
|
84
|
+
* represent jobs interrupted by a previous crash. Surface each to audit.jsonl
|
|
85
|
+
* and clear the file. Deliberately do NOT auto-restart — the next scheduled
|
|
86
|
+
* tick handles it, avoiding duplicate external side effects (emails sent,
|
|
87
|
+
* commits pushed, etc.) from a partial prior run.
|
|
88
|
+
*/
|
|
89
|
+
private reconcileInterruptedJobs;
|
|
75
90
|
/** Load job definitions from CRON.md and agent dirs without scheduling tasks. */
|
|
76
91
|
private loadJobDefinitions;
|
|
77
92
|
/** Register a listener that fires when system state changes (job start/finish, self-improve, etc). */
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* retry helpers, CronRunLog, and daily-note logging utilities used by both schedulers.
|
|
8
8
|
*/
|
|
9
9
|
import { execSync } from 'node:child_process';
|
|
10
|
-
import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync, unlinkSync, watchFile, unwatchFile, writeFileSync, } from 'node:fs';
|
|
10
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, unlinkSync, watchFile, unwatchFile, writeFileSync, } from 'node:fs';
|
|
11
11
|
import path from 'node:path';
|
|
12
12
|
import cron from 'node-cron';
|
|
13
13
|
import matter from 'gray-matter';
|
|
@@ -17,6 +17,7 @@ import { listAllGoals, findGoalPath, readGoalById } from '../tools/shared.js';
|
|
|
17
17
|
import { scanner } from '../security/scanner.js';
|
|
18
18
|
import { parseAllWorkflows as parseAllWorkflowsSync } from '../agent/workflow-runner.js';
|
|
19
19
|
import { SelfImproveLoop } from '../agent/self-improve.js';
|
|
20
|
+
import { logAuditJsonl } from '../agent/hooks.js';
|
|
20
21
|
const logger = pino({ name: 'clementine.cron' });
|
|
21
22
|
/** Default timeout for standard cron jobs (10 minutes). */
|
|
22
23
|
const CRON_STANDARD_TIMEOUT_MS = 10 * 60 * 1000;
|
|
@@ -332,6 +333,7 @@ export class CronScheduler {
|
|
|
332
333
|
disabledJobs = new Set();
|
|
333
334
|
scheduledTasks = new Map();
|
|
334
335
|
runningJobs = new Set();
|
|
336
|
+
runMetadata = new Map();
|
|
335
337
|
completedJobs = new Map(); // jobName → completion timestamp
|
|
336
338
|
watching = false;
|
|
337
339
|
runLog;
|
|
@@ -346,6 +348,10 @@ export class CronScheduler {
|
|
|
346
348
|
triggerTimer = null;
|
|
347
349
|
// Event-driven status change listeners (used by Discord status embed)
|
|
348
350
|
statusChangeListeners = [];
|
|
351
|
+
// Disk-backed mirror of runningJobs for crash-safe idempotency. If the
|
|
352
|
+
// daemon dies mid-run, startup reconciliation surfaces the interrupted job
|
|
353
|
+
// to audit.jsonl and clears the file so the next scheduled tick proceeds.
|
|
354
|
+
static RUNNING_JOBS_FILE = path.join(BASE_DIR, 'cron-running.json');
|
|
349
355
|
constructor(gateway, dispatcher) {
|
|
350
356
|
this.gateway = gateway;
|
|
351
357
|
this.dispatcher = dispatcher;
|
|
@@ -355,6 +361,65 @@ export class CronScheduler {
|
|
|
355
361
|
// query jobs on connect which happens before start().
|
|
356
362
|
this.loadJobDefinitions();
|
|
357
363
|
}
|
|
364
|
+
/**
|
|
365
|
+
* Atomically persist the current runningJobs set to disk. Uses write-then-
|
|
366
|
+
* rename so a crash mid-write cannot corrupt the file.
|
|
367
|
+
*/
|
|
368
|
+
persistRunningJobs(metaByName) {
|
|
369
|
+
try {
|
|
370
|
+
const entries = [...this.runningJobs].map(name => ({
|
|
371
|
+
jobName: name,
|
|
372
|
+
startedAt: metaByName?.get(name)?.startedAt ?? new Date().toISOString(),
|
|
373
|
+
runId: metaByName?.get(name)?.runId ?? '',
|
|
374
|
+
pid: process.pid,
|
|
375
|
+
}));
|
|
376
|
+
const tmp = CronScheduler.RUNNING_JOBS_FILE + '.tmp';
|
|
377
|
+
writeFileSync(tmp, JSON.stringify(entries, null, 2));
|
|
378
|
+
renameSync(tmp, CronScheduler.RUNNING_JOBS_FILE);
|
|
379
|
+
}
|
|
380
|
+
catch (err) {
|
|
381
|
+
logger.debug({ err }, 'Failed to persist running-jobs file');
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* On startup, read the persisted running-jobs file. Any entries present
|
|
386
|
+
* represent jobs interrupted by a previous crash. Surface each to audit.jsonl
|
|
387
|
+
* and clear the file. Deliberately do NOT auto-restart — the next scheduled
|
|
388
|
+
* tick handles it, avoiding duplicate external side effects (emails sent,
|
|
389
|
+
* commits pushed, etc.) from a partial prior run.
|
|
390
|
+
*/
|
|
391
|
+
reconcileInterruptedJobs() {
|
|
392
|
+
try {
|
|
393
|
+
if (!existsSync(CronScheduler.RUNNING_JOBS_FILE))
|
|
394
|
+
return;
|
|
395
|
+
const raw = readFileSync(CronScheduler.RUNNING_JOBS_FILE, 'utf-8');
|
|
396
|
+
const entries = JSON.parse(raw);
|
|
397
|
+
if (!Array.isArray(entries) || entries.length === 0) {
|
|
398
|
+
unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
const detectedAt = new Date().toISOString();
|
|
402
|
+
for (const entry of entries) {
|
|
403
|
+
logger.warn({ ...entry, detectedAt }, 'Interrupted cron job detected on startup');
|
|
404
|
+
logAuditJsonl({
|
|
405
|
+
event_type: 'cron_interrupted',
|
|
406
|
+
jobName: entry.jobName,
|
|
407
|
+
runId: entry.runId,
|
|
408
|
+
startedAt: entry.startedAt,
|
|
409
|
+
detectedAt,
|
|
410
|
+
previousPid: entry.pid,
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
|
|
414
|
+
}
|
|
415
|
+
catch (err) {
|
|
416
|
+
logger.warn({ err }, 'Failed to reconcile running-jobs file — starting fresh');
|
|
417
|
+
try {
|
|
418
|
+
unlinkSync(CronScheduler.RUNNING_JOBS_FILE);
|
|
419
|
+
}
|
|
420
|
+
catch { /* ignore */ }
|
|
421
|
+
}
|
|
422
|
+
}
|
|
358
423
|
/** Load job definitions from CRON.md and agent dirs without scheduling tasks. */
|
|
359
424
|
loadJobDefinitions() {
|
|
360
425
|
this.jobs = parseCronJobs();
|
|
@@ -376,6 +441,9 @@ export class CronScheduler {
|
|
|
376
441
|
}
|
|
377
442
|
}
|
|
378
443
|
start() {
|
|
444
|
+
// Surface any jobs that were mid-run when the daemon last died and clear
|
|
445
|
+
// the crash-consistency file before scheduling new ticks.
|
|
446
|
+
this.reconcileInterruptedJobs();
|
|
379
447
|
this.reloadJobs();
|
|
380
448
|
this.reloadWorkflows();
|
|
381
449
|
this.watchCronFile();
|
|
@@ -800,6 +868,11 @@ export class CronScheduler {
|
|
|
800
868
|
catch { /* non-fatal */ }
|
|
801
869
|
}
|
|
802
870
|
this.runningJobs.add(job.name);
|
|
871
|
+
this.runMetadata.set(job.name, {
|
|
872
|
+
startedAt: new Date().toISOString(),
|
|
873
|
+
runId: Math.random().toString(36).slice(2, 10),
|
|
874
|
+
});
|
|
875
|
+
this.persistRunningJobs(this.runMetadata);
|
|
803
876
|
this.emitStatusChange();
|
|
804
877
|
try {
|
|
805
878
|
logger.info(`Running cron job: ${job.name}${job.agentSlug ? ` (agent: ${job.agentSlug})` : ''}`);
|
|
@@ -969,6 +1042,8 @@ export class CronScheduler {
|
|
|
969
1042
|
}
|
|
970
1043
|
finally {
|
|
971
1044
|
this.runningJobs.delete(job.name);
|
|
1045
|
+
this.runMetadata.delete(job.name);
|
|
1046
|
+
this.persistRunningJobs(this.runMetadata);
|
|
972
1047
|
this.emitStatusChange();
|
|
973
1048
|
// Fire-and-forget: check if this agent's profile needs self-learning update
|
|
974
1049
|
if (job.agentSlug) {
|
package/dist/gateway/router.js
CHANGED
|
@@ -748,6 +748,8 @@ export class Gateway {
|
|
|
748
748
|
const isOwnerDm = sessionKey.startsWith('discord:user:') ||
|
|
749
749
|
sessionKey.startsWith('discord:agent:') ||
|
|
750
750
|
sessionKey.startsWith('slack:dm:') ||
|
|
751
|
+
// New workspace-namespaced Slack DMs: slack:team:{teamId}:user:{userId}
|
|
752
|
+
/^slack:team:[^:]+:(user|dm):/.test(sessionKey) ||
|
|
751
753
|
sessionKey.startsWith('telegram:');
|
|
752
754
|
const shouldBlock = scan.verdict === 'block' && !isOwnerDm;
|
|
753
755
|
if (shouldBlock) {
|
|
@@ -1308,6 +1310,8 @@ export class Gateway {
|
|
|
1308
1310
|
const isOwnerDm = sessionKey.startsWith('discord:user:') ||
|
|
1309
1311
|
sessionKey.startsWith('discord:agent:') ||
|
|
1310
1312
|
sessionKey.startsWith('slack:dm:') ||
|
|
1313
|
+
// New workspace-namespaced Slack DMs: slack:team:{teamId}:user:{userId}
|
|
1314
|
+
/^slack:team:[^:]+:(user|dm):/.test(sessionKey) ||
|
|
1311
1315
|
sessionKey.startsWith('telegram:');
|
|
1312
1316
|
const shouldBlock = scan.verdict === 'block' && !isOwnerDm;
|
|
1313
1317
|
if (shouldBlock) {
|