clementine-agent 1.6.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -411,10 +411,24 @@ Routing rule: if the fact is something the agent should *always know* (not just
411
411
  ## Rules:
412
412
  - Only save genuinely NEW facts not already present in the Current Memory above.
413
413
  - If updating an existing topic, use memory_write(action="update_memory") to REPLACE the section, not append duplicates.
414
+ - If a stored fact is now wrong (user corrected it, situation changed), use memory_write(action="supersede", supersedes_chunk_id=N, reason="…") instead of appending — the old chunk becomes invisible to retrieval, provenance is preserved.
414
415
  - If there's nothing new to save, respond "No new facts." and exit — do NOT call any tools.
415
416
  - Use the MCP tools (user_model, memory_write, note_create, task_add, note_take).
416
417
  - NEVER respond to ${OWNER}. You are invisible. Just save facts and exit.
417
418
 
419
+ ## Salience hint, confidence, reason (memory_write):
420
+ Every memory_write call may include \`salience_hint\` (0.5–2.0), \`confidence\` (0–1), and \`reason\` (one short sentence). Use them — retrieval prioritizes high-salience, deprioritizes low-confidence, and reasons make the memory system explainable.
421
+
422
+ salience_hint:
423
+ - 0.5 — tentative, single-mention, may not be durable
424
+ - 1.0 — normal (default; equivalent to omitting)
425
+ - 1.5 — durable preference, decision, or strong stated opinion
426
+ - 2.0 — identity-level fact (rare): role, name, foundational stance
427
+
428
+ confidence: 1.0 = certain (default), 0.7 = probable, 0.5 = uncertain or heard secondhand, 0.3 = tentative. Lowers retrieval ranking without hiding.
429
+
430
+ reason: one sentence answering "why is this worth keeping?" — e.g. "user just stated firm preference for plain .env over keychain after being burned by it." Skip routine cases.
431
+
418
432
  ## Behavioral Correction Detection:
419
433
  If ${OWNER} corrects HOW the assistant behaved (not a factual correction), output a JSON block:
420
434
  \`\`\`json-behavioral
@@ -48,6 +48,8 @@ export interface AuditEvent {
48
48
  */
49
49
  export declare function logAuditJsonl(event: AuditEvent): void;
50
50
  export declare function setHeartbeatMode(active: boolean, tier2Allowed?: boolean): void;
51
+ export declare function resetBrowserHarnessApproval(): void;
52
+ export declare function isBrowserHarnessApproved(): boolean;
51
53
  export declare function setApprovalCallback(cb: ((desc: string) => Promise<boolean>) | null): void;
52
54
  export declare function setProfileTier(tier: number | null): void;
53
55
  export declare function setProfileAllowedTools(tools: string[] | null): void;
@@ -120,6 +120,16 @@ export function setHeartbeatMode(active, tier2Allowed = false) {
120
120
  heartbeatActive = active;
121
121
  heartbeatTier2Allowed = tier2Allowed;
122
122
  }
123
+ // Session-scoped approval for browser harness T3 actions. Once the user
124
+ // approves a session, subsequent T3 calls within that session auto-allow.
125
+ // Resets on daemon restart (in-memory) and on explicit revoke.
126
+ let browserHarnessSessionApproved = false;
127
+ export function resetBrowserHarnessApproval() {
128
+ browserHarnessSessionApproved = false;
129
+ }
130
+ export function isBrowserHarnessApproved() {
131
+ return browserHarnessSessionApproved;
132
+ }
123
133
  export function setApprovalCallback(cb) {
124
134
  approvalCallback = cb;
125
135
  }
@@ -197,11 +207,23 @@ export function logToolUse(toolName, toolInput) {
197
207
  // These apply to actual heartbeats and tier-1 cron jobs (read-only).
198
208
  // Tier 2+ cron jobs and unleashed tasks bypass these restrictions.
199
209
  const HEARTBEAT_DISALLOWED_TIER2 = ['Write', 'Edit', 'Bash'];
210
+ // Browser harness write-class tools — drive the user's real Chrome with their
211
+ // live cookies/sessions. NEVER run these without interactive approval. The
212
+ // MCP server name is "browser-harness" so the SDK exposes them as
213
+ // mcp__browser-harness__<tool>.
214
+ const BROWSER_HARNESS_T3_TOOLS = [
215
+ 'mcp__browser-harness__browser_click_xy',
216
+ 'mcp__browser-harness__browser_type_text',
217
+ 'mcp__browser-harness__browser_press_key',
218
+ 'mcp__browser-harness__browser_scroll',
219
+ 'mcp__browser-harness__browser_run_python',
220
+ ];
200
221
  const HEARTBEAT_DISALLOWED_ALWAYS = [
201
222
  'Bash', // No raw shell in low-tier autonomous mode
202
223
  'Task', // No sub-agents in heartbeats (too short to benefit)
203
224
  'Skill', // Skill packs load heavy context and waste turns
204
225
  'TodoWrite', // Internal bookkeeping wastes autonomous turns
226
+ ...BROWSER_HARNESS_T3_TOOLS, // Browser writes never run unsupervised
205
227
  ];
206
228
  export function getHeartbeatDisallowedTools() {
207
229
  const disallowed = [...HEARTBEAT_DISALLOWED_ALWAYS];
@@ -315,6 +337,42 @@ export async function enforceToolPermissions(toolName, toolInput, sourceOverride
315
337
  };
316
338
  }
317
339
  }
340
+ // ── Browser harness T3 — never autonomous, approve once per session ─
341
+ // These tools click/type/scroll/run-python in the user's REAL Chrome
342
+ // with their live cookies. They must never run without explicit consent.
343
+ const effectiveSourceForBrowser = sourceOverride ?? interactionSource;
344
+ if (BROWSER_HARNESS_T3_TOOLS.includes(toolName)) {
345
+ // Hard block during any autonomous context (cron tier-2, unleashed,
346
+ // heartbeat, member-channel sources). Heartbeat block is also handled
347
+ // above via getHeartbeatDisallowedTools, but this catches tier-2 cron
348
+ // and unleashed where heartbeatActive=false.
349
+ if (heartbeatActive || effectiveSourceForBrowser === 'autonomous') {
350
+ appendAuditFile(`[BROWSER-HARNESS] DENIED autonomous: ${toolName}`);
351
+ return {
352
+ behavior: 'deny',
353
+ message: `${toolName} controls your live browser — blocked during autonomous execution. Run interactively instead.`,
354
+ };
355
+ }
356
+ // Interactive: ask once per session. Subsequent T3 calls auto-allow
357
+ // until daemon restart (or explicit revoke via resetBrowserHarnessApproval).
358
+ if (!browserHarnessSessionApproved) {
359
+ if (approvalCallback) {
360
+ const approved = await approvalCallback('Allow Clementine to control your browser this session? Clicks, types, and key presses will run in your real Chrome with your live cookies and logins.');
361
+ if (!approved) {
362
+ return { behavior: 'deny', message: 'Browser control denied by user.' };
363
+ }
364
+ browserHarnessSessionApproved = true;
365
+ appendAuditFile('[BROWSER-HARNESS] Session approval granted');
366
+ }
367
+ else {
368
+ // No approval callback wired — be safe, deny.
369
+ return {
370
+ behavior: 'deny',
371
+ message: 'Browser control requires interactive approval, but no approval callback is set in this context.',
372
+ };
373
+ }
374
+ }
375
+ }
318
376
  // ── Profile tier restrictions (restrict, never elevate) ────────
319
377
  if (activeProfileTier !== null) {
320
378
  if (activeProfileTier < 2 && ['Bash', 'Write', 'Edit'].includes(toolName)) {