watchmyagents 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -3
- package/package.json +8 -6
- package/scripts/agents.js +218 -0
- package/scripts/fetch-anthropic.js +82 -55
- package/scripts/service.js +38 -12
- package/scripts/shield.js +97 -95
- package/src/sources/anthropic-managed.js +18 -0
- package/src/typology-weights.json +88 -0
- package/src/typology.js +398 -0
package/scripts/shield.js
CHANGED
|
@@ -33,10 +33,10 @@ import {
|
|
|
33
33
|
getAgentConfig, detectAlwaysAsk,
|
|
34
34
|
} from '../src/shield/enforce.js';
|
|
35
35
|
import { DecisionLogger } from '../src/shield/decisions.js';
|
|
36
|
-
import { listSessions } from '../src/sources/anthropic-managed.js';
|
|
36
|
+
import { listSessions, listAgents } from '../src/sources/anthropic-managed.js';
|
|
37
37
|
import { FortressPolicySource, postDecision } from '../src/shield/sources/fortress.js';
|
|
38
38
|
import { resolveFortressBase } from '../src/fortress/url.js';
|
|
39
|
-
import { isValidAgentId } from '../src/validate.js';
|
|
39
|
+
import { isValidAgentId, isValidSessionId } from '../src/validate.js';
|
|
40
40
|
|
|
41
41
|
function parseArgs(argv) {
|
|
42
42
|
const out = {};
|
|
@@ -423,127 +423,129 @@ async function main() {
|
|
|
423
423
|
explicitUrl: args['fortress-url'],
|
|
424
424
|
});
|
|
425
425
|
const logDir = resolve(args['log-dir'] || './watchmyagents-logs');
|
|
426
|
+
const allAgents = !!args['all-agents'];
|
|
426
427
|
|
|
427
428
|
if (!apiKey) die('error: --api-key or ANTHROPIC_API_KEY required');
|
|
428
|
-
if (!agentId) die('error: --agent-id required');
|
|
429
|
-
if (
|
|
429
|
+
if (!allAgents && !agentId) die('error: --agent-id required (or --all-agents for fleet mode)');
|
|
430
|
+
if (allAgents && singleSessionId) die('error: --all-agents is incompatible with --session-id');
|
|
431
|
+
if (allAgents && policiesSource !== 'fortress') {
|
|
432
|
+
die('error: --all-agents requires --policies-source fortress (per-agent policies).');
|
|
433
|
+
}
|
|
434
|
+
if (agentId && !isValidAgentId(agentId)) {
|
|
430
435
|
die(`error: --agent-id has invalid format (expected "agent_" + alphanumeric, got "${agentId}")`);
|
|
431
436
|
}
|
|
437
|
+
// --session-id ends up in the Anthropic SSE URL path (src/shield/stream.js).
|
|
438
|
+
// Validate the same way wma-fetch does so a crafted value can't tamper the URL.
|
|
439
|
+
if (singleSessionId && !isValidSessionId(singleSessionId)) {
|
|
440
|
+
die(`error: --session-id has invalid format (expected "sesn_" + alphanumeric, got "${singleSessionId}")`);
|
|
441
|
+
}
|
|
432
442
|
|
|
433
|
-
//
|
|
434
|
-
|
|
435
|
-
let
|
|
436
|
-
|
|
443
|
+
// Validate the policy source config once (shared across the fleet). For local
|
|
444
|
+
// mode the ruleset is loaded once and shared by every agent.
|
|
445
|
+
let sharedLocalRuleset = null;
|
|
437
446
|
if (policiesSource === 'fortress') {
|
|
438
447
|
if (!wmaApiKey) die('error: --policies-source fortress requires --wma-api-key or WMA_API_KEY env');
|
|
439
448
|
if (!fortressBase) die('error: --policies-source fortress requires --fortress-base-url or WMA_FORTRESS_BASE_URL env');
|
|
440
449
|
if (!/^wma_[a-f0-9]{32}$/i.test(wmaApiKey)) warn(`WMA_API_KEY format looks unusual (expected wma_<32hex>).`);
|
|
441
|
-
|
|
442
|
-
fortressPolicies = new FortressPolicySource({
|
|
443
|
-
apiKey: wmaApiKey,
|
|
444
|
-
base: fortressBase,
|
|
445
|
-
anthropicAgentId: agentId,
|
|
446
|
-
refreshIntervalMs: 5 * 60_000,
|
|
447
|
-
onError: (e) => warn(`policy refresh failed (keeping cached): ${e.message}`),
|
|
448
|
-
onRefresh: ({ policies, fetched_at, initial }) => {
|
|
449
|
-
info(`policies ${initial ? 'loaded' : 'refreshed'} from Fortress — ${policies.length} active (fetched_at: ${fetched_at})`);
|
|
450
|
-
},
|
|
451
|
-
});
|
|
452
|
-
try {
|
|
453
|
-
await fortressPolicies.start();
|
|
454
|
-
} catch (e) {
|
|
455
|
-
die(`error fetching policies from Fortress: ${e.message}\n` +
|
|
456
|
-
` Check WMA_FORTRESS_BASE_URL and WMA_API_KEY.`);
|
|
457
|
-
}
|
|
458
|
-
ruleset = fortressPolicies.current();
|
|
459
450
|
} else if (policiesSource === 'local') {
|
|
460
451
|
if (!policyPath) die('error: --policies-source local requires --policy <path-to-policies.json>');
|
|
461
|
-
try {
|
|
462
|
-
|
|
463
|
-
} catch (e) {
|
|
464
|
-
die(`error loading policies: ${e.message}`);
|
|
465
|
-
}
|
|
452
|
+
try { sharedLocalRuleset = await loadPolicies(resolve(policyPath)); }
|
|
453
|
+
catch (e) { die(`error loading policies: ${e.message}`); }
|
|
466
454
|
} else {
|
|
467
455
|
die('error: --policy <path> OR --policies-source fortress required');
|
|
468
456
|
}
|
|
469
457
|
|
|
470
|
-
|
|
471
|
-
let
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
? `Fortress (${fortressBase})`
|
|
481
|
-
: policyPath;
|
|
482
|
-
info(`armed — ${ruleset.policies.length} policies loaded from ${sourceLabel}`);
|
|
483
|
-
info(`default action when no rule matches: ${ruleset.default.action}`);
|
|
484
|
-
info(`agent: ${agentId}${agentMeta?.name ? ` "${agentMeta.name}"` : ''}`);
|
|
485
|
-
info(`enforcement mode: ${mode}`);
|
|
486
|
-
if (mode === 'interrupt') {
|
|
487
|
-
warn('DEGRADED mode — Shield will interrupt AFTER a violating tool runs.');
|
|
488
|
-
warn(`For pre-execution blocking, run: wma-shield --setup-guide --agent-id ${agentId}`);
|
|
458
|
+
// Resolve the agent list: whole fleet (--all-agents) or a single agent.
|
|
459
|
+
let agentIds;
|
|
460
|
+
if (allAgents) {
|
|
461
|
+
info('discovering agents (fleet mode)…');
|
|
462
|
+
const all = await listAgents(apiKey).catch((e) => die(`failed to list agents: ${e.message}`));
|
|
463
|
+
agentIds = all.map((a) => a.id).filter((id) => id && isValidAgentId(id));
|
|
464
|
+
if (agentIds.length === 0) die('error: no agents found under this API key');
|
|
465
|
+
info(`fleet: ${agentIds.length} agent(s)`);
|
|
466
|
+
} else {
|
|
467
|
+
agentIds = [agentId];
|
|
489
468
|
}
|
|
469
|
+
const fleet = agentIds.length > 1;
|
|
490
470
|
|
|
491
|
-
//
|
|
492
|
-
|
|
493
|
-
const
|
|
494
|
-
const
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
return loggers.get(sessionId);
|
|
471
|
+
// Shared infra: one shutdown signal, one fortress-source registry, one pusher.
|
|
472
|
+
const ac = new AbortController();
|
|
473
|
+
const fortressSources = [];
|
|
474
|
+
const shutdown = (sig) => {
|
|
475
|
+
info(`${sig} received, shutting down…`);
|
|
476
|
+
for (const fp of fortressSources) fp.stop();
|
|
477
|
+
ac.abort();
|
|
499
478
|
};
|
|
479
|
+
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
480
|
+
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
500
481
|
|
|
501
|
-
// Optional Fortress decision pusher
|
|
502
|
-
//
|
|
503
|
-
// and-forget extra channel if both are set.
|
|
482
|
+
// Optional Fortress decision pusher (each ctx carries its own agent id, so a
|
|
483
|
+
// single shared pusher tags decisions with the right agent).
|
|
504
484
|
const canPushToFortress = !!(wmaApiKey && fortressBase);
|
|
505
485
|
const pushDecisionToFortress = canPushToFortress
|
|
506
486
|
? async (decisionData) => {
|
|
507
|
-
try {
|
|
508
|
-
|
|
509
|
-
} catch (e) {
|
|
510
|
-
warn(`Fortress decision push failed: ${e.message}`);
|
|
511
|
-
}
|
|
487
|
+
try { await postDecision({ apiKey: wmaApiKey, base: fortressBase, decision: decisionData }); }
|
|
488
|
+
catch (e) { warn(`Fortress decision push failed: ${e.message}`); }
|
|
512
489
|
}
|
|
513
490
|
: null;
|
|
514
491
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
492
|
+
// Per-agent SETUP (separate from the long-running phase so we can COUNT how
|
|
493
|
+
// many actually armed). In fleet mode a per-agent startup failure is skipped
|
|
494
|
+
// (warn) instead of killing the fleet. Returns the agent's ctx, or null if skipped.
|
|
495
|
+
async function setupAgent(aid) {
|
|
496
|
+
const tag = fleet ? `[${aid.slice(0, 16)}…] ` : '';
|
|
497
|
+
let fortressPolicies = null;
|
|
498
|
+
let ruleset = sharedLocalRuleset;
|
|
499
|
+
if (policiesSource === 'fortress') {
|
|
500
|
+
fortressPolicies = new FortressPolicySource({
|
|
501
|
+
apiKey: wmaApiKey, base: fortressBase, anthropicAgentId: aid, refreshIntervalMs: 5 * 60_000,
|
|
502
|
+
onError: (e) => warn(`${tag}policy refresh failed (keeping cached): ${e.message}`),
|
|
503
|
+
onRefresh: ({ policies, fetched_at, initial }) => info(`${tag}policies ${initial ? 'loaded' : 'refreshed'} from Fortress — ${policies.length} active (fetched_at: ${fetched_at})`),
|
|
504
|
+
});
|
|
505
|
+
try { await fortressPolicies.start(); }
|
|
506
|
+
catch (e) {
|
|
507
|
+
if (fleet) { warn(`${tag}skipped — policy fetch failed: ${e.message}`); return null; }
|
|
508
|
+
die(`error fetching policies from Fortress: ${e.message}\n Check WMA_FORTRESS_BASE_URL and WMA_API_KEY.`);
|
|
509
|
+
}
|
|
510
|
+
fortressSources.push(fortressPolicies);
|
|
511
|
+
ruleset = fortressPolicies.current();
|
|
512
|
+
}
|
|
526
513
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
apiKey,
|
|
530
|
-
|
|
531
|
-
get ruleset() {
|
|
532
|
-
return fortressPolicies ? fortressPolicies.current() : ruleset;
|
|
533
|
-
},
|
|
534
|
-
mode,
|
|
535
|
-
decisions,
|
|
536
|
-
pushDecisionToFortress,
|
|
537
|
-
signalsSalt,
|
|
538
|
-
signal: ac.signal,
|
|
539
|
-
};
|
|
514
|
+
let mode = 'interrupt';
|
|
515
|
+
let agentMeta = null;
|
|
516
|
+
try { agentMeta = await getAgentConfig(apiKey, aid); if (detectAlwaysAsk(agentMeta)) mode = 'tool_confirmation'; }
|
|
517
|
+
catch (e) { warn(`${tag}could not fetch agent config (${e.message}). Defaulting to interrupt mode.`); }
|
|
540
518
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
519
|
+
info(`${tag}armed — ${ruleset.policies.length} policies · default ${ruleset.default.action} · mode ${mode}${agentMeta?.name ? ` · "${agentMeta.name}"` : ''}`);
|
|
520
|
+
if (mode === 'interrupt' && !fleet) {
|
|
521
|
+
warn('DEGRADED mode — Shield will interrupt AFTER a violating tool runs.');
|
|
522
|
+
warn(`For pre-execution blocking, run: wma-shield --setup-guide --agent-id ${aid}`);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
const loggers = new Map();
|
|
526
|
+
const decisions = (sessionId) => {
|
|
527
|
+
if (!loggers.has(sessionId)) loggers.set(sessionId, new DecisionLogger({ logDir, agentId: aid, sessionId }));
|
|
528
|
+
return loggers.get(sessionId);
|
|
529
|
+
};
|
|
530
|
+
return {
|
|
531
|
+
apiKey, agentId: aid,
|
|
532
|
+
get ruleset() { return fortressPolicies ? fortressPolicies.current() : ruleset; },
|
|
533
|
+
mode, decisions, pushDecisionToFortress, signalsSalt, signal: ac.signal,
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Phase 1: arm every agent. Fail LOUD if none armed (otherwise the process would
|
|
538
|
+
// exit silently and — under launchd/systemd — restart-loop without a clear cause).
|
|
539
|
+
const ctxs = (await Promise.all(agentIds.map(setupAgent))).filter(Boolean);
|
|
540
|
+
if (ctxs.length === 0) {
|
|
541
|
+
die(`error: no agents could be armed (${agentIds.length} discovered; all policy fetches failed). Check WMA_API_KEY / WMA_FORTRESS_BASE_URL.`);
|
|
546
542
|
}
|
|
543
|
+
if (fleet) info(`armed ${ctxs.length}/${agentIds.length} agent(s); watching.`);
|
|
544
|
+
|
|
545
|
+
// Phase 2: run each agent's loop (blocks until SIGINT/SIGTERM).
|
|
546
|
+
await Promise.all(ctxs.map((ctx) => (
|
|
547
|
+
singleSessionId ? runSessionWorker({ sessionId: singleSessionId, ctx }) : runAgentWide(ctx)
|
|
548
|
+
)));
|
|
547
549
|
}
|
|
548
550
|
|
|
549
551
|
main().catch(e => {
|
|
@@ -77,6 +77,24 @@ export async function getAgent(apiKey, agentId) {
|
|
|
77
77
|
return getWithRetry(apiKey, `/v1/agents/${agentId}`);
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
+
// List every Managed Agent under the API key (paginated). Used for fleet mode
|
|
81
|
+
// (watch/shield/service --all-agents) and agent discovery.
|
|
82
|
+
export async function listAgents(apiKey, { limit = 100 } = {}) {
|
|
83
|
+
const agents = [];
|
|
84
|
+
let after = null;
|
|
85
|
+
while (true) {
|
|
86
|
+
const qs = new URLSearchParams({ limit: String(limit) });
|
|
87
|
+
if (after) qs.set('after_id', after);
|
|
88
|
+
const data = await getWithRetry(apiKey, `/v1/agents?${qs}`);
|
|
89
|
+
const page = data.data || [];
|
|
90
|
+
for (const a of page) agents.push(a);
|
|
91
|
+
if (!data.has_more || page.length === 0) break;
|
|
92
|
+
after = page[page.length - 1]?.id;
|
|
93
|
+
if (!after) break;
|
|
94
|
+
}
|
|
95
|
+
return agents;
|
|
96
|
+
}
|
|
97
|
+
|
|
80
98
|
export async function listSessions(apiKey, { agentId, since, limit = 100 } = {}) {
|
|
81
99
|
const sessions = [];
|
|
82
100
|
let after = null;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$comment": "WatchMyAgents — typology classifier weights + thresholds (Guardian Core, agent-typology-classification.spec.md §3/§4/§5). INVARIANT: weights and thresholds live HERE, never hardcoded in typology.js ('poids de signature en config, pas en dur'). Calibrate on labelled real traffic. Modèle C: all inputs are anonymized behavioural fractions/flags only.",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"updated_at": "2026-05-29T00:00:00Z",
|
|
5
|
+
|
|
6
|
+
"thresholds": {
|
|
7
|
+
"$comment": "§4 'Seuils par défaut (à calibrer)' + §5 downgrade asymmetry.",
|
|
8
|
+
"n_events_min": 50,
|
|
9
|
+
"confidence_min": 0.70,
|
|
10
|
+
"margin_min": 0.15,
|
|
11
|
+
"stable_windows": 3,
|
|
12
|
+
"downgrade_confidence_min": 0.85,
|
|
13
|
+
"downgrade_windows": 5,
|
|
14
|
+
"untrusted_modifier_min": 0.1,
|
|
15
|
+
"sensitive_modifier_min": 0.0,
|
|
16
|
+
"payment_overlay_min": 0.0,
|
|
17
|
+
"autonomy_modifier_min": 0.5,
|
|
18
|
+
"$comment_tie": "§8 conservative tie-break: when |score(top1)-score(top2)| <= tie_epsilon (a near/exact tie between two REAL types with real signal), select the STRICTER of the two rather than falling to the more-permissive generic — 'dans le doute, on reste sur le plus protecteur'. Set to 0 for exact-tie only.",
|
|
19
|
+
"tie_epsilon": 0.0
|
|
20
|
+
},
|
|
21
|
+
|
|
22
|
+
"confidence_sigmoid": {
|
|
23
|
+
"$comment": "§4 confidence = sigmoid(a·top1.score + b·margin + c·log(n_events)). All three coefficients live in config; a naive impl that only used top1.score would be wrong.",
|
|
24
|
+
"a": 4.0,
|
|
25
|
+
"b": 6.0,
|
|
26
|
+
"c": 0.6,
|
|
27
|
+
"bias": -3.5
|
|
28
|
+
},
|
|
29
|
+
|
|
30
|
+
"strictness_rank": {
|
|
31
|
+
"$comment": "§5 restriction ranking — derived from each template's baseline_policies enforcement severity (isolate>block>require_approval>throttle>monitor>warn). Higher rank = STRICTER. Drives re-classification asymmetry: to a stricter rank = normal threshold; to a looser rank = downgrade gate (conf>=0.85 AND 5 windows). NOT alphabetical.",
|
|
32
|
+
"devops_infra": 10,
|
|
33
|
+
"transactional_financial": 9,
|
|
34
|
+
"workflow_backoffice": 8,
|
|
35
|
+
"coding": 7,
|
|
36
|
+
"orchestrator": 6,
|
|
37
|
+
"browser_web": 5,
|
|
38
|
+
"personal_assistant": 4,
|
|
39
|
+
"data_rag": 3,
|
|
40
|
+
"generic": 2,
|
|
41
|
+
"customer_facing": 1
|
|
42
|
+
},
|
|
43
|
+
|
|
44
|
+
"features": {
|
|
45
|
+
"$comment": "Canonical anonymized feature keys (Modèle C). Fractions f_* in [0,1]; flag_* in {0,1}; aux_* in [0,1]. Order is informational only — scoring is key-addressed.",
|
|
46
|
+
"fractions": ["f_code", "f_browser", "f_database", "f_http", "f_email", "f_payment", "f_secret", "f_search", "f_memory", "f_handoff", "f_user_msg", "f_file"],
|
|
47
|
+
"flags": ["flag_deploy", "flag_internal_sys", "flag_on_behalf"],
|
|
48
|
+
"aux": ["aux_autonomy", "aux_untrusted", "aux_sensitive"]
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
"weights": {
|
|
52
|
+
"$comment": "w[type][feature] — signature weights (§3). Positive = signal for the type; negative = signal against. flag_* are the REQUIRED discriminators for the 3 inseparable pairs (coding/devops, data_rag/workflow, personal_assistant/workflow). 'generic' has no positive weights (pure fallback).",
|
|
53
|
+
|
|
54
|
+
"coding": {
|
|
55
|
+
"f_code": 1.0, "f_file": 0.5, "f_search": 0.3, "f_secret": 0.1,
|
|
56
|
+
"flag_deploy": -0.9
|
|
57
|
+
},
|
|
58
|
+
"devops_infra": {
|
|
59
|
+
"f_code": 0.7, "f_secret": 0.6, "f_file": 0.2,
|
|
60
|
+
"flag_deploy": 1.2
|
|
61
|
+
},
|
|
62
|
+
"data_rag": {
|
|
63
|
+
"f_database": 0.8, "f_search": 0.35, "f_memory": 0.7, "aux_untrusted": 0.2,
|
|
64
|
+
"flag_internal_sys": -0.7
|
|
65
|
+
},
|
|
66
|
+
"customer_facing": {
|
|
67
|
+
"f_user_msg": 1.0, "f_handoff": 0.3, "f_email": 0.2
|
|
68
|
+
},
|
|
69
|
+
"browser_web": {
|
|
70
|
+
"f_browser": 1.0, "f_http": 0.6, "f_search": 0.7
|
|
71
|
+
},
|
|
72
|
+
"orchestrator": {
|
|
73
|
+
"f_handoff": 1.2, "f_code": -0.2, "f_browser": -0.2, "f_database": -0.2
|
|
74
|
+
},
|
|
75
|
+
"workflow_backoffice": {
|
|
76
|
+
"f_database": 0.6, "f_http": 0.5, "f_file": 0.2,
|
|
77
|
+
"flag_internal_sys": 0.9, "flag_on_behalf": -0.6
|
|
78
|
+
},
|
|
79
|
+
"personal_assistant": {
|
|
80
|
+
"f_email": 0.8, "f_file": 0.4, "f_user_msg": 0.3,
|
|
81
|
+
"flag_on_behalf": 1.0
|
|
82
|
+
},
|
|
83
|
+
"transactional_financial": {
|
|
84
|
+
"f_payment": 1.5
|
|
85
|
+
},
|
|
86
|
+
"generic": {}
|
|
87
|
+
}
|
|
88
|
+
}
|