agent-threat-rules 2.1.3 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/action-executor.d.ts +1 -1
  2. package/dist/action-executor.d.ts.map +1 -1
  3. package/dist/action-executor.js +13 -11
  4. package/dist/action-executor.js.map +1 -1
  5. package/dist/adapters/default-adapter.d.ts +2 -1
  6. package/dist/adapters/default-adapter.d.ts.map +1 -1
  7. package/dist/adapters/default-adapter.js +14 -11
  8. package/dist/adapters/default-adapter.js.map +1 -1
  9. package/dist/adapters/stdio-adapter.d.ts +2 -1
  10. package/dist/adapters/stdio-adapter.d.ts.map +1 -1
  11. package/dist/adapters/stdio-adapter.js +43 -26
  12. package/dist/adapters/stdio-adapter.js.map +1 -1
  13. package/dist/converters/index.d.ts +4 -0
  14. package/dist/converters/index.d.ts.map +1 -1
  15. package/dist/converters/index.js +2 -0
  16. package/dist/converters/index.js.map +1 -1
  17. package/dist/converters/sage-reverse.d.ts +52 -0
  18. package/dist/converters/sage-reverse.d.ts.map +1 -0
  19. package/dist/converters/sage-reverse.js +216 -0
  20. package/dist/converters/sage-reverse.js.map +1 -0
  21. package/dist/converters/sage.d.ts +123 -0
  22. package/dist/converters/sage.d.ts.map +1 -0
  23. package/dist/converters/sage.js +702 -0
  24. package/dist/converters/sage.js.map +1 -0
  25. package/dist/types.d.ts +24 -17
  26. package/dist/types.d.ts.map +1 -1
  27. package/package.json +9 -1
  28. package/rules/context-exfiltration/ATR-2026-00449-spring-ai-chatmemory-cross-user-leak.yaml +196 -0
  29. package/rules/data-poisoning/ATR-2026-00450-spring-ai-prompt-memory-poisoning.yaml +196 -0
  30. package/rules/privilege-escalation/ATR-2026-00451-litellm-admin-sqli-cisa-kev.yaml +204 -0
  31. package/rules/tool-poisoning/ATR-2026-00448-spring-ai-milvus-filter-injection.yaml +193 -0
@@ -0,0 +1,702 @@
1
+ /**
2
+ * ATR → Sage (gendigitalinc/sage) Converter
3
+ *
4
+ * Converts ATR YAML rules into Sage's threat rule schema, suitable for
5
+ * Sage's `threats/agent-layer.yaml`. Sage uses a single-pattern-per-rule
6
+ * schema with `match_on` controlling the artifact channel; ATR uses
7
+ * multi-condition rules with `field`-targeted regexes.
8
+ *
9
+ * Conversion strategy (per research memo):
10
+ * - One ATR rule → one or more Sage rules, grouped by `field` target.
11
+ * Conditions on the same field combine via regex alternation.
12
+ * - ATR `field` values (user_input, agent_output, content, tool_response,
13
+ * tool_args, tool_name, tool_description) collapse to Sage `match_on:
14
+ * content`. Sage's other channels (command, url, file_path, domain) are
15
+ * command-time matchers and do not have ATR equivalents in the current
16
+ * corpus.
17
+ * - ATR `response.actions` (10 values) collapse to Sage `action` (3
18
+ * values: block / require_approval / log) by strongest-wins semantics.
19
+ * - ATR `(?i)` inline flag (PCRE) is extracted into Sage's rule-level
20
+ * `case_insensitive: true` because the Sage runtime regex compiler is
21
+ * JavaScript and does not support inline flag groups.
22
+ * - ATR `detection_tier: semantic` rules are SKIPPED — they require LLM
23
+ * evaluation, not deterministic regex.
24
+ * - License: ATR rules are MIT; Sage's `threats/*.yaml` are DRL 1.1. Per-rule
25
+ * comment in the output preserves upstream attribution per both licenses.
26
+ *
27
+ * Used by: scripts/sync-with-atr.ts (Sage maintainers can run this script
28
+ * to regenerate threats/agent-layer.yaml from a pinned ATR version), and
29
+ * by any downstream tool that consumes ATR but wants Sage-flavoured output.
30
+ *
31
+ * @module agent-threat-rules/converters/sage
32
+ */
33
+ // ── Mappings ───────────────────────────────────────────────────────────────
34
+ /**
35
+ * ATR category strings (hyphenated) → Sage category strings (snake_case).
36
+ * Sage's schema accepts any string; the agent-layer.yaml convention from
37
+ * ATR PR #33 established snake_case category names.
38
+ */
39
+ const CATEGORY_MAP = Object.freeze({
40
+ 'prompt-injection': 'prompt_injection',
41
+ 'tool-poisoning': 'mcp_poisoning',
42
+ 'context-exfiltration': 'context_exfiltration',
43
+ 'agent-manipulation': 'agent_manipulation',
44
+ 'privilege-escalation': 'privilege_escalation',
45
+ 'excessive-autonomy': 'excessive_autonomy',
46
+ 'data-poisoning': 'data_poisoning',
47
+ 'model-abuse': 'model_abuse',
48
+ 'skill-compromise': 'skill_compromise',
49
+ 'model-security': 'model_security',
50
+ });
51
+ /**
52
+ * ATR severity → Sage severity. Sage has no `informational`; collapse to `low`.
53
+ */
54
+ const SEVERITY_MAP = Object.freeze({
55
+ critical: 'critical',
56
+ high: 'high',
57
+ medium: 'medium',
58
+ low: 'low',
59
+ informational: 'low',
60
+ });
61
+ /**
62
+ * ATR action strength ordering. Higher value = stronger action. When ATR rule
63
+ * has multiple actions, we pick the strongest Sage-mappable action.
64
+ */
65
+ const ACTION_STRENGTH = Object.freeze({
66
+ block: 3,
67
+ require_approval: 2,
68
+ log: 1,
69
+ });
70
+ /**
71
+ * Map ATR action → Sage action. Returns null for ATR actions Sage does not
72
+ * support (reset_context, reduce_permissions); the caller logs a warning.
73
+ */
74
+ function mapAction(atr) {
75
+ switch (atr) {
76
+ case 'block_input':
77
+ case 'block_output':
78
+ case 'block_tool':
79
+ case 'quarantine_session':
80
+ case 'kill_agent':
81
+ return 'block';
82
+ case 'escalate':
83
+ return 'require_approval';
84
+ case 'alert':
85
+ case 'snapshot':
86
+ case 'shadow':
87
+ return 'log';
88
+ case 'reset_context':
89
+ case 'reduce_permissions':
90
+ return null;
91
+ default: {
92
+ // Exhaustiveness check — if a new ATRAction is added in the future
93
+ // and not handled above, TypeScript will flag this assignment.
94
+ const _exhaustive = atr;
95
+ void _exhaustive;
96
+ return null;
97
+ }
98
+ }
99
+ }
100
+ /**
101
+ * Map ATR field name → Sage match_on. Most ATR field channels collapse to
102
+ * Sage's `content`. URL-shaped fields could map to `url`, but the ATR corpus
103
+ * does not currently use a distinct `url` field — URL matching happens via
104
+ * regex content match.
105
+ */
106
+ function mapField(atrField) {
107
+ switch (atrField) {
108
+ case 'url':
109
+ return 'url';
110
+ case 'user_input':
111
+ case 'agent_output':
112
+ case 'content':
113
+ case 'tool_response':
114
+ case 'tool_args':
115
+ case 'tool_name':
116
+ case 'tool_description':
117
+ case 'agent_message':
118
+ return 'content';
119
+ default:
120
+ return 'content';
121
+ }
122
+ }
123
+ /**
124
+ * Map ATR category → Sage category. Unknown categories pass through unchanged
125
+ * (Sage's loader accepts any string).
126
+ */
127
+ function mapCategory(atrCategory) {
128
+ return CATEGORY_MAP[atrCategory] ?? atrCategory.replace(/-/g, '_');
129
+ }
130
+ // ── Regex compatibility ────────────────────────────────────────────────────
131
+ /**
132
+ * Extract a leading `(?flags)` inline modifier and return the stripped regex
133
+ * plus the JavaScript-applicable flag characters. Sage compiles regex with
134
+ * `new RegExp(pattern, case_insensitive ? "i" : "")`, so the `i` flag is the
135
+ * only one Sage supports at rule level. Other inline flags (`s`, `m`, `u`,
136
+ * `g`, `y`) cannot survive — they get stripped and a warning emitted.
137
+ */
138
+ function extractInlineFlags(pattern) {
139
+ const leadingFlagMatch = pattern.match(/^\(\?([gimsuy]+)\)/);
140
+ if (!leadingFlagMatch) {
141
+ return { pattern, caseInsensitive: false, unsupportedFlags: '' };
142
+ }
143
+ const flags = leadingFlagMatch[1];
144
+ const stripped = pattern.slice(leadingFlagMatch[0].length);
145
+ const ci = flags.includes('i');
146
+ const unsupported = flags.replace(/i/g, '');
147
+ return {
148
+ pattern: stripped,
149
+ caseInsensitive: ci,
150
+ unsupportedFlags: unsupported,
151
+ };
152
+ }
153
+ /**
154
+ * Validate a regex compiles under JavaScript's RegExp engine with the given
155
+ * flags. Returns the compile error message if it fails, or null if it
156
+ * succeeds.
157
+ */
158
+ function validateRegexCompiles(pattern, caseInsensitive) {
159
+ try {
160
+ new RegExp(pattern, caseInsensitive ? 'i' : '');
161
+ return null;
162
+ }
163
+ catch (e) {
164
+ return e instanceof Error ? e.message : String(e);
165
+ }
166
+ }
167
+ /**
168
+ * Combine multiple regex patterns into a single regex using non-capturing
169
+ * alternation. Each pattern is wrapped in a non-capturing group to preserve
170
+ * its internal alternation semantics.
171
+ */
172
+ function alternationCombine(patterns) {
173
+ if (patterns.length === 1)
174
+ return patterns[0];
175
+ return patterns.map((p) => `(?:${p})`).join('|');
176
+ }
177
+ // ── Confidence ─────────────────────────────────────────────────────────────
178
+ /**
179
+ * Pick a Sage confidence (0.0-1.0 number) from an ATR rule. ATR has both
180
+ * `confidence` (numeric, 0-100) and `tags.confidence` (string enum). Prefer
181
+ * numeric; fall back to enum; default to 0.8.
182
+ */
183
+ function pickConfidence(atr) {
184
+ if (typeof atr.confidence === 'number') {
185
+ // ATR uses 0-100; Sage uses 0.0-1.0
186
+ const normalized = atr.confidence > 1 ? atr.confidence / 100 : atr.confidence;
187
+ return Math.max(0, Math.min(1, Number(normalized.toFixed(2))));
188
+ }
189
+ const tag = atr.tags.confidence;
190
+ if (tag === 'high')
191
+ return 0.9;
192
+ if (tag === 'medium')
193
+ return 0.75;
194
+ if (tag === 'low')
195
+ return 0.6;
196
+ return 0.8;
197
+ }
198
+ // ── Action picking ─────────────────────────────────────────────────────────
199
+ /**
200
+ * From an ATR rule's `response.actions` array, pick the strongest Sage-mappable
201
+ * action. Returns the action plus a list of dropped ATR actions (those that
202
+ * have no Sage equivalent — reset_context, reduce_permissions).
203
+ */
204
+ function pickAction(atrActions) {
205
+ const dropped = [];
206
+ let best = 'log';
207
+ let bestStrength = 0;
208
+ for (const a of atrActions) {
209
+ const mapped = mapAction(a);
210
+ if (mapped === null) {
211
+ dropped.push(a);
212
+ continue;
213
+ }
214
+ const strength = ACTION_STRENGTH[mapped];
215
+ if (strength > bestStrength) {
216
+ best = mapped;
217
+ bestStrength = strength;
218
+ }
219
+ }
220
+ return { action: best, droppedActions: dropped };
221
+ }
222
+ // ── Sage id generation ────────────────────────────────────────────────────
223
+ /**
224
+ * Sage uses CLT-PREFIX-NNN ids. Map ATR category → Sage prefix.
225
+ */
226
+ const SAGE_PREFIX_MAP = Object.freeze({
227
+ prompt_injection: 'PI',
228
+ mcp_poisoning: 'MCP',
229
+ context_exfiltration: 'CTX',
230
+ agent_manipulation: 'AGM',
231
+ privilege_escalation: 'PRV',
232
+ excessive_autonomy: 'EAU',
233
+ data_poisoning: 'DPS',
234
+ model_abuse: 'MAB',
235
+ skill_compromise: 'SKL',
236
+ model_security: 'MSC',
237
+ supply_chain: 'SUP',
238
+ });
239
+ /**
240
+ * Generate a Sage rule id matching Sage's existing 3-digit-suffix convention.
241
+ * Format: CLT-<PREFIX>-<NNN> where PREFIX is the 2-3 letter category code.
242
+ *
243
+ * The id is generated sequentially by an IdAllocator passed in by the caller,
244
+ * NOT derived from the ATR id. This matches Sage's convention (CLT-PI-001,
245
+ * CLT-MCP-001, etc.) instead of producing weird-looking ids like CLT-PRV-0441.
246
+ *
247
+ * The ATR provenance survives in the `# Upstream: ATR-2026-NNNNN` comment.
248
+ */
249
+ export class SageIdAllocator {
250
+ counters = new Map();
251
+ /**
252
+ * Construct with optional starting offsets per category. Pass
253
+ * `{prompt_injection: 8}` to start prompt_injection ids at 008 (after
254
+ * Sage's existing CLT-PI-001..007, for example).
255
+ */
256
+ constructor(startingOffsets = {}) {
257
+ for (const [cat, offset] of Object.entries(startingOffsets)) {
258
+ this.counters.set(cat, offset);
259
+ }
260
+ }
261
+ next(sageCategory, channelDisambiguator) {
262
+ const prefix = SAGE_PREFIX_MAP[sageCategory] ?? 'GEN';
263
+ const counter = (this.counters.get(sageCategory) ?? 0) + 1;
264
+ this.counters.set(sageCategory, counter);
265
+ const numericPart = String(counter).padStart(3, '0');
266
+ const suffix = channelDisambiguator ? `-${channelDisambiguator}` : '';
267
+ return `CLT-${prefix}-${numericPart}${suffix}`;
268
+ }
269
+ }
270
+ // ── Core conversion ────────────────────────────────────────────────────────
271
+ /**
272
+ * Group ATR array-format conditions by the Sage match_on channel they map to,
273
+ * NOT by their original ATR field name. This matters because all of ATR's
274
+ * text-channel fields (user_input, agent_output, content, tool_response,
275
+ * tool_args, tool_name, tool_description) collapse to Sage's single `content`
276
+ * match_on — so an ATR rule with conditions across those fields should produce
277
+ * one Sage rule with an alternation regex over all of them, not N separate
278
+ * Sage rules.
279
+ *
280
+ * Conditions with no field or non-regex operators are excluded (with a warning
281
+ * emitted by the caller).
282
+ */
283
+ function groupConditionsBySageChannel(atrId, conditions, warnings) {
284
+ const groups = new Map();
285
+ for (const cond of conditions) {
286
+ if (!('field' in cond) || !cond.field) {
287
+ warnings.push({
288
+ ruleId: atrId,
289
+ kind: 'condition_without_field',
290
+ detail: 'Condition skipped — missing field target',
291
+ });
292
+ continue;
293
+ }
294
+ if (cond.operator !== 'regex') {
295
+ warnings.push({
296
+ ruleId: atrId,
297
+ kind: 'regex_compat_issue',
298
+ detail: `Operator ${cond.operator} not supported by Sage (only regex)`,
299
+ });
300
+ continue;
301
+ }
302
+ if (!cond.value)
303
+ continue;
304
+ const sageChannel = mapField(cond.field);
305
+ const existing = groups.get(sageChannel) ?? [];
306
+ existing.push(cond);
307
+ groups.set(sageChannel, existing);
308
+ }
309
+ return groups;
310
+ }
311
+ /**
312
+ * Process a single condition group (all conditions mapping to the same Sage
313
+ * match_on channel) into a single Sage rule. Returns null if no valid regex
314
+ * remains after compat filtering.
315
+ */
316
+ function groupToSageRule(atr, sageChannel, conditions, channelDisambiguator, pickedAction, idAllocator, warnings) {
317
+ const processed = [];
318
+ let anyCaseInsensitive = false;
319
+ let anyCaseSensitive = false;
320
+ for (const cond of conditions) {
321
+ const { pattern, caseInsensitive, unsupportedFlags } = extractInlineFlags(cond.value);
322
+ if (unsupportedFlags) {
323
+ warnings.push({
324
+ ruleId: atr.id,
325
+ kind: 'regex_compat_issue',
326
+ detail: `Unsupported inline flags (${unsupportedFlags}) stripped; Sage runtime supports case-insensitive only`,
327
+ });
328
+ }
329
+ const compileError = validateRegexCompiles(pattern, caseInsensitive);
330
+ if (compileError) {
331
+ warnings.push({
332
+ ruleId: atr.id,
333
+ kind: 'regex_compat_issue',
334
+ detail: `Regex does not compile under JS RegExp: ${compileError}`,
335
+ });
336
+ continue;
337
+ }
338
+ processed.push({ regex: pattern, caseInsensitive });
339
+ if (caseInsensitive) {
340
+ anyCaseInsensitive = true;
341
+ }
342
+ else {
343
+ anyCaseSensitive = true;
344
+ }
345
+ }
346
+ if (processed.length === 0)
347
+ return null;
348
+ // If the group mixes case-sensitive and case-insensitive patterns, we can
349
+ // only honour one at the rule level. Choose case-insensitive (safer fail-
350
+ // open for detection) and rewrap case-sensitive patterns by hoisting the
351
+ // case-sensitive requirement into the regex via [Aa] character classes is
352
+ // non-trivial; for now, mark a warning and treat the whole rule as ci.
353
+ if (anyCaseInsensitive && anyCaseSensitive) {
354
+ warnings.push({
355
+ ruleId: atr.id,
356
+ kind: 'regex_compat_issue',
357
+ detail: 'Group mixes case-sensitive and case-insensitive sub-patterns; promoting whole rule to case_insensitive=true',
358
+ });
359
+ }
360
+ const ruleCaseInsensitive = anyCaseInsensitive;
361
+ const combinedRegex = alternationCombine(processed.map((p) => p.regex));
362
+ // Validate the combined regex still compiles (alternation can occasionally
363
+ // expose escape conflicts).
364
+ const compileError = validateRegexCompiles(combinedRegex, ruleCaseInsensitive);
365
+ if (compileError) {
366
+ warnings.push({
367
+ ruleId: atr.id,
368
+ kind: 'regex_compat_issue',
369
+ detail: `Combined regex (alternation) does not compile: ${compileError}`,
370
+ });
371
+ return null;
372
+ }
373
+ const sageCategory = mapCategory(atr.tags.category);
374
+ const sageId = idAllocator.next(sageCategory, channelDisambiguator);
375
+ const matchOn = sageChannel;
376
+ const title = atr.title.length > 100 ? atr.title.slice(0, 97) + '...' : atr.title;
377
+ const upstreamUrl = `https://github.com/Agent-Threat-Rule/agent-threat-rules/blob/main/rules/${atr.tags.category}/${atr.id}.yaml`;
378
+ // Calibrate action vs confidence: Sage's convention is action=block ONLY
379
+ // when confidence ≥0.85. Downgrade block→require_approval for lower-
380
+ // confidence rules to avoid false-positive damage in production.
381
+ const calibratedConfidence = pickConfidence(atr);
382
+ const calibratedAction = pickedAction === 'block' && calibratedConfidence < 0.85
383
+ ? 'require_approval'
384
+ : pickedAction;
385
+ return {
386
+ id: sageId,
387
+ category: sageCategory,
388
+ severity: SEVERITY_MAP[atr.severity],
389
+ confidence: calibratedConfidence,
390
+ action: calibratedAction,
391
+ pattern: combinedRegex,
392
+ match_on: matchOn,
393
+ title,
394
+ expires_at: null,
395
+ revoked: atr.status === 'draft',
396
+ case_insensitive: ruleCaseInsensitive,
397
+ upstream: atr.id,
398
+ upstream_url: upstreamUrl,
399
+ upstream_license: 'MIT',
400
+ detects: extractDetectsSummary(atr.description),
401
+ };
402
+ }
403
+ /**
404
+ * Extract a 1-2 sentence summary from an ATR rule's `description` field,
405
+ * for inclusion as a `# Detects:` comment above the Sage rule. Preserves the
406
+ * attack-scenario context that Sage's schema does not have a field for.
407
+ */
408
+ function extractDetectsSummary(description) {
409
+ if (!description)
410
+ return '';
411
+ // Take the first sentence (terminating at the first `.` or newline).
412
+ const firstSentence = description
413
+ .replace(/\s+/g, ' ')
414
+ .trim()
415
+ .match(/^(.{20,200}?[\.\?\!])(\s|$)/)?.[1];
416
+ if (firstSentence)
417
+ return firstSentence;
418
+ // Fallback: first 150 chars
419
+ const trimmed = description.replace(/\s+/g, ' ').trim().slice(0, 150);
420
+ return trimmed.length === 150 ? trimmed + '...' : trimmed;
421
+ }
422
+ /**
423
+ * Convert a single ATR rule to one or more Sage rules.
424
+ *
425
+ * Returns one Sage rule per unique `field` target in the ATR rule's detection
426
+ * conditions. Most ATR rules in the current corpus target a single field
427
+ * (user_input or tool_response or content), producing 1 Sage rule each.
428
+ *
429
+ * Lossy spots (documented in the bridge research memo):
430
+ * - Multi-condition rules with mixed case-sensitivity → whole rule becomes
431
+ * case-insensitive
432
+ * - `condition: all` semantics → ignored; alternation always treats as OR
433
+ * - `response.actions` reduced to strongest single action
434
+ * - All compliance/metadata fields (eu_ai_act, mitre_atlas, references, etc.)
435
+ * are dropped (not in Sage schema). They survive in the upstream_url link.
436
+ */
437
+ export function atrToSage(atr, idAllocator = new SageIdAllocator()) {
438
+ const warnings = [];
439
+ // Skip semantic-tier rules — they require LLM evaluation
440
+ if (atr.detection_tier === 'semantic') {
441
+ warnings.push({
442
+ ruleId: atr.id,
443
+ kind: 'semantic_tier_skipped',
444
+ detail: 'Semantic-tier rules require LLM evaluation; Sage uses deterministic regex',
445
+ });
446
+ return { rules: [], warnings };
447
+ }
448
+ // Skip deprecated rules
449
+ if (atr.status === 'deprecated') {
450
+ warnings.push({
451
+ ruleId: atr.id,
452
+ kind: 'deprecated_skipped',
453
+ detail: `Deprecated rule (replaced_by=${atr.replaced_by ?? 'none'})`,
454
+ });
455
+ return { rules: [], warnings };
456
+ }
457
+ // Pick action (strongest, with dropped actions logged)
458
+ const { action: pickedAction, droppedActions } = pickAction(atr.response.actions);
459
+ for (const dropped of droppedActions) {
460
+ warnings.push({
461
+ ruleId: atr.id,
462
+ kind: 'unsupported_action_dropped',
463
+ detail: `Action ${dropped} has no Sage equivalent`,
464
+ });
465
+ }
466
+ // Process conditions
467
+ const rawConditions = atr.detection.conditions;
468
+ if (!Array.isArray(rawConditions)) {
469
+ // Named-map format: extract pattern conditions only (skip behavioral/sequence)
470
+ warnings.push({
471
+ ruleId: atr.id,
472
+ kind: 'no_convertible_conditions',
473
+ detail: 'Named-map condition format not yet supported',
474
+ });
475
+ return { rules: [], warnings };
476
+ }
477
+ const groups = groupConditionsBySageChannel(atr.id, rawConditions, warnings);
478
+ if (groups.size === 0) {
479
+ warnings.push({
480
+ ruleId: atr.id,
481
+ kind: 'no_convertible_conditions',
482
+ detail: 'No regex conditions with a field target found',
483
+ });
484
+ return { rules: [], warnings };
485
+ }
486
+ // Single-channel rules → no disambiguator. Multi-channel (rare; only when
487
+ // an ATR rule has conditions on both content-channel fields AND url field)
488
+ // → suffix with channel code.
489
+ const channelEntries = Array.from(groups.entries());
490
+ const isMultiChannel = channelEntries.length > 1;
491
+ const sageRules = [];
492
+ const MAX_COMBINED_PATTERN_LEN = 500;
493
+ for (const [sageChannel, conditions] of channelEntries) {
494
+ const disambiguator = isMultiChannel ? channelCode(sageChannel) : null;
495
+ // Build the combined regex once to check length.
496
+ const candidatePatterns = conditions
497
+ .map((c) => extractInlineFlags(c.value).pattern)
498
+ .filter((p) => p.length > 0);
499
+ const tentativeCombined = alternationCombine(candidatePatterns);
500
+ if (tentativeCombined.length <= MAX_COMBINED_PATTERN_LEN || conditions.length === 1) {
501
+ // Fits in one Sage rule — allocator advances once.
502
+ const rule = groupToSageRule(atr, sageChannel, conditions, disambiguator, pickedAction, idAllocator, warnings);
503
+ if (rule)
504
+ sageRules.push(rule);
505
+ }
506
+ else {
507
+ // Combined regex too long — split into one Sage rule per ATR
508
+ // condition. Share a single base id (allocator advances once) and
509
+ // append letter suffixes so the related rules sort together:
510
+ // CLT-PRV-001a, CLT-PRV-001b, etc.
511
+ warnings.push({
512
+ ruleId: atr.id,
513
+ kind: 'split_by_length',
514
+ detail: `Combined regex would be ${tentativeCombined.length} chars (>${MAX_COMBINED_PATTERN_LEN}); splitting into ${conditions.length} Sage rules with letter suffixes`,
515
+ });
516
+ const sageCategoryForSplit = mapCategory(atr.tags.category);
517
+ const baseId = idAllocator.next(sageCategoryForSplit, disambiguator);
518
+ conditions.forEach((cond, i) => {
519
+ const letterSuffix = letterFromIndex(i);
520
+ const subRule = buildSplitSubRule(atr, sageChannel, cond, `${baseId}${letterSuffix}`, pickedAction, warnings);
521
+ if (subRule)
522
+ sageRules.push(subRule);
523
+ });
524
+ }
525
+ }
526
+ if (atr.status === 'draft') {
527
+ warnings.push({
528
+ ruleId: atr.id,
529
+ kind: 'draft_marked_revoked',
530
+ detail: 'ATR draft status → Sage revoked=true so the rule does not load',
531
+ });
532
+ }
533
+ return { rules: sageRules, warnings };
534
+ }
535
+ /**
536
+ * Build a single Sage rule from a single ATR condition. Used by the split
537
+ * path when the combined regex exceeds the length cap. The caller provides
538
+ * a pre-allocated id (which already includes the letter suffix) so split
539
+ * sub-rules share the same numeric base.
540
+ */
541
+ function buildSplitSubRule(atr, sageChannel, condition, id, pickedAction, warnings) {
542
+ const { pattern, caseInsensitive, unsupportedFlags } = extractInlineFlags(condition.value);
543
+ if (unsupportedFlags) {
544
+ warnings.push({
545
+ ruleId: atr.id,
546
+ kind: 'regex_compat_issue',
547
+ detail: `Unsupported inline flags (${unsupportedFlags}) stripped`,
548
+ });
549
+ }
550
+ const compileError = validateRegexCompiles(pattern, caseInsensitive);
551
+ if (compileError) {
552
+ warnings.push({
553
+ ruleId: atr.id,
554
+ kind: 'regex_compat_issue',
555
+ detail: `Split sub-pattern does not compile: ${compileError}`,
556
+ });
557
+ return null;
558
+ }
559
+ const sageCategory = mapCategory(atr.tags.category);
560
+ const title = atr.title.length > 100 ? atr.title.slice(0, 97) + '...' : atr.title;
561
+ const upstreamUrl = `https://github.com/Agent-Threat-Rule/agent-threat-rules/blob/main/rules/${atr.tags.category}/${atr.id}.yaml`;
562
+ const calibratedConfidence = pickConfidence(atr);
563
+ const calibratedAction = pickedAction === 'block' && calibratedConfidence < 0.85
564
+ ? 'require_approval'
565
+ : pickedAction;
566
+ // Per-condition description from ATR (if present) takes precedence over
567
+ // the rule-level description summary for split rules. This lets each sub-
568
+ // rule explain its own role rather than repeating the rule-wide summary.
569
+ const detectsSummary = (condition.description && condition.description.length > 10
570
+ ? condition.description
571
+ : extractDetectsSummary(atr.description)) ?? '';
572
+ return {
573
+ id,
574
+ category: sageCategory,
575
+ severity: SEVERITY_MAP[atr.severity],
576
+ confidence: calibratedConfidence,
577
+ action: calibratedAction,
578
+ pattern,
579
+ match_on: sageChannel,
580
+ title,
581
+ expires_at: null,
582
+ revoked: atr.status === 'draft',
583
+ case_insensitive: caseInsensitive,
584
+ upstream: atr.id,
585
+ upstream_url: upstreamUrl,
586
+ upstream_license: 'MIT',
587
+ detects: detectsSummary,
588
+ };
589
+ }
590
+ /**
591
+ * Letter suffix for split rules. Used when an ATR rule's combined regex
592
+ * exceeds the length cap and the converter splits each condition into a
593
+ * separate Sage rule (CLT-PRV-001a, CLT-PRV-001b, ...).
594
+ */
595
+ function letterFromIndex(i) {
596
+ if (i < 26)
597
+ return String.fromCharCode(97 + i); // a..z
598
+ // Beyond 26 conditions: aa, ab, ac, ...
599
+ const first = Math.floor(i / 26) - 1;
600
+ const second = i % 26;
601
+ return String.fromCharCode(97 + first) + String.fromCharCode(97 + second);
602
+ }
603
+ /**
604
+ * Short codes for Sage match_on channels, used as disambiguator suffixes when
605
+ * a single ATR rule produces multiple Sage rules (one per Sage channel — rare
606
+ * since most ATR rules target a single channel via text-field collapse).
607
+ */
608
+ function channelCode(channel) {
609
+ switch (channel) {
610
+ case 'content':
611
+ return 'CT';
612
+ case 'url':
613
+ return 'URL';
614
+ case 'command':
615
+ return 'CMD';
616
+ case 'file_path':
617
+ return 'FP';
618
+ case 'domain':
619
+ return 'DOM';
620
+ }
621
+ }
622
+ /**
623
+ * Convert many ATR rules to Sage rules. Aggregates warnings and uses a shared
624
+ * id allocator so that the resulting Sage rules have sequential 3-digit ids
625
+ * (CLT-PRV-001, CLT-PRV-002, ...) within each category, matching Sage's
626
+ * existing convention.
627
+ *
628
+ * @param atrRules the ATR rules to convert
629
+ * @param startingOffsets optional starting offsets per Sage category — pass
630
+ * `{prompt_injection: 7}` if Sage's existing rules already use CLT-PI-001
631
+ * through CLT-PI-007 so the new rules start at CLT-PI-008.
632
+ */
633
+ export function atrToSageBatch(atrRules, startingOffsets = {}) {
634
+ const allocator = new SageIdAllocator(startingOffsets);
635
+ const rules = [];
636
+ const warnings = [];
637
+ for (const atr of atrRules) {
638
+ const result = atrToSage(atr, allocator);
639
+ rules.push(...result.rules);
640
+ warnings.push(...result.warnings);
641
+ }
642
+ return { rules, warnings };
643
+ }
644
+ // ── YAML serialization ─────────────────────────────────────────────────────
645
+ /**
646
+ * Serialize a list of Sage rules to a YAML string suitable for inclusion in
647
+ * `threats/agent-layer.yaml`. The output matches the style used in Sage's
648
+ * existing agent-layer.yaml (no list indentation, double-quoted ids,
649
+ * single-line regex pattern, upstream comment after each rule).
650
+ *
651
+ * Caller is responsible for prepending file-level comments (license,
652
+ * generation timestamp, upstream version) before this output.
653
+ */
654
+ export function sageRulesToYaml(rules) {
655
+ return rules.map(serializeRule).join('\n\n');
656
+ }
657
+ function serializeRule(rule) {
658
+ const lines = [];
659
+ // Detects summary first — gives the reader the attack-scenario context
660
+ // before they read the regex.
661
+ if (rule.detects) {
662
+ lines.push(` # Detects: ${rule.detects}`);
663
+ }
664
+ lines.push(`- id: "${rule.id}"`);
665
+ lines.push(` category: ${rule.category}`);
666
+ lines.push(` severity: ${rule.severity}`);
667
+ lines.push(` confidence: ${rule.confidence}`);
668
+ lines.push(` action: ${rule.action}`);
669
+ lines.push(` pattern: ${yamlString(rule.pattern)}`);
670
+ if (Array.isArray(rule.match_on)) {
671
+ lines.push(` match_on: [${rule.match_on.join(', ')}]`);
672
+ }
673
+ else {
674
+ lines.push(` match_on: ${rule.match_on}`);
675
+ }
676
+ if (rule.case_insensitive) {
677
+ lines.push(` case_insensitive: true`);
678
+ }
679
+ lines.push(` title: ${yamlString(rule.title)}`);
680
+ lines.push(` expires_at: null`);
681
+ lines.push(` revoked: ${rule.revoked}`);
682
+ if (rule.upstream) {
683
+ const license = rule.upstream_license ?? 'MIT';
684
+ const url = rule.upstream_url ?? '';
685
+ lines.push(` # Upstream: ${rule.upstream} (${license}) ${url ? '— ' + url : ''}`.trimEnd());
686
+ }
687
+ return lines.join('\n');
688
+ }
689
+ /**
690
+ * YAML double-quote a string with proper escaping. Used for pattern, title
691
+ * (which can contain quotes / special characters). Falls back to single-quoted
692
+ * form if pattern contains a literal backslash-quote sequence that double-
693
+ * quoted YAML cannot represent without escape ambiguity.
694
+ */
695
+ function yamlString(s) {
696
+ // If the string contains no special characters, plain string is fine, but
697
+ // YAML's plain-string grammar is fragile around colons + spaces, leading
698
+ // hyphens, leading question marks, etc. Safer: always wrap in double
699
+ // quotes with JSON-style escaping (YAML accepts JSON as a subset).
700
+ return JSON.stringify(s);
701
+ }
702
+ //# sourceMappingURL=sage.js.map