agent-threat-rules 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +159 -442
- package/dist/capability-extractor.d.ts +35 -0
- package/dist/capability-extractor.d.ts.map +1 -0
- package/dist/capability-extractor.js +91 -0
- package/dist/capability-extractor.js.map +1 -0
- package/dist/cli.js +173 -15
- package/dist/cli.js.map +1 -1
- package/dist/converters/elastic.d.ts +36 -0
- package/dist/converters/elastic.d.ts.map +1 -0
- package/dist/converters/elastic.js +125 -0
- package/dist/converters/elastic.js.map +1 -0
- package/dist/converters/index.d.ts +28 -0
- package/dist/converters/index.d.ts.map +1 -0
- package/dist/converters/index.js +36 -0
- package/dist/converters/index.js.map +1 -0
- package/dist/converters/splunk.d.ts +19 -0
- package/dist/converters/splunk.d.ts.map +1 -0
- package/dist/converters/splunk.js +148 -0
- package/dist/converters/splunk.js.map +1 -0
- package/dist/embedding/build-corpus.d.ts +15 -0
- package/dist/embedding/build-corpus.d.ts.map +1 -0
- package/dist/embedding/build-corpus.js +105 -0
- package/dist/embedding/build-corpus.js.map +1 -0
- package/dist/embedding/model-loader.d.ts +41 -0
- package/dist/embedding/model-loader.d.ts.map +1 -0
- package/dist/embedding/model-loader.js +90 -0
- package/dist/embedding/model-loader.js.map +1 -0
- package/dist/embedding/vector-store.d.ts +41 -0
- package/dist/embedding/vector-store.d.ts.map +1 -0
- package/dist/embedding/vector-store.js +70 -0
- package/dist/embedding/vector-store.js.map +1 -0
- package/dist/engine.d.ts +23 -22
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +174 -25
- package/dist/engine.js.map +1 -1
- package/dist/eval/corpus.d.ts +42 -0
- package/dist/eval/corpus.d.ts.map +1 -0
- package/dist/eval/corpus.js +427 -0
- package/dist/eval/corpus.js.map +1 -0
- package/dist/eval/eval-harness.d.ts +44 -0
- package/dist/eval/eval-harness.d.ts.map +1 -0
- package/dist/eval/eval-harness.js +296 -0
- package/dist/eval/eval-harness.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +9 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/metrics.d.ts +74 -0
- package/dist/eval/metrics.d.ts.map +1 -0
- package/dist/eval/metrics.js +108 -0
- package/dist/eval/metrics.js.map +1 -0
- package/dist/eval/pint-corpus.d.ts +34 -0
- package/dist/eval/pint-corpus.d.ts.map +1 -0
- package/dist/eval/pint-corpus.js +109 -0
- package/dist/eval/pint-corpus.js.map +1 -0
- package/dist/eval/rule-corpus.d.ts +9 -0
- package/dist/eval/rule-corpus.d.ts.map +1 -0
- package/dist/eval/rule-corpus.js +4780 -0
- package/dist/eval/rule-corpus.js.map +1 -0
- package/dist/eval/rule-metrics.d.ts +34 -0
- package/dist/eval/rule-metrics.d.ts.map +1 -0
- package/dist/eval/rule-metrics.js +92 -0
- package/dist/eval/rule-metrics.js.map +1 -0
- package/dist/eval/run-eval.d.ts +7 -0
- package/dist/eval/run-eval.d.ts.map +1 -0
- package/dist/eval/run-eval.js +11 -0
- package/dist/eval/run-eval.js.map +1 -0
- package/dist/eval/run-pint-benchmark.d.ts +18 -0
- package/dist/eval/run-pint-benchmark.d.ts.map +1 -0
- package/dist/eval/run-pint-benchmark.js +157 -0
- package/dist/eval/run-pint-benchmark.js.map +1 -0
- package/dist/flywheel.d.ts +54 -0
- package/dist/flywheel.d.ts.map +1 -0
- package/dist/flywheel.js +98 -0
- package/dist/flywheel.js.map +1 -0
- package/dist/index.d.ts +30 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +33 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp-server.js +1 -1
- package/dist/mcp-tools/scan.d.ts.map +1 -1
- package/dist/mcp-tools/scan.js +7 -0
- package/dist/mcp-tools/scan.js.map +1 -1
- package/dist/mcp-tools/submit-proposal.d.ts.map +1 -1
- package/dist/mcp-tools/submit-proposal.js +8 -0
- package/dist/mcp-tools/submit-proposal.js.map +1 -1
- package/dist/modules/embedding.d.ts +71 -0
- package/dist/modules/embedding.d.ts.map +1 -0
- package/dist/modules/embedding.js +141 -0
- package/dist/modules/embedding.js.map +1 -0
- package/dist/modules/semantic.d.ts +1 -0
- package/dist/modules/semantic.d.ts.map +1 -1
- package/dist/modules/semantic.js +79 -3
- package/dist/modules/semantic.js.map +1 -1
- package/dist/session-tracker.d.ts +2 -0
- package/dist/session-tracker.d.ts.map +1 -1
- package/dist/session-tracker.js +1 -0
- package/dist/session-tracker.js.map +1 -1
- package/dist/shadow-evaluator.d.ts +48 -0
- package/dist/shadow-evaluator.d.ts.map +1 -0
- package/dist/shadow-evaluator.js +128 -0
- package/dist/shadow-evaluator.js.map +1 -0
- package/dist/skill-fingerprint.d.ts.map +1 -1
- package/dist/skill-fingerprint.js +10 -52
- package/dist/skill-fingerprint.js.map +1 -1
- package/dist/tier0-invariant.d.ts +49 -0
- package/dist/tier0-invariant.d.ts.map +1 -0
- package/dist/tier0-invariant.js +184 -0
- package/dist/tier0-invariant.js.map +1 -0
- package/dist/tier1-blacklist.d.ts +48 -0
- package/dist/tier1-blacklist.d.ts.map +1 -0
- package/dist/tier1-blacklist.js +91 -0
- package/dist/tier1-blacklist.js.map +1 -0
- package/dist/types.d.ts +3 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +9 -2
- package/rules/agent-manipulation/ATR-2026-108-consensus-sybil-attack.yaml +103 -0
- package/rules/context-exfiltration/ATR-2026-102-disguised-analytics-exfiltration.yaml +69 -0
- package/rules/excessive-autonomy/ATR-2026-098-unauthorized-financial-action.yaml +155 -0
- package/rules/excessive-autonomy/ATR-2026-099-high-risk-tool-gate.yaml +174 -0
- package/rules/privilege-escalation/ATR-2026-107-delayed-execution-bypass.yaml +67 -0
- package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +200 -12
- package/rules/prompt-injection/ATR-2026-003-jailbreak-attempt.yaml +148 -3
- package/rules/prompt-injection/ATR-2026-004-system-prompt-override.yaml +102 -0
- package/rules/prompt-injection/ATR-2026-080-encoding-evasion.yaml +7 -1
- package/rules/prompt-injection/ATR-2026-081-semantic-multi-turn.yaml +8 -2
- package/rules/prompt-injection/ATR-2026-082-fingerprint-evasion.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-083-indirect-tool-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-084-structured-data-injection.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-085-audit-evasion.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-086-visual-spoofing.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-087-rule-probing.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-088-adaptive-countermeasure.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-089-polymorphic-skill.yaml +9 -3
- package/rules/prompt-injection/ATR-2026-090-threat-intel-exfil.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-091-nested-payload.yaml +6 -0
- package/rules/prompt-injection/ATR-2026-092-consensus-poisoning.yaml +8 -2
- package/rules/prompt-injection/ATR-2026-093-gradual-escalation.yaml +7 -1
- package/rules/prompt-injection/ATR-2026-094-audit-bypass.yaml +7 -1
- package/rules/prompt-injection/ATR-2026-097-cjk-injection-patterns.yaml +322 -0
- package/rules/prompt-injection/ATR-2026-104-persona-hijacking.yaml +72 -0
- package/rules/tool-poisoning/ATR-2026-011-tool-output-injection.yaml +9 -0
- package/rules/tool-poisoning/ATR-2026-012-unauthorized-tool-call.yaml +9 -0
- package/rules/tool-poisoning/ATR-2026-013-tool-ssrf.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-095-supply-chain-poisoning.yaml +9 -0
- package/rules/tool-poisoning/ATR-2026-096-registry-poisoning.yaml +9 -0
- package/rules/tool-poisoning/ATR-2026-100-consent-bypass-instruction.yaml +80 -0
- package/rules/tool-poisoning/ATR-2026-101-trust-escalation-override.yaml +66 -0
- package/rules/tool-poisoning/ATR-2026-103-hidden-safety-bypass-instruction.yaml +71 -0
- package/rules/tool-poisoning/ATR-2026-105-silent-action-concealment.yaml +67 -0
- package/rules/tool-poisoning/ATR-2026-106-schema-description-contradiction.yaml +66 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ATR-to-Elastic Query DSL Converter
|
|
3
|
+
*
|
|
4
|
+
* Converts ATR YAML rules into Elasticsearch Query DSL (JSON) that
|
|
5
|
+
* a SOC analyst can use in Kibana or the Elasticsearch API.
|
|
6
|
+
*
|
|
7
|
+
* @module agent-threat-rules/converters/elastic
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Convert a single ATR array condition to an Elastic Query DSL clause.
|
|
11
|
+
*/
|
|
12
|
+
function conditionToElastic(cond) {
|
|
13
|
+
const field = cond.field;
|
|
14
|
+
const value = cond.value;
|
|
15
|
+
switch (cond.operator) {
|
|
16
|
+
case 'regex':
|
|
17
|
+
return {
|
|
18
|
+
regexp: {
|
|
19
|
+
[field]: {
|
|
20
|
+
value: value,
|
|
21
|
+
flags: 'ALL',
|
|
22
|
+
case_insensitive: true,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
case 'contains':
|
|
27
|
+
return {
|
|
28
|
+
wildcard: {
|
|
29
|
+
[field]: {
|
|
30
|
+
value: `*${value}*`,
|
|
31
|
+
case_insensitive: true,
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
case 'exact':
|
|
36
|
+
return {
|
|
37
|
+
term: {
|
|
38
|
+
[field]: value,
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
case 'starts_with':
|
|
42
|
+
return {
|
|
43
|
+
prefix: {
|
|
44
|
+
[field]: {
|
|
45
|
+
value: value,
|
|
46
|
+
case_insensitive: true,
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
case 'gt':
|
|
51
|
+
return { range: { [field]: { gt: Number(value) } } };
|
|
52
|
+
case 'lt':
|
|
53
|
+
return { range: { [field]: { lt: Number(value) } } };
|
|
54
|
+
case 'gte':
|
|
55
|
+
return { range: { [field]: { gte: Number(value) } } };
|
|
56
|
+
case 'lte':
|
|
57
|
+
return { range: { [field]: { lte: Number(value) } } };
|
|
58
|
+
case 'eq':
|
|
59
|
+
return { term: { [field]: Number(value) } };
|
|
60
|
+
default:
|
|
61
|
+
// Fallback: wildcard contains
|
|
62
|
+
return {
|
|
63
|
+
wildcard: {
|
|
64
|
+
[field]: {
|
|
65
|
+
value: `*${value}*`,
|
|
66
|
+
case_insensitive: true,
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Convert an ATR rule to an Elasticsearch Query DSL object.
|
|
74
|
+
*
|
|
75
|
+
* Returns a JSON-serializable object with _meta and query fields.
|
|
76
|
+
* - "any" condition logic -> bool.should with minimum_should_match=1
|
|
77
|
+
* - "all" condition logic -> bool.must
|
|
78
|
+
*/
|
|
79
|
+
export function ruleToElastic(rule) {
|
|
80
|
+
const conditions = rule.detection.conditions;
|
|
81
|
+
const logic = rule.detection.condition;
|
|
82
|
+
const meta = {
|
|
83
|
+
rule_id: rule.id,
|
|
84
|
+
title: rule.title,
|
|
85
|
+
severity: rule.severity,
|
|
86
|
+
category: rule.tags.category,
|
|
87
|
+
source_type: rule.agent_source.type,
|
|
88
|
+
condition_logic: logic,
|
|
89
|
+
};
|
|
90
|
+
if (!Array.isArray(conditions)) {
|
|
91
|
+
// Named-map format: return empty query with warning
|
|
92
|
+
return {
|
|
93
|
+
_meta: meta,
|
|
94
|
+
query: {
|
|
95
|
+
bool: {
|
|
96
|
+
should: [],
|
|
97
|
+
minimum_should_match: 0,
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
const arrayConditions = conditions;
|
|
103
|
+
const clauses = arrayConditions.map(conditionToElastic);
|
|
104
|
+
if (logic === 'all') {
|
|
105
|
+
return {
|
|
106
|
+
_meta: meta,
|
|
107
|
+
query: {
|
|
108
|
+
bool: {
|
|
109
|
+
must: clauses,
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
// Default: "any" -> should
|
|
115
|
+
return {
|
|
116
|
+
_meta: meta,
|
|
117
|
+
query: {
|
|
118
|
+
bool: {
|
|
119
|
+
should: clauses,
|
|
120
|
+
minimum_should_match: 1,
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=elastic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"elastic.js","sourceRoot":"","sources":["../../src/converters/elastic.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAsBH;;GAEG;AACH,SAAS,kBAAkB,CAAC,IAAuB;IACjD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IAEzB,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;QACtB,KAAK,OAAO;YACV,OAAO;gBACL,MAAM,EAAE;oBACN,CAAC,KAAK,CAAC,EAAE;wBACP,KAAK,EAAE,KAAK;wBACZ,KAAK,EAAE,KAAK;wBACZ,gBAAgB,EAAE,IAAI;qBACvB;iBACF;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,QAAQ,EAAE;oBACR,CAAC,KAAK,CAAC,EAAE;wBACP,KAAK,EAAE,IAAI,KAAK,GAAG;wBACnB,gBAAgB,EAAE,IAAI;qBACvB;iBACF;aACF,CAAC;QAEJ,KAAK,OAAO;YACV,OAAO;gBACL,IAAI,EAAE;oBACJ,CAAC,KAAK,CAAC,EAAE,KAAK;iBACf;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,MAAM,EAAE;oBACN,CAAC,KAAK,CAAC,EAAE;wBACP,KAAK,EAAE,KAAK;wBACZ,gBAAgB,EAAE,IAAI;qBACvB;iBACF;aACF,CAAC;QAEJ,KAAK,IAAI;YACP,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC;QAEvD,KAAK,IAAI;YACP,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC;QAEvD,KAAK,KAAK;YACR,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC;QAExD,KAAK,KAAK;YACR,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC;QAExD,KAAK,IAAI;YACP,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC;QAE9C;YACE,8BAA8B;YAC9B,OAAO;gBACL,QAAQ,EAAE;oBACR,CAAC,KAAK,CAAC,EAAE;wBACP,KAAK,EAAE,IAAI,KAAK,GAAG;wBACnB,gBAAgB,EAAE,IAAI;qBACvB;iBACF;aACF,CAAC;IACN,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,IAAa;IACzC,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;IAEvC,MAAM,IAAI,GAAG;QACX,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;QAC5B,WAAW,EAAE,IAAI,CAAC,YAAY,CAAC,IAAI;QACnC,eAAe,EAAE,KAAK;KACvB,CAAC;IAEF,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,oDAAoD;QACpD,OAAO;YACL,KAAK,EAAE,IAAI;YACX,KAAK,EAAE;gBACL,IAAI,EAAE;oBACJ,MAAM,EAAE,EAAE;oBACV,oBAAoB,EAAE,CAAC;iBACxB;aACF;SACF,CAAC;IACJ,CAAC;IAED,MAAM,eAAe,GAAG,UAAiC,CAAC;IAC1D,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAExD,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;QACpB,OAAO;YACL,KAAK,EAAE,IAAI;YACX,KAAK,EAAE;gBACL,IAAI,EAAE;oBACJ,IAAI,EAAE,OAAO;iBACd;aACF;SACF,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,OAAO;QACL,KAAK,EAAE,IAAI;QACX,KAAK,EAAE;YACL,IAAI,EAAE;gBACJ,MAAM,EAAE,OAAO;gBACf,oBAAoB,EAAE,CAAC;aACxB;SACF;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ATR SIEM Query Converter
|
|
3
|
+
*
|
|
4
|
+
* Converts ATR YAML rules into SIEM-specific query formats
|
|
5
|
+
* (Splunk SPL and Elasticsearch Query DSL).
|
|
6
|
+
*
|
|
7
|
+
* @module agent-threat-rules/converters
|
|
8
|
+
*/
|
|
9
|
+
import type { ATRRule } from '../types.js';
|
|
10
|
+
export type SIEMFormat = 'splunk' | 'elastic';
|
|
11
|
+
export interface ConvertedQuery {
|
|
12
|
+
readonly ruleId: string;
|
|
13
|
+
readonly title: string;
|
|
14
|
+
readonly severity: string;
|
|
15
|
+
readonly format: SIEMFormat;
|
|
16
|
+
readonly query: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Convert a single ATR rule to a SIEM query.
|
|
20
|
+
*/
|
|
21
|
+
export declare function convertRule(rule: ATRRule, format: SIEMFormat): ConvertedQuery;
|
|
22
|
+
/**
|
|
23
|
+
* Convert all ATR rules in a directory to SIEM queries.
|
|
24
|
+
*/
|
|
25
|
+
export declare function convertAllRules(rulesDir: string, format: SIEMFormat): readonly ConvertedQuery[];
|
|
26
|
+
export { ruleToSPL } from './splunk.js';
|
|
27
|
+
export { ruleToElastic } from './elastic.js';
|
|
28
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/converters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAK3C,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,SAAS,CAAC;AAE9C,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,UAAU,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,GAAG,cAAc,CAY7E;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,GAAG,SAAS,cAAc,EAAE,CAG/F;AAED,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ATR SIEM Query Converter
|
|
3
|
+
*
|
|
4
|
+
* Converts ATR YAML rules into SIEM-specific query formats
|
|
5
|
+
* (Splunk SPL and Elasticsearch Query DSL).
|
|
6
|
+
*
|
|
7
|
+
* @module agent-threat-rules/converters
|
|
8
|
+
*/
|
|
9
|
+
import { loadRulesFromDirectory } from '../loader.js';
|
|
10
|
+
import { ruleToSPL } from './splunk.js';
|
|
11
|
+
import { ruleToElastic } from './elastic.js';
|
|
12
|
+
/**
|
|
13
|
+
* Convert a single ATR rule to a SIEM query.
|
|
14
|
+
*/
|
|
15
|
+
export function convertRule(rule, format) {
|
|
16
|
+
const query = format === 'splunk'
|
|
17
|
+
? ruleToSPL(rule)
|
|
18
|
+
: JSON.stringify(ruleToElastic(rule), null, 2);
|
|
19
|
+
return {
|
|
20
|
+
ruleId: rule.id,
|
|
21
|
+
title: rule.title,
|
|
22
|
+
severity: rule.severity,
|
|
23
|
+
format,
|
|
24
|
+
query,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Convert all ATR rules in a directory to SIEM queries.
|
|
29
|
+
*/
|
|
30
|
+
export function convertAllRules(rulesDir, format) {
|
|
31
|
+
const rules = loadRulesFromDirectory(rulesDir);
|
|
32
|
+
return rules.map(rule => convertRule(rule, format));
|
|
33
|
+
}
|
|
34
|
+
export { ruleToSPL } from './splunk.js';
|
|
35
|
+
export { ruleToElastic } from './elastic.js';
|
|
36
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/converters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAY7C;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa,EAAE,MAAkB;IAC3D,MAAM,KAAK,GAAG,MAAM,KAAK,QAAQ;QAC/B,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;QACjB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAEjD,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,EAAE;QACf,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,MAAM;QACN,KAAK;KACN,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB,EAAE,MAAkB;IAClE,MAAM,KAAK,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAC/C,OAAO,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AACtD,CAAC;AAED,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ATR-to-Splunk SPL Converter
|
|
3
|
+
*
|
|
4
|
+
* Converts ATR YAML rules into Splunk Search Processing Language (SPL) queries
|
|
5
|
+
* that a SOC analyst can use as a starting point for threat hunting.
|
|
6
|
+
*
|
|
7
|
+
* @module agent-threat-rules/converters/splunk
|
|
8
|
+
*/
|
|
9
|
+
import type { ATRRule } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Convert an ATR rule to a Splunk SPL query string.
|
|
12
|
+
*
|
|
13
|
+
* The generated query includes:
|
|
14
|
+
* - Comment header with rule metadata
|
|
15
|
+
* - Index/sourcetype base search (generic, analyst should customize)
|
|
16
|
+
* - Condition clauses joined with appropriate logic
|
|
17
|
+
*/
|
|
18
|
+
export declare function ruleToSPL(rule: ATRRule): string;
|
|
19
|
+
//# sourceMappingURL=splunk.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splunk.d.ts","sourceRoot":"","sources":["../../src/converters/splunk.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAqB,MAAM,aAAa,CAAC;AAoD9D;;;;;;;GAOG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAqG/C"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ATR-to-Splunk SPL Converter
|
|
3
|
+
*
|
|
4
|
+
* Converts ATR YAML rules into Splunk Search Processing Language (SPL) queries
|
|
5
|
+
* that a SOC analyst can use as a starting point for threat hunting.
|
|
6
|
+
*
|
|
7
|
+
* @module agent-threat-rules/converters/splunk
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Escape a string for use in Splunk SPL double-quoted values.
|
|
11
|
+
*/
|
|
12
|
+
function escapeForSPL(value) {
|
|
13
|
+
return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Convert a single ATR array condition to an SPL clause.
|
|
17
|
+
*
|
|
18
|
+
* Supports operators: regex, contains, exact, starts_with, gt, lt, gte, lte, eq
|
|
19
|
+
*/
|
|
20
|
+
function conditionToSPL(cond) {
|
|
21
|
+
const field = cond.field;
|
|
22
|
+
const value = cond.value;
|
|
23
|
+
switch (cond.operator) {
|
|
24
|
+
case 'regex':
|
|
25
|
+
return `| regex ${field}="${escapeForSPL(value)}"`;
|
|
26
|
+
case 'contains':
|
|
27
|
+
return `${field}="*${escapeForSPL(value)}*"`;
|
|
28
|
+
case 'exact':
|
|
29
|
+
return `${field}="${escapeForSPL(value)}"`;
|
|
30
|
+
case 'starts_with':
|
|
31
|
+
return `${field}="${escapeForSPL(value)}*"`;
|
|
32
|
+
case 'gt':
|
|
33
|
+
return `| where ${field} > ${Number(value)}`;
|
|
34
|
+
case 'lt':
|
|
35
|
+
return `| where ${field} < ${Number(value)}`;
|
|
36
|
+
case 'gte':
|
|
37
|
+
return `| where ${field} >= ${Number(value)}`;
|
|
38
|
+
case 'lte':
|
|
39
|
+
return `| where ${field} <= ${Number(value)}`;
|
|
40
|
+
case 'eq':
|
|
41
|
+
return `| where ${field} == ${Number(value)}`;
|
|
42
|
+
default:
|
|
43
|
+
// Fallback: treat unknown operators as a contains search
|
|
44
|
+
return `${field}="*${escapeForSPL(value)}*"`;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Convert an ATR rule to a Splunk SPL query string.
|
|
49
|
+
*
|
|
50
|
+
* The generated query includes:
|
|
51
|
+
* - Comment header with rule metadata
|
|
52
|
+
* - Index/sourcetype base search (generic, analyst should customize)
|
|
53
|
+
* - Condition clauses joined with appropriate logic
|
|
54
|
+
*/
|
|
55
|
+
export function ruleToSPL(rule) {
|
|
56
|
+
const conditions = rule.detection.conditions;
|
|
57
|
+
const logic = rule.detection.condition; // "any" or "all"
|
|
58
|
+
const lines = [];
|
|
59
|
+
// Comment header with rule metadata
|
|
60
|
+
lines.push(`\`\`\` ATR Rule: ${rule.id} \`\`\``);
|
|
61
|
+
lines.push(`\`\`\` Title: ${rule.title} \`\`\``);
|
|
62
|
+
lines.push(`\`\`\` Severity: ${rule.severity} | Category: ${rule.tags.category} \`\`\``);
|
|
63
|
+
lines.push(`\`\`\` Source: ${rule.agent_source.type} | Condition logic: ${logic} \`\`\``);
|
|
64
|
+
lines.push('');
|
|
65
|
+
// Base search -- analyst should adjust index and sourcetype
|
|
66
|
+
lines.push('index=ai_agent_logs sourcetype=agent_events');
|
|
67
|
+
if (!Array.isArray(conditions)) {
|
|
68
|
+
// Named-map format: not common in current rules, emit a placeholder
|
|
69
|
+
lines.push('```` Warning: Named-map conditions not fully supported. Review manually. ````');
|
|
70
|
+
return lines.join('\n');
|
|
71
|
+
}
|
|
72
|
+
const arrayConditions = conditions;
|
|
73
|
+
if (arrayConditions.length === 0) {
|
|
74
|
+
return lines.join('\n');
|
|
75
|
+
}
|
|
76
|
+
// For "any" logic with regex conditions, we can combine them using
|
|
77
|
+
// a single regex with OR (|) alternation where possible, or use
|
|
78
|
+
// multiple search branches.
|
|
79
|
+
// For clarity and analyst usability, we emit each condition separately.
|
|
80
|
+
if (logic === 'all') {
|
|
81
|
+
// AND logic: chain all conditions sequentially
|
|
82
|
+
for (const cond of arrayConditions) {
|
|
83
|
+
lines.push(conditionToSPL(cond));
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// OR logic ("any"): use Splunk's multisearch or OR-joined search
|
|
88
|
+
// For regex conditions, wrap in a single eval+match approach
|
|
89
|
+
// For simplicity and readability, use OR-joined subsearches
|
|
90
|
+
const regexConditions = arrayConditions.filter(c => c.operator === 'regex');
|
|
91
|
+
const otherConditions = arrayConditions.filter(c => c.operator !== 'regex');
|
|
92
|
+
if (regexConditions.length > 0 && otherConditions.length === 0) {
|
|
93
|
+
// All regex: combine with OR in eval/match
|
|
94
|
+
lines.push('| where (');
|
|
95
|
+
const regexClauses = regexConditions.map((cond, i) => {
|
|
96
|
+
const prefix = i === 0 ? ' ' : ' OR ';
|
|
97
|
+
return `${prefix}match(${cond.field}, "${escapeForSPL(cond.value)}")`;
|
|
98
|
+
});
|
|
99
|
+
lines.push(...regexClauses);
|
|
100
|
+
lines.push(')');
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
// Mixed operators: emit each as separate OR clause
|
|
104
|
+
lines.push('| where (');
|
|
105
|
+
const clauses = [];
|
|
106
|
+
for (const cond of arrayConditions) {
|
|
107
|
+
switch (cond.operator) {
|
|
108
|
+
case 'regex':
|
|
109
|
+
clauses.push(`match(${cond.field}, "${escapeForSPL(cond.value)}")`);
|
|
110
|
+
break;
|
|
111
|
+
case 'contains':
|
|
112
|
+
clauses.push(`like(${cond.field}, "%${escapeForSPL(cond.value)}%")`);
|
|
113
|
+
break;
|
|
114
|
+
case 'exact':
|
|
115
|
+
clauses.push(`${cond.field}="${escapeForSPL(cond.value)}"`);
|
|
116
|
+
break;
|
|
117
|
+
case 'starts_with':
|
|
118
|
+
clauses.push(`like(${cond.field}, "${escapeForSPL(cond.value)}%")`);
|
|
119
|
+
break;
|
|
120
|
+
case 'gt':
|
|
121
|
+
clauses.push(`${cond.field} > ${Number(cond.value)}`);
|
|
122
|
+
break;
|
|
123
|
+
case 'lt':
|
|
124
|
+
clauses.push(`${cond.field} < ${Number(cond.value)}`);
|
|
125
|
+
break;
|
|
126
|
+
case 'gte':
|
|
127
|
+
clauses.push(`${cond.field} >= ${Number(cond.value)}`);
|
|
128
|
+
break;
|
|
129
|
+
case 'lte':
|
|
130
|
+
clauses.push(`${cond.field} <= ${Number(cond.value)}`);
|
|
131
|
+
break;
|
|
132
|
+
case 'eq':
|
|
133
|
+
clauses.push(`${cond.field} == ${Number(cond.value)}`);
|
|
134
|
+
break;
|
|
135
|
+
default:
|
|
136
|
+
clauses.push(`like(${cond.field}, "%${escapeForSPL(cond.value)}%")`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
lines.push(clauses.map((c, i) => (i === 0 ? ` ${c}` : ` OR ${c}`)).join('\n'));
|
|
140
|
+
lines.push(')');
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Add a table output for the analyst
|
|
144
|
+
const fields = [...new Set(arrayConditions.map(c => c.field))];
|
|
145
|
+
lines.push(`| table _time ${fields.join(' ')} source`);
|
|
146
|
+
return lines.join('\n');
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=splunk.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splunk.js","sourceRoot":"","sources":["../../src/converters/splunk.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH;;GAEG;AACH,SAAS,YAAY,CAAC,KAAa;IACjC,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;AAC3D,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CAAC,IAAuB;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IAEzB,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;QACtB,KAAK,OAAO;YACV,OAAO,WAAW,KAAK,KAAK,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC;QAErD,KAAK,UAAU;YACb,OAAO,GAAG,KAAK,MAAM,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;QAE/C,KAAK,OAAO;YACV,OAAO,GAAG,KAAK,KAAK,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC;QAE7C,KAAK,aAAa;YAChB,OAAO,GAAG,KAAK,KAAK,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;QAE9C,KAAK,IAAI;YACP,OAAO,WAAW,KAAK,MAAM,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAE/C,KAAK,IAAI;YACP,OAAO,WAAW,KAAK,MAAM,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAE/C,KAAK,KAAK;YACR,OAAO,WAAW,KAAK,OAAO,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAEhD,KAAK,KAAK;YACR,OAAO,WAAW,KAAK,OAAO,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAEhD,KAAK,IAAI;YACP,OAAO,WAAW,KAAK,OAAO,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAEhD;YACE,yDAAyD;YACzD,OAAO,GAAG,KAAK,MAAM,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;IACjD,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,SAAS,CAAC,IAAa;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,iBAAiB;IAEzD,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,oCAAoC;IACpC,KAAK,CAAC,IAAI,CAAC,oBAAoB,IAAI,CAAC,EAAE,SAAS,CAAC,CAAC;IACjD,KAAK,CAAC,IAAI,CAAC,iBAAiB,IAAI,CAAC,KAAK,SAAS,CAAC,CAAC;IACjD,KAAK,CAAC,IAAI,CAAC,oBAAoB,IAAI,CAAC,QAAQ,gBAAgB,IAAI,CAAC,IAAI,CAAC,QAAQ,SAAS,CAAC,CAAC;IACzF,KAAK,CAAC,IAAI,CAAC,kBAAkB,IAAI,CAAC,YAAY,CAAC,IAAI,uBAAuB,KAAK,SAAS,CAAC,CAAC;IAC1F,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,4DAA4D;IAC5D,KAAK,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC;IAE1D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,oEAAoE;QACpE,KAAK,CAAC,IAAI,CAAC,+EAA+E,CAAC,CAAC;QAC5F,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,eAAe,GAAG,UAAiC,CAAC;IAE1D,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,mEAAmE;IACnE,gEAAgE;IAChE,4BAA4B;IAC5B,wEAAwE;IAExE,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;QACpB,+CAA+C;QAC/C,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,iEAAiE;QACjE,6DAA6D;QAC7D,4DAA4D;QAC5D,MAAM,eAAe,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;QAC5E,MAAM,eAAe,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;QAE5E,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/D,2CAA2C;YAC3C,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACxB,MAAM,YAAY,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,MAAM,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;gBAC5C,OAAO,GAAG,MAAM,SAAS,IAAI,CAAC,KAAK,MAAM,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YACxE,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,mDAAmD;YACnD,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACxB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;gBACnC,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACtB,KAAK,OAAO;wBACV,OAAO,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,KAAK,MAAM,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBACpE,MAAM;oBACR,KAAK,UAAU;wBACb,OAAO,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,OAAO,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;wBACrE,MAAM;oBACR,KAAK,OAAO;wBACV,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,KAAK,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;wBAC5D,MAAM;oBACR,KAAK,aAAa;wBAChB,OAAO,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,MAAM,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;wBACpE,MAAM;oBACR,KAAK,IAAI;wBACP,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,MAAM,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACtD,MAAM;oBACR,KAAK,IAAI;wBACP,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,MAAM,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACtD,MAAM;oBACR,KAAK,KAAK;wBACR,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACvD,MAAM;oBACR,KAAK,KAAK;wBACR,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACvD,MAAM;oBACR,KAAK,IAAI;wBACP,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBACvD,MAAM;oBACR;wBACE,OAAO,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,OAAO,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;gBACzE,CAAC;YACH,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YACrF,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/D,KAAK,CAAC,IAAI,CAAC,iBAAiB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Build attack embedding corpus from ATR rule test cases.
|
|
4
|
+
*
|
|
5
|
+
* Reads all stable ATR rules, extracts true_positive test cases,
|
|
6
|
+
* encodes them through all-MiniLM-L6-v2, and saves as JSON.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npx tsx src/embedding/build-corpus.ts
|
|
10
|
+
*
|
|
11
|
+
* Output:
|
|
12
|
+
* data/attack-embeddings.json
|
|
13
|
+
*/
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=build-corpus.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"build-corpus.d.ts","sourceRoot":"","sources":["../../src/embedding/build-corpus.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;GAWG"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Build attack embedding corpus from ATR rule test cases.
|
|
4
|
+
*
|
|
5
|
+
* Reads all stable ATR rules, extracts true_positive test cases,
|
|
6
|
+
* encodes them through all-MiniLM-L6-v2, and saves as JSON.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npx tsx src/embedding/build-corpus.ts
|
|
10
|
+
*
|
|
11
|
+
* Output:
|
|
12
|
+
* data/attack-embeddings.json
|
|
13
|
+
*/
|
|
14
|
+
import { readFileSync, writeFileSync, mkdirSync, readdirSync } from 'node:fs';
|
|
15
|
+
import { join, resolve } from 'node:path';
|
|
16
|
+
import * as yaml from 'js-yaml';
|
|
17
|
+
const RULES_DIR = resolve(join(import.meta.dirname ?? '.', '..', '..', 'rules'));
|
|
18
|
+
const OUTPUT_PATH = resolve(join(import.meta.dirname ?? '.', '..', '..', 'data', 'attack-embeddings.json'));
|
|
19
|
+
async function main() {
|
|
20
|
+
console.log('Building attack embedding corpus...');
|
|
21
|
+
console.log(`Rules dir: ${RULES_DIR}`);
|
|
22
|
+
// Load model
|
|
23
|
+
console.log('Loading embedding model (first run downloads ~22MB)...');
|
|
24
|
+
const { TransformersJSModel } = await import('./model-loader.js');
|
|
25
|
+
const model = new TransformersJSModel();
|
|
26
|
+
await model.initialize();
|
|
27
|
+
console.log('Model loaded.');
|
|
28
|
+
// Collect all true_positive texts from rules
|
|
29
|
+
const attacks = [];
|
|
30
|
+
function walkDir(dir) {
|
|
31
|
+
const files = [];
|
|
32
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
33
|
+
const fullPath = join(dir, entry.name);
|
|
34
|
+
if (entry.isDirectory()) {
|
|
35
|
+
files.push(...walkDir(fullPath));
|
|
36
|
+
}
|
|
37
|
+
else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
|
|
38
|
+
files.push(fullPath);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return files;
|
|
42
|
+
}
|
|
43
|
+
const ruleFiles = walkDir(RULES_DIR);
|
|
44
|
+
console.log(`Found ${ruleFiles.length} rule files.`);
|
|
45
|
+
for (const file of ruleFiles) {
|
|
46
|
+
try {
|
|
47
|
+
const content = readFileSync(file, 'utf-8');
|
|
48
|
+
const rule = yaml.load(content);
|
|
49
|
+
if (!rule?.id || !rule?.test_cases?.true_positives)
|
|
50
|
+
continue;
|
|
51
|
+
for (const tp of rule.test_cases.true_positives) {
|
|
52
|
+
const text = tp.input ?? tp.content ?? tp.user_input ?? tp.tool_response ?? tp.tool_description ?? tp.tool_args;
|
|
53
|
+
if (!text || text.length < 10)
|
|
54
|
+
continue;
|
|
55
|
+
attacks.push({
|
|
56
|
+
id: rule.id,
|
|
57
|
+
text: text.slice(0, 512),
|
|
58
|
+
category: rule.tags?.category ?? 'unknown',
|
|
59
|
+
severity: rule.severity ?? 'medium',
|
|
60
|
+
ruleTitle: rule.title ?? rule.id,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
// Skip unparseable rules
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
console.log(`Extracted ${attacks.length} attack payloads from ${ruleFiles.length} rules.`);
|
|
69
|
+
// Deduplicate by text
|
|
70
|
+
const seen = new Set();
|
|
71
|
+
const unique = attacks.filter((a) => {
|
|
72
|
+
if (seen.has(a.text))
|
|
73
|
+
return false;
|
|
74
|
+
seen.add(a.text);
|
|
75
|
+
return true;
|
|
76
|
+
});
|
|
77
|
+
console.log(`Unique payloads: ${unique.length}`);
|
|
78
|
+
// Encode all payloads
|
|
79
|
+
console.log('Encoding payloads...');
|
|
80
|
+
const output = [];
|
|
81
|
+
for (let i = 0; i < unique.length; i++) {
|
|
82
|
+
const a = unique[i];
|
|
83
|
+
process.stdout.write(`\r [${i + 1}/${unique.length}] ${a.id}`);
|
|
84
|
+
const vec = await model.encode(a.text);
|
|
85
|
+
output.push({
|
|
86
|
+
id: `${a.id}-tp${i}`,
|
|
87
|
+
text: a.text,
|
|
88
|
+
vector: Array.from(vec),
|
|
89
|
+
label: `${a.ruleTitle}: ${a.text.slice(0, 80)}`,
|
|
90
|
+
category: a.category,
|
|
91
|
+
severity: a.severity,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
console.log('\n');
|
|
95
|
+
// Save
|
|
96
|
+
mkdirSync(join(OUTPUT_PATH, '..'), { recursive: true });
|
|
97
|
+
writeFileSync(OUTPUT_PATH, JSON.stringify(output, null, 2));
|
|
98
|
+
console.log(`Saved ${output.length} embeddings to ${OUTPUT_PATH}`);
|
|
99
|
+
console.log(`File size: ${(readFileSync(OUTPUT_PATH).length / 1024).toFixed(0)} KB`);
|
|
100
|
+
}
|
|
101
|
+
main().catch((err) => {
|
|
102
|
+
console.error('Fatal:', err);
|
|
103
|
+
process.exit(1);
|
|
104
|
+
});
|
|
105
|
+
//# sourceMappingURL=build-corpus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"build-corpus.js","sourceRoot":"","sources":["../../src/embedding/build-corpus.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,WAAW,EAAc,MAAM,SAAS,CAAC;AAC1F,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,KAAK,IAAI,MAAM,SAAS,CAAC;AAEhC,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;AACjF,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,wBAAwB,CAAC,CAAC,CAAC;AAuB5G,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;IACnD,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;IAEvC,aAAa;IACb,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;IACtE,MAAM,EAAE,mBAAmB,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,IAAI,mBAAmB,EAAE,CAAC;IACxC,MAAM,KAAK,CAAC,UAAU,EAAE,CAAC;IACzB,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAE7B,6CAA6C;IAC7C,MAAM,OAAO,GAA+F,EAAE,CAAC;IAE/G,SAAS,OAAO,CAAC,GAAW;QAC1B,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YACvC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;YACnC,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACvE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IACrC,OAAO,CAAC,GAAG,CAAC,SAAS,SAAS,CAAC,MAAM,cAAc,CAAC,CAAC;IAErD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC5C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAgB,CAAC;YAC/C,IAAI,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,UAAU,EAAE,cAAc;gBAAE,SAAS;YAE7D,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,CAAC;gBAChD,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC,UAAU,IAAI,EAAE,CAAC,aAAa,IAAI,EAAE,CAAC,gBAAgB,IAAI,EAAE,CAAC,SAAS,CAAC;gBAChH,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;oBAAE,SAAS;gBAExC,OAAO,CAAC,IAAI,CAAC;oBACX,EAAE,EAAE,IAAI,CAAC,EAAE;oBACX,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oBACxB,QAAQ,EAAE,IAAI,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS;oBAC1C,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,QAAQ;oBACnC,SAAS,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,EAAE;iBACjC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yBAAyB;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,CAAC,MAAM,yBAAyB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IAE3F,sBAAsB;IACtB,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QAClC,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACnC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAEjD,sBAAsB;IACtB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACpC,MAAM,MAAM,GAOP,EAAE,CAAC;IAER,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACrB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAChE,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE;YACpB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC;YACvB,KAAK,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE;YAC/C,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,QAAQ,EAAE,CAAC,CAAC,QAAQ;SACrB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;IACP,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5D,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,MAAM,kBAAkB,WAAW,EAAE,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;AACvF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding model loader.
|
|
3
|
+
*
|
|
4
|
+
* Lazy-loads all-MiniLM-L6-v2 via @xenova/transformers (optional dep).
|
|
5
|
+
* Model is ~22MB, cached to disk after first download.
|
|
6
|
+
* Runs in pure JS/WASM -- no native bindings needed.
|
|
7
|
+
*
|
|
8
|
+
* @module agent-threat-rules/embedding/model-loader
|
|
9
|
+
*/
|
|
10
|
+
export interface EmbeddingModel {
|
|
11
|
+
/** Encode text to embedding vector */
|
|
12
|
+
encode(text: string): Promise<Float32Array>;
|
|
13
|
+
/** Encode multiple texts (batched) */
|
|
14
|
+
encodeBatch(texts: readonly string[]): Promise<Float32Array[]>;
|
|
15
|
+
/** Initialize / load the model */
|
|
16
|
+
initialize(): Promise<void>;
|
|
17
|
+
/** Model output dimension */
|
|
18
|
+
readonly dimension: number;
|
|
19
|
+
/** Whether model is loaded */
|
|
20
|
+
readonly isLoaded: boolean;
|
|
21
|
+
}
|
|
22
|
+
export declare class TransformersJSModel implements EmbeddingModel {
|
|
23
|
+
readonly dimension = 384;
|
|
24
|
+
private pipeline;
|
|
25
|
+
get isLoaded(): boolean;
|
|
26
|
+
/** Lazy-load the model on first use */
|
|
27
|
+
initialize(): Promise<void>;
|
|
28
|
+
encode(text: string): Promise<Float32Array>;
|
|
29
|
+
encodeBatch(texts: readonly string[]): Promise<Float32Array[]>;
|
|
30
|
+
}
|
|
31
|
+
/** Create a no-op model for testing */
|
|
32
|
+
export declare class MockEmbeddingModel implements EmbeddingModel {
|
|
33
|
+
readonly dimension = 384;
|
|
34
|
+
readonly isLoaded = true;
|
|
35
|
+
private readonly mockVectors;
|
|
36
|
+
constructor(mockVectors?: Map<string, Float32Array>);
|
|
37
|
+
initialize(): Promise<void>;
|
|
38
|
+
encode(text: string): Promise<Float32Array>;
|
|
39
|
+
encodeBatch(texts: readonly string[]): Promise<Float32Array[]>;
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=model-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-loader.d.ts","sourceRoot":"","sources":["../../src/embedding/model-loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,WAAW,cAAc;IAC7B,sCAAsC;IACtC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAC5C,sCAAsC;IACtC,WAAW,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC/D,kCAAkC;IAClC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,6BAA6B;IAC7B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,8BAA8B;IAC9B,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;CAC5B;AAKD,qBAAa,mBAAoB,YAAW,cAAc;IACxD,QAAQ,CAAC,SAAS,OAAa;IAC/B,OAAO,CAAC,QAAQ,CAAiB;IAEjC,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB3B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAQ3C,WAAW,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAYrE;AAED,uCAAuC;AACvC,qBAAa,kBAAmB,YAAW,cAAc;IACvD,QAAQ,CAAC,SAAS,OAAa;IAC/B,QAAQ,CAAC,QAAQ,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA4B;gBAE5C,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC;IAI7C,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAI3B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAgB3C,WAAW,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAGrE"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding model loader.
|
|
3
|
+
*
|
|
4
|
+
* Lazy-loads all-MiniLM-L6-v2 via @xenova/transformers (optional dep).
|
|
5
|
+
* Model is ~22MB, cached to disk after first download.
|
|
6
|
+
* Runs in pure JS/WASM -- no native bindings needed.
|
|
7
|
+
*
|
|
8
|
+
* @module agent-threat-rules/embedding/model-loader
|
|
9
|
+
*/
|
|
10
|
+
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2';
|
|
11
|
+
const DIMENSION = 384;
|
|
12
|
+
export class TransformersJSModel {
|
|
13
|
+
dimension = DIMENSION;
|
|
14
|
+
pipeline = null;
|
|
15
|
+
get isLoaded() {
|
|
16
|
+
return this.pipeline !== null;
|
|
17
|
+
}
|
|
18
|
+
/** Lazy-load the model on first use */
|
|
19
|
+
async initialize() {
|
|
20
|
+
if (this.pipeline)
|
|
21
|
+
return;
|
|
22
|
+
try {
|
|
23
|
+
// Dynamic import to keep @xenova/transformers optional
|
|
24
|
+
const { pipeline } = await import('@xenova/transformers');
|
|
25
|
+
this.pipeline = (await pipeline('feature-extraction', MODEL_NAME, {
|
|
26
|
+
quantized: true,
|
|
27
|
+
}));
|
|
28
|
+
}
|
|
29
|
+
catch (err) {
|
|
30
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
31
|
+
if (msg.includes('Cannot find module') || msg.includes('MODULE_NOT_FOUND')) {
|
|
32
|
+
throw new Error('Embedding model requires @xenova/transformers. Install: npm install @xenova/transformers');
|
|
33
|
+
}
|
|
34
|
+
throw new Error(`Failed to load embedding model: ${msg}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async encode(text) {
|
|
38
|
+
if (!this.pipeline)
|
|
39
|
+
await this.initialize();
|
|
40
|
+
const pipelineFn = this.pipeline;
|
|
41
|
+
const output = await pipelineFn([text], { pooling: 'mean', normalize: true });
|
|
42
|
+
return new Float32Array(output.data.slice(0, DIMENSION));
|
|
43
|
+
}
|
|
44
|
+
async encodeBatch(texts) {
|
|
45
|
+
if (!this.pipeline)
|
|
46
|
+
await this.initialize();
|
|
47
|
+
const pipelineFn = this.pipeline;
|
|
48
|
+
const results = [];
|
|
49
|
+
// Process one at a time to control memory
|
|
50
|
+
for (const text of texts) {
|
|
51
|
+
const output = await pipelineFn([text], { pooling: 'mean', normalize: true });
|
|
52
|
+
results.push(new Float32Array(output.data.slice(0, DIMENSION)));
|
|
53
|
+
}
|
|
54
|
+
return results;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/** Create a no-op model for testing */
|
|
58
|
+
export class MockEmbeddingModel {
|
|
59
|
+
dimension = DIMENSION;
|
|
60
|
+
isLoaded = true;
|
|
61
|
+
mockVectors;
|
|
62
|
+
constructor(mockVectors) {
|
|
63
|
+
this.mockVectors = mockVectors ?? new Map();
|
|
64
|
+
}
|
|
65
|
+
async initialize() {
|
|
66
|
+
// No-op for mock
|
|
67
|
+
}
|
|
68
|
+
async encode(text) {
|
|
69
|
+
const existing = this.mockVectors.get(text);
|
|
70
|
+
if (existing)
|
|
71
|
+
return existing;
|
|
72
|
+
// Generate deterministic vector from text hash
|
|
73
|
+
const vec = new Float32Array(DIMENSION);
|
|
74
|
+
for (let i = 0; i < DIMENSION; i++) {
|
|
75
|
+
vec[i] = Math.sin(text.charCodeAt(i % text.length) * (i + 1) * 0.01);
|
|
76
|
+
}
|
|
77
|
+
// Normalize
|
|
78
|
+
let mag = 0;
|
|
79
|
+
for (let i = 0; i < DIMENSION; i++)
|
|
80
|
+
mag += vec[i] * vec[i];
|
|
81
|
+
mag = Math.sqrt(mag);
|
|
82
|
+
for (let i = 0; i < DIMENSION; i++)
|
|
83
|
+
vec[i] /= mag;
|
|
84
|
+
return vec;
|
|
85
|
+
}
|
|
86
|
+
async encodeBatch(texts) {
|
|
87
|
+
return Promise.all(texts.map((t) => this.encode(t)));
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=model-loader.js.map
|