agent-threat-rules 0.3.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +190 -54
- package/package.json +3 -1
- package/rules/agent-manipulation/{ATR-2026-030-cross-agent-attack.yaml → ATR-2026-00030-cross-agent-attack.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-032-goal-hijacking.yaml → ATR-2026-00032-goal-hijacking.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-074-cross-agent-privilege-escalation.yaml → ATR-2026-00074-cross-agent-privilege-escalation.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-076-inter-agent-message-spoofing.yaml → ATR-2026-00076-inter-agent-message-spoofing.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-077-human-trust-exploitation.yaml → ATR-2026-00077-human-trust-exploitation.yaml} +3 -1
- package/rules/agent-manipulation/{ATR-2026-108-consensus-sybil-attack.yaml → ATR-2026-00108-consensus-sybil-attack.yaml} +3 -1
- package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +92 -0
- package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +92 -0
- package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +89 -0
- package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +89 -0
- package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +105 -0
- package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +53 -0
- package/rules/context-exfiltration/{ATR-2026-020-system-prompt-leak.yaml → ATR-2026-00020-system-prompt-leak.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-021-api-key-exposure.yaml → ATR-2026-00021-api-key-exposure.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-075-agent-memory-manipulation.yaml → ATR-2026-00075-agent-memory-manipulation.yaml} +3 -1
- package/rules/context-exfiltration/{ATR-2026-102-disguised-analytics-exfiltration.yaml → ATR-2026-00102-disguised-analytics-exfiltration.yaml} +3 -1
- package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +89 -0
- package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +89 -0
- package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +90 -0
- package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +100 -0
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +52 -0
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +55 -0
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +49 -0
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +49 -0
- package/rules/data-poisoning/{ATR-2026-070-data-poisoning.yaml → ATR-2026-00070-data-poisoning.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-050-runaway-agent-loop.yaml → ATR-2026-00050-runaway-agent-loop.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-051-resource-exhaustion.yaml → ATR-2026-00051-resource-exhaustion.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-052-cascading-failure.yaml → ATR-2026-00052-cascading-failure.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-098-unauthorized-financial-action.yaml → ATR-2026-00098-unauthorized-financial-action.yaml} +3 -1
- package/rules/excessive-autonomy/{ATR-2026-099-high-risk-tool-gate.yaml → ATR-2026-00099-high-risk-tool-gate.yaml} +4 -2
- package/rules/model-security/{ATR-2026-072-model-behavior-extraction.yaml → ATR-2026-00072-model-behavior-extraction.yaml} +3 -1
- package/rules/model-security/{ATR-2026-073-malicious-finetuning-data.yaml → ATR-2026-00073-malicious-finetuning-data.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-040-privilege-escalation.yaml → ATR-2026-00040-privilege-escalation.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-041-scope-creep.yaml → ATR-2026-00041-scope-creep.yaml} +3 -1
- package/rules/privilege-escalation/{ATR-2026-107-delayed-execution-bypass.yaml → ATR-2026-00107-delayed-execution-bypass.yaml} +3 -1
- package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +92 -0
- package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +93 -0
- package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +89 -0
- package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +53 -0
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +49 -0
- package/rules/prompt-injection/{ATR-2026-001-direct-prompt-injection.yaml → ATR-2026-00001-direct-prompt-injection.yaml} +121 -11
- package/rules/prompt-injection/{ATR-2026-002-indirect-prompt-injection.yaml → ATR-2026-00002-indirect-prompt-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-003-jailbreak-attempt.yaml → ATR-2026-00003-jailbreak-attempt.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-004-system-prompt-override.yaml → ATR-2026-00004-system-prompt-override.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-005-multi-turn-injection.yaml → ATR-2026-00005-multi-turn-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-080-encoding-evasion.yaml → ATR-2026-00080-encoding-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-081-semantic-multi-turn.yaml → ATR-2026-00081-semantic-multi-turn.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-082-fingerprint-evasion.yaml → ATR-2026-00082-fingerprint-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-083-indirect-tool-injection.yaml → ATR-2026-00083-indirect-tool-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-084-structured-data-injection.yaml → ATR-2026-00084-structured-data-injection.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-085-audit-evasion.yaml → ATR-2026-00085-audit-evasion.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-086-visual-spoofing.yaml → ATR-2026-00086-visual-spoofing.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-087-rule-probing.yaml → ATR-2026-00087-rule-probing.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-088-adaptive-countermeasure.yaml → ATR-2026-00088-adaptive-countermeasure.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-089-polymorphic-skill.yaml → ATR-2026-00089-polymorphic-skill.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-090-threat-intel-exfil.yaml → ATR-2026-00090-threat-intel-exfil.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-091-nested-payload.yaml → ATR-2026-00091-nested-payload.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-092-consensus-poisoning.yaml → ATR-2026-00092-consensus-poisoning.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-093-gradual-escalation.yaml → ATR-2026-00093-gradual-escalation.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-094-audit-bypass.yaml → ATR-2026-00094-audit-bypass.yaml} +3 -1
- package/rules/prompt-injection/{ATR-2026-097-cjk-injection-patterns.yaml → ATR-2026-00097-cjk-injection-patterns.yaml} +18 -1
- package/rules/prompt-injection/{ATR-2026-104-persona-hijacking.yaml → ATR-2026-00104-persona-hijacking.yaml} +3 -1
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +103 -0
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +99 -0
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +117 -0
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +52 -0
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +51 -0
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +52 -0
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +71 -0
- package/rules/skill-compromise/{ATR-2026-060-skill-impersonation.yaml → ATR-2026-00060-skill-impersonation.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-061-description-behavior-mismatch.yaml → ATR-2026-00061-description-behavior-mismatch.yaml} +4 -2
- package/rules/skill-compromise/{ATR-2026-062-hidden-capability.yaml → ATR-2026-00062-hidden-capability.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-063-skill-chain-attack.yaml → ATR-2026-00063-skill-chain-attack.yaml} +5 -2
- package/rules/skill-compromise/{ATR-2026-064-over-permissioned-skill.yaml → ATR-2026-00064-over-permissioned-skill.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-065-skill-update-attack.yaml → ATR-2026-00065-skill-update-attack.yaml} +3 -1
- package/rules/skill-compromise/{ATR-2026-066-parameter-injection.yaml → ATR-2026-00066-parameter-injection.yaml} +3 -1
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +121 -0
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +165 -0
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +114 -0
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +118 -0
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +98 -0
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +93 -0
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +99 -0
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +74 -0
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +79 -0
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +73 -0
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +93 -0
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +82 -0
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +48 -0
- package/rules/tool-poisoning/{ATR-2026-010-mcp-malicious-response.yaml → ATR-2026-00010-mcp-malicious-response.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-011-tool-output-injection.yaml → ATR-2026-00011-tool-output-injection.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-012-unauthorized-tool-call.yaml → ATR-2026-00012-unauthorized-tool-call.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-013-tool-ssrf.yaml → ATR-2026-00013-tool-ssrf.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-095-supply-chain-poisoning.yaml → ATR-2026-00095-supply-chain-poisoning.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-096-registry-poisoning.yaml → ATR-2026-00096-registry-poisoning.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-100-consent-bypass-instruction.yaml → ATR-2026-00100-consent-bypass-instruction.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-101-trust-escalation-override.yaml → ATR-2026-00101-trust-escalation-override.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-103-hidden-safety-bypass-instruction.yaml → ATR-2026-00103-hidden-safety-bypass-instruction.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-105-silent-action-concealment.yaml → ATR-2026-00105-silent-action-concealment.yaml} +3 -1
- package/rules/tool-poisoning/{ATR-2026-106-schema-description-contradiction.yaml → ATR-2026-00106-schema-description-contradiction.yaml} +3 -1
- package/spec/atr-schema.yaml +32 -3
- package/dist/action-executor.d.ts +0 -44
- package/dist/action-executor.d.ts.map +0 -1
- package/dist/action-executor.js +0 -130
- package/dist/action-executor.js.map +0 -1
- package/dist/adapters/default-adapter.d.ts +0 -24
- package/dist/adapters/default-adapter.d.ts.map +0 -1
- package/dist/adapters/default-adapter.js +0 -51
- package/dist/adapters/default-adapter.js.map +0 -1
- package/dist/adapters/stdio-adapter.d.ts +0 -30
- package/dist/adapters/stdio-adapter.d.ts.map +0 -1
- package/dist/adapters/stdio-adapter.js +0 -128
- package/dist/adapters/stdio-adapter.js.map +0 -1
- package/dist/capability-extractor.d.ts +0 -35
- package/dist/capability-extractor.d.ts.map +0 -1
- package/dist/capability-extractor.js +0 -91
- package/dist/capability-extractor.js.map +0 -1
- package/dist/cli.d.ts +0 -12
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -820
- package/dist/cli.js.map +0 -1
- package/dist/converters/elastic.d.ts +0 -36
- package/dist/converters/elastic.d.ts.map +0 -1
- package/dist/converters/elastic.js +0 -125
- package/dist/converters/elastic.js.map +0 -1
- package/dist/converters/index.d.ts +0 -28
- package/dist/converters/index.d.ts.map +0 -1
- package/dist/converters/index.js +0 -36
- package/dist/converters/index.js.map +0 -1
- package/dist/converters/splunk.d.ts +0 -19
- package/dist/converters/splunk.d.ts.map +0 -1
- package/dist/converters/splunk.js +0 -148
- package/dist/converters/splunk.js.map +0 -1
- package/dist/coverage-analyzer.d.ts +0 -43
- package/dist/coverage-analyzer.d.ts.map +0 -1
- package/dist/coverage-analyzer.js +0 -329
- package/dist/coverage-analyzer.js.map +0 -1
- package/dist/embedding/build-corpus.d.ts +0 -15
- package/dist/embedding/build-corpus.d.ts.map +0 -1
- package/dist/embedding/build-corpus.js +0 -105
- package/dist/embedding/build-corpus.js.map +0 -1
- package/dist/embedding/model-loader.d.ts +0 -41
- package/dist/embedding/model-loader.d.ts.map +0 -1
- package/dist/embedding/model-loader.js +0 -90
- package/dist/embedding/model-loader.js.map +0 -1
- package/dist/embedding/vector-store.d.ts +0 -41
- package/dist/embedding/vector-store.d.ts.map +0 -1
- package/dist/embedding/vector-store.js +0 -70
- package/dist/embedding/vector-store.js.map +0 -1
- package/dist/engine.d.ts +0 -163
- package/dist/engine.d.ts.map +0 -1
- package/dist/engine.js +0 -869
- package/dist/engine.js.map +0 -1
- package/dist/eval/corpus.d.ts +0 -42
- package/dist/eval/corpus.d.ts.map +0 -1
- package/dist/eval/corpus.js +0 -427
- package/dist/eval/corpus.js.map +0 -1
- package/dist/eval/eval-harness.d.ts +0 -44
- package/dist/eval/eval-harness.d.ts.map +0 -1
- package/dist/eval/eval-harness.js +0 -296
- package/dist/eval/eval-harness.js.map +0 -1
- package/dist/eval/index.d.ts +0 -13
- package/dist/eval/index.d.ts.map +0 -1
- package/dist/eval/index.js +0 -9
- package/dist/eval/index.js.map +0 -1
- package/dist/eval/metrics.d.ts +0 -74
- package/dist/eval/metrics.d.ts.map +0 -1
- package/dist/eval/metrics.js +0 -108
- package/dist/eval/metrics.js.map +0 -1
- package/dist/eval/pint-corpus.d.ts +0 -34
- package/dist/eval/pint-corpus.d.ts.map +0 -1
- package/dist/eval/pint-corpus.js +0 -109
- package/dist/eval/pint-corpus.js.map +0 -1
- package/dist/eval/rule-corpus.d.ts +0 -9
- package/dist/eval/rule-corpus.d.ts.map +0 -1
- package/dist/eval/rule-corpus.js +0 -4780
- package/dist/eval/rule-corpus.js.map +0 -1
- package/dist/eval/rule-metrics.d.ts +0 -34
- package/dist/eval/rule-metrics.d.ts.map +0 -1
- package/dist/eval/rule-metrics.js +0 -92
- package/dist/eval/rule-metrics.js.map +0 -1
- package/dist/eval/run-eval.d.ts +0 -7
- package/dist/eval/run-eval.d.ts.map +0 -1
- package/dist/eval/run-eval.js +0 -11
- package/dist/eval/run-eval.js.map +0 -1
- package/dist/eval/run-pint-benchmark.d.ts +0 -18
- package/dist/eval/run-pint-benchmark.d.ts.map +0 -1
- package/dist/eval/run-pint-benchmark.js +0 -157
- package/dist/eval/run-pint-benchmark.js.map +0 -1
- package/dist/flywheel.d.ts +0 -54
- package/dist/flywheel.d.ts.map +0 -1
- package/dist/flywheel.js +0 -121
- package/dist/flywheel.js.map +0 -1
- package/dist/hook-handler.d.ts +0 -61
- package/dist/hook-handler.d.ts.map +0 -1
- package/dist/hook-handler.js +0 -178
- package/dist/hook-handler.js.map +0 -1
- package/dist/index.d.ts +0 -62
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -54
- package/dist/index.js.map +0 -1
- package/dist/layer-integration.d.ts +0 -55
- package/dist/layer-integration.d.ts.map +0 -1
- package/dist/layer-integration.js +0 -185
- package/dist/layer-integration.js.map +0 -1
- package/dist/loader.d.ts +0 -21
- package/dist/loader.d.ts.map +0 -1
- package/dist/loader.js +0 -124
- package/dist/loader.js.map +0 -1
- package/dist/mcp-server.d.ts +0 -13
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js +0 -220
- package/dist/mcp-server.js.map +0 -1
- package/dist/mcp-tools/coverage-gaps.d.ts +0 -13
- package/dist/mcp-tools/coverage-gaps.d.ts.map +0 -1
- package/dist/mcp-tools/coverage-gaps.js +0 -55
- package/dist/mcp-tools/coverage-gaps.js.map +0 -1
- package/dist/mcp-tools/list-rules.d.ts +0 -17
- package/dist/mcp-tools/list-rules.d.ts.map +0 -1
- package/dist/mcp-tools/list-rules.js +0 -45
- package/dist/mcp-tools/list-rules.js.map +0 -1
- package/dist/mcp-tools/scan.d.ts +0 -24
- package/dist/mcp-tools/scan.d.ts.map +0 -1
- package/dist/mcp-tools/scan.js +0 -94
- package/dist/mcp-tools/scan.js.map +0 -1
- package/dist/mcp-tools/submit-proposal.d.ts +0 -12
- package/dist/mcp-tools/submit-proposal.d.ts.map +0 -1
- package/dist/mcp-tools/submit-proposal.js +0 -103
- package/dist/mcp-tools/submit-proposal.js.map +0 -1
- package/dist/mcp-tools/threat-summary.d.ts +0 -12
- package/dist/mcp-tools/threat-summary.d.ts.map +0 -1
- package/dist/mcp-tools/threat-summary.js +0 -74
- package/dist/mcp-tools/threat-summary.js.map +0 -1
- package/dist/mcp-tools/validate.d.ts +0 -15
- package/dist/mcp-tools/validate.d.ts.map +0 -1
- package/dist/mcp-tools/validate.js +0 -45
- package/dist/mcp-tools/validate.js.map +0 -1
- package/dist/modules/embedding.d.ts +0 -71
- package/dist/modules/embedding.d.ts.map +0 -1
- package/dist/modules/embedding.js +0 -141
- package/dist/modules/embedding.js.map +0 -1
- package/dist/modules/index.d.ts +0 -144
- package/dist/modules/index.d.ts.map +0 -1
- package/dist/modules/index.js +0 -82
- package/dist/modules/index.js.map +0 -1
- package/dist/modules/semantic.d.ts +0 -106
- package/dist/modules/semantic.d.ts.map +0 -1
- package/dist/modules/semantic.js +0 -359
- package/dist/modules/semantic.js.map +0 -1
- package/dist/modules/session.d.ts +0 -70
- package/dist/modules/session.d.ts.map +0 -1
- package/dist/modules/session.js +0 -128
- package/dist/modules/session.js.map +0 -1
- package/dist/rule-scaffolder.d.ts +0 -53
- package/dist/rule-scaffolder.d.ts.map +0 -1
- package/dist/rule-scaffolder.js +0 -301
- package/dist/rule-scaffolder.js.map +0 -1
- package/dist/session-tracker.d.ts +0 -58
- package/dist/session-tracker.d.ts.map +0 -1
- package/dist/session-tracker.js +0 -176
- package/dist/session-tracker.js.map +0 -1
- package/dist/shadow-evaluator.d.ts +0 -48
- package/dist/shadow-evaluator.d.ts.map +0 -1
- package/dist/shadow-evaluator.js +0 -128
- package/dist/shadow-evaluator.js.map +0 -1
- package/dist/skill-fingerprint.d.ts +0 -85
- package/dist/skill-fingerprint.d.ts.map +0 -1
- package/dist/skill-fingerprint.js +0 -284
- package/dist/skill-fingerprint.js.map +0 -1
- package/dist/tier0-invariant.d.ts +0 -49
- package/dist/tier0-invariant.d.ts.map +0 -1
- package/dist/tier0-invariant.js +0 -184
- package/dist/tier0-invariant.js.map +0 -1
- package/dist/tier1-blacklist.d.ts +0 -48
- package/dist/tier1-blacklist.d.ts.map +0 -1
- package/dist/tier1-blacklist.js +0 -91
- package/dist/tier1-blacklist.js.map +0 -1
- package/dist/types.d.ts +0 -190
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -6
- package/dist/types.js.map +0 -1
- package/dist/verdict.d.ts +0 -26
- package/dist/verdict.d.ts.map +0 -1
- package/dist/verdict.js +0 -127
- package/dist/verdict.js.map +0 -1
|
@@ -1,296 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evaluation Harness -- orchestrates running the corpus through the ATR engine
|
|
3
|
-
* and produces a structured EvalReport.
|
|
4
|
-
*
|
|
5
|
-
* Supports:
|
|
6
|
-
* - Regex-only evaluation (Tier 2)
|
|
7
|
-
* - Regex + Embedding evaluation (Tier 2 + 2.5)
|
|
8
|
-
* - Full pipeline evaluation (all tiers)
|
|
9
|
-
* - Per-sample latency measurement
|
|
10
|
-
* - Regression check against baseline thresholds
|
|
11
|
-
*
|
|
12
|
-
* @module agent-threat-rules/eval/eval-harness
|
|
13
|
-
*/
|
|
14
|
-
import { resolve, join } from 'node:path';
|
|
15
|
-
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
16
|
-
import { ATREngine } from '../engine.js';
|
|
17
|
-
import { EmbeddingModule } from '../modules/embedding.js';
|
|
18
|
-
import { EVAL_CORPUS, getCorpusStats } from './corpus.js';
|
|
19
|
-
import { computeEvalReport, checkRegression } from './metrics.js';
|
|
20
|
-
import { computeRuleQuality } from './rule-metrics.js';
|
|
21
|
-
/**
|
|
22
|
-
* Convert a corpus sample to an AgentEvent.
|
|
23
|
-
*/
|
|
24
|
-
function sampleToEvent(sample) {
|
|
25
|
-
return {
|
|
26
|
-
type: sample.eventType,
|
|
27
|
-
content: sample.text,
|
|
28
|
-
timestamp: new Date().toISOString(),
|
|
29
|
-
fields: sample.fields,
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
/**
|
|
33
|
-
* Run a single sample through the engine (regex only) and measure results.
|
|
34
|
-
* Catches engine errors so a single bad sample doesn't abort the entire eval.
|
|
35
|
-
*/
|
|
36
|
-
function evaluateSampleRegex(engine, sample) {
|
|
37
|
-
const event = sampleToEvent(sample);
|
|
38
|
-
try {
|
|
39
|
-
const start = performance.now();
|
|
40
|
-
const matches = engine.evaluate(event);
|
|
41
|
-
const latencyMs = performance.now() - start;
|
|
42
|
-
const detected = matches.length > 0;
|
|
43
|
-
const topMatch = matches[0];
|
|
44
|
-
return {
|
|
45
|
-
id: sample.id,
|
|
46
|
-
category: sample.category,
|
|
47
|
-
expectedDetection: sample.expectedDetection,
|
|
48
|
-
actualDetection: detected,
|
|
49
|
-
matchedRules: matches.map((m) => m.rule.id),
|
|
50
|
-
confidence: topMatch?.confidence ?? 0,
|
|
51
|
-
latencyMs,
|
|
52
|
-
difficulty: sample.difficulty,
|
|
53
|
-
tier: sample.tier,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
catch {
|
|
57
|
-
return {
|
|
58
|
-
id: sample.id,
|
|
59
|
-
category: sample.category,
|
|
60
|
-
expectedDetection: sample.expectedDetection,
|
|
61
|
-
actualDetection: false,
|
|
62
|
-
matchedRules: [],
|
|
63
|
-
confidence: 0,
|
|
64
|
-
latencyMs: 0,
|
|
65
|
-
difficulty: sample.difficulty,
|
|
66
|
-
tier: sample.tier,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Run a single sample through the full pipeline (regex + embedding) and measure results.
|
|
72
|
-
*/
|
|
73
|
-
async function evaluateSampleFull(engine, sample) {
|
|
74
|
-
const event = sampleToEvent(sample);
|
|
75
|
-
try {
|
|
76
|
-
const start = performance.now();
|
|
77
|
-
const { verdict } = await engine.evaluateWithVerdict(event);
|
|
78
|
-
const latencyMs = performance.now() - start;
|
|
79
|
-
const detected = verdict.matchCount > 0;
|
|
80
|
-
return {
|
|
81
|
-
id: sample.id,
|
|
82
|
-
category: sample.category,
|
|
83
|
-
expectedDetection: sample.expectedDetection,
|
|
84
|
-
actualDetection: detected,
|
|
85
|
-
matchedRules: verdict.matches.map((m) => m.rule.id),
|
|
86
|
-
confidence: verdict.highestConfidence,
|
|
87
|
-
latencyMs,
|
|
88
|
-
difficulty: sample.difficulty,
|
|
89
|
-
tier: sample.tier,
|
|
90
|
-
};
|
|
91
|
-
}
|
|
92
|
-
catch {
|
|
93
|
-
return {
|
|
94
|
-
id: sample.id,
|
|
95
|
-
category: sample.category,
|
|
96
|
-
expectedDetection: sample.expectedDetection,
|
|
97
|
-
actualDetection: false,
|
|
98
|
-
matchedRules: [],
|
|
99
|
-
confidence: 0,
|
|
100
|
-
latencyMs: 0,
|
|
101
|
-
difficulty: sample.difficulty,
|
|
102
|
-
tier: sample.tier,
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
/**
|
|
107
|
-
* Try to load the embedding module. Returns null if unavailable.
|
|
108
|
-
*/
|
|
109
|
-
async function tryLoadEmbedding(embeddingsPath) {
|
|
110
|
-
if (!existsSync(embeddingsPath))
|
|
111
|
-
return null;
|
|
112
|
-
try {
|
|
113
|
-
const data = JSON.parse(readFileSync(embeddingsPath, 'utf-8'));
|
|
114
|
-
const module = new EmbeddingModule({
|
|
115
|
-
attackVectorsData: data,
|
|
116
|
-
similarityThreshold: 0.65,
|
|
117
|
-
});
|
|
118
|
-
await module.initialize();
|
|
119
|
-
return module.isAvailable() ? module : null;
|
|
120
|
-
}
|
|
121
|
-
catch {
|
|
122
|
-
return null;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Run the full evaluation harness.
|
|
127
|
-
* Returns the EvalReport and RegressionCheck.
|
|
128
|
-
*/
|
|
129
|
-
export async function runEval(config) {
|
|
130
|
-
const corpus = config.corpus ?? EVAL_CORPUS;
|
|
131
|
-
const base = resolve(config.rulesDir, '..');
|
|
132
|
-
const embeddingsPath = config.embeddingsPath ?? join(base, 'data', 'attack-embeddings.json');
|
|
133
|
-
// Try to load embedding module
|
|
134
|
-
const shouldEmbed = config.enableEmbedding !== false;
|
|
135
|
-
let embeddingModule = null;
|
|
136
|
-
const tiersUsed = ['tier2-regex'];
|
|
137
|
-
if (shouldEmbed) {
|
|
138
|
-
embeddingModule = await tryLoadEmbedding(embeddingsPath);
|
|
139
|
-
if (embeddingModule) {
|
|
140
|
-
tiersUsed.push('tier2.5-embedding');
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
// Initialize engine
|
|
144
|
-
const engine = new ATREngine({
|
|
145
|
-
rulesDir: config.rulesDir,
|
|
146
|
-
embeddingModule: embeddingModule ?? undefined,
|
|
147
|
-
});
|
|
148
|
-
const ruleCount = await engine.loadRules();
|
|
149
|
-
if (ruleCount === 0) {
|
|
150
|
-
throw new Error(`No rules loaded from ${config.rulesDir}`);
|
|
151
|
-
}
|
|
152
|
-
// Run all samples
|
|
153
|
-
const results = [];
|
|
154
|
-
const useFullPipeline = embeddingModule !== null;
|
|
155
|
-
for (const sample of corpus) {
|
|
156
|
-
const result = useFullPipeline
|
|
157
|
-
? await evaluateSampleFull(engine, sample)
|
|
158
|
-
: evaluateSampleRegex(engine, sample);
|
|
159
|
-
results.push(result);
|
|
160
|
-
}
|
|
161
|
-
// Cleanup embedding module
|
|
162
|
-
if (embeddingModule) {
|
|
163
|
-
await embeddingModule.destroy();
|
|
164
|
-
}
|
|
165
|
-
// Compute report
|
|
166
|
-
const report = computeEvalReport(results);
|
|
167
|
-
const regression = checkRegression(report, config.thresholds);
|
|
168
|
-
const corpusStats = getCorpusStats();
|
|
169
|
-
// Compute per-rule quality
|
|
170
|
-
const loadedRuleIds = engine.getRules().map((r) => r.id);
|
|
171
|
-
const ruleQuality = computeRuleQuality(results, loadedRuleIds);
|
|
172
|
-
// Save report if output path specified
|
|
173
|
-
if (config.outputPath) {
|
|
174
|
-
const output = {
|
|
175
|
-
report,
|
|
176
|
-
regression,
|
|
177
|
-
corpusStats,
|
|
178
|
-
ruleQuality,
|
|
179
|
-
ruleCount,
|
|
180
|
-
engine: 'ATREngine',
|
|
181
|
-
tiers: tiersUsed,
|
|
182
|
-
};
|
|
183
|
-
writeFileSync(config.outputPath, JSON.stringify(output, null, 2));
|
|
184
|
-
}
|
|
185
|
-
return { report, regression, corpusStats, tiersUsed, ruleQuality };
|
|
186
|
-
}
|
|
187
|
-
// ---------------------------------------------------------------------------
|
|
188
|
-
// CLI entry point
|
|
189
|
-
// ---------------------------------------------------------------------------
|
|
190
|
-
function formatPercent(n) {
|
|
191
|
-
return `${(n * 100).toFixed(1)}%`;
|
|
192
|
-
}
|
|
193
|
-
function formatMs(n) {
|
|
194
|
-
return `${n.toFixed(2)}ms`;
|
|
195
|
-
}
|
|
196
|
-
export async function runEvalCLI() {
|
|
197
|
-
const base = resolve(join(import.meta.dirname ?? '.', '..', '..'));
|
|
198
|
-
const rulesDir = join(base, 'rules');
|
|
199
|
-
const outputPath = join(base, 'data', 'eval-report.json');
|
|
200
|
-
console.log('\n=== ATR Evaluation Harness ===\n');
|
|
201
|
-
const { report, regression, corpusStats, tiersUsed, ruleQuality } = await runEval({
|
|
202
|
-
rulesDir,
|
|
203
|
-
outputPath,
|
|
204
|
-
});
|
|
205
|
-
// Corpus stats
|
|
206
|
-
console.log(`Corpus: ${corpusStats.total} samples (${corpusStats.attacks} attacks, ${corpusStats.benign} benign)`);
|
|
207
|
-
console.log(`Categories: ${Object.keys(corpusStats.byCategory).join(', ')}`);
|
|
208
|
-
console.log(`Tiers: ${tiersUsed.join(' + ')}`);
|
|
209
|
-
// Overall metrics
|
|
210
|
-
console.log(`\n--- Overall ---`);
|
|
211
|
-
console.log(` Precision: ${formatPercent(report.overall.precision)}`);
|
|
212
|
-
console.log(` Recall: ${formatPercent(report.overall.recall)}`);
|
|
213
|
-
console.log(` F1: ${formatPercent(report.overall.f1)}`);
|
|
214
|
-
console.log(` Accuracy: ${formatPercent(report.overall.accuracy)}`);
|
|
215
|
-
console.log(` FP Rate: ${formatPercent(report.overall.fpRate)}`);
|
|
216
|
-
console.log(` Confusion: TP=${report.overall.confusion.tp} FP=${report.overall.confusion.fp} TN=${report.overall.confusion.tn} FN=${report.overall.confusion.fn}`);
|
|
217
|
-
// Latency
|
|
218
|
-
console.log(`\n--- Latency ---`);
|
|
219
|
-
console.log(` P50: ${formatMs(report.latency.p50)}`);
|
|
220
|
-
console.log(` P95: ${formatMs(report.latency.p95)}`);
|
|
221
|
-
console.log(` P99: ${formatMs(report.latency.p99)}`);
|
|
222
|
-
console.log(` Mean: ${formatMs(report.latency.mean)}`);
|
|
223
|
-
console.log(` Max: ${formatMs(report.latency.max)}`);
|
|
224
|
-
// Per category
|
|
225
|
-
console.log(`\n--- By Category ---`);
|
|
226
|
-
for (const cat of report.byCategory) {
|
|
227
|
-
const missed = cat.missedSamples.length > 0 ? ` (missed: ${cat.missedSamples.join(', ')})` : '';
|
|
228
|
-
const fps = cat.falsePositives.length > 0 ? ` (FP: ${cat.falsePositives.join(', ')})` : '';
|
|
229
|
-
console.log(` ${cat.category}: recall=${formatPercent(cat.metrics.recall)} precision=${formatPercent(cat.metrics.precision)} f1=${formatPercent(cat.metrics.f1)}${missed}${fps}`);
|
|
230
|
-
}
|
|
231
|
-
// Per difficulty
|
|
232
|
-
console.log(`\n--- By Difficulty ---`);
|
|
233
|
-
for (const diff of report.byDifficulty) {
|
|
234
|
-
console.log(` ${diff.difficulty}: recall=${formatPercent(diff.metrics.recall)} precision=${formatPercent(diff.metrics.precision)} f1=${formatPercent(diff.metrics.f1)}`);
|
|
235
|
-
}
|
|
236
|
-
// Missed attacks
|
|
237
|
-
if (report.missedAttacks.length > 0) {
|
|
238
|
-
console.log(`\n--- Missed Attacks (${report.missedAttacks.length}) ---`);
|
|
239
|
-
for (const m of report.missedAttacks) {
|
|
240
|
-
console.log(` [${m.id}] ${m.category}/${m.difficulty}/${m.tier}`);
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
// False positives
|
|
244
|
-
if (report.falsePositives.length > 0) {
|
|
245
|
-
console.log(`\n--- False Positives (${report.falsePositives.length}) ---`);
|
|
246
|
-
for (const fp of report.falsePositives) {
|
|
247
|
-
console.log(` [${fp.id}] rules: ${fp.matchedRules.join(', ')}`);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
// Rule quality
|
|
251
|
-
console.log(`\n--- Rule Quality ---`);
|
|
252
|
-
console.log(` Total rules loaded: ${ruleQuality.totalRulesEvaluated}`);
|
|
253
|
-
console.log(` Rules fired: ${ruleQuality.rulesFired}`);
|
|
254
|
-
console.log(` Rules never fired: ${ruleQuality.rulesNeverFired}`);
|
|
255
|
-
if (ruleQuality.topRules.length > 0) {
|
|
256
|
-
console.log(`\n Top 10 rules by match count:`);
|
|
257
|
-
for (const rule of ruleQuality.topRules.slice(0, 10)) {
|
|
258
|
-
const precision = rule.matchCount > 0
|
|
259
|
-
? formatPercent(rule.tpCount / rule.matchCount)
|
|
260
|
-
: 'N/A';
|
|
261
|
-
console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} precision=${precision} avgConf=${rule.avgConfidence.toFixed(2)}`);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
if (ruleQuality.weakRules.length > 0) {
|
|
265
|
-
console.log(`\n Weak rules (FP > 0 or matchCount <= 1):`);
|
|
266
|
-
for (const rule of ruleQuality.weakRules.slice(0, 10)) {
|
|
267
|
-
console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} categories=[${rule.categories.join(', ')}]`);
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
if (ruleQuality.neverFiredRuleIds.length > 0) {
|
|
271
|
-
console.log(`\n Never-fired rules (${ruleQuality.neverFiredRuleIds.length}):`);
|
|
272
|
-
for (const id of ruleQuality.neverFiredRuleIds.slice(0, 20)) {
|
|
273
|
-
console.log(` ${id}`);
|
|
274
|
-
}
|
|
275
|
-
if (ruleQuality.neverFiredRuleIds.length > 20) {
|
|
276
|
-
console.log(` ... and ${ruleQuality.neverFiredRuleIds.length - 20} more`);
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
// Regression check
|
|
280
|
-
console.log(`\n--- Regression Check ---`);
|
|
281
|
-
if (regression.passed) {
|
|
282
|
-
console.log(' PASSED');
|
|
283
|
-
}
|
|
284
|
-
else {
|
|
285
|
-
console.log(' FAILED:');
|
|
286
|
-
for (const v of regression.violations) {
|
|
287
|
-
console.log(` - ${v}`);
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
console.log(`\nReport saved to: ${outputPath}`);
|
|
291
|
-
console.log('Done.\n');
|
|
292
|
-
if (!regression.passed) {
|
|
293
|
-
process.exitCode = 1;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
//# sourceMappingURL=eval-harness.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"eval-harness.js","sourceRoot":"","sources":["../../src/eval/eval-harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAG1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAElE,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAiBvD;;GAEG;AACH,SAAS,aAAa,CAAC,MAAoB;IACzC,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,SAAS;QACtB,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAE5B,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,UAAU,EAAE,QAAQ,EAAE,UAAU,IAAI,CAAC;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB,CAC/B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC;QAExC,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,UAAU,EAAE,OAAO,CAAC,iBAAiB;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAAC,cAAsB;IACpD,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,iBAAiB,EAAE,IAAI;YACvB,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;QAC1B,OAAO,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,MAAkB;IAO9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,WAAW,CAAC;IAC5C,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAC5C,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,wBAAwB,CAAC,CAAC;IAE7F,+BAA+B;IAC/B,MAAM,WAAW,GAAG,MAAM,CAAC,eAAe,KAAK,KAAK,CAAC;IACrD,IAAI,eAAe,GAA2B,IAAI,CAAC;IACnD,MAAM,SAAS,GAAa,CAAC,aAAa,CAAC,CAAC;IAE5C,IAAI,WAAW,EAAE,CAAC;QAChB,eAAe,GAAG,MAAM,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,IAAI,eAAe,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,eAAe,EAAE,eAAe,IAAI,SAAS;KAC9C,CAAC,CAAC;IACH,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;IAE3C,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,eAAe,GAAG,eAAe,KAAK,IAAI,CAAC;IAEjD,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,eAAe;YAC5B,CAAC,CAAC,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;YAC1C,CAAC,CAAC,mBAAmB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,2BAA2B;IAC3B,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,iBAAiB;IACjB,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,2BAA2B;IAC3B,MAAM,aAAa,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACzD,MAAM,WAAW,GAAG,kBAAkB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAE/D,uCAAuC;IACvC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG;YACb,MAAM;YACN,UAAU;YACV,WAAW;YACX,WAAW;YACX,SAAS;YACT,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,SAAS;SACjB,CAAC;QACF,aAAa,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;AACrE,CAAC;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,SAAS,aAAa,CAAC,CAAS;IAC9B,OAAO,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACpC,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,kBAAkB,CAAC,CAAC;IAE1D,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAElD,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC;QAChF,QAAQ;QACR,UAAU;KACX,CAAC,CAAC;IAEH,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,WAAW,WAAW,CAAC,KAAK,aAAa,WAAW,CAAC,OAAO,aAAa,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;IACnH,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC7E,OAAO,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAE/C,kBAAkB;IAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IACjE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,CAAC,CAAC;IAErK,UAAU;IACV,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEvD,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACrC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAChG,MAAM,GAAG,GAAG,GAAG,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3F,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,QAAQ,YAAY,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC,CAAC;IACrL,CAAC;IAED,iBAAiB;IACjB,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,UAAU,YAAY,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IAC5K,CAAC;IAED,iBAAiB;IACjB,IAAI,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,yBAAyB,MAAM,CAAC,aAAa,CAAC,MAAM,OAAO,CAAC,CAAC;QACzE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,IAAI,MAAM,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,0BAA0B,MAAM,CAAC,cAAc,CAAC,MAAM,OAAO,CAAC,CAAC;QAC3E,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,yBAAyB,WAAW,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,kBAAkB,WAAW,CAAC,UAAU,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,wBAAwB,WAAW,CAAC,eAAe,EAAE,CAAC,CAAC;IAEnE,IAAI,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC;gBACnC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;gBAC/C,CAAC,CAAC,KAAK,CAAC;YACV,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,cAAc,SAAS,YAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACtK,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;QAC3D,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,gBAAgB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjJ,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,0BAA0B,WAAW,CAAC,iBAAiB,CAAC,MAAM,IAAI,CAAC,CAAC;QAChF,KAAK,MAAM,EAAE,IAAI,WAAW,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YAC5D,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAC3B,CAAC;QACD,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,OAAO,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACzB,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,UAAU,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAEvB,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
|
package/dist/eval/index.d.ts
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evaluation framework public API
|
|
3
|
-
* @module agent-threat-rules/eval
|
|
4
|
-
*/
|
|
5
|
-
export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
|
|
6
|
-
export type { CorpusSample } from './corpus.js';
|
|
7
|
-
export { computeEvalReport, checkRegression } from './metrics.js';
|
|
8
|
-
export type { SampleResult, ConfusionMatrix, ClassMetrics, LatencyStats, CategoryBreakdown, DifficultyBreakdown, EvalReport, RegressionCheck, BaselineThresholds, } from './metrics.js';
|
|
9
|
-
export { runEval, runEvalCLI } from './eval-harness.js';
|
|
10
|
-
export type { EvalConfig } from './eval-harness.js';
|
|
11
|
-
export { computeRuleQuality } from './rule-metrics.js';
|
|
12
|
-
export type { RuleQuality, RuleQualityReport } from './rule-metrics.js';
|
|
13
|
-
//# sourceMappingURL=index.d.ts.map
|
package/dist/eval/index.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC5I,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAClE,YAAY,EACV,YAAY,EACZ,eAAe,EACf,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,EACV,eAAe,EACf,kBAAkB,GACnB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACxD,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC"}
|
package/dist/eval/index.js
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evaluation framework public API
|
|
3
|
-
* @module agent-threat-rules/eval
|
|
4
|
-
*/
|
|
5
|
-
export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
|
|
6
|
-
export { computeEvalReport, checkRegression } from './metrics.js';
|
|
7
|
-
export { runEval, runEvalCLI } from './eval-harness.js';
|
|
8
|
-
export { computeRuleQuality } from './rule-metrics.js';
|
|
9
|
-
//# sourceMappingURL=index.js.map
|
package/dist/eval/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG5I,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAalE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC"}
|
package/dist/eval/metrics.d.ts
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
|
|
3
|
-
* per-category breakdowns, and latency percentiles.
|
|
4
|
-
*
|
|
5
|
-
* All functions are pure (no side effects, no mutation).
|
|
6
|
-
*
|
|
7
|
-
* @module agent-threat-rules/eval/metrics
|
|
8
|
-
*/
|
|
9
|
-
export interface SampleResult {
|
|
10
|
-
readonly id: string;
|
|
11
|
-
readonly category: string;
|
|
12
|
-
readonly expectedDetection: boolean;
|
|
13
|
-
readonly actualDetection: boolean;
|
|
14
|
-
readonly matchedRules: readonly string[];
|
|
15
|
-
readonly confidence: number;
|
|
16
|
-
readonly latencyMs: number;
|
|
17
|
-
readonly difficulty: string;
|
|
18
|
-
readonly tier: string;
|
|
19
|
-
}
|
|
20
|
-
export interface ConfusionMatrix {
|
|
21
|
-
readonly tp: number;
|
|
22
|
-
readonly fp: number;
|
|
23
|
-
readonly tn: number;
|
|
24
|
-
readonly fn: number;
|
|
25
|
-
}
|
|
26
|
-
export interface ClassMetrics {
|
|
27
|
-
readonly precision: number;
|
|
28
|
-
readonly recall: number;
|
|
29
|
-
readonly f1: number;
|
|
30
|
-
readonly accuracy: number;
|
|
31
|
-
readonly fpRate: number;
|
|
32
|
-
readonly confusion: ConfusionMatrix;
|
|
33
|
-
readonly sampleCount: number;
|
|
34
|
-
}
|
|
35
|
-
export interface LatencyStats {
|
|
36
|
-
readonly p50: number;
|
|
37
|
-
readonly p95: number;
|
|
38
|
-
readonly p99: number;
|
|
39
|
-
readonly mean: number;
|
|
40
|
-
readonly max: number;
|
|
41
|
-
}
|
|
42
|
-
export interface CategoryBreakdown {
|
|
43
|
-
readonly category: string;
|
|
44
|
-
readonly metrics: ClassMetrics;
|
|
45
|
-
readonly missedSamples: readonly string[];
|
|
46
|
-
readonly falsePositives: readonly string[];
|
|
47
|
-
}
|
|
48
|
-
export interface DifficultyBreakdown {
|
|
49
|
-
readonly difficulty: string;
|
|
50
|
-
readonly metrics: ClassMetrics;
|
|
51
|
-
}
|
|
52
|
-
export interface EvalReport {
|
|
53
|
-
readonly timestamp: string;
|
|
54
|
-
readonly corpusSize: number;
|
|
55
|
-
readonly overall: ClassMetrics;
|
|
56
|
-
readonly latency: LatencyStats;
|
|
57
|
-
readonly byCategory: readonly CategoryBreakdown[];
|
|
58
|
-
readonly byDifficulty: readonly DifficultyBreakdown[];
|
|
59
|
-
readonly missedAttacks: readonly SampleResult[];
|
|
60
|
-
readonly falsePositives: readonly SampleResult[];
|
|
61
|
-
}
|
|
62
|
-
export declare function computeEvalReport(results: readonly SampleResult[]): EvalReport;
|
|
63
|
-
export interface RegressionCheck {
|
|
64
|
-
readonly passed: boolean;
|
|
65
|
-
readonly violations: readonly string[];
|
|
66
|
-
}
|
|
67
|
-
export interface BaselineThresholds {
|
|
68
|
-
readonly minRecall: number;
|
|
69
|
-
readonly maxFpRate: number;
|
|
70
|
-
readonly minF1: number;
|
|
71
|
-
readonly maxP95LatencyMs: number;
|
|
72
|
-
}
|
|
73
|
-
export declare function checkRegression(report: EvalReport, thresholds?: BaselineThresholds): RegressionCheck;
|
|
74
|
-
//# sourceMappingURL=metrics.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC;IACpC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,eAAe,CAAC;IACpC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;CAC5C;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,SAAS,iBAAiB,EAAE,CAAC;IAClD,QAAQ,CAAC,YAAY,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACtD,QAAQ,CAAC,aAAa,EAAE,SAAS,YAAY,EAAE,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,SAAS,YAAY,EAAE,CAAC;CAClD;AAkDD,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,GAAG,UAAU,CAsC9E;AAMD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;CAClC;AASD,wBAAgB,eAAe,CAC7B,MAAM,EAAE,UAAU,EAClB,UAAU,GAAE,kBAAuC,GAClD,eAAe,CA4BjB"}
|
package/dist/eval/metrics.js
DELETED
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
|
|
3
|
-
* per-category breakdowns, and latency percentiles.
|
|
4
|
-
*
|
|
5
|
-
* All functions are pure (no side effects, no mutation).
|
|
6
|
-
*
|
|
7
|
-
* @module agent-threat-rules/eval/metrics
|
|
8
|
-
*/
|
|
9
|
-
// ---------------------------------------------------------------------------
|
|
10
|
-
// Core metric calculations
|
|
11
|
-
// ---------------------------------------------------------------------------
|
|
12
|
-
function buildConfusionMatrix(results) {
|
|
13
|
-
let tp = 0, fp = 0, tn = 0, fn = 0;
|
|
14
|
-
for (const r of results) {
|
|
15
|
-
if (r.expectedDetection && r.actualDetection)
|
|
16
|
-
tp++;
|
|
17
|
-
else if (!r.expectedDetection && r.actualDetection)
|
|
18
|
-
fp++;
|
|
19
|
-
else if (!r.expectedDetection && !r.actualDetection)
|
|
20
|
-
tn++;
|
|
21
|
-
else
|
|
22
|
-
fn++;
|
|
23
|
-
}
|
|
24
|
-
return { tp, fp, tn, fn };
|
|
25
|
-
}
|
|
26
|
-
function computeClassMetrics(cm, sampleCount) {
|
|
27
|
-
const precision = cm.tp + cm.fp > 0 ? cm.tp / (cm.tp + cm.fp) : 1;
|
|
28
|
-
const recall = cm.tp + cm.fn > 0 ? cm.tp / (cm.tp + cm.fn) : 1;
|
|
29
|
-
const f1 = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
|
|
30
|
-
const accuracy = sampleCount > 0 ? (cm.tp + cm.tn) / sampleCount : 1;
|
|
31
|
-
const fpRate = cm.fp + cm.tn > 0 ? cm.fp / (cm.fp + cm.tn) : 0;
|
|
32
|
-
return { precision, recall, f1, accuracy, fpRate, confusion: cm, sampleCount };
|
|
33
|
-
}
|
|
34
|
-
function computeLatency(results) {
|
|
35
|
-
if (results.length === 0) {
|
|
36
|
-
return { p50: 0, p95: 0, p99: 0, mean: 0, max: 0 };
|
|
37
|
-
}
|
|
38
|
-
const sorted = [...results].map((r) => r.latencyMs).sort((a, b) => a - b);
|
|
39
|
-
const len = sorted.length;
|
|
40
|
-
return {
|
|
41
|
-
p50: sorted[Math.floor(len * 0.5)] ?? 0,
|
|
42
|
-
p95: sorted[Math.floor(len * 0.95)] ?? 0,
|
|
43
|
-
p99: sorted[Math.floor(len * 0.99)] ?? 0,
|
|
44
|
-
mean: sorted.reduce((a, b) => a + b, 0) / len,
|
|
45
|
-
max: sorted[len - 1] ?? 0,
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
// Report generation
|
|
50
|
-
// ---------------------------------------------------------------------------
|
|
51
|
-
export function computeEvalReport(results) {
|
|
52
|
-
const overallCM = buildConfusionMatrix(results);
|
|
53
|
-
const overall = computeClassMetrics(overallCM, results.length);
|
|
54
|
-
const latency = computeLatency(results);
|
|
55
|
-
// By category
|
|
56
|
-
const categories = [...new Set(results.map((r) => r.category))];
|
|
57
|
-
const byCategory = categories.map((cat) => {
|
|
58
|
-
const catResults = results.filter((r) => r.category === cat);
|
|
59
|
-
const cm = buildConfusionMatrix(catResults);
|
|
60
|
-
const metrics = computeClassMetrics(cm, catResults.length);
|
|
61
|
-
const missed = catResults.filter((r) => r.expectedDetection && !r.actualDetection).map((r) => r.id);
|
|
62
|
-
const fps = catResults.filter((r) => !r.expectedDetection && r.actualDetection).map((r) => r.id);
|
|
63
|
-
return { category: cat, metrics, missedSamples: missed, falsePositives: fps };
|
|
64
|
-
});
|
|
65
|
-
// By difficulty
|
|
66
|
-
const difficulties = [...new Set(results.map((r) => r.difficulty))];
|
|
67
|
-
const byDifficulty = difficulties.map((diff) => {
|
|
68
|
-
const diffResults = results.filter((r) => r.difficulty === diff);
|
|
69
|
-
const cm = buildConfusionMatrix(diffResults);
|
|
70
|
-
const metrics = computeClassMetrics(cm, diffResults.length);
|
|
71
|
-
return { difficulty: diff, metrics };
|
|
72
|
-
});
|
|
73
|
-
const missedAttacks = results.filter((r) => r.expectedDetection && !r.actualDetection);
|
|
74
|
-
const falsePositives = results.filter((r) => !r.expectedDetection && r.actualDetection);
|
|
75
|
-
return {
|
|
76
|
-
timestamp: new Date().toISOString(),
|
|
77
|
-
corpusSize: results.length,
|
|
78
|
-
overall,
|
|
79
|
-
latency,
|
|
80
|
-
byCategory,
|
|
81
|
-
byDifficulty,
|
|
82
|
-
missedAttacks,
|
|
83
|
-
falsePositives,
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
const DEFAULT_THRESHOLDS = {
|
|
87
|
-
minRecall: 0.60,
|
|
88
|
-
maxFpRate: 0.05,
|
|
89
|
-
minF1: 0.70,
|
|
90
|
-
maxP95LatencyMs: 50,
|
|
91
|
-
};
|
|
92
|
-
export function checkRegression(report, thresholds = DEFAULT_THRESHOLDS) {
|
|
93
|
-
const violations = [];
|
|
94
|
-
if (report.overall.recall < thresholds.minRecall) {
|
|
95
|
-
violations.push(`Recall ${(report.overall.recall * 100).toFixed(1)}% < minimum ${(thresholds.minRecall * 100).toFixed(1)}%`);
|
|
96
|
-
}
|
|
97
|
-
if (report.overall.fpRate > thresholds.maxFpRate) {
|
|
98
|
-
violations.push(`FP rate ${(report.overall.fpRate * 100).toFixed(3)}% > maximum ${(thresholds.maxFpRate * 100).toFixed(3)}%`);
|
|
99
|
-
}
|
|
100
|
-
if (report.overall.f1 < thresholds.minF1) {
|
|
101
|
-
violations.push(`F1 ${(report.overall.f1 * 100).toFixed(1)}% < minimum ${(thresholds.minF1 * 100).toFixed(1)}%`);
|
|
102
|
-
}
|
|
103
|
-
if (report.latency.p95 > thresholds.maxP95LatencyMs) {
|
|
104
|
-
violations.push(`P95 latency ${report.latency.p95.toFixed(1)}ms > maximum ${thresholds.maxP95LatencyMs}ms`);
|
|
105
|
-
}
|
|
106
|
-
return { passed: violations.length === 0, violations };
|
|
107
|
-
}
|
|
108
|
-
//# sourceMappingURL=metrics.js.map
|
package/dist/eval/metrics.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8DH,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E,SAAS,oBAAoB,CAAC,OAAgC;IAC5D,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEnC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aAC9C,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aACpD,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;;YACrD,EAAE,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;AAC5B,CAAC;AAED,SAAS,mBAAmB,CAAC,EAAmB,EAAE,WAAmB;IACnE,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,EAAE,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtF,MAAM,QAAQ,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC;AACjF,CAAC;AAED,SAAS,cAAc,CAAC,OAAgC;IACtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;IACrD,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1E,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;IAE1B,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;QACvC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG;QAC7C,GAAG,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;KAC1B,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAAC,OAAgC;IAChE,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,mBAAmB,CAAC,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAExC,cAAc;IACd,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAwB,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,CAAC;QAC7D,MAAM,EAAE,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpG,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACjG,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACpE,MAAM,YAAY,GAA0B,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACpE,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,IAAI,CAAC,CAAC;QACjE,MAAM,EAAE,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;QAC5D,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvF,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC;IAExF,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,OAAO,CAAC,MAAM;QAC1B,OAAO;QACP,OAAO;QACP,UAAU;QACV,YAAY;QACZ,aAAa;QACb,cAAc;KACf,CAAC;AACJ,CAAC;AAkBD,MAAM,kBAAkB,GAAuB;IAC7C,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,UAAU,eAAe,CAC7B,MAAkB,EAClB,aAAiC,kBAAkB;IAEnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC5G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC7G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CACb,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAChG,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,UAAU,CAAC,eAAe,EAAE,CAAC;QACpD,UAAU,CAAC,IAAI,CACb,eAAe,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,UAAU,CAAC,eAAe,IAAI,CAC3F,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;AACzD,CAAC"}
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PINT Benchmark Corpus Loader
|
|
3
|
-
*
|
|
4
|
-
* Reads the PINT-format dataset (JSON with text/category/label/source/language)
|
|
5
|
-
* built from publicly available prompt injection datasets:
|
|
6
|
-
* - deepset/prompt-injections (HuggingFace)
|
|
7
|
-
* - Lakera/gandalf_ignore_instructions (HuggingFace)
|
|
8
|
-
*
|
|
9
|
-
* Converts each sample into the CorpusSample interface used by the ATR eval
|
|
10
|
-
* harness, allowing the PINT corpus to be evaluated alongside or instead of
|
|
11
|
-
* the built-in hand-crafted corpus.
|
|
12
|
-
*
|
|
13
|
-
* @module agent-threat-rules/eval/pint-corpus
|
|
14
|
-
*/
|
|
15
|
-
import type { CorpusSample } from './corpus.js';
|
|
16
|
-
/**
|
|
17
|
-
* Load the PINT benchmark corpus from a JSON file on disk.
|
|
18
|
-
*
|
|
19
|
-
* @param dataPath - Absolute path to pint-corpus.json
|
|
20
|
-
* @returns Readonly array of CorpusSample for use with runEval()
|
|
21
|
-
*/
|
|
22
|
-
export declare function loadPintCorpus(dataPath: string): readonly CorpusSample[];
|
|
23
|
-
/**
|
|
24
|
-
* Get summary statistics for the loaded PINT corpus.
|
|
25
|
-
*/
|
|
26
|
-
export declare function getPintCorpusStats(corpus: readonly CorpusSample[]): {
|
|
27
|
-
readonly total: number;
|
|
28
|
-
readonly attacks: number;
|
|
29
|
-
readonly benign: number;
|
|
30
|
-
readonly byCategory: Readonly<Record<string, number>>;
|
|
31
|
-
readonly byDifficulty: Readonly<Record<string, number>>;
|
|
32
|
-
readonly byLanguage: Readonly<Record<string, number>>;
|
|
33
|
-
};
|
|
34
|
-
//# sourceMappingURL=pint-corpus.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pint-corpus.d.ts","sourceRoot":"","sources":["../../src/eval/pint-corpus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsDhD;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,YAAY,EAAE,CAqCxE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,GAAG;IACnE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACtD,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACvD,CAoBA"}
|