audrey 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +27 -5
- package/benchmarks/guardbench.js +98 -8
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +240 -140
- package/benchmarks/output/guardbench-summary.json +350 -224
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +240 -140
- package/benchmarks/output/submission-bundle/guardbench-summary.json +350 -224
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +56 -56
- package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/src/audrey.d.ts +10 -0
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +17 -4
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/controller.d.ts +17 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +52 -13
- package/dist/src/controller.js.map +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +4 -1
- package/dist/src/routes.js.map +1 -1
- package/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +27 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +240 -140
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +350 -224
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +14 -14
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +67 -67
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +2 -2
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +35 -35
- package/package.json +2 -2
- package/scripts/smoke-cli.js +22 -2
- package/scripts/verify-release-readiness.mjs +50 -6
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench leaderboard",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-15T17:52:13.304Z",
|
|
5
5
|
"ranking": [
|
|
6
6
|
"verified bundle",
|
|
7
7
|
"adapter conformance",
|
|
@@ -28,9 +28,9 @@
|
|
|
28
28
|
"evidenceRecall": 1,
|
|
29
29
|
"redactionLeaks": 0,
|
|
30
30
|
"latency": {
|
|
31
|
-
"p50Ms":
|
|
32
|
-
"p95Ms":
|
|
33
|
-
"maxMs":
|
|
31
|
+
"p50Ms": 2.465,
|
|
32
|
+
"p95Ms": 30.791,
|
|
33
|
+
"maxMs": 30.791
|
|
34
34
|
}
|
|
35
35
|
},
|
|
36
36
|
"conformance": {
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"source": {
|
|
43
43
|
"dir": "benchmarks/output/submission-bundle",
|
|
44
|
-
"manifestGeneratedAt": "2026-05-
|
|
44
|
+
"manifestGeneratedAt": "2026-05-15T17:52:13.050Z",
|
|
45
45
|
"fileCount": 17
|
|
46
46
|
},
|
|
47
47
|
"verification": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# GuardBench Leaderboard
|
|
2
2
|
|
|
3
|
-
Generated: 2026-05-
|
|
3
|
+
Generated: 2026-05-15T17:52:13.304Z
|
|
4
4
|
|
|
5
5
|
| Rank | Subject | Verified | Conformant | Full Contract | Decision Accuracy | Evidence Recall | Redaction Leaks | p95 Latency | Bundle |
|
|
6
6
|
|---:|---|---:|---:|---:|---:|---:|---:|---:|---|
|
|
7
|
-
| 1 | Audrey Guard | yes | yes | 100.0% | 100.0% | 100.0% | 0 |
|
|
7
|
+
| 1 | Audrey Guard | yes | yes | 100.0% | 100.0% | 100.0% | 0 | 30.791ms | benchmarks/output/submission-bundle |
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench conformance card",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-15T17:52:13.040Z",
|
|
5
5
|
"sourceDir": "benchmarks/output",
|
|
6
6
|
"manifestVersion": "0.2.0",
|
|
7
7
|
"suiteId": "guardbench-local-comparative",
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
"evidenceRecall": 1,
|
|
26
26
|
"redactionLeaks": 0,
|
|
27
27
|
"latency": {
|
|
28
|
-
"p50Ms":
|
|
29
|
-
"p95Ms":
|
|
30
|
-
"maxMs":
|
|
28
|
+
"p50Ms": 2.465,
|
|
29
|
+
"p95Ms": 30.791,
|
|
30
|
+
"maxMs": 30.791
|
|
31
31
|
}
|
|
32
32
|
},
|
|
33
33
|
"conformance": {
|
|
@@ -39,21 +39,21 @@
|
|
|
39
39
|
"integrity": {
|
|
40
40
|
"artifactHashes": {
|
|
41
41
|
"guardbench-manifest.json": "57636ce19fdaa6e50fc3fc961d9e499a9f43632f588c713a9fefe8e8a6fa724c",
|
|
42
|
-
"guardbench-summary.json": "
|
|
43
|
-
"guardbench-raw.json": "
|
|
42
|
+
"guardbench-summary.json": "21023f230b761f1b43f8ecabe519dd6b320c62ad56f0b6aa28bbcf7a2c8838f5",
|
|
43
|
+
"guardbench-raw.json": "3b78d1a2432e7d72752f96d9ac4b2b49cf6f59eb65548fbadb21ea6adbb86b37"
|
|
44
44
|
},
|
|
45
45
|
"externalRunMetadataHash": null
|
|
46
46
|
},
|
|
47
47
|
"provenance": {
|
|
48
|
-
"generatedAt": "2026-05-
|
|
49
|
-
"gitSha": "
|
|
48
|
+
"generatedAt": "2026-05-15T17:52:12.761Z",
|
|
49
|
+
"gitSha": "82b0e9979680acf751b9e80f6f90f8c6ac74befb",
|
|
50
50
|
"gitDirty": false,
|
|
51
|
-
"node": "v24.
|
|
52
|
-
"v8": "13.6.233.17-node.
|
|
51
|
+
"node": "v24.15.0",
|
|
52
|
+
"v8": "13.6.233.17-node.48",
|
|
53
53
|
"platform": "linux",
|
|
54
54
|
"arch": "x64",
|
|
55
|
-
"osRelease": "6.17.0-
|
|
56
|
-
"cpuModel": "AMD EPYC
|
|
55
|
+
"osRelease": "6.17.0-1013-azure",
|
|
56
|
+
"cpuModel": "AMD EPYC 9V74 80-Core Processor",
|
|
57
57
|
"cpuCount": 4,
|
|
58
58
|
"totalMemoryGb": 15.61,
|
|
59
59
|
"embeddingProvider": "mock",
|