audrey 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +9 -1
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +9 -9
- package/benchmarks/output/guardbench-raw.json +104 -103
- package/benchmarks/output/guardbench-summary.json +167 -165
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +9 -9
- package/benchmarks/output/submission-bundle/guardbench-raw.json +104 -103
- package/benchmarks/output/submission-bundle/guardbench-summary.json +167 -165
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +48 -48
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/mcp-server/index.d.ts +3 -344
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +6 -280
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/docs/paper/07-evaluation.md +6 -6
- package/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +9 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +9 -9
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +104 -103
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +167 -165
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +60 -60
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +1 -1
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench leaderboard",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-29T13:33:23.840Z",
|
|
5
5
|
"ranking": [
|
|
6
6
|
"verified bundle",
|
|
7
7
|
"adapter conformance",
|
|
@@ -28,9 +28,9 @@
|
|
|
28
28
|
"evidenceRecall": 1,
|
|
29
29
|
"redactionLeaks": 0,
|
|
30
30
|
"latency": {
|
|
31
|
-
"p50Ms":
|
|
32
|
-
"p95Ms":
|
|
33
|
-
"maxMs":
|
|
31
|
+
"p50Ms": 3.09,
|
|
32
|
+
"p95Ms": 28.181,
|
|
33
|
+
"maxMs": 28.181
|
|
34
34
|
}
|
|
35
35
|
},
|
|
36
36
|
"conformance": {
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"source": {
|
|
43
43
|
"dir": "benchmarks/output/submission-bundle",
|
|
44
|
-
"manifestGeneratedAt": "2026-05-
|
|
44
|
+
"manifestGeneratedAt": "2026-05-29T13:33:23.534Z",
|
|
45
45
|
"fileCount": 17
|
|
46
46
|
},
|
|
47
47
|
"verification": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# GuardBench Leaderboard
|
|
2
2
|
|
|
3
|
-
Generated: 2026-05-
|
|
3
|
+
Generated: 2026-05-29T13:33:23.840Z
|
|
4
4
|
|
|
5
5
|
| Rank | Subject | Verified | Conformant | Full Contract | Decision Accuracy | Evidence Recall | Redaction Leaks | p95 Latency | Bundle |
|
|
6
6
|
|---:|---|---:|---:|---:|---:|---:|---:|---:|---|
|
|
7
|
-
| 1 | Audrey Guard | yes | yes | 100.0% | 100.0% | 100.0% | 0 |
|
|
7
|
+
| 1 | Audrey Guard | yes | yes | 100.0% | 100.0% | 100.0% | 0 | 28.181ms | benchmarks/output/submission-bundle |
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench conformance card",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-29T13:33:23.522Z",
|
|
5
5
|
"sourceDir": "benchmarks/output",
|
|
6
6
|
"manifestVersion": "0.2.0",
|
|
7
7
|
"suiteId": "guardbench-local-comparative",
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
"evidenceRecall": 1,
|
|
26
26
|
"redactionLeaks": 0,
|
|
27
27
|
"latency": {
|
|
28
|
-
"p50Ms":
|
|
29
|
-
"p95Ms":
|
|
30
|
-
"maxMs":
|
|
28
|
+
"p50Ms": 3.09,
|
|
29
|
+
"p95Ms": 28.181,
|
|
30
|
+
"maxMs": 28.181
|
|
31
31
|
}
|
|
32
32
|
},
|
|
33
33
|
"conformance": {
|
|
@@ -39,21 +39,21 @@
|
|
|
39
39
|
"integrity": {
|
|
40
40
|
"artifactHashes": {
|
|
41
41
|
"guardbench-manifest.json": "57636ce19fdaa6e50fc3fc961d9e499a9f43632f588c713a9fefe8e8a6fa724c",
|
|
42
|
-
"guardbench-summary.json": "
|
|
43
|
-
"guardbench-raw.json": "
|
|
42
|
+
"guardbench-summary.json": "91f264dd889e2c639a6fc6d1b867bc228b94c84ed5120345e23dddb79c11ee74",
|
|
43
|
+
"guardbench-raw.json": "66d4b69087258638f3572a40e1fd59bb84067034f899eaa2c27eed2dde554b2b"
|
|
44
44
|
},
|
|
45
45
|
"externalRunMetadataHash": null
|
|
46
46
|
},
|
|
47
47
|
"provenance": {
|
|
48
|
-
"generatedAt": "2026-05-
|
|
49
|
-
"gitSha": "
|
|
48
|
+
"generatedAt": "2026-05-29T13:33:23.189Z",
|
|
49
|
+
"gitSha": "9f771bae94f5ce4cfd5d5425e300a6a440c833d2",
|
|
50
50
|
"gitDirty": false,
|
|
51
51
|
"node": "v24.16.0",
|
|
52
52
|
"v8": "13.6.233.17-node.49",
|
|
53
53
|
"platform": "linux",
|
|
54
54
|
"arch": "x64",
|
|
55
55
|
"osRelease": "6.17.0-1015-azure",
|
|
56
|
-
"cpuModel": "
|
|
56
|
+
"cpuModel": "Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz",
|
|
57
57
|
"cpuCount": 4,
|
|
58
58
|
"totalMemoryGb": 15.61,
|
|
59
59
|
"embeddingProvider": "mock",
|