audrey 0.23.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +101 -15
- package/LICENSE +21 -21
- package/README.md +232 -6
- package/SECURITY.md +2 -1
- package/benchmarks/adapter-kit.mjs +20 -0
- package/benchmarks/adapter-self-test.mjs +166 -0
- package/benchmarks/adapters/example-allow.mjs +28 -0
- package/benchmarks/adapters/mem0-platform.mjs +267 -0
- package/benchmarks/adapters/registry.json +51 -0
- package/benchmarks/adapters/zep-cloud.mjs +280 -0
- package/benchmarks/baselines.js +169 -0
- package/benchmarks/build-leaderboard.mjs +170 -0
- package/benchmarks/cases.js +537 -0
- package/benchmarks/create-conformance-card.mjs +139 -0
- package/benchmarks/create-submission-bundle.mjs +176 -0
- package/benchmarks/dry-run-external-adapters.mjs +165 -0
- package/benchmarks/guardbench.js +1125 -0
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
- package/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
- package/benchmarks/output/external/guardbench-external-evidence.json +56 -0
- package/benchmarks/output/guardbench-conformance-card.json +63 -0
- package/benchmarks/output/guardbench-manifest.json +414 -0
- package/benchmarks/output/guardbench-raw.json +1271 -0
- package/benchmarks/output/guardbench-summary.json +2107 -0
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +63 -0
- package/benchmarks/output/submission-bundle/guardbench-manifest.json +414 -0
- package/benchmarks/output/submission-bundle/guardbench-raw.json +1271 -0
- package/benchmarks/output/submission-bundle/guardbench-summary.json +2107 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-registry.schema.json +69 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-adapter-self-test.schema.json +156 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-conformance-card.schema.json +184 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-external-dry-run.schema.json +74 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-external-evidence.schema.json +108 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-external-run.schema.json +160 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-leaderboard.schema.json +179 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-manifest.schema.json +213 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-publication-verification.schema.json +47 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +184 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-submission-manifest.schema.json +151 -0
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +249 -0
- package/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
- package/benchmarks/output/submission-bundle/validation-report.json +31 -0
- package/benchmarks/output/summary.json +2354 -0
- package/benchmarks/perf-snapshot.js +304 -0
- package/benchmarks/perf.bench.js +161 -0
- package/benchmarks/public-paths.mjs +78 -0
- package/benchmarks/reference-results.js +70 -0
- package/benchmarks/report.js +259 -0
- package/benchmarks/run-external-guardbench.mjs +281 -0
- package/benchmarks/run.js +682 -0
- package/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
- package/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
- package/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
- package/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
- package/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
- package/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
- package/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
- package/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
- package/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
- package/benchmarks/schemas/guardbench-raw.schema.json +184 -0
- package/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
- package/benchmarks/schemas/guardbench-summary.schema.json +249 -0
- package/benchmarks/snapshots/perf-0.22.2.json +123 -0
- package/benchmarks/snapshots/perf-0.23.0.json +123 -0
- package/benchmarks/validate-adapter-module.mjs +104 -0
- package/benchmarks/validate-adapter-registry.mjs +134 -0
- package/benchmarks/validate-adapter-self-test.mjs +96 -0
- package/benchmarks/validate-guardbench-artifacts.mjs +343 -0
- package/benchmarks/verify-external-evidence.mjs +296 -0
- package/benchmarks/verify-publication-artifacts.mjs +286 -0
- package/benchmarks/verify-submission-bundle.mjs +167 -0
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +65 -3
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +675 -157
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/src/action-key.d.ts +9 -0
- package/dist/src/action-key.d.ts.map +1 -0
- package/dist/src/action-key.js +49 -0
- package/dist/src/action-key.js.map +1 -0
- package/dist/src/adaptive.js +5 -5
- package/dist/src/affect.js +8 -8
- package/dist/src/audrey.d.ts +13 -0
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +68 -3
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.js +4 -4
- package/dist/src/causal.js +3 -3
- package/dist/src/consolidate.js +48 -48
- package/dist/src/controller.d.ts +78 -6
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +273 -53
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.js +172 -172
- package/dist/src/decay.js +8 -8
- package/dist/src/embedding.d.ts +2 -1
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +39 -29
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.js +6 -6
- package/dist/src/feedback.d.ts +6 -0
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +6 -0
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.js +12 -12
- package/dist/src/hybrid-recall.js +9 -9
- package/dist/src/impact.js +6 -6
- package/dist/src/import.d.ts +3 -3
- package/dist/src/import.js +41 -41
- package/dist/src/index.d.ts +5 -4
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -3
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.js +14 -14
- package/dist/src/introspect.js +18 -18
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +41 -0
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/promote.js +7 -7
- package/dist/src/prompts.js +118 -118
- package/dist/src/recall.js +30 -30
- package/dist/src/reflexes.d.ts +1 -0
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +3 -0
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.js +4 -4
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +71 -2
- package/dist/src/routes.js.map +1 -1
- package/dist/src/validate.js +25 -25
- package/docs/AUDREY_PAPER_OUTLINE.md +175 -0
- package/docs/MEMORY_BENCHMARKING.md +59 -0
- package/docs/PRODUCTION_BACKLOG.md +304 -0
- package/docs/paper/00-master.md +48 -0
- package/docs/paper/01-introduction.md +27 -0
- package/docs/paper/02-related-work.md +47 -0
- package/docs/paper/03-problem-definition.md +108 -0
- package/docs/paper/04-design.md +164 -0
- package/docs/paper/05-guardbench-spec.md +412 -0
- package/docs/paper/06-implementation.md +113 -0
- package/docs/paper/07-evaluation.md +168 -0
- package/docs/paper/08-discussion-limitations.md +61 -0
- package/docs/paper/09-conclusion.md +11 -0
- package/docs/paper/SUBMISSION_README.md +162 -0
- package/docs/paper/appendix-a-demo-transcript.md +114 -0
- package/docs/paper/arxiv-compile-report.schema.json +116 -0
- package/docs/paper/arxiv-source.schema.json +61 -0
- package/docs/paper/audrey-paper-v1.md +1106 -0
- package/docs/paper/browser-launch-plan.json +209 -0
- package/docs/paper/browser-launch-plan.schema.json +100 -0
- package/docs/paper/browser-launch-results.json +86 -0
- package/docs/paper/browser-launch-results.schema.json +66 -0
- package/docs/paper/claim-register.json +138 -0
- package/docs/paper/claim-register.schema.json +81 -0
- package/docs/paper/evidence-ledger.md +103 -0
- package/docs/paper/output/arxiv/README-arxiv.txt +8 -0
- package/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
- package/docs/paper/output/arxiv/main.tex +949 -0
- package/docs/paper/output/arxiv/references.bib +222 -0
- package/docs/paper/output/arxiv-compile-report.json +24 -0
- package/docs/paper/output/submission-bundle/LICENSE +21 -0
- package/docs/paper/output/submission-bundle/README.md +555 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +50 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +69 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +56 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +63 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-manifest.json +414 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +1271 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +2107 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +93 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +7 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +131 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +31 -0
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +2354 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-registry.schema.json +69 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-adapter-self-test.schema.json +156 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-conformance-card.schema.json +184 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-dry-run.schema.json +74 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-evidence.schema.json +108 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-external-run.schema.json +160 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-leaderboard.schema.json +179 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-manifest.schema.json +213 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-publication-verification.schema.json +47 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +184 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-submission-manifest.schema.json +151 -0
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +249 -0
- package/docs/paper/output/submission-bundle/docs/AUDREY_PAPER_OUTLINE.md +175 -0
- package/docs/paper/output/submission-bundle/docs/paper/00-master.md +48 -0
- package/docs/paper/output/submission-bundle/docs/paper/01-introduction.md +27 -0
- package/docs/paper/output/submission-bundle/docs/paper/02-related-work.md +47 -0
- package/docs/paper/output/submission-bundle/docs/paper/03-problem-definition.md +108 -0
- package/docs/paper/output/submission-bundle/docs/paper/04-design.md +164 -0
- package/docs/paper/output/submission-bundle/docs/paper/05-guardbench-spec.md +412 -0
- package/docs/paper/output/submission-bundle/docs/paper/06-implementation.md +113 -0
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +168 -0
- package/docs/paper/output/submission-bundle/docs/paper/08-discussion-limitations.md +61 -0
- package/docs/paper/output/submission-bundle/docs/paper/09-conclusion.md +11 -0
- package/docs/paper/output/submission-bundle/docs/paper/SUBMISSION_README.md +162 -0
- package/docs/paper/output/submission-bundle/docs/paper/appendix-a-demo-transcript.md +114 -0
- package/docs/paper/output/submission-bundle/docs/paper/arxiv-compile-report.schema.json +116 -0
- package/docs/paper/output/submission-bundle/docs/paper/arxiv-source.schema.json +61 -0
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +1106 -0
- package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.json +209 -0
- package/docs/paper/output/submission-bundle/docs/paper/browser-launch-plan.schema.json +100 -0
- package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.json +86 -0
- package/docs/paper/output/submission-bundle/docs/paper/browser-launch-results.schema.json +66 -0
- package/docs/paper/output/submission-bundle/docs/paper/claim-register.json +138 -0
- package/docs/paper/output/submission-bundle/docs/paper/claim-register.schema.json +81 -0
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +103 -0
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/README-arxiv.txt +8 -0
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +41 -0
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +949 -0
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/references.bib +222 -0
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +24 -0
- package/docs/paper/output/submission-bundle/docs/paper/paper-submission-bundle.schema.json +70 -0
- package/docs/paper/output/submission-bundle/docs/paper/publication-pack.json +81 -0
- package/docs/paper/output/submission-bundle/docs/paper/publication-pack.schema.json +60 -0
- package/docs/paper/output/submission-bundle/docs/paper/references.bib +222 -0
- package/docs/paper/output/submission-bundle/package.json +212 -0
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +379 -0
- package/docs/paper/paper-submission-bundle.schema.json +70 -0
- package/docs/paper/publication-pack.json +81 -0
- package/docs/paper/publication-pack.schema.json +60 -0
- package/docs/paper/references.bib +222 -0
- package/package.json +87 -4
- package/scripts/audit-release-completion.mjs +362 -0
- package/scripts/create-arxiv-source.mjs +362 -0
- package/scripts/create-paper-submission-bundle.mjs +210 -0
- package/scripts/finalize-release.mjs +526 -0
- package/scripts/prepare-release-cut.mjs +269 -0
- package/scripts/publish-release-bundle.mjs +209 -0
- package/scripts/publish-release-github-api.mjs +429 -0
- package/scripts/run-vitest.mjs +34 -0
- package/scripts/smoke-cli.js +92 -0
- package/scripts/sync-paper-artifacts.mjs +109 -0
- package/scripts/verify-arxiv-compile.mjs +440 -0
- package/scripts/verify-arxiv-source.mjs +194 -0
- package/scripts/verify-browser-launch-plan.mjs +237 -0
- package/scripts/verify-browser-launch-results.mjs +285 -0
- package/scripts/verify-paper-artifacts.mjs +338 -0
- package/scripts/verify-paper-claims.mjs +226 -0
- package/scripts/verify-paper-submission-bundle.mjs +207 -0
- package/scripts/verify-publication-pack.mjs +196 -0
- package/scripts/verify-python-package.py +201 -0
- package/scripts/verify-release-readiness.mjs +785 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-05-01T02:15:29.400Z",
|
|
3
|
+
"durationMs": 4600,
|
|
4
|
+
"audreyVersion": null,
|
|
5
|
+
"gitSha": "e2e821b",
|
|
6
|
+
"methodology": {
|
|
7
|
+
"embedding": "mock provider, 64 dimensions (in-process, no network)",
|
|
8
|
+
"llm": "mock provider (in-process)",
|
|
9
|
+
"retrieval": "hybrid (vector + lexical) with limit=5",
|
|
10
|
+
"sizes": [
|
|
11
|
+
100,
|
|
12
|
+
1000,
|
|
13
|
+
5000
|
|
14
|
+
],
|
|
15
|
+
"recallRunsPerSize": 50,
|
|
16
|
+
"notes": "Latency is wall-clock for a single call from a JS caller. Cloud and local 384-dim providers will report higher recall latency dominated by embedding cost and network. Run on your own hardware before quoting."
|
|
17
|
+
},
|
|
18
|
+
"machine": {
|
|
19
|
+
"node": "25.5.0",
|
|
20
|
+
"v8": "14.1.146.11-node.18",
|
|
21
|
+
"platform": "win32",
|
|
22
|
+
"arch": "x64",
|
|
23
|
+
"osRelease": "10.0.26200",
|
|
24
|
+
"cpuCount": 24,
|
|
25
|
+
"cpuModel": "AMD Ryzen 9 7900X3D 12-Core Processor ",
|
|
26
|
+
"memoryGb": 62.9
|
|
27
|
+
},
|
|
28
|
+
"sizes": [
|
|
29
|
+
{
|
|
30
|
+
"corpusSize": 100,
|
|
31
|
+
"encodeMs": {
|
|
32
|
+
"samples": 100,
|
|
33
|
+
"p50": 0.331,
|
|
34
|
+
"p95": 0.589,
|
|
35
|
+
"p99": 7.65,
|
|
36
|
+
"min": 0.214,
|
|
37
|
+
"max": 10.978,
|
|
38
|
+
"mean": 0.577
|
|
39
|
+
},
|
|
40
|
+
"hybridRecallMs": {
|
|
41
|
+
"samples": 50,
|
|
42
|
+
"p50": 0.539,
|
|
43
|
+
"p95": 1.82,
|
|
44
|
+
"p99": 2.712,
|
|
45
|
+
"min": 0.448,
|
|
46
|
+
"max": 2.712,
|
|
47
|
+
"mean": 0.659
|
|
48
|
+
},
|
|
49
|
+
"postEncodeQueueMs": {
|
|
50
|
+
"samples": 100,
|
|
51
|
+
"p50": 0.344,
|
|
52
|
+
"p95": 0.73,
|
|
53
|
+
"p99": 9.715,
|
|
54
|
+
"min": 0.079,
|
|
55
|
+
"max": 13.218,
|
|
56
|
+
"mean": 0.533
|
|
57
|
+
},
|
|
58
|
+
"queueEvents": 100
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"corpusSize": 1000,
|
|
62
|
+
"encodeMs": {
|
|
63
|
+
"samples": 1000,
|
|
64
|
+
"p50": 0.307,
|
|
65
|
+
"p95": 2.147,
|
|
66
|
+
"p99": 9.672,
|
|
67
|
+
"min": 0.183,
|
|
68
|
+
"max": 13.514,
|
|
69
|
+
"mean": 0.639
|
|
70
|
+
},
|
|
71
|
+
"hybridRecallMs": {
|
|
72
|
+
"samples": 50,
|
|
73
|
+
"p50": 1.566,
|
|
74
|
+
"p95": 2.364,
|
|
75
|
+
"p99": 21.177,
|
|
76
|
+
"min": 0.608,
|
|
77
|
+
"max": 21.177,
|
|
78
|
+
"mean": 1.803
|
|
79
|
+
},
|
|
80
|
+
"postEncodeQueueMs": {
|
|
81
|
+
"samples": 1000,
|
|
82
|
+
"p50": 0.321,
|
|
83
|
+
"p95": 1.468,
|
|
84
|
+
"p99": 9.744,
|
|
85
|
+
"min": 0.07,
|
|
86
|
+
"max": 15.61,
|
|
87
|
+
"mean": 0.553
|
|
88
|
+
},
|
|
89
|
+
"queueEvents": 1000
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"corpusSize": 5000,
|
|
93
|
+
"encodeMs": {
|
|
94
|
+
"samples": 5000,
|
|
95
|
+
"p50": 0.308,
|
|
96
|
+
"p95": 1.838,
|
|
97
|
+
"p99": 10.45,
|
|
98
|
+
"min": 0.171,
|
|
99
|
+
"max": 19.247,
|
|
100
|
+
"mean": 0.653
|
|
101
|
+
},
|
|
102
|
+
"hybridRecallMs": {
|
|
103
|
+
"samples": 50,
|
|
104
|
+
"p50": 2.091,
|
|
105
|
+
"p95": 3.417,
|
|
106
|
+
"p99": 16.58,
|
|
107
|
+
"min": 2.005,
|
|
108
|
+
"max": 16.58,
|
|
109
|
+
"mean": 2.492
|
|
110
|
+
},
|
|
111
|
+
"postEncodeQueueMs": {
|
|
112
|
+
"samples": 5000,
|
|
113
|
+
"p50": 0.313,
|
|
114
|
+
"p95": 1.217,
|
|
115
|
+
"p99": 10.532,
|
|
116
|
+
"min": 0.074,
|
|
117
|
+
"max": 19.378,
|
|
118
|
+
"mean": 0.554
|
|
119
|
+
},
|
|
120
|
+
"queueEvents": 5000
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-05-05T17:32:45.578Z",
|
|
3
|
+
"durationMs": 1042,
|
|
4
|
+
"audreyVersion": "0.23.0",
|
|
5
|
+
"gitSha": "20cdde0",
|
|
6
|
+
"methodology": {
|
|
7
|
+
"embedding": "mock provider, 64 dimensions (in-process, no network)",
|
|
8
|
+
"llm": "mock provider (in-process)",
|
|
9
|
+
"retrieval": "hybrid (vector + lexical) with limit=5",
|
|
10
|
+
"sizes": [
|
|
11
|
+
100,
|
|
12
|
+
1000,
|
|
13
|
+
5000
|
|
14
|
+
],
|
|
15
|
+
"recallRunsPerSize": 50,
|
|
16
|
+
"notes": "Latency is wall-clock for a single call from a JS caller. Cloud and local 384-dim providers will report higher recall latency dominated by embedding cost and network. Run on your own hardware before quoting."
|
|
17
|
+
},
|
|
18
|
+
"machine": {
|
|
19
|
+
"node": "25.9.0",
|
|
20
|
+
"v8": "14.1.146.11-node.25",
|
|
21
|
+
"platform": "darwin",
|
|
22
|
+
"arch": "arm64",
|
|
23
|
+
"osRelease": "25.4.0",
|
|
24
|
+
"cpuCount": 18,
|
|
25
|
+
"cpuModel": "Apple M5 Max",
|
|
26
|
+
"memoryGb": 64
|
|
27
|
+
},
|
|
28
|
+
"sizes": [
|
|
29
|
+
{
|
|
30
|
+
"corpusSize": 100,
|
|
31
|
+
"encodeMs": {
|
|
32
|
+
"samples": 100,
|
|
33
|
+
"p50": 0.136,
|
|
34
|
+
"p95": 0.246,
|
|
35
|
+
"p99": 1.054,
|
|
36
|
+
"min": 0.084,
|
|
37
|
+
"max": 2.211,
|
|
38
|
+
"mean": 0.178
|
|
39
|
+
},
|
|
40
|
+
"hybridRecallMs": {
|
|
41
|
+
"samples": 50,
|
|
42
|
+
"p50": 0.215,
|
|
43
|
+
"p95": 0.693,
|
|
44
|
+
"p99": 1.285,
|
|
45
|
+
"min": 0.166,
|
|
46
|
+
"max": 1.285,
|
|
47
|
+
"mean": 0.261
|
|
48
|
+
},
|
|
49
|
+
"postEncodeQueueMs": {
|
|
50
|
+
"samples": 100,
|
|
51
|
+
"p50": 0.134,
|
|
52
|
+
"p95": 0.35,
|
|
53
|
+
"p99": 1.079,
|
|
54
|
+
"min": 0.036,
|
|
55
|
+
"max": 1.244,
|
|
56
|
+
"mean": 0.156
|
|
57
|
+
},
|
|
58
|
+
"queueEvents": 100
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"corpusSize": 1000,
|
|
62
|
+
"encodeMs": {
|
|
63
|
+
"samples": 1000,
|
|
64
|
+
"p50": 0.114,
|
|
65
|
+
"p95": 0.187,
|
|
66
|
+
"p99": 0.865,
|
|
67
|
+
"min": 0.074,
|
|
68
|
+
"max": 1.378,
|
|
69
|
+
"mean": 0.139
|
|
70
|
+
},
|
|
71
|
+
"hybridRecallMs": {
|
|
72
|
+
"samples": 50,
|
|
73
|
+
"p50": 0.272,
|
|
74
|
+
"p95": 0.48,
|
|
75
|
+
"p99": 2.081,
|
|
76
|
+
"min": 0.254,
|
|
77
|
+
"max": 2.081,
|
|
78
|
+
"mean": 0.322
|
|
79
|
+
},
|
|
80
|
+
"postEncodeQueueMs": {
|
|
81
|
+
"samples": 1000,
|
|
82
|
+
"p50": 0.122,
|
|
83
|
+
"p95": 0.256,
|
|
84
|
+
"p99": 0.921,
|
|
85
|
+
"min": 0.035,
|
|
86
|
+
"max": 1.49,
|
|
87
|
+
"mean": 0.127
|
|
88
|
+
},
|
|
89
|
+
"queueEvents": 1000
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"corpusSize": 5000,
|
|
93
|
+
"encodeMs": {
|
|
94
|
+
"samples": 5000,
|
|
95
|
+
"p50": 0.109,
|
|
96
|
+
"p95": 0.174,
|
|
97
|
+
"p99": 0.938,
|
|
98
|
+
"min": 0.07,
|
|
99
|
+
"max": 3.618,
|
|
100
|
+
"mean": 0.136
|
|
101
|
+
},
|
|
102
|
+
"hybridRecallMs": {
|
|
103
|
+
"samples": 50,
|
|
104
|
+
"p50": 0.735,
|
|
105
|
+
"p95": 0.867,
|
|
106
|
+
"p99": 4.228,
|
|
107
|
+
"min": 0.688,
|
|
108
|
+
"max": 4.228,
|
|
109
|
+
"mean": 0.816
|
|
110
|
+
},
|
|
111
|
+
"postEncodeQueueMs": {
|
|
112
|
+
"samples": 5000,
|
|
113
|
+
"p50": 0.116,
|
|
114
|
+
"p95": 0.242,
|
|
115
|
+
"p99": 0.978,
|
|
116
|
+
"min": 0.034,
|
|
117
|
+
"max": 6.272,
|
|
118
|
+
"mean": 0.124
|
|
119
|
+
},
|
|
120
|
+
"queueEvents": 5000
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { existsSync } from 'node:fs';
|
|
2
|
+
import { basename, resolve } from 'node:path';
|
|
3
|
+
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
4
|
+
import { validateGuardBenchAdapter } from './guardbench.js';
|
|
5
|
+
import { publicPath } from './public-paths.mjs';
|
|
6
|
+
|
|
7
|
+
const DEFAULT_ADAPTER = 'benchmarks/adapters/example-allow.mjs';
|
|
8
|
+
|
|
9
|
+
export function parseAdapterModuleValidatorArgs(argv = process.argv.slice(2)) {
|
|
10
|
+
const args = {
|
|
11
|
+
adapter: DEFAULT_ADAPTER,
|
|
12
|
+
json: false,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
for (let i = 0; i < argv.length; i++) {
|
|
16
|
+
const token = argv[i];
|
|
17
|
+
if (token === '--adapter' && argv[i + 1]) args.adapter = argv[++i];
|
|
18
|
+
else if (token === '--json') args.json = true;
|
|
19
|
+
else if (token === '--help' || token === '-h') args.help = true;
|
|
20
|
+
else throw new Error(`Unknown argument: ${token}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return args;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function usage() {
|
|
27
|
+
return `Usage: node benchmarks/validate-adapter-module.mjs [options]
|
|
28
|
+
|
|
29
|
+
Options:
|
|
30
|
+
--adapter <path> ESM GuardBench adapter module. Default: ${DEFAULT_ADAPTER}.
|
|
31
|
+
--json Print the machine-readable validation report.
|
|
32
|
+
`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export async function validateAdapterModuleFile(options = {}) {
|
|
36
|
+
const adapterPath = resolve(options.adapter ?? DEFAULT_ADAPTER);
|
|
37
|
+
const failures = [];
|
|
38
|
+
let adapter = null;
|
|
39
|
+
|
|
40
|
+
if (!existsSync(adapterPath)) {
|
|
41
|
+
failures.push(`Adapter not found: ${adapterPath}`);
|
|
42
|
+
} else {
|
|
43
|
+
try {
|
|
44
|
+
const mod = await import(pathToFileURL(adapterPath).href);
|
|
45
|
+
const candidate = typeof mod.createGuardBenchAdapter === 'function'
|
|
46
|
+
? await mod.createGuardBenchAdapter()
|
|
47
|
+
: mod.default ?? mod.adapter;
|
|
48
|
+
adapter = validateGuardBenchAdapter(candidate, adapterPath);
|
|
49
|
+
} catch (error) {
|
|
50
|
+
failures.push(error.message);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
ok: failures.length === 0,
|
|
56
|
+
adapterPath: publicPath(adapterPath),
|
|
57
|
+
moduleFile: basename(adapterPath),
|
|
58
|
+
adapter: adapter
|
|
59
|
+
? {
|
|
60
|
+
name: adapter.name,
|
|
61
|
+
description: adapter.description ?? null,
|
|
62
|
+
hasSetup: typeof adapter.setup === 'function',
|
|
63
|
+
hasDecide: typeof adapter.decide === 'function',
|
|
64
|
+
hasCleanup: typeof adapter.cleanup === 'function',
|
|
65
|
+
}
|
|
66
|
+
: null,
|
|
67
|
+
contract: {
|
|
68
|
+
moduleFormat: 'ESM',
|
|
69
|
+
exports: ['default', 'adapter', 'createGuardBenchAdapter'],
|
|
70
|
+
requiredMethods: ['decide'],
|
|
71
|
+
optionalMethods: ['setup', 'cleanup'],
|
|
72
|
+
},
|
|
73
|
+
failures,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async function main() {
|
|
78
|
+
const args = parseAdapterModuleValidatorArgs();
|
|
79
|
+
if (args.help) {
|
|
80
|
+
console.log(usage());
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const validation = await validateAdapterModuleFile(args);
|
|
85
|
+
if (args.json) {
|
|
86
|
+
console.log(JSON.stringify(validation, null, 2));
|
|
87
|
+
} else if (validation.ok) {
|
|
88
|
+
console.log(`GuardBench adapter module validation passed: ${validation.adapterPath}`);
|
|
89
|
+
console.log(`Adapter: ${validation.adapter.name}`);
|
|
90
|
+
console.log(`Methods: setup=${validation.adapter.hasSetup}, decide=${validation.adapter.hasDecide}, cleanup=${validation.adapter.hasCleanup}`);
|
|
91
|
+
} else {
|
|
92
|
+
console.error('GuardBench adapter module validation failed:');
|
|
93
|
+
for (const failure of validation.failures) console.error(`- ${failure}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (!validation.ok) process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
|
|
100
|
+
main().catch(error => {
|
|
101
|
+
console.error(error.stack ?? error.message);
|
|
102
|
+
process.exit(1);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import { validateSchema } from './validate-guardbench-artifacts.mjs';
|
|
5
|
+
import { validateAdapterModuleFile } from './validate-adapter-module.mjs';
|
|
6
|
+
import { publicPath } from './public-paths.mjs';
|
|
7
|
+
|
|
8
|
+
const DEFAULT_REGISTRY = 'benchmarks/adapters/registry.json';
|
|
9
|
+
const DEFAULT_SCHEMA = 'benchmarks/schemas/guardbench-adapter-registry.schema.json';
|
|
10
|
+
|
|
11
|
+
export function parseAdapterRegistryValidatorArgs(argv = process.argv.slice(2)) {
|
|
12
|
+
const args = {
|
|
13
|
+
registry: DEFAULT_REGISTRY,
|
|
14
|
+
schema: DEFAULT_SCHEMA,
|
|
15
|
+
json: false,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < argv.length; i++) {
|
|
19
|
+
const token = argv[i];
|
|
20
|
+
if (token === '--registry' && argv[i + 1]) args.registry = argv[++i];
|
|
21
|
+
else if (token === '--schema' && argv[i + 1]) args.schema = argv[++i];
|
|
22
|
+
else if (token === '--json') args.json = true;
|
|
23
|
+
else if (token === '--help' || token === '-h') args.help = true;
|
|
24
|
+
else throw new Error(`Unknown argument: ${token}`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return args;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function usage() {
|
|
31
|
+
return `Usage: node benchmarks/validate-adapter-registry.mjs [options]
|
|
32
|
+
|
|
33
|
+
Options:
|
|
34
|
+
--registry <path> Adapter registry JSON. Default: ${DEFAULT_REGISTRY}.
|
|
35
|
+
--schema <path> Adapter registry JSON schema. Default: ${DEFAULT_SCHEMA}.
|
|
36
|
+
--json Print the machine-readable validation report.
|
|
37
|
+
`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function readJson(path) {
|
|
41
|
+
return JSON.parse(readFileSync(path, 'utf-8'));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function validateAdapterRegistry(options = {}) {
|
|
45
|
+
const registryPath = resolve(options.registry ?? DEFAULT_REGISTRY);
|
|
46
|
+
const schemaPath = resolve(options.schema ?? DEFAULT_SCHEMA);
|
|
47
|
+
const failures = [];
|
|
48
|
+
let registry = null;
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
registry = readJson(registryPath);
|
|
52
|
+
} catch (error) {
|
|
53
|
+
failures.push(error.message);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const schema = readJson(schemaPath);
|
|
58
|
+
if (registry) failures.push(...validateSchema(registry, schema, 'guardbench-adapter-registry'));
|
|
59
|
+
} catch (error) {
|
|
60
|
+
failures.push(error.message);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const ids = new Set();
|
|
64
|
+
const adapterReports = [];
|
|
65
|
+
for (const adapter of registry?.adapters ?? []) {
|
|
66
|
+
if (ids.has(adapter.id)) failures.push(`Duplicate adapter id: ${adapter.id}`);
|
|
67
|
+
ids.add(adapter.id);
|
|
68
|
+
if (adapter.credentialMode === 'none' && adapter.requiredEnv.length !== 0) {
|
|
69
|
+
failures.push(`Adapter ${adapter.id} has credentialMode=none but declares requiredEnv`);
|
|
70
|
+
}
|
|
71
|
+
if (adapter.credentialMode === 'runtime-env' && adapter.requiredEnv.length === 0) {
|
|
72
|
+
failures.push(`Adapter ${adapter.id} has credentialMode=runtime-env but declares no requiredEnv`);
|
|
73
|
+
}
|
|
74
|
+
for (const [commandName, command] of Object.entries(adapter.commands ?? {})) {
|
|
75
|
+
if ((commandName === 'moduleValidate' || commandName === 'selfTest') && !command.includes(adapter.path)) {
|
|
76
|
+
failures.push(`Adapter ${adapter.id} command ${commandName} does not reference ${adapter.path}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
if (!existsSync(resolve(adapter.path))) {
|
|
80
|
+
failures.push(`Adapter ${adapter.id} path does not exist: ${adapter.path}`);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
const report = await validateAdapterModuleFile({ adapter: adapter.path });
|
|
84
|
+
adapterReports.push({
|
|
85
|
+
id: adapter.id,
|
|
86
|
+
ok: report.ok,
|
|
87
|
+
adapter: report.adapter,
|
|
88
|
+
credentialMode: adapter.credentialMode,
|
|
89
|
+
failures: report.failures,
|
|
90
|
+
});
|
|
91
|
+
if (!report.ok) {
|
|
92
|
+
failures.push(`Adapter ${adapter.id} failed module validation: ${report.failures.join('; ')}`);
|
|
93
|
+
}
|
|
94
|
+
if (report.adapter?.name && report.adapter.name !== adapter.name) {
|
|
95
|
+
failures.push(`Adapter ${adapter.id} registry name ${adapter.name} does not match module name ${report.adapter.name}`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
ok: failures.length === 0,
|
|
101
|
+
registry: publicPath(registryPath),
|
|
102
|
+
schema: publicPath(schemaPath),
|
|
103
|
+
adapters: adapterReports,
|
|
104
|
+
failures,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function main() {
|
|
109
|
+
const args = parseAdapterRegistryValidatorArgs();
|
|
110
|
+
if (args.help) {
|
|
111
|
+
console.log(usage());
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const validation = await validateAdapterRegistry(args);
|
|
116
|
+
if (args.json) {
|
|
117
|
+
console.log(JSON.stringify(validation, null, 2));
|
|
118
|
+
} else if (validation.ok) {
|
|
119
|
+
console.log(`GuardBench adapter registry validation passed: ${validation.registry}`);
|
|
120
|
+
console.log(`Adapters: ${validation.adapters.length}`);
|
|
121
|
+
} else {
|
|
122
|
+
console.error('GuardBench adapter registry validation failed:');
|
|
123
|
+
for (const failure of validation.failures) console.error(`- ${failure}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (!validation.ok) process.exit(1);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
|
|
130
|
+
main().catch(error => {
|
|
131
|
+
console.error(error.stack ?? error.message);
|
|
132
|
+
process.exit(1);
|
|
133
|
+
});
|
|
134
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import { validateAdapterSelfTestReport } from './adapter-self-test.mjs';
|
|
5
|
+
import { publicPath } from './public-paths.mjs';
|
|
6
|
+
|
|
7
|
+
const DEFAULT_REPORT = 'benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json';
|
|
8
|
+
|
|
9
|
+
export function parseAdapterSelfTestValidatorArgs(argv = process.argv.slice(2)) {
|
|
10
|
+
const args = {
|
|
11
|
+
report: DEFAULT_REPORT,
|
|
12
|
+
schema: undefined,
|
|
13
|
+
json: false,
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < argv.length; i++) {
|
|
17
|
+
const token = argv[i];
|
|
18
|
+
if ((token === '--report' || token === '--file') && argv[i + 1]) args.report = argv[++i];
|
|
19
|
+
else if (token === '--schema' && argv[i + 1]) args.schema = argv[++i];
|
|
20
|
+
else if (token === '--json') args.json = true;
|
|
21
|
+
else if (token === '--help' || token === '-h') args.help = true;
|
|
22
|
+
else throw new Error(`Unknown argument: ${token}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return args;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function usage() {
|
|
29
|
+
return `Usage: node benchmarks/validate-adapter-self-test.mjs [options]
|
|
30
|
+
|
|
31
|
+
Options:
|
|
32
|
+
--report <path> Adapter self-test JSON report. Default: ${DEFAULT_REPORT}.
|
|
33
|
+
--schema <path> Optional alternate schema path.
|
|
34
|
+
--json Print the machine-readable validation report.
|
|
35
|
+
`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function readJson(path) {
|
|
39
|
+
return JSON.parse(readFileSync(path, 'utf-8'));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function validateAdapterSelfTestFile(options = {}) {
|
|
43
|
+
const reportPath = resolve(options.report ?? DEFAULT_REPORT);
|
|
44
|
+
const failures = [];
|
|
45
|
+
let report = null;
|
|
46
|
+
|
|
47
|
+
if (!existsSync(reportPath)) {
|
|
48
|
+
failures.push(`Missing adapter self-test report: ${reportPath}`);
|
|
49
|
+
} else {
|
|
50
|
+
try {
|
|
51
|
+
report = readJson(reportPath);
|
|
52
|
+
failures.push(...validateAdapterSelfTestReport(report, { schema: options.schema }));
|
|
53
|
+
} catch (error) {
|
|
54
|
+
failures.push(error.message);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
ok: failures.length === 0,
|
|
60
|
+
report: publicPath(reportPath),
|
|
61
|
+
adapter: report?.adapter?.name ?? null,
|
|
62
|
+
scenarios: report?.conformance?.scenarios ?? null,
|
|
63
|
+
expectedScenarios: report?.conformance?.expectedScenarios ?? null,
|
|
64
|
+
lowScoreAllowed: report?.contract?.lowScoreAllowed ?? null,
|
|
65
|
+
failures,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function main() {
|
|
70
|
+
const args = parseAdapterSelfTestValidatorArgs();
|
|
71
|
+
if (args.help) {
|
|
72
|
+
console.log(usage());
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const validation = validateAdapterSelfTestFile(args);
|
|
77
|
+
if (args.json) {
|
|
78
|
+
console.log(JSON.stringify(validation, null, 2));
|
|
79
|
+
} else if (validation.ok) {
|
|
80
|
+
console.log(`GuardBench adapter self-test validation passed: ${validation.report}`);
|
|
81
|
+
console.log(`Adapter: ${validation.adapter}`);
|
|
82
|
+
console.log(`Rows: ${validation.scenarios}/${validation.expectedScenarios}`);
|
|
83
|
+
} else {
|
|
84
|
+
console.error('GuardBench adapter self-test validation failed:');
|
|
85
|
+
for (const failure of validation.failures) console.error(`- ${failure}`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (!validation.ok) process.exit(1);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url)) {
|
|
92
|
+
main().catch(error => {
|
|
93
|
+
console.error(error.stack ?? error.message);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
});
|
|
96
|
+
}
|