audrey 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +9 -1
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +9 -9
- package/benchmarks/output/guardbench-raw.json +104 -103
- package/benchmarks/output/guardbench-summary.json +167 -165
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +9 -9
- package/benchmarks/output/submission-bundle/guardbench-raw.json +104 -103
- package/benchmarks/output/submission-bundle/guardbench-summary.json +167 -165
- package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +48 -48
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.js +1 -1
- package/dist/mcp-server/index.d.ts +3 -344
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +6 -280
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/mcp-server/tool-schemas.d.ts +341 -0
- package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
- package/dist/mcp-server/tool-schemas.js +248 -0
- package/dist/mcp-server/tool-schemas.js.map +1 -0
- package/dist/mcp-server/tool-validation.d.ts +17 -0
- package/dist/mcp-server/tool-validation.d.ts.map +1 -0
- package/dist/mcp-server/tool-validation.js +41 -0
- package/dist/mcp-server/tool-validation.js.map +1 -0
- package/docs/paper/07-evaluation.md +6 -6
- package/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +9 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +9 -9
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +104 -103
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +167 -165
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +60 -60
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +1 -1
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,32 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.0.3 - 2026-05-28
|
|
4
|
+
|
|
5
|
+
Housekeeping release. Nothing about how Audrey behaves has changed — this is
|
|
6
|
+
all under-the-hood tidying plus a friendlier README. Safe to upgrade from 1.0.2
|
|
7
|
+
without touching anything.
|
|
8
|
+
|
|
9
|
+
### Cleaner code under the hood
|
|
10
|
+
|
|
11
|
+
- Started breaking up the big `mcp-server/index.ts` file (it had grown to ~3,600
|
|
12
|
+
lines that did everything at once). The memory-tool input schemas and the
|
|
13
|
+
shared validation helpers now live in their own small files
|
|
14
|
+
(`tool-schemas.ts`, `tool-validation.ts`). Same behavior, just easier to read
|
|
15
|
+
and work on. More of this tidying will follow.
|
|
16
|
+
|
|
17
|
+
### More reliable tests
|
|
18
|
+
|
|
19
|
+
- The test suite used to need a slow, multi-step "build all the benchmark and
|
|
20
|
+
paper files first" step before it could run. It now sets those up
|
|
21
|
+
automatically, so `npm test` (or a plain `vitest run`) just works from a fresh
|
|
22
|
+
checkout. 785 tests pass with nothing extra to remember.
|
|
23
|
+
|
|
24
|
+
### Friendlier docs
|
|
25
|
+
|
|
26
|
+
- The README now opens with a short "In Plain English" section that explains
|
|
27
|
+
what Audrey is for in everyday language, before diving into the technical
|
|
28
|
+
detail.
|
|
29
|
+
|
|
3
30
|
## 1.0.2 - 2026-05-28
|
|
4
31
|
|
|
5
32
|
Maintenance and engineering-quality release. No runtime behavior change — the
|
package/README.md
CHANGED
|
@@ -15,6 +15,14 @@
|
|
|
15
15
|
</p>
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
+
## In Plain English
|
|
19
|
+
|
|
20
|
+
AI coding assistants are brilliant but forgetful. They'll happily rerun the same broken command they ran yesterday, forget the rules your team agreed on last week, and treat every new session like it's day one.
|
|
21
|
+
|
|
22
|
+
Audrey is the memory they're missing. It quietly keeps track of what worked, what failed, and what you told it — then checks that memory **before** the agent does something, so it can say "hold on, this exact command failed last time, and here's what fixed it" instead of repeating the mistake. Everything lives in one local file on your machine: no cloud, no account, and nothing about your code ever leaves your computer.
|
|
23
|
+
|
|
24
|
+
That's the whole idea. The rest of this README is the detail.
|
|
25
|
+
|
|
18
26
|
## Why Audrey Exists
|
|
19
27
|
|
|
20
28
|
Agents forget the exact mistakes they made yesterday. They repeat broken commands, lose project-specific rules, miss contradictions, and treat every new session like a cold start.
|
|
@@ -296,7 +304,7 @@ output shapes are validated by JSON schemas under `benchmarks/schemas/`.
|
|
|
296
304
|
|
|
297
305
|
Latest local result in this checkout: 10/10 scenarios passed, 100% prevention
|
|
298
306
|
rate, 0% false-block rate, 0 raw secret leaks, 0 published artifact leaks in
|
|
299
|
-
the raw-secret sweep, and
|
|
307
|
+
the raw-secret sweep, and 3.09ms / 28.181ms
|
|
300
308
|
p50/p95 guard latency under the mock-provider methodology.
|
|
301
309
|
|
|
302
310
|
**Methodology caveats, on purpose.** All numbers above are produced against
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench adapter self-test",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-29T13:33:27.293Z",
|
|
5
5
|
"ok": true,
|
|
6
6
|
"adapter": {
|
|
7
7
|
"name": "Example Allow Adapter",
|
|
@@ -27,9 +27,9 @@
|
|
|
27
27
|
"evidenceRecall": 0.1,
|
|
28
28
|
"redactionLeaks": 0,
|
|
29
29
|
"latency": {
|
|
30
|
-
"p50Ms": 0.
|
|
31
|
-
"p95Ms": 0.
|
|
32
|
-
"maxMs": 0.
|
|
30
|
+
"p50Ms": 0.012,
|
|
31
|
+
"p95Ms": 0.042,
|
|
32
|
+
"maxMs": 0.042
|
|
33
33
|
}
|
|
34
34
|
},
|
|
35
35
|
"contract": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench external adapter dry-run matrix",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-29T13:33:27.818Z",
|
|
5
5
|
"ok": true,
|
|
6
6
|
"registry": "benchmarks/adapters/registry.json",
|
|
7
7
|
"outRoot": "benchmarks/output/external",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": "1.0.0",
|
|
3
3
|
"suite": "GuardBench conformance card",
|
|
4
|
-
"generatedAt": "2026-05-
|
|
4
|
+
"generatedAt": "2026-05-29T13:33:23.522Z",
|
|
5
5
|
"sourceDir": "benchmarks/output",
|
|
6
6
|
"manifestVersion": "0.2.0",
|
|
7
7
|
"suiteId": "guardbench-local-comparative",
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
"evidenceRecall": 1,
|
|
26
26
|
"redactionLeaks": 0,
|
|
27
27
|
"latency": {
|
|
28
|
-
"p50Ms":
|
|
29
|
-
"p95Ms":
|
|
30
|
-
"maxMs":
|
|
28
|
+
"p50Ms": 3.09,
|
|
29
|
+
"p95Ms": 28.181,
|
|
30
|
+
"maxMs": 28.181
|
|
31
31
|
}
|
|
32
32
|
},
|
|
33
33
|
"conformance": {
|
|
@@ -39,21 +39,21 @@
|
|
|
39
39
|
"integrity": {
|
|
40
40
|
"artifactHashes": {
|
|
41
41
|
"guardbench-manifest.json": "57636ce19fdaa6e50fc3fc961d9e499a9f43632f588c713a9fefe8e8a6fa724c",
|
|
42
|
-
"guardbench-summary.json": "
|
|
43
|
-
"guardbench-raw.json": "
|
|
42
|
+
"guardbench-summary.json": "91f264dd889e2c639a6fc6d1b867bc228b94c84ed5120345e23dddb79c11ee74",
|
|
43
|
+
"guardbench-raw.json": "66d4b69087258638f3572a40e1fd59bb84067034f899eaa2c27eed2dde554b2b"
|
|
44
44
|
},
|
|
45
45
|
"externalRunMetadataHash": null
|
|
46
46
|
},
|
|
47
47
|
"provenance": {
|
|
48
|
-
"generatedAt": "2026-05-
|
|
49
|
-
"gitSha": "
|
|
48
|
+
"generatedAt": "2026-05-29T13:33:23.189Z",
|
|
49
|
+
"gitSha": "9f771bae94f5ce4cfd5d5425e300a6a440c833d2",
|
|
50
50
|
"gitDirty": false,
|
|
51
51
|
"node": "v24.16.0",
|
|
52
52
|
"v8": "13.6.233.17-node.49",
|
|
53
53
|
"platform": "linux",
|
|
54
54
|
"arch": "x64",
|
|
55
55
|
"osRelease": "6.17.0-1015-azure",
|
|
56
|
-
"cpuModel": "
|
|
56
|
+
"cpuModel": "Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz",
|
|
57
57
|
"cpuCount": 4,
|
|
58
58
|
"totalMemoryGb": 15.61,
|
|
59
59
|
"embeddingProvider": "mock",
|