audrey 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +9 -1
  3. package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  4. package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  5. package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  6. package/benchmarks/output/guardbench-conformance-card.json +9 -9
  7. package/benchmarks/output/guardbench-raw.json +104 -103
  8. package/benchmarks/output/guardbench-summary.json +167 -165
  9. package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  10. package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  11. package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +9 -9
  12. package/benchmarks/output/submission-bundle/guardbench-raw.json +104 -103
  13. package/benchmarks/output/submission-bundle/guardbench-summary.json +167 -165
  14. package/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  15. package/benchmarks/output/submission-bundle/validation-report.json +1 -1
  16. package/benchmarks/output/summary.json +48 -48
  17. package/dist/mcp-server/config.d.ts +1 -1
  18. package/dist/mcp-server/config.js +1 -1
  19. package/dist/mcp-server/index.d.ts +3 -344
  20. package/dist/mcp-server/index.d.ts.map +1 -1
  21. package/dist/mcp-server/index.js +6 -280
  22. package/dist/mcp-server/index.js.map +1 -1
  23. package/dist/mcp-server/tool-schemas.d.ts +341 -0
  24. package/dist/mcp-server/tool-schemas.d.ts.map +1 -0
  25. package/dist/mcp-server/tool-schemas.js +248 -0
  26. package/dist/mcp-server/tool-schemas.js.map +1 -0
  27. package/dist/mcp-server/tool-validation.d.ts +17 -0
  28. package/dist/mcp-server/tool-validation.d.ts.map +1 -0
  29. package/dist/mcp-server/tool-validation.js +41 -0
  30. package/dist/mcp-server/tool-validation.js.map +1 -0
  31. package/docs/paper/07-evaluation.md +6 -6
  32. package/docs/paper/audrey-paper-v1.md +6 -6
  33. package/docs/paper/evidence-ledger.md +1 -1
  34. package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  35. package/docs/paper/output/arxiv/main.tex +6 -6
  36. package/docs/paper/output/arxiv-compile-report.json +3 -3
  37. package/docs/paper/output/submission-bundle/README.md +9 -1
  38. package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +4 -4
  39. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
  40. package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
  41. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +9 -9
  42. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +104 -103
  43. package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +167 -165
  44. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
  45. package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
  46. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +11 -11
  47. package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
  48. package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +60 -60
  49. package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +6 -6
  50. package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
  51. package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
  52. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
  53. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
  54. package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
  55. package/docs/paper/output/submission-bundle/package.json +1 -1
  56. package/docs/paper/output/submission-bundle/paper-submission-manifest.json +34 -34
  57. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.0.3 - 2026-05-28
4
+
5
+ Housekeeping release. Nothing about how Audrey behaves has changed — this is
6
+ all under-the-hood tidying plus a friendlier README. Safe to upgrade from 1.0.2
7
+ without touching anything.
8
+
9
+ ### Cleaner code under the hood
10
+
11
+ - Started breaking up the big `mcp-server/index.ts` file (it had grown to ~3,600
12
+ lines that did everything at once). The memory-tool input schemas and the
13
+ shared validation helpers now live in their own small files
14
+ (`tool-schemas.ts`, `tool-validation.ts`). Same behavior, just easier to read
15
+ and work on. More of this tidying will follow.
16
+
17
+ ### More reliable tests
18
+
19
+ - The test suite used to need a slow, multi-step "build all the benchmark and
20
+ paper files first" step before it could run. It now sets those up
21
+ automatically, so `npm test` (or a plain `vitest run`) just works from a fresh
22
+ checkout. 785 tests pass with nothing extra to remember.
23
+
24
+ ### Friendlier docs
25
+
26
+ - The README now opens with a short "In Plain English" section that explains
27
+ what Audrey is for in everyday language, before diving into the technical
28
+ detail.
29
+
3
30
  ## 1.0.2 - 2026-05-28
4
31
 
5
32
  Maintenance and engineering-quality release. No runtime behavior change — the
package/README.md CHANGED
@@ -15,6 +15,14 @@
15
15
  </p>
16
16
  </div>
17
17
 
18
+ ## In Plain English
19
+
20
+ AI coding assistants are brilliant but forgetful. They'll happily rerun the same broken command they ran yesterday, forget the rules your team agreed on last week, and treat every new session like it's day one.
21
+
22
+ Audrey is the memory they're missing. It quietly keeps track of what worked, what failed, and what you told it — then checks that memory **before** the agent does something, so it can say "hold on, this exact command failed last time, and here's what fixed it" instead of repeating the mistake. Everything lives in one local file on your machine: no cloud, no account, and nothing about your code ever leaves your computer.
23
+
24
+ That's the whole idea. The rest of this README is the detail.
25
+
18
26
  ## Why Audrey Exists
19
27
 
20
28
  Agents forget the exact mistakes they made yesterday. They repeat broken commands, lose project-specific rules, miss contradictions, and treat every new session like a cold start.
@@ -296,7 +304,7 @@ output shapes are validated by JSON schemas under `benchmarks/schemas/`.
296
304
 
297
305
  Latest local result in this checkout: 10/10 scenarios passed, 100% prevention
298
306
  rate, 0% false-block rate, 0 raw secret leaks, 0 published artifact leaks in
299
- the raw-secret sweep, and 2.916ms / 21.17ms
307
+ the raw-secret sweep, and 3.09ms / 28.181ms
300
308
  p50/p95 guard latency under the mock-provider methodology.
301
309
 
302
310
  **Methodology caveats, on purpose.** All numbers above are produced against
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "schemaVersion": "1.0.0",
3
3
  "suite": "GuardBench adapter self-test",
4
- "generatedAt": "2026-05-29T03:45:40.969Z",
4
+ "generatedAt": "2026-05-29T13:33:27.293Z",
5
5
  "ok": true,
6
6
  "adapter": {
7
7
  "name": "Example Allow Adapter",
@@ -27,9 +27,9 @@
27
27
  "evidenceRecall": 0.1,
28
28
  "redactionLeaks": 0,
29
29
  "latency": {
30
- "p50Ms": 0.01,
31
- "p95Ms": 0.043,
32
- "maxMs": 0.043
30
+ "p50Ms": 0.012,
31
+ "p95Ms": 0.042,
32
+ "maxMs": 0.042
33
33
  }
34
34
  },
35
35
  "contract": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "schemaVersion": "1.0.0",
3
3
  "suite": "GuardBench external adapter dry-run matrix",
4
- "generatedAt": "2026-05-29T03:45:41.522Z",
4
+ "generatedAt": "2026-05-29T13:33:27.818Z",
5
5
  "ok": true,
6
6
  "registry": "benchmarks/adapters/registry.json",
7
7
  "outRoot": "benchmarks/output/external",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "schemaVersion": "1.0.0",
3
3
  "suite": "GuardBench external evidence verification",
4
- "generatedAt": "2026-05-29T03:45:41.794Z",
4
+ "generatedAt": "2026-05-29T13:33:28.076Z",
5
5
  "ok": true,
6
6
  "allowPending": true,
7
7
  "registry": "benchmarks/adapters/registry.json",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "schemaVersion": "1.0.0",
3
3
  "suite": "GuardBench conformance card",
4
- "generatedAt": "2026-05-29T03:45:36.958Z",
4
+ "generatedAt": "2026-05-29T13:33:23.522Z",
5
5
  "sourceDir": "benchmarks/output",
6
6
  "manifestVersion": "0.2.0",
7
7
  "suiteId": "guardbench-local-comparative",
@@ -25,9 +25,9 @@
25
25
  "evidenceRecall": 1,
26
26
  "redactionLeaks": 0,
27
27
  "latency": {
28
- "p50Ms": 2.916,
29
- "p95Ms": 21.17,
30
- "maxMs": 21.17
28
+ "p50Ms": 3.09,
29
+ "p95Ms": 28.181,
30
+ "maxMs": 28.181
31
31
  }
32
32
  },
33
33
  "conformance": {
@@ -39,21 +39,21 @@
39
39
  "integrity": {
40
40
  "artifactHashes": {
41
41
  "guardbench-manifest.json": "57636ce19fdaa6e50fc3fc961d9e499a9f43632f588c713a9fefe8e8a6fa724c",
42
- "guardbench-summary.json": "e8669cd6c80dc3dc849b3c4fcc473ea706eb3a760bced69682d0dc2396b2e233",
43
- "guardbench-raw.json": "15b39fd1a65709a89455fbfcaf815daf364b204fa526d5065cc12fcaed281d28"
42
+ "guardbench-summary.json": "91f264dd889e2c639a6fc6d1b867bc228b94c84ed5120345e23dddb79c11ee74",
43
+ "guardbench-raw.json": "66d4b69087258638f3572a40e1fd59bb84067034f899eaa2c27eed2dde554b2b"
44
44
  },
45
45
  "externalRunMetadataHash": null
46
46
  },
47
47
  "provenance": {
48
- "generatedAt": "2026-05-29T03:45:36.607Z",
49
- "gitSha": "ceed2f51b615175c8bb412b96b5e5a501561189f",
48
+ "generatedAt": "2026-05-29T13:33:23.189Z",
49
+ "gitSha": "9f771bae94f5ce4cfd5d5425e300a6a440c833d2",
50
50
  "gitDirty": false,
51
51
  "node": "v24.16.0",
52
52
  "v8": "13.6.233.17-node.49",
53
53
  "platform": "linux",
54
54
  "arch": "x64",
55
55
  "osRelease": "6.17.0-1015-azure",
56
- "cpuModel": "AMD EPYC 9V74 80-Core Processor",
56
+ "cpuModel": "Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz",
57
57
  "cpuCount": 4,
58
58
  "totalMemoryGb": 15.61,
59
59
  "embeddingProvider": "mock",