audrey 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +30 -6
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +354 -124
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +243 -144
- package/benchmarks/output/guardbench-summary.json +354 -230
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +243 -144
- package/benchmarks/output/submission-bundle/guardbench-summary.json +354 -230
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +4 -3
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +479 -172
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +11 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +110 -53
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts +17 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +73 -23
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +5 -4
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +4 -4
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +37 -14
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +30 -6
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +243 -144
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +354 -230
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +52 -52
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +18 -5
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +40 -40
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +18 -5
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +26 -6
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +250 -71
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
2
|
import { createHash } from 'node:crypto';
|
|
3
3
|
import { join, resolve } from 'node:path';
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
computeGuardBenchArtifactHashes,
|
|
6
|
+
validateGuardBenchArtifacts,
|
|
7
|
+
} from './validate-guardbench-artifacts.mjs';
|
|
5
8
|
import { publicArtifactValue, publicPath } from './public-paths.mjs';
|
|
6
9
|
|
|
7
10
|
const CARD_FILE = 'guardbench-conformance-card.json';
|
|
@@ -18,7 +21,9 @@ function sha256File(path) {
|
|
|
18
21
|
function findExternalSubject(summary, requestedAdapter) {
|
|
19
22
|
const externalSubjects = (summary.manifest?.subjects ?? []).filter(subject => subject.external);
|
|
20
23
|
if (requestedAdapter) {
|
|
21
|
-
const requested = externalSubjects.find(
|
|
24
|
+
const requested = externalSubjects.find(
|
|
25
|
+
subject => subject.name === requestedAdapter || subject.id === requestedAdapter,
|
|
26
|
+
);
|
|
22
27
|
if (requested) return requested;
|
|
23
28
|
}
|
|
24
29
|
return externalSubjects.length === 1 ? externalSubjects[0] : null;
|
|
@@ -57,7 +62,11 @@ export function buildGuardBenchConformanceCard(options = {}) {
|
|
|
57
62
|
manifestVersion: summary.manifest?.manifestVersion ?? null,
|
|
58
63
|
suiteId: summary.manifest?.suiteId ?? null,
|
|
59
64
|
subject: {
|
|
60
|
-
name:
|
|
65
|
+
name:
|
|
66
|
+
systemSummary?.system ??
|
|
67
|
+
metadata?.adapterConformance?.adapter ??
|
|
68
|
+
metadata?.adapter ??
|
|
69
|
+
'unknown',
|
|
61
70
|
requestedAdapter: metadata?.adapterConformance?.requestedAdapter ?? metadata?.adapter ?? null,
|
|
62
71
|
external: Boolean(externalSubject?.external ?? metadata),
|
|
63
72
|
},
|
|
@@ -67,7 +67,9 @@ export function writeGuardBenchSubmissionBundle(options = {}) {
|
|
|
67
67
|
writeGuardBenchConformanceCard({ dir: sourceDir });
|
|
68
68
|
const sourceValidation = validateGuardBenchArtifacts({ dir: sourceDir, schemasDir });
|
|
69
69
|
if (!sourceValidation.ok) {
|
|
70
|
-
throw new Error(
|
|
70
|
+
throw new Error(
|
|
71
|
+
`Cannot create GuardBench submission bundle from invalid artifacts: ${sourceValidation.failures.join('; ')}`,
|
|
72
|
+
);
|
|
71
73
|
}
|
|
72
74
|
|
|
73
75
|
rmSync(outDir, { recursive: true, force: true });
|
|
@@ -89,11 +91,19 @@ export function writeGuardBenchSubmissionBundle(options = {}) {
|
|
|
89
91
|
schemasDir: join(outDir, 'schemas'),
|
|
90
92
|
});
|
|
91
93
|
const validationReportPath = join(outDir, 'validation-report.json');
|
|
92
|
-
writeFileSync(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
writeFileSync(
|
|
95
|
+
validationReportPath,
|
|
96
|
+
`${JSON.stringify(
|
|
97
|
+
{
|
|
98
|
+
generatedAt: new Date().toISOString(),
|
|
99
|
+
sourceValidation,
|
|
100
|
+
bundleValidation,
|
|
101
|
+
},
|
|
102
|
+
null,
|
|
103
|
+
2,
|
|
104
|
+
)}\n`,
|
|
105
|
+
'utf-8',
|
|
106
|
+
);
|
|
97
107
|
copied.push(validationReportPath);
|
|
98
108
|
|
|
99
109
|
const card = readJson(join(outDir, 'guardbench-conformance-card.json'));
|
|
@@ -107,11 +117,15 @@ export function writeGuardBenchSubmissionBundle(options = {}) {
|
|
|
107
117
|
score: card.score,
|
|
108
118
|
conformance: card.conformance,
|
|
109
119
|
validation: bundleValidation,
|
|
110
|
-
files: copied
|
|
120
|
+
files: copied
|
|
121
|
+
.map(path => fileRecord(path, outDir))
|
|
122
|
+
.sort((a, b) => a.path.localeCompare(b.path)),
|
|
111
123
|
};
|
|
112
124
|
writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, 'utf-8');
|
|
113
125
|
|
|
114
|
-
const finalFiles = copied
|
|
126
|
+
const finalFiles = copied
|
|
127
|
+
.map(path => fileRecord(path, outDir))
|
|
128
|
+
.sort((a, b) => a.path.localeCompare(b.path));
|
|
115
129
|
manifest.files = finalFiles;
|
|
116
130
|
writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, 'utf-8');
|
|
117
131
|
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
2
|
import { dirname, join, resolve } from 'node:path';
|
|
3
3
|
import { fileURLToPath } from 'node:url';
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
buildExternalGuardBenchRun,
|
|
6
|
+
writeExternalRunMetadata,
|
|
7
|
+
} from './run-external-guardbench.mjs';
|
|
5
8
|
import { validateAdapterRegistry } from './validate-adapter-registry.mjs';
|
|
6
9
|
import { validateSchema } from './validate-guardbench-artifacts.mjs';
|
|
7
10
|
import { publicCommand, publicPath } from './public-paths.mjs';
|
|
@@ -70,9 +73,11 @@ export async function buildExternalAdapterDryRunMatrix(options = {}) {
|
|
|
70
73
|
const outRoot = resolve(options.outRoot ?? DEFAULT_OUT_ROOT);
|
|
71
74
|
const registryValidation = await validateAdapterRegistry({ registry: registryPath });
|
|
72
75
|
const registry = readJson(registryPath);
|
|
73
|
-
const adapters = registry.adapters.filter(
|
|
74
|
-
adapter
|
|
75
|
-
|
|
76
|
+
const adapters = registry.adapters.filter(
|
|
77
|
+
adapter =>
|
|
78
|
+
adapter.status === 'external-system' &&
|
|
79
|
+
(options.includeCredentialFree || adapter.credentialMode === 'runtime-env'),
|
|
80
|
+
);
|
|
76
81
|
const rows = [];
|
|
77
82
|
const failures = [];
|
|
78
83
|
|
|
@@ -81,12 +86,15 @@ export async function buildExternalAdapterDryRunMatrix(options = {}) {
|
|
|
81
86
|
}
|
|
82
87
|
|
|
83
88
|
for (const adapter of adapters) {
|
|
84
|
-
const run = buildExternalGuardBenchRun(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
89
|
+
const run = buildExternalGuardBenchRun(
|
|
90
|
+
{
|
|
91
|
+
adapter: adapter.id,
|
|
92
|
+
outDir: join(outRoot, adapter.id),
|
|
93
|
+
check: true,
|
|
94
|
+
json: true,
|
|
95
|
+
},
|
|
96
|
+
options.env ?? process.env,
|
|
97
|
+
);
|
|
90
98
|
const metadata = {
|
|
91
99
|
suite: 'GuardBench external adapter run',
|
|
92
100
|
startedAt: new Date().toISOString(),
|
|
@@ -126,7 +134,9 @@ export async function buildExternalAdapterDryRunMatrix(options = {}) {
|
|
|
126
134
|
};
|
|
127
135
|
const schemaFailures = validateExternalAdapterDryRunMatrix(matrix, options);
|
|
128
136
|
if (schemaFailures.length > 0) {
|
|
129
|
-
throw new Error(
|
|
137
|
+
throw new Error(
|
|
138
|
+
`GuardBench external adapter dry-run schema validation failed: ${schemaFailures.join('; ')}`,
|
|
139
|
+
);
|
|
130
140
|
}
|
|
131
141
|
return matrix;
|
|
132
142
|
}
|
|
@@ -143,7 +153,9 @@ async function main() {
|
|
|
143
153
|
if (args.json) {
|
|
144
154
|
console.log(JSON.stringify(matrix, null, 2));
|
|
145
155
|
} else if (matrix.ok) {
|
|
146
|
-
console.log(
|
|
156
|
+
console.log(
|
|
157
|
+
`GuardBench external adapter dry-run matrix passed: ${matrix.adapters.length} adapter(s)`,
|
|
158
|
+
);
|
|
147
159
|
for (const row of matrix.adapters) {
|
|
148
160
|
const missing = row.missingEnv.length ? `missing ${row.missingEnv.join(', ')}` : 'ready';
|
|
149
161
|
console.log(`- ${row.id}: ${missing}; metadata ${row.metadataPath}`);
|