audrey 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +30 -6
- package/benchmarks/adapter-self-test.mjs +6 -2
- package/benchmarks/adapters/example-allow.mjs +5 -2
- package/benchmarks/adapters/mem0-platform.mjs +19 -12
- package/benchmarks/adapters/zep-cloud.mjs +51 -27
- package/benchmarks/baselines.js +11 -6
- package/benchmarks/build-leaderboard.mjs +36 -23
- package/benchmarks/cases.js +24 -12
- package/benchmarks/create-conformance-card.mjs +12 -3
- package/benchmarks/create-submission-bundle.mjs +22 -8
- package/benchmarks/dry-run-external-adapters.mjs +24 -12
- package/benchmarks/guardbench.js +354 -124
- package/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/guardbench-raw.json +243 -144
- package/benchmarks/output/guardbench-summary.json +354 -230
- package/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/benchmarks/output/submission-bundle/guardbench-conformance-card.json +12 -12
- package/benchmarks/output/submission-bundle/guardbench-raw.json +243 -144
- package/benchmarks/output/submission-bundle/guardbench-summary.json +354 -230
- package/benchmarks/output/submission-bundle/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/output/submission-bundle/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
- package/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/benchmarks/output/summary.json +58 -58
- package/benchmarks/perf-snapshot.js +12 -9
- package/benchmarks/perf.bench.js +14 -6
- package/benchmarks/public-paths.mjs +11 -5
- package/benchmarks/reference-results.js +10 -5
- package/benchmarks/report.js +48 -27
- package/benchmarks/run-external-guardbench.mjs +47 -25
- package/benchmarks/run.js +112 -59
- package/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/benchmarks/validate-adapter-module.mjs +13 -10
- package/benchmarks/validate-adapter-registry.mjs +16 -5
- package/benchmarks/validate-guardbench-artifacts.mjs +76 -19
- package/benchmarks/verify-external-evidence.mjs +86 -31
- package/benchmarks/verify-publication-artifacts.mjs +34 -11
- package/benchmarks/verify-submission-bundle.mjs +9 -4
- package/dist/mcp-server/config.d.ts +1 -1
- package/dist/mcp-server/config.d.ts.map +1 -1
- package/dist/mcp-server/config.js +5 -3
- package/dist/mcp-server/config.js.map +1 -1
- package/dist/mcp-server/index.d.ts +4 -3
- package/dist/mcp-server/index.d.ts.map +1 -1
- package/dist/mcp-server/index.js +479 -172
- package/dist/mcp-server/index.js.map +1 -1
- package/dist/src/action-key.d.ts.map +1 -1
- package/dist/src/action-key.js +6 -2
- package/dist/src/action-key.js.map +1 -1
- package/dist/src/adaptive.d.ts.map +1 -1
- package/dist/src/adaptive.js +4 -2
- package/dist/src/adaptive.js.map +1 -1
- package/dist/src/affect.d.ts.map +1 -1
- package/dist/src/affect.js +8 -5
- package/dist/src/affect.js.map +1 -1
- package/dist/src/audrey.d.ts +11 -1
- package/dist/src/audrey.d.ts.map +1 -1
- package/dist/src/audrey.js +110 -53
- package/dist/src/audrey.js.map +1 -1
- package/dist/src/capsule.d.ts.map +1 -1
- package/dist/src/capsule.js +37 -15
- package/dist/src/capsule.js.map +1 -1
- package/dist/src/causal.d.ts +1 -1
- package/dist/src/causal.d.ts.map +1 -1
- package/dist/src/causal.js +4 -2
- package/dist/src/causal.js.map +1 -1
- package/dist/src/confidence.d.ts.map +1 -1
- package/dist/src/confidence.js +5 -5
- package/dist/src/confidence.js.map +1 -1
- package/dist/src/consolidate.d.ts.map +1 -1
- package/dist/src/consolidate.js +17 -9
- package/dist/src/consolidate.js.map +1 -1
- package/dist/src/context.js +1 -1
- package/dist/src/context.js.map +1 -1
- package/dist/src/controller.d.ts +17 -1
- package/dist/src/controller.d.ts.map +1 -1
- package/dist/src/controller.js +73 -23
- package/dist/src/controller.js.map +1 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +78 -27
- package/dist/src/db.js.map +1 -1
- package/dist/src/decay.d.ts +1 -1
- package/dist/src/decay.d.ts.map +1 -1
- package/dist/src/decay.js +1 -1
- package/dist/src/decay.js.map +1 -1
- package/dist/src/embedding.d.ts +12 -4
- package/dist/src/embedding.d.ts.map +1 -1
- package/dist/src/embedding.js +18 -16
- package/dist/src/embedding.js.map +1 -1
- package/dist/src/encode.d.ts.map +1 -1
- package/dist/src/encode.js +5 -4
- package/dist/src/encode.js.map +1 -1
- package/dist/src/events.d.ts +3 -2
- package/dist/src/events.d.ts.map +1 -1
- package/dist/src/events.js +7 -3
- package/dist/src/events.js.map +1 -1
- package/dist/src/export.d.ts.map +1 -1
- package/dist/src/export.js +21 -7
- package/dist/src/export.js.map +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +1 -1
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/forget.d.ts.map +1 -1
- package/dist/src/forget.js +12 -6
- package/dist/src/forget.js.map +1 -1
- package/dist/src/fts.d.ts.map +1 -1
- package/dist/src/fts.js +20 -8
- package/dist/src/fts.js.map +1 -1
- package/dist/src/hybrid-recall.d.ts.map +1 -1
- package/dist/src/hybrid-recall.js +12 -6
- package/dist/src/hybrid-recall.js.map +1 -1
- package/dist/src/impact.d.ts.map +1 -1
- package/dist/src/impact.js +26 -10
- package/dist/src/impact.js.map +1 -1
- package/dist/src/import.d.ts.map +1 -1
- package/dist/src/import.js +11 -6
- package/dist/src/import.js.map +1 -1
- package/dist/src/index.d.ts +5 -4
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +4 -4
- package/dist/src/index.js.map +1 -1
- package/dist/src/interference.d.ts.map +1 -1
- package/dist/src/interference.js +10 -5
- package/dist/src/interference.js.map +1 -1
- package/dist/src/introspect.d.ts.map +1 -1
- package/dist/src/introspect.js +12 -6
- package/dist/src/introspect.js.map +1 -1
- package/dist/src/llm.d.ts +2 -2
- package/dist/src/llm.d.ts.map +1 -1
- package/dist/src/llm.js +6 -6
- package/dist/src/llm.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +10 -4
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/preflight.d.ts.map +1 -1
- package/dist/src/preflight.js +6 -8
- package/dist/src/preflight.js.map +1 -1
- package/dist/src/profile.d.ts.map +1 -1
- package/dist/src/profile.js.map +1 -1
- package/dist/src/promote.d.ts.map +1 -1
- package/dist/src/promote.js +16 -7
- package/dist/src/promote.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +1 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/recall.d.ts.map +1 -1
- package/dist/src/recall.js +85 -18
- package/dist/src/recall.js.map +1 -1
- package/dist/src/redact.d.ts.map +1 -1
- package/dist/src/redact.js +9 -4
- package/dist/src/redact.js.map +1 -1
- package/dist/src/reflexes.d.ts.map +1 -1
- package/dist/src/reflexes.js +1 -7
- package/dist/src/reflexes.js.map +1 -1
- package/dist/src/rollback.d.ts.map +1 -1
- package/dist/src/rollback.js +4 -2
- package/dist/src/rollback.js.map +1 -1
- package/dist/src/routes.d.ts.map +1 -1
- package/dist/src/routes.js +37 -14
- package/dist/src/routes.js.map +1 -1
- package/dist/src/rules-compiler.d.ts.map +1 -1
- package/dist/src/rules-compiler.js +24 -2
- package/dist/src/rules-compiler.js.map +1 -1
- package/dist/src/server.js +2 -2
- package/dist/src/server.js.map +1 -1
- package/dist/src/tool-trace.d.ts +2 -2
- package/dist/src/tool-trace.d.ts.map +1 -1
- package/dist/src/tool-trace.js +12 -4
- package/dist/src/tool-trace.js.map +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/ulid.js +1 -1
- package/dist/src/ulid.js.map +1 -1
- package/dist/src/utils.d.ts.map +1 -1
- package/dist/src/utils.js.map +1 -1
- package/dist/src/validate.d.ts.map +1 -1
- package/dist/src/validate.js +20 -10
- package/dist/src/validate.js.map +1 -1
- package/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/README.md +30 -6
- package/docs/paper/output/submission-bundle/benchmarks/output/adapter-self-test/guardbench-adapter-self-test.json +7 -7
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-dry-run.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/external/guardbench-external-evidence.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-conformance-card.json +12 -12
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-raw.json +243 -144
- package/docs/paper/output/submission-bundle/benchmarks/output/guardbench-summary.json +354 -230
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.json +5 -5
- package/docs/paper/output/submission-bundle/benchmarks/output/leaderboard/guardbench-leaderboard.md +2 -2
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/submission-manifest.json +15 -15
- package/docs/paper/output/submission-bundle/benchmarks/output/submission-bundle/validation-report.json +1 -1
- package/docs/paper/output/submission-bundle/benchmarks/output/summary.json +52 -52
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-raw.schema.json +21 -1
- package/docs/paper/output/submission-bundle/benchmarks/schemas/guardbench-summary.schema.json +23 -2
- package/docs/paper/output/submission-bundle/docs/paper/07-evaluation.md +5 -5
- package/docs/paper/output/submission-bundle/docs/paper/audrey-paper-v1.md +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/evidence-ledger.md +1 -1
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/arxiv-manifest.json +4 -4
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv/main.tex +6 -6
- package/docs/paper/output/submission-bundle/docs/paper/output/arxiv-compile-report.json +3 -3
- package/docs/paper/output/submission-bundle/package.json +18 -5
- package/docs/paper/output/submission-bundle/paper-submission-manifest.json +40 -40
- package/examples/fintech-ops-demo.js +12 -5
- package/examples/healthcare-ops-demo.js +8 -4
- package/examples/ollama-memory-agent.js +41 -13
- package/examples/stripe-demo.js +12 -5
- package/package.json +18 -5
- package/scripts/audit-release-completion.mjs +179 -101
- package/scripts/create-arxiv-source.mjs +20 -14
- package/scripts/create-paper-submission-bundle.mjs +6 -2
- package/scripts/finalize-release.mjs +111 -36
- package/scripts/prepare-release-cut.mjs +14 -6
- package/scripts/publish-release-bundle.mjs +62 -23
- package/scripts/publish-release-github-api.mjs +89 -24
- package/scripts/smoke-cli.js +26 -6
- package/scripts/sync-paper-artifacts.mjs +5 -1
- package/scripts/verify-arxiv-compile.mjs +52 -16
- package/scripts/verify-arxiv-source.mjs +45 -15
- package/scripts/verify-browser-launch-plan.mjs +28 -11
- package/scripts/verify-browser-launch-results.mjs +32 -14
- package/scripts/verify-paper-artifacts.mjs +539 -79
- package/scripts/verify-paper-claims.mjs +48 -20
- package/scripts/verify-paper-submission-bundle.mjs +22 -11
- package/scripts/verify-publication-pack.mjs +23 -9
- package/scripts/verify-release-readiness.mjs +250 -71
|
@@ -102,9 +102,11 @@ function matchesTrustedGitHubRepoUrl(value) {
|
|
|
102
102
|
try {
|
|
103
103
|
const url = new URL(value);
|
|
104
104
|
const pathname = url.pathname.toLowerCase();
|
|
105
|
-
return
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
return (
|
|
106
|
+
url.protocol === TRUSTED_GITHUB_REPO.protocol &&
|
|
107
|
+
url.hostname === TRUSTED_GITHUB_REPO.hostname &&
|
|
108
|
+
(pathname === TRUSTED_GITHUB_REPO_PATH || pathname.startsWith(`${TRUSTED_GITHUB_REPO_PATH}/`))
|
|
109
|
+
);
|
|
108
110
|
} catch {
|
|
109
111
|
return false;
|
|
110
112
|
}
|
|
@@ -136,7 +138,9 @@ function validateResultTarget(result, planTarget) {
|
|
|
136
138
|
}
|
|
137
139
|
|
|
138
140
|
if (result.platform !== planTarget.platform) {
|
|
139
|
-
failures.push(
|
|
141
|
+
failures.push(
|
|
142
|
+
`${result.id}: platform ${result.platform} does not match launch plan ${planTarget.platform}`,
|
|
143
|
+
);
|
|
140
144
|
}
|
|
141
145
|
if (!isAllowedPlatformUrl(result.platform, result.publicUrl)) {
|
|
142
146
|
failures.push(`${result.id}: publicUrl host is not allowed for ${result.platform}`);
|
|
@@ -144,14 +148,18 @@ function validateResultTarget(result, planTarget) {
|
|
|
144
148
|
if (!isHttpsUrl(result.artifactUrl)) {
|
|
145
149
|
failures.push(`${result.id}: artifactUrl must be null or https`);
|
|
146
150
|
}
|
|
147
|
-
if (text.includes(SEEDED_SECRET))
|
|
151
|
+
if (text.includes(SEEDED_SECRET))
|
|
152
|
+
failures.push(`${result.id}: contains raw seeded GuardBench secret`);
|
|
148
153
|
if (containsLocalPath(text)) failures.push(`${result.id}: contains local absolute path`);
|
|
149
154
|
|
|
150
155
|
if (result.status === 'pending') {
|
|
151
156
|
if (!result.blocker) failures.push(`${result.id}: pending result must record a blocker`);
|
|
152
|
-
if (result.publicUrl !== null)
|
|
153
|
-
|
|
154
|
-
if (result.
|
|
157
|
+
if (result.publicUrl !== null)
|
|
158
|
+
failures.push(`${result.id}: pending result must not record a publicUrl`);
|
|
159
|
+
if (result.submittedAt !== null)
|
|
160
|
+
failures.push(`${result.id}: pending result must not record submittedAt`);
|
|
161
|
+
if (result.operatorVerified)
|
|
162
|
+
failures.push(`${result.id}: pending result must not be operator verified`);
|
|
155
163
|
blockers.push(`${result.id}: ${result.blocker ?? 'pending launch target'}`);
|
|
156
164
|
}
|
|
157
165
|
|
|
@@ -160,8 +168,10 @@ function validateResultTarget(result, planTarget) {
|
|
|
160
168
|
if (planTarget.status === 'blocked-until-artifact-url' && !result.artifactUrl) {
|
|
161
169
|
failures.push(`${result.id}: submitted artifact-url target must record artifactUrl`);
|
|
162
170
|
}
|
|
163
|
-
if (!result.submittedAt)
|
|
164
|
-
|
|
171
|
+
if (!result.submittedAt)
|
|
172
|
+
failures.push(`${result.id}: submitted result must record submittedAt`);
|
|
173
|
+
if (!result.operatorVerified)
|
|
174
|
+
failures.push(`${result.id}: submitted result must be operator verified`);
|
|
165
175
|
if (planTarget.manualRuleCheckRequired && !result.manualRuleCheckCompleted) {
|
|
166
176
|
failures.push(`${result.id}: submitted result must record manual rule check completion`);
|
|
167
177
|
}
|
|
@@ -192,7 +202,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
192
202
|
const planReport = await verifyBrowserLaunchPlan({ plan: planPath });
|
|
193
203
|
const planTargets = new Map((plan.targets ?? []).map(target => [target.id, target]));
|
|
194
204
|
const failures = [
|
|
195
|
-
...validateSchema(results, schema, 'audrey-browser-launch-results').map(
|
|
205
|
+
...validateSchema(results, schema, 'audrey-browser-launch-results').map(
|
|
206
|
+
failure => `browser launch results schema: ${failure}`,
|
|
207
|
+
),
|
|
196
208
|
];
|
|
197
209
|
const blockers = [];
|
|
198
210
|
const seen = new Set();
|
|
@@ -225,7 +237,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
225
237
|
});
|
|
226
238
|
}
|
|
227
239
|
|
|
228
|
-
const planOrder = [...(plan.targets ?? [])]
|
|
240
|
+
const planOrder = [...(plan.targets ?? [])]
|
|
241
|
+
.sort((a, b) => a.order - b.order)
|
|
242
|
+
.map(target => target.id);
|
|
229
243
|
const resultOrder = [...(results.targets ?? [])].map(target => target.id);
|
|
230
244
|
if (resultOrder.join('|') !== planOrder.join('|')) {
|
|
231
245
|
failures.push(`browser launch results order must be ${planOrder.join(', ')}`);
|
|
@@ -234,7 +248,9 @@ export async function verifyBrowserLaunchResults(options = {}) {
|
|
|
234
248
|
if (!seen.has(id)) failures.push(`Missing browser launch result: ${id}`);
|
|
235
249
|
}
|
|
236
250
|
|
|
237
|
-
const notSubmitted = targetReports
|
|
251
|
+
const notSubmitted = targetReports
|
|
252
|
+
.filter(target => target.status !== 'submitted')
|
|
253
|
+
.map(target => target.id);
|
|
238
254
|
const ready = failures.length === 0 && notSubmitted.length === 0;
|
|
239
255
|
if (options.strict === true && notSubmitted.length > 0) {
|
|
240
256
|
failures.push(`strict launch readiness requires submitted targets: ${notSubmitted.join(', ')}`);
|
|
@@ -268,7 +284,9 @@ async function main() {
|
|
|
268
284
|
} else if (report.ok) {
|
|
269
285
|
const submitted = report.targets.filter(target => target.status === 'submitted').length;
|
|
270
286
|
const pending = report.targets.length - submitted;
|
|
271
|
-
console.log(
|
|
287
|
+
console.log(
|
|
288
|
+
`Browser launch results verification passed: ${submitted} submitted, ${pending} pending`,
|
|
289
|
+
);
|
|
272
290
|
} else {
|
|
273
291
|
console.error('Browser launch results verification failed:');
|
|
274
292
|
for (const failure of report.failures) console.error(`- ${failure}`);
|