@qulib/core 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/dist/baseline/baseline.schema.d.ts +26 -26
- package/dist/baseline/baseline.schema.d.ts.map +1 -1
- package/dist/baseline/baseline.schema.js +1 -0
- package/dist/cli/analyze-diff-run.d.ts +77 -0
- package/dist/cli/analyze-diff-run.d.ts.map +1 -0
- package/dist/cli/analyze-diff-run.js +266 -0
- package/dist/cli/baseline-run.d.ts +55 -0
- package/dist/cli/baseline-run.d.ts.map +1 -0
- package/dist/cli/baseline-run.js +259 -0
- package/dist/cli/confidence-run.d.ts.map +1 -1
- package/dist/cli/confidence-run.js +10 -6
- package/dist/cli/index.js +4 -0
- package/dist/cli/score-automation-run.d.ts.map +1 -1
- package/dist/cli/score-automation-run.js +5 -1
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -0
- package/dist/phases/think.d.ts.map +1 -1
- package/dist/phases/think.js +4 -1
- package/dist/reporters/heatmap.d.ts +55 -0
- package/dist/reporters/heatmap.d.ts.map +1 -0
- package/dist/reporters/heatmap.js +148 -0
- package/dist/reporters/markdown-reporter.d.ts.map +1 -1
- package/dist/reporters/markdown-reporter.js +4 -1
- package/dist/schemas/confidence.schema.d.ts +2 -2
- package/dist/schemas/config.schema.d.ts.map +1 -1
- package/dist/schemas/config.schema.js +6 -1
- package/dist/schemas/gap-analysis.schema.d.ts +8 -8
- package/dist/schemas/gap-analysis.schema.js +1 -1
- package/dist/schemas/golden-manifest.schema.d.ts +137 -0
- package/dist/schemas/golden-manifest.schema.d.ts.map +1 -0
- package/dist/schemas/golden-manifest.schema.js +25 -0
- package/dist/schemas/index.d.ts +1 -0
- package/dist/schemas/index.d.ts.map +1 -1
- package/dist/schemas/index.js +1 -0
- package/dist/schemas/public-surface.schema.d.ts +15 -5
- package/dist/schemas/public-surface.schema.d.ts.map +1 -1
- package/dist/schemas/route-inventory.schema.d.ts +20 -0
- package/dist/schemas/route-inventory.schema.d.ts.map +1 -1
- package/dist/schemas/route-inventory.schema.js +4 -0
- package/dist/schemas/views.schema.d.ts +1 -1
- package/dist/tools/scoring/confidence.d.ts.map +1 -1
- package/dist/tools/scoring/confidence.js +140 -14
- package/dist/tools/scoring/prompt-leakage.d.ts +29 -0
- package/dist/tools/scoring/prompt-leakage.d.ts.map +1 -0
- package/dist/tools/scoring/prompt-leakage.js +256 -0
- package/package.json +8 -4
|
@@ -65,6 +65,10 @@ export declare const RouteSchema: z.ZodObject<{
|
|
|
65
65
|
nodeCount: number;
|
|
66
66
|
}>, "many">;
|
|
67
67
|
statusCode: z.ZodOptional<z.ZodNumber>;
|
|
68
|
+
/** Optional: response headers from the page fetch (populated by explorers that capture them). */
|
|
69
|
+
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
70
|
+
/** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
|
|
71
|
+
bodySnippet: z.ZodOptional<z.ZodString>;
|
|
68
72
|
}, "strip", z.ZodTypeAny, {
|
|
69
73
|
path: string;
|
|
70
74
|
pageTitle: string;
|
|
@@ -84,6 +88,8 @@ export declare const RouteSchema: z.ZodObject<{
|
|
|
84
88
|
nodeCount: number;
|
|
85
89
|
}[];
|
|
86
90
|
statusCode?: number | undefined;
|
|
91
|
+
headers?: Record<string, string> | undefined;
|
|
92
|
+
bodySnippet?: string | undefined;
|
|
87
93
|
}, {
|
|
88
94
|
path: string;
|
|
89
95
|
pageTitle: string;
|
|
@@ -103,6 +109,8 @@ export declare const RouteSchema: z.ZodObject<{
|
|
|
103
109
|
nodeCount: number;
|
|
104
110
|
}[];
|
|
105
111
|
statusCode?: number | undefined;
|
|
112
|
+
headers?: Record<string, string> | undefined;
|
|
113
|
+
bodySnippet?: string | undefined;
|
|
106
114
|
}>;
|
|
107
115
|
export declare const RouteInventorySchema: z.ZodObject<{
|
|
108
116
|
scannedAt: z.ZodString;
|
|
@@ -144,6 +152,10 @@ export declare const RouteInventorySchema: z.ZodObject<{
|
|
|
144
152
|
nodeCount: number;
|
|
145
153
|
}>, "many">;
|
|
146
154
|
statusCode: z.ZodOptional<z.ZodNumber>;
|
|
155
|
+
/** Optional: response headers from the page fetch (populated by explorers that capture them). */
|
|
156
|
+
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
157
|
+
/** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
|
|
158
|
+
bodySnippet: z.ZodOptional<z.ZodString>;
|
|
147
159
|
}, "strip", z.ZodTypeAny, {
|
|
148
160
|
path: string;
|
|
149
161
|
pageTitle: string;
|
|
@@ -163,6 +175,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
|
|
|
163
175
|
nodeCount: number;
|
|
164
176
|
}[];
|
|
165
177
|
statusCode?: number | undefined;
|
|
178
|
+
headers?: Record<string, string> | undefined;
|
|
179
|
+
bodySnippet?: string | undefined;
|
|
166
180
|
}, {
|
|
167
181
|
path: string;
|
|
168
182
|
pageTitle: string;
|
|
@@ -182,6 +196,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
|
|
|
182
196
|
nodeCount: number;
|
|
183
197
|
}[];
|
|
184
198
|
statusCode?: number | undefined;
|
|
199
|
+
headers?: Record<string, string> | undefined;
|
|
200
|
+
bodySnippet?: string | undefined;
|
|
185
201
|
}>, "many">;
|
|
186
202
|
pagesSkipped: z.ZodNumber;
|
|
187
203
|
budgetExceeded: z.ZodBoolean;
|
|
@@ -207,6 +223,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
|
|
|
207
223
|
nodeCount: number;
|
|
208
224
|
}[];
|
|
209
225
|
statusCode?: number | undefined;
|
|
226
|
+
headers?: Record<string, string> | undefined;
|
|
227
|
+
bodySnippet?: string | undefined;
|
|
210
228
|
}[];
|
|
211
229
|
pagesSkipped: number;
|
|
212
230
|
budgetExceeded: boolean;
|
|
@@ -232,6 +250,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
|
|
|
232
250
|
nodeCount: number;
|
|
233
251
|
}[];
|
|
234
252
|
statusCode?: number | undefined;
|
|
253
|
+
headers?: Record<string, string> | undefined;
|
|
254
|
+
bodySnippet?: string | undefined;
|
|
235
255
|
}[];
|
|
236
256
|
pagesSkipped: number;
|
|
237
257
|
budgetExceeded: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"route-inventory.schema.d.ts","sourceRoot":"","sources":["../../src/schemas/route-inventory.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;EAK9B,CAAC;AAEH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;EAI3B,CAAC;AAEH,eAAO,MAAM,WAAW
|
|
1
|
+
{"version":3,"file":"route-inventory.schema.d.ts","sourceRoot":"","sources":["../../src/schemas/route-inventory.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;EAK9B,CAAC;AAEH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;EAI3B,CAAC;AAEH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAUtB,iGAAiG;;IAEjG,iGAAiG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAEjG,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;QAN/B,iGAAiG;;QAEjG,iGAAiG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAUjG,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC"}
|
|
@@ -20,6 +20,10 @@ export const RouteSchema = z.object({
|
|
|
20
20
|
brokenLinks: z.array(BrokenLinkSchema),
|
|
21
21
|
a11yViolations: z.array(A11yViolationSchema),
|
|
22
22
|
statusCode: z.number().int().optional(),
|
|
23
|
+
/** Optional: response headers from the page fetch (populated by explorers that capture them). */
|
|
24
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
25
|
+
/** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
|
|
26
|
+
bodySnippet: z.string().max(8000).optional(),
|
|
23
27
|
});
|
|
24
28
|
export const RouteInventorySchema = z.object({
|
|
25
29
|
scannedAt: z.string().datetime(),
|
|
@@ -211,8 +211,8 @@ export declare const AuditEntrySchema: z.ZodObject<{
|
|
|
211
211
|
recordHash: z.ZodString;
|
|
212
212
|
}, "strip", z.ZodTypeAny, {
|
|
213
213
|
computedAt: string;
|
|
214
|
-
tenantId: string;
|
|
215
214
|
schemaVersion: 1;
|
|
215
|
+
tenantId: string;
|
|
216
216
|
confidenceScore: number | null;
|
|
217
217
|
verdict: "ship" | "caution" | "hold" | "block";
|
|
218
218
|
blockers: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"confidence.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,eAAe,
|
|
1
|
+
{"version":3,"file":"confidence.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,eAAe,EAIf,iBAAiB,EAElB,MAAM,oCAAoC,CAAC;AA8L5C;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,eAAe,GAAG,iBAAiB,CAgKlF"}
|
|
@@ -43,6 +43,18 @@ const DEFAULT_WEIGHTS = {
|
|
|
43
43
|
'human-approval': 0.0,
|
|
44
44
|
'agent-evidence': 0.0,
|
|
45
45
|
};
|
|
46
|
+
/** Model sources with non-zero default weight — the full evidence model for partial-run disclosure. */
|
|
47
|
+
const MODEL_SOURCES = Object.entries(DEFAULT_WEIGHTS)
|
|
48
|
+
.filter(([, weight]) => weight > 0)
|
|
49
|
+
.map(([source]) => source);
|
|
50
|
+
const UNCOLLECTED_NEXT_CHECKS = {
|
|
51
|
+
'live-app-quality': 'Run analyze_app against the deployed URL to collect live-app quality evidence.',
|
|
52
|
+
'accessibility': 'Run analyze_app against the deployed URL to evaluate accessibility.',
|
|
53
|
+
'crawl-coverage': 'Run analyze_app against the deployed URL to measure crawl coverage.',
|
|
54
|
+
'test-automation': 'Run qulib score-automation against the repo to score test automation maturity.',
|
|
55
|
+
'api-coverage': 'Run qulib score-api against the repo to measure API test coverage.',
|
|
56
|
+
'ci-results': 'Ingest CI status from your pipeline (ci-results source not yet wired).',
|
|
57
|
+
};
|
|
46
58
|
function resolvePolicy(p) {
|
|
47
59
|
const base = ConfidencePolicySchema.parse(p ?? {});
|
|
48
60
|
return {
|
|
@@ -72,6 +84,93 @@ function buildHonestyNote(item) {
|
|
|
72
84
|
}
|
|
73
85
|
return `${base} has partial or degraded signal.`;
|
|
74
86
|
}
|
|
87
|
+
function resolveModelWeight(source, policyWeights) {
|
|
88
|
+
if (policyWeights && source in policyWeights) {
|
|
89
|
+
return policyWeights[source];
|
|
90
|
+
}
|
|
91
|
+
return DEFAULT_WEIGHTS[source] ?? 0;
|
|
92
|
+
}
|
|
93
|
+
function inferUncollectedReason(source, presentSources) {
|
|
94
|
+
const hasAnalyzeEvidence = presentSources.has('live-app-quality') ||
|
|
95
|
+
presentSources.has('accessibility') ||
|
|
96
|
+
presentSources.has('crawl-coverage');
|
|
97
|
+
const hasRepoEvidence = presentSources.has('test-automation') || presentSources.has('api-coverage');
|
|
98
|
+
switch (source) {
|
|
99
|
+
case 'live-app-quality':
|
|
100
|
+
case 'accessibility':
|
|
101
|
+
case 'crawl-coverage':
|
|
102
|
+
return hasAnalyzeEvidence
|
|
103
|
+
? 'not collected in this confidence run'
|
|
104
|
+
: 'app-runtime analysis not run — no url provided';
|
|
105
|
+
case 'test-automation':
|
|
106
|
+
case 'api-coverage':
|
|
107
|
+
return hasRepoEvidence
|
|
108
|
+
? 'not collected in this confidence run'
|
|
109
|
+
: 'repo scoring not run — no repo provided';
|
|
110
|
+
case 'ci-results':
|
|
111
|
+
return 'CI status not ingested — no ci-results source wired';
|
|
112
|
+
default:
|
|
113
|
+
return 'not collected';
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function buildUncollectedHonestyNote(source, reason, rawWeight) {
|
|
117
|
+
const pct = Math.round(rawWeight * 100);
|
|
118
|
+
return `'${source}' not collected (${pct}% raw model weight): ${reason}.`;
|
|
119
|
+
}
|
|
120
|
+
function buildCoverageSummaryNote(scoredSourceCount, modelSourceCount, rawWeightScored, rawWeightModel) {
|
|
121
|
+
const coveragePct = rawWeightModel > 0 ? Math.round((rawWeightScored / rawWeightModel) * 100) : 0;
|
|
122
|
+
return (`Partial evidence: verdict computed on ${scoredSourceCount} of ${modelSourceCount} model sources ` +
|
|
123
|
+
`(~${coveragePct}% of raw model weight). Collected weights were renormalized to 100% for the score.`);
|
|
124
|
+
}
|
|
125
|
+
function isPositiveEvidence(text) {
|
|
126
|
+
if (/appear covered/i.test(text))
|
|
127
|
+
return true;
|
|
128
|
+
if (/Automation maturity: L\d/i.test(text))
|
|
129
|
+
return true;
|
|
130
|
+
if (/No a11y gaps/i.test(text))
|
|
131
|
+
return true;
|
|
132
|
+
if (/^L\d —/i.test(text))
|
|
133
|
+
return true;
|
|
134
|
+
if (/^releaseConfidence=/i.test(text))
|
|
135
|
+
return true;
|
|
136
|
+
if (/^coverageScore=/i.test(text))
|
|
137
|
+
return true;
|
|
138
|
+
if (/^No .* gaps detected/i.test(text))
|
|
139
|
+
return true;
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
function extractItemRisks(item, passThreshold) {
|
|
143
|
+
const risks = [];
|
|
144
|
+
if (item.blocking) {
|
|
145
|
+
if (item.reason)
|
|
146
|
+
risks.push(item.reason);
|
|
147
|
+
risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry)));
|
|
148
|
+
return risks;
|
|
149
|
+
}
|
|
150
|
+
const applicability = item.applicability ?? 'applicable';
|
|
151
|
+
if (applicability === 'unknown' || item.score === null) {
|
|
152
|
+
if (item.reason)
|
|
153
|
+
risks.push(`${item.source}: ${item.reason}`);
|
|
154
|
+
risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry) && /(gap|critical|high|untested|uncovered|missing|block|fail|warning|auth|blocked)/i.test(entry)));
|
|
155
|
+
return risks;
|
|
156
|
+
}
|
|
157
|
+
if (applicability === 'not_applicable') {
|
|
158
|
+
if (item.reason)
|
|
159
|
+
risks.push(`${item.source}: ${item.reason}`);
|
|
160
|
+
return risks;
|
|
161
|
+
}
|
|
162
|
+
if (item.score !== null && item.score < passThreshold) {
|
|
163
|
+
risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry)));
|
|
164
|
+
if (item.score < passThreshold) {
|
|
165
|
+
risks.push(`${item.source} scored ${item.score}/100 — below pass threshold (${passThreshold}).`);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry) &&
|
|
170
|
+
/(gap|critical|high|untested|uncovered|missing|block|fail|warning|penalty|below)/i.test(entry)));
|
|
171
|
+
}
|
|
172
|
+
return risks;
|
|
173
|
+
}
|
|
75
174
|
/**
|
|
76
175
|
* Compute the fused Release Confidence result from an evidence bundle.
|
|
77
176
|
*
|
|
@@ -137,29 +236,56 @@ export function computeReleaseConfidence(input) {
|
|
|
137
236
|
}
|
|
138
237
|
// Level / label from shared ladder.
|
|
139
238
|
const { level, label } = scoreLevel(confidenceScore ?? 0);
|
|
140
|
-
|
|
239
|
+
const presentSources = new Set(input.evidence.map((item) => item.source));
|
|
240
|
+
const uncollectedSources = MODEL_SOURCES.filter((source) => !presentSources.has(source));
|
|
241
|
+
const modelWeightSum = MODEL_SOURCES.reduce((sum, source) => sum + resolveModelWeight(source, policy.weights), 0);
|
|
242
|
+
// Honesty notes — partial-run summary first, then present-but-excluded sources (must not
|
|
243
|
+
// be truncated by maxListLength), then uncollected model sources.
|
|
141
244
|
const honestyNotes = [];
|
|
245
|
+
if (uncollectedSources.length > 0 || (weightSum > 0 && weightSum < modelWeightSum - 0.001)) {
|
|
246
|
+
honestyNotes.push(buildCoverageSummaryNote(applicable.length, MODEL_SOURCES.length, weightSum, modelWeightSum));
|
|
247
|
+
}
|
|
142
248
|
for (const item of excluded) {
|
|
143
249
|
honestyNotes.push(buildHonestyNote(item));
|
|
144
250
|
}
|
|
145
|
-
|
|
251
|
+
for (const source of uncollectedSources) {
|
|
252
|
+
const rawWeight = resolveModelWeight(source, policy.weights);
|
|
253
|
+
const reason = inferUncollectedReason(source, presentSources);
|
|
254
|
+
honestyNotes.push(buildUncollectedHonestyNote(source, reason, rawWeight));
|
|
255
|
+
}
|
|
146
256
|
for (const item of blockingItems) {
|
|
147
257
|
if ((item.applicability ?? 'applicable') === 'applicable' && item.score !== null) {
|
|
148
258
|
honestyNotes.push(`'${item.source}' is a hard blocker${item.reason ? ': ' + item.reason : ''}.`);
|
|
149
259
|
}
|
|
150
260
|
}
|
|
151
|
-
// Top risks —
|
|
152
|
-
const allRisks = [
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
.
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
261
|
+
// Top risks — gaps and blockers only; never surface coverage successes as risks.
|
|
262
|
+
const allRisks = [];
|
|
263
|
+
for (const source of uncollectedSources) {
|
|
264
|
+
const rawWeight = resolveModelWeight(source, policy.weights);
|
|
265
|
+
if (rawWeight >= 0.10) {
|
|
266
|
+
const reason = inferUncollectedReason(source, presentSources);
|
|
267
|
+
allRisks.push(`Uncollected high-weight evidence: ${source} (${Math.round(rawWeight * 100)}% raw weight) — ${reason}.`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
for (const item of blockingItems) {
|
|
271
|
+
allRisks.push(...extractItemRisks(item, policy.passThreshold));
|
|
272
|
+
}
|
|
273
|
+
for (const item of [...excluded].sort((a, b) => resolveWeight(a, policy.weights) - resolveWeight(b, policy.weights))) {
|
|
274
|
+
allRisks.push(...extractItemRisks(item, policy.passThreshold));
|
|
275
|
+
}
|
|
276
|
+
for (const item of [...applicable].sort((a, b) => (a.score ?? 0) - (b.score ?? 0))) {
|
|
277
|
+
allRisks.push(...extractItemRisks(item, policy.passThreshold));
|
|
278
|
+
}
|
|
279
|
+
const topRisks = [...new Set(allRisks.filter(Boolean))].slice(0, limit);
|
|
280
|
+
// Recommended next checks — concrete actions for uncollected sources plus per-item recommendations.
|
|
281
|
+
const allRecs = [];
|
|
282
|
+
for (const source of uncollectedSources) {
|
|
283
|
+
const rec = UNCOLLECTED_NEXT_CHECKS[source];
|
|
284
|
+
if (rec)
|
|
285
|
+
allRecs.push(rec);
|
|
286
|
+
}
|
|
287
|
+
allRecs.push(...input.evidence.flatMap((item) => item.recommendations ?? []));
|
|
288
|
+
const recommendedNextChecks = [...new Set(allRecs.filter(Boolean))].slice(0, limit);
|
|
163
289
|
const result = {
|
|
164
290
|
schemaVersion: 1,
|
|
165
291
|
computedAt: now,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt-leakage detector — gap category `prompt-leakage`.
|
|
3
|
+
*
|
|
4
|
+
* Flags when a web page inadvertently exposes AI system-prompt / agent
|
|
5
|
+
* instructions in its public surface: inline scripts, HTML comments, meta
|
|
6
|
+
* tags, visible text, response headers, or error bodies.
|
|
7
|
+
*
|
|
8
|
+
* CONSERVATIVE design: every signal requires TWO corroborating markers
|
|
9
|
+
* before generating a Gap, to keep the false-positive rate low.
|
|
10
|
+
* A page that merely uses the word "AI" or "assistant" will NOT trip.
|
|
11
|
+
*
|
|
12
|
+
* Heuristics are derived from first principles — the structural telltale
|
|
13
|
+
* shapes of an exposed instruction block. No third-party leaked-prompt
|
|
14
|
+
* text or vendor identifiers were used.
|
|
15
|
+
*/
|
|
16
|
+
import type { Gap } from '../../schemas/gap-analysis.schema.js';
|
|
17
|
+
import type { Route } from '../../schemas/route-inventory.schema.js';
|
|
18
|
+
/**
|
|
19
|
+
* Scan a captured page surface for signals that an AI system prompt or agent
|
|
20
|
+
* instructions are exposed in its public surface.
|
|
21
|
+
*
|
|
22
|
+
* Accepts the `Route` shape from `route-inventory.schema.ts`, which now
|
|
23
|
+
* includes the optional `headers` and `bodySnippet` fields.
|
|
24
|
+
*
|
|
25
|
+
* Returns an array of `Gap` objects with `category: 'prompt-leakage'`.
|
|
26
|
+
* Returns an empty array when no signals are found.
|
|
27
|
+
*/
|
|
28
|
+
export declare function detectPromptLeakage(route: Pick<Route, 'path' | 'headers' | 'bodySnippet'>): Gap[];
|
|
29
|
+
//# sourceMappingURL=prompt-leakage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt-leakage.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/prompt-leakage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,sCAAsC,CAAC;AAChE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,yCAAyC,CAAC;AAqLrE;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,GAAG,GAAG,EAAE,CAgGjG"}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt-leakage detector — gap category `prompt-leakage`.
|
|
3
|
+
*
|
|
4
|
+
* Flags when a web page inadvertently exposes AI system-prompt / agent
|
|
5
|
+
* instructions in its public surface: inline scripts, HTML comments, meta
|
|
6
|
+
* tags, visible text, response headers, or error bodies.
|
|
7
|
+
*
|
|
8
|
+
* CONSERVATIVE design: every signal requires TWO corroborating markers
|
|
9
|
+
* before generating a Gap, to keep the false-positive rate low.
|
|
10
|
+
* A page that merely uses the word "AI" or "assistant" will NOT trip.
|
|
11
|
+
*
|
|
12
|
+
* Heuristics are derived from first principles — the structural telltale
|
|
13
|
+
* shapes of an exposed instruction block. No third-party leaked-prompt
|
|
14
|
+
* text or vendor identifiers were used.
|
|
15
|
+
*/
|
|
16
|
+
import { randomUUID } from 'node:crypto';
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Pattern constants — all original heuristics; no vendor identifiers
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/**
|
|
21
|
+
* Patterns that mark the OPENING of a system-instruction block.
|
|
22
|
+
* These alone are weak — we require corroboration.
|
|
23
|
+
*/
|
|
24
|
+
const ROLE_DIRECTIVE_RE = /\b(?:you\s+are\s+(?:an?\s+)?(?:ai|assistant|agent|bot|helpful|language\s+model)|act\s+as\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|your\s+(?:role|persona|job|task|purpose)\s+is\s+to|i\s+am\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|as\s+(?:an?\s+)?(?:ai|assistant|agent|language\s+model))\b/i;
|
|
25
|
+
/**
|
|
26
|
+
* Patterns that mark instruction-block structural keywords.
|
|
27
|
+
* Typical in system prompts to delineate sections/rules.
|
|
28
|
+
*/
|
|
29
|
+
const INSTRUCTION_KEYWORD_RE = /\b(?:do\s+not\s+(?:reveal|disclose|share|tell|mention|discuss)\s+(?:this|these|your\s+instructions?|the\s+(?:system\s+)?prompt)|never\s+(?:reveal|disclose|share|tell)\s+(?:this|these|your|the)\b|keep\s+(?:this|these|the\s+following)\s+(?:confidential|secret|private|hidden)|do\s+not\s+(?:break|exit|leave)\s+(?:character|role|persona)|stay\s+in\s+character|maintain\s+(?:your\s+)?(?:persona|role|character))\b/i;
|
|
30
|
+
/**
|
|
31
|
+
* Markers that signal a tool/function definition block being echoed back
|
|
32
|
+
* (e.g. an OpenAI-style function spec or a Claude tool_use block).
|
|
33
|
+
*/
|
|
34
|
+
const TOOL_DEFINITION_RE = /(?:"function_call"\s*:|"tool_use"\s*:|"tools"\s*:\s*\[|"tool_name"\s*:|function\s+definitions?\s*:)/i;
|
|
35
|
+
/**
|
|
36
|
+
* Structural markers of a multi-turn instruction payload being echoed:
|
|
37
|
+
* system/user/assistant roles in JSON or XML-style markup.
|
|
38
|
+
*/
|
|
39
|
+
const SYSTEM_ROLE_BLOCK_RE = /(?:"role"\s*:\s*"system"|<\s*system\s*>[\s\S]{10,}<\s*\/\s*system\s*>|<\s*instructions?\s*>[\s\S]{10,}<\s*\/\s*instructions?\s*>|\[\s*INST\s*\][\s\S]{10,}\[\/\s*INST\s*\])/i;
|
|
40
|
+
/**
|
|
41
|
+
* Header names that should never expose agent instructions.
|
|
42
|
+
*/
|
|
43
|
+
const LEAKY_HEADER_NAMES_RE = /^(?:x-system-prompt|x-agent-instructions?|x-llm-prompt|x-ai-context|x-openai-system|x-anthropic-system|x-bot-instructions?)$/i;
|
|
44
|
+
/**
|
|
45
|
+
* Markers that suggest a debug-mode echo of the model's instructions
|
|
46
|
+
* inside an error or JSON response body.
|
|
47
|
+
*/
|
|
48
|
+
const DEBUG_ECHO_RE = /(?:"system_prompt"\s*:|"system_message"\s*:|"instructions"\s*:\s*"[^"]{50,}"|"agent_instructions"\s*:|"prompt_template"\s*:)/i;
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Helper utilities
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
/** Strip HTML tags, returning visible text only. */
|
|
53
|
+
function stripHtml(html) {
|
|
54
|
+
return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
55
|
+
}
|
|
56
|
+
/** Extract content of HTML comments. */
|
|
57
|
+
function extractComments(html) {
|
|
58
|
+
const results = [];
|
|
59
|
+
const re = /<!--([\s\S]*?)-->/g;
|
|
60
|
+
let m;
|
|
61
|
+
while ((m = re.exec(html)) !== null) {
|
|
62
|
+
const content = m[1]?.trim() ?? '';
|
|
63
|
+
if (content.length > 0)
|
|
64
|
+
results.push(content);
|
|
65
|
+
}
|
|
66
|
+
return results;
|
|
67
|
+
}
|
|
68
|
+
/** Extract inline <script> content (non-src scripts). */
|
|
69
|
+
function extractInlineScripts(html) {
|
|
70
|
+
const results = [];
|
|
71
|
+
const re = /<script(?![^>]+\bsrc\s*=)[^>]*>([\s\S]*?)<\/script>/gi;
|
|
72
|
+
let m;
|
|
73
|
+
while ((m = re.exec(html)) !== null) {
|
|
74
|
+
const content = m[1]?.trim() ?? '';
|
|
75
|
+
if (content.length > 0)
|
|
76
|
+
results.push(content);
|
|
77
|
+
}
|
|
78
|
+
return results;
|
|
79
|
+
}
|
|
80
|
+
/** Extract <meta> tag content values. */
|
|
81
|
+
function extractMetaContents(html) {
|
|
82
|
+
const results = [];
|
|
83
|
+
const re = /<meta[^>]+content\s*=\s*["']([^"']{30,})["'][^>]*>/gi;
|
|
84
|
+
let m;
|
|
85
|
+
while ((m = re.exec(html)) !== null) {
|
|
86
|
+
const content = m[1]?.trim() ?? '';
|
|
87
|
+
if (content.length > 0)
|
|
88
|
+
results.push(content);
|
|
89
|
+
}
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
/** Truncate a string for embedding in gap evidence. */
|
|
93
|
+
function truncate(s, max = 200) {
|
|
94
|
+
return s.length <= max ? s : `${s.slice(0, max)}…`;
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Two-signal corroboration check
|
|
98
|
+
//
|
|
99
|
+
// A "leak" is flagged only when BOTH a role-directive AND at least one of the
|
|
100
|
+
// structural markers co-occur in the same text block. This prevents a single
|
|
101
|
+
// casual mention of "AI" from tripping the detector.
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
function detectInBlock(text, location) {
|
|
104
|
+
const hasRoleDirective = ROLE_DIRECTIVE_RE.test(text);
|
|
105
|
+
const hasToolDef = TOOL_DEFINITION_RE.test(text);
|
|
106
|
+
const hasSystemRoleBlock = SYSTEM_ROLE_BLOCK_RE.test(text);
|
|
107
|
+
const hasInstructionKeyword = INSTRUCTION_KEYWORD_RE.test(text);
|
|
108
|
+
const hasDebugEcho = DEBUG_ECHO_RE.test(text);
|
|
109
|
+
// Highest confidence: a role directive + an explicit secrecy/instruction keyword
|
|
110
|
+
if (hasRoleDirective && hasInstructionKeyword) {
|
|
111
|
+
const match = text.match(ROLE_DIRECTIVE_RE)?.[0] ?? '';
|
|
112
|
+
return {
|
|
113
|
+
description: `Role-framing directive with instruction confidentiality keyword in ${location}`,
|
|
114
|
+
evidence: truncate(`${match} … [instruction keyword found]`),
|
|
115
|
+
severity: 'critical',
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
// High confidence: system-role JSON/XML block containing a role directive
|
|
119
|
+
if (hasSystemRoleBlock && hasRoleDirective) {
|
|
120
|
+
return {
|
|
121
|
+
description: `System-role payload block with role directive in ${location}`,
|
|
122
|
+
evidence: truncate(text.match(SYSTEM_ROLE_BLOCK_RE)?.[0] ?? text),
|
|
123
|
+
severity: 'high',
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
// High confidence: tool/function definition echoed in page surface with role directive
|
|
127
|
+
if (hasToolDef && hasRoleDirective) {
|
|
128
|
+
return {
|
|
129
|
+
description: `Tool/function definition block with role directive in ${location}`,
|
|
130
|
+
evidence: truncate(text.match(TOOL_DEFINITION_RE)?.[0] ?? text),
|
|
131
|
+
severity: 'high',
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
// Medium confidence: debug echo of system prompt field in JSON
|
|
135
|
+
if (hasDebugEcho && (hasRoleDirective || hasSystemRoleBlock)) {
|
|
136
|
+
return {
|
|
137
|
+
description: `Debug-mode system-prompt echo in ${location}`,
|
|
138
|
+
evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
|
|
139
|
+
severity: 'high',
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
// Lower confidence: standalone debug echo field (without corroborating role directive)
|
|
143
|
+
// Still worth flagging if the field name alone is a strong indicator
|
|
144
|
+
if (hasDebugEcho && text.length > 100) {
|
|
145
|
+
return {
|
|
146
|
+
description: `Possible debug-mode prompt field echo in ${location}`,
|
|
147
|
+
evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
|
|
148
|
+
severity: 'medium',
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
// Public detector
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
/**
|
|
157
|
+
* Scan a captured page surface for signals that an AI system prompt or agent
|
|
158
|
+
* instructions are exposed in its public surface.
|
|
159
|
+
*
|
|
160
|
+
* Accepts the `Route` shape from `route-inventory.schema.ts`, which now
|
|
161
|
+
* includes the optional `headers` and `bodySnippet` fields.
|
|
162
|
+
*
|
|
163
|
+
* Returns an array of `Gap` objects with `category: 'prompt-leakage'`.
|
|
164
|
+
* Returns an empty array when no signals are found.
|
|
165
|
+
*/
|
|
166
|
+
export function detectPromptLeakage(route) {
|
|
167
|
+
const gaps = [];
|
|
168
|
+
const path = route.path;
|
|
169
|
+
const html = route.bodySnippet ?? '';
|
|
170
|
+
// 1. Check inline scripts
|
|
171
|
+
for (const script of extractInlineScripts(html)) {
|
|
172
|
+
const signal = detectInBlock(script, 'inline-script');
|
|
173
|
+
if (signal) {
|
|
174
|
+
gaps.push({
|
|
175
|
+
id: randomUUID(),
|
|
176
|
+
path,
|
|
177
|
+
severity: signal.severity,
|
|
178
|
+
reason: signal.description,
|
|
179
|
+
category: 'prompt-leakage',
|
|
180
|
+
description: `Prompt-leakage signal detected in inline JavaScript: ${signal.evidence}`,
|
|
181
|
+
recommendation: 'Remove agent instruction content from client-facing JavaScript. Never embed system prompts in frontend bundles or inline scripts.',
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
// 2. Check HTML comments
|
|
186
|
+
for (const comment of extractComments(html)) {
|
|
187
|
+
const signal = detectInBlock(comment, 'HTML-comment');
|
|
188
|
+
if (signal) {
|
|
189
|
+
gaps.push({
|
|
190
|
+
id: randomUUID(),
|
|
191
|
+
path,
|
|
192
|
+
severity: signal.severity,
|
|
193
|
+
reason: signal.description,
|
|
194
|
+
category: 'prompt-leakage',
|
|
195
|
+
description: `Prompt-leakage signal detected in HTML comment: ${signal.evidence}`,
|
|
196
|
+
recommendation: 'Remove agent instructions from HTML comments. Comments are visible in page source.',
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// 3. Check meta tag content
|
|
201
|
+
for (const content of extractMetaContents(html)) {
|
|
202
|
+
const signal = detectInBlock(content, 'meta-tag');
|
|
203
|
+
if (signal) {
|
|
204
|
+
gaps.push({
|
|
205
|
+
id: randomUUID(),
|
|
206
|
+
path,
|
|
207
|
+
severity: signal.severity,
|
|
208
|
+
reason: signal.description,
|
|
209
|
+
category: 'prompt-leakage',
|
|
210
|
+
description: `Prompt-leakage signal detected in meta tag: ${signal.evidence}`,
|
|
211
|
+
recommendation: 'Remove agent instructions from HTML meta tags. Meta content is public.',
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// 4. Check visible body text (stripped of tags)
|
|
216
|
+
if (html.length > 0) {
|
|
217
|
+
const visible = stripHtml(html);
|
|
218
|
+
const signal = detectInBlock(visible, 'page-body');
|
|
219
|
+
if (signal) {
|
|
220
|
+
gaps.push({
|
|
221
|
+
id: randomUUID(),
|
|
222
|
+
path,
|
|
223
|
+
severity: signal.severity,
|
|
224
|
+
reason: signal.description,
|
|
225
|
+
category: 'prompt-leakage',
|
|
226
|
+
description: `Prompt-leakage signal detected in visible page body: ${signal.evidence}`,
|
|
227
|
+
recommendation: 'Ensure agent instructions are never rendered into visible page content. Check debug/error pages.',
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
// 5. Check response headers
|
|
232
|
+
const headers = route.headers ?? {};
|
|
233
|
+
for (const [name, value] of Object.entries(headers)) {
|
|
234
|
+
if (LEAKY_HEADER_NAMES_RE.test(name)) {
|
|
235
|
+
gaps.push({
|
|
236
|
+
id: randomUUID(),
|
|
237
|
+
path,
|
|
238
|
+
severity: 'critical',
|
|
239
|
+
reason: `Response header "${name}" exposes agent configuration`,
|
|
240
|
+
category: 'prompt-leakage',
|
|
241
|
+
description: `Header "${name}: ${truncate(value, 80)}" should not be sent to clients.`,
|
|
242
|
+
recommendation: `Remove the "${name}" response header. Agent configuration must never be transmitted to the browser.`,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// Deduplicate by (path + severity + reason) to avoid double-counting when
|
|
247
|
+
// the same signal appears in multiple extraction contexts.
|
|
248
|
+
const seen = new Set();
|
|
249
|
+
return gaps.filter((g) => {
|
|
250
|
+
const key = `${g.path}::${g.severity}::${g.reason}`;
|
|
251
|
+
if (seen.has(key))
|
|
252
|
+
return false;
|
|
253
|
+
seen.add(key);
|
|
254
|
+
return true;
|
|
255
|
+
});
|
|
256
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@qulib/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.1",
|
|
4
4
|
"description": "Qulib — release confidence for deployed web apps. Fuses live-app quality, automation maturity, and API coverage into a single ship/caution/hold/block verdict.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Tapesh Nagarwal",
|
|
@@ -23,7 +23,11 @@
|
|
|
23
23
|
"accessibility",
|
|
24
24
|
"playwright",
|
|
25
25
|
"mcp",
|
|
26
|
-
"ai"
|
|
26
|
+
"ai",
|
|
27
|
+
"ci-gate",
|
|
28
|
+
"test-confidence",
|
|
29
|
+
"web-quality",
|
|
30
|
+
"wcag"
|
|
27
31
|
],
|
|
28
32
|
"publishConfig": {
|
|
29
33
|
"access": "public"
|
|
@@ -52,7 +56,7 @@
|
|
|
52
56
|
"build": "tsc",
|
|
53
57
|
"prepack": "npm run build",
|
|
54
58
|
"prepublishOnly": "npm run build",
|
|
55
|
-
"test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts",
|
|
59
|
+
"test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/runner/__tests__/golden-manifest.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts src/cli/__tests__/baseline.test.ts src/cli/__tests__/naming-aliases.test.ts src/cli/__tests__/analyze-diff.test.ts src/reporters/__tests__/heatmap.test.ts src/tools/scoring/__tests__/prompt-leakage.test.ts",
|
|
56
60
|
"test:integration": "node --import tsx/esm --test src/__tests__/analyze.integration.test.ts",
|
|
57
61
|
"eval": "node --import tsx/esm evals/runner/index.ts",
|
|
58
62
|
"eval:judge": "node --import tsx/esm evals/judge/eval-judge.ts",
|
|
@@ -71,6 +75,6 @@
|
|
|
71
75
|
"devDependencies": {
|
|
72
76
|
"@types/js-yaml": "^4.0.9",
|
|
73
77
|
"@types/node": "^20.0.0",
|
|
74
|
-
"tsx": "^4.
|
|
78
|
+
"tsx": "^4.22.4"
|
|
75
79
|
}
|
|
76
80
|
}
|