@qulib/core 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +11 -11
  2. package/dist/baseline/baseline.schema.d.ts +26 -26
  3. package/dist/baseline/baseline.schema.d.ts.map +1 -1
  4. package/dist/baseline/baseline.schema.js +1 -0
  5. package/dist/cli/analyze-diff-run.d.ts +77 -0
  6. package/dist/cli/analyze-diff-run.d.ts.map +1 -0
  7. package/dist/cli/analyze-diff-run.js +266 -0
  8. package/dist/cli/baseline-run.d.ts +55 -0
  9. package/dist/cli/baseline-run.d.ts.map +1 -0
  10. package/dist/cli/baseline-run.js +259 -0
  11. package/dist/cli/confidence-run.d.ts.map +1 -1
  12. package/dist/cli/confidence-run.js +10 -6
  13. package/dist/cli/index.js +4 -0
  14. package/dist/cli/score-automation-run.d.ts.map +1 -1
  15. package/dist/cli/score-automation-run.js +5 -1
  16. package/dist/index.d.ts +5 -0
  17. package/dist/index.d.ts.map +1 -1
  18. package/dist/index.js +5 -0
  19. package/dist/phases/think.d.ts.map +1 -1
  20. package/dist/phases/think.js +4 -1
  21. package/dist/reporters/heatmap.d.ts +55 -0
  22. package/dist/reporters/heatmap.d.ts.map +1 -0
  23. package/dist/reporters/heatmap.js +148 -0
  24. package/dist/reporters/markdown-reporter.d.ts.map +1 -1
  25. package/dist/reporters/markdown-reporter.js +4 -1
  26. package/dist/schemas/confidence.schema.d.ts +2 -2
  27. package/dist/schemas/config.schema.d.ts.map +1 -1
  28. package/dist/schemas/config.schema.js +6 -1
  29. package/dist/schemas/gap-analysis.schema.d.ts +8 -8
  30. package/dist/schemas/gap-analysis.schema.js +1 -1
  31. package/dist/schemas/golden-manifest.schema.d.ts +137 -0
  32. package/dist/schemas/golden-manifest.schema.d.ts.map +1 -0
  33. package/dist/schemas/golden-manifest.schema.js +25 -0
  34. package/dist/schemas/index.d.ts +1 -0
  35. package/dist/schemas/index.d.ts.map +1 -1
  36. package/dist/schemas/index.js +1 -0
  37. package/dist/schemas/public-surface.schema.d.ts +15 -5
  38. package/dist/schemas/public-surface.schema.d.ts.map +1 -1
  39. package/dist/schemas/route-inventory.schema.d.ts +20 -0
  40. package/dist/schemas/route-inventory.schema.d.ts.map +1 -1
  41. package/dist/schemas/route-inventory.schema.js +4 -0
  42. package/dist/schemas/views.schema.d.ts +1 -1
  43. package/dist/tools/scoring/confidence.d.ts.map +1 -1
  44. package/dist/tools/scoring/confidence.js +140 -14
  45. package/dist/tools/scoring/prompt-leakage.d.ts +29 -0
  46. package/dist/tools/scoring/prompt-leakage.d.ts.map +1 -0
  47. package/dist/tools/scoring/prompt-leakage.js +256 -0
  48. package/package.json +8 -4
@@ -65,6 +65,10 @@ export declare const RouteSchema: z.ZodObject<{
65
65
  nodeCount: number;
66
66
  }>, "many">;
67
67
  statusCode: z.ZodOptional<z.ZodNumber>;
68
+ /** Optional: response headers from the page fetch (populated by explorers that capture them). */
69
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
70
+ /** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
71
+ bodySnippet: z.ZodOptional<z.ZodString>;
68
72
  }, "strip", z.ZodTypeAny, {
69
73
  path: string;
70
74
  pageTitle: string;
@@ -84,6 +88,8 @@ export declare const RouteSchema: z.ZodObject<{
84
88
  nodeCount: number;
85
89
  }[];
86
90
  statusCode?: number | undefined;
91
+ headers?: Record<string, string> | undefined;
92
+ bodySnippet?: string | undefined;
87
93
  }, {
88
94
  path: string;
89
95
  pageTitle: string;
@@ -103,6 +109,8 @@ export declare const RouteSchema: z.ZodObject<{
103
109
  nodeCount: number;
104
110
  }[];
105
111
  statusCode?: number | undefined;
112
+ headers?: Record<string, string> | undefined;
113
+ bodySnippet?: string | undefined;
106
114
  }>;
107
115
  export declare const RouteInventorySchema: z.ZodObject<{
108
116
  scannedAt: z.ZodString;
@@ -144,6 +152,10 @@ export declare const RouteInventorySchema: z.ZodObject<{
144
152
  nodeCount: number;
145
153
  }>, "many">;
146
154
  statusCode: z.ZodOptional<z.ZodNumber>;
155
+ /** Optional: response headers from the page fetch (populated by explorers that capture them). */
156
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
157
+ /** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
158
+ bodySnippet: z.ZodOptional<z.ZodString>;
147
159
  }, "strip", z.ZodTypeAny, {
148
160
  path: string;
149
161
  pageTitle: string;
@@ -163,6 +175,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
163
175
  nodeCount: number;
164
176
  }[];
165
177
  statusCode?: number | undefined;
178
+ headers?: Record<string, string> | undefined;
179
+ bodySnippet?: string | undefined;
166
180
  }, {
167
181
  path: string;
168
182
  pageTitle: string;
@@ -182,6 +196,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
182
196
  nodeCount: number;
183
197
  }[];
184
198
  statusCode?: number | undefined;
199
+ headers?: Record<string, string> | undefined;
200
+ bodySnippet?: string | undefined;
185
201
  }>, "many">;
186
202
  pagesSkipped: z.ZodNumber;
187
203
  budgetExceeded: z.ZodBoolean;
@@ -207,6 +223,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
207
223
  nodeCount: number;
208
224
  }[];
209
225
  statusCode?: number | undefined;
226
+ headers?: Record<string, string> | undefined;
227
+ bodySnippet?: string | undefined;
210
228
  }[];
211
229
  pagesSkipped: number;
212
230
  budgetExceeded: boolean;
@@ -232,6 +250,8 @@ export declare const RouteInventorySchema: z.ZodObject<{
232
250
  nodeCount: number;
233
251
  }[];
234
252
  statusCode?: number | undefined;
253
+ headers?: Record<string, string> | undefined;
254
+ bodySnippet?: string | undefined;
235
255
  }[];
236
256
  pagesSkipped: number;
237
257
  budgetExceeded: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"route-inventory.schema.d.ts","sourceRoot":"","sources":["../../src/schemas/route-inventory.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;EAK9B,CAAC;AAEH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;EAI3B,CAAC;AAEH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAUtB,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAM/B,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC"}
1
+ {"version":3,"file":"route-inventory.schema.d.ts","sourceRoot":"","sources":["../../src/schemas/route-inventory.schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;EAK9B,CAAC;AAEH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;EAI3B,CAAC;AAEH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAUtB,iGAAiG;;IAEjG,iGAAiG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAEjG,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;QAN/B,iGAAiG;;QAEjG,iGAAiG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAUjG,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC"}
@@ -20,6 +20,10 @@ export const RouteSchema = z.object({
20
20
  brokenLinks: z.array(BrokenLinkSchema),
21
21
  a11yViolations: z.array(A11yViolationSchema),
22
22
  statusCode: z.number().int().optional(),
23
+ /** Optional: response headers from the page fetch (populated by explorers that capture them). */
24
+ headers: z.record(z.string(), z.string()).optional(),
25
+ /** Optional: first ~4000 chars of the raw HTML body (populated by explorers that capture it). */
26
+ bodySnippet: z.string().max(8000).optional(),
23
27
  });
24
28
  export const RouteInventorySchema = z.object({
25
29
  scannedAt: z.string().datetime(),
@@ -211,8 +211,8 @@ export declare const AuditEntrySchema: z.ZodObject<{
211
211
  recordHash: z.ZodString;
212
212
  }, "strip", z.ZodTypeAny, {
213
213
  computedAt: string;
214
- tenantId: string;
215
214
  schemaVersion: 1;
215
+ tenantId: string;
216
216
  confidenceScore: number | null;
217
217
  verdict: "ship" | "caution" | "hold" | "block";
218
218
  blockers: string[];
@@ -1 +1 @@
1
- {"version":3,"file":"confidence.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,eAAe,EAGf,iBAAiB,EAElB,MAAM,oCAAoC,CAAC;AAiE5C;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,eAAe,GAAG,iBAAiB,CA8HlF"}
1
+ {"version":3,"file":"confidence.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,eAAe,EAIf,iBAAiB,EAElB,MAAM,oCAAoC,CAAC;AA8L5C;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,eAAe,GAAG,iBAAiB,CAgKlF"}
@@ -43,6 +43,18 @@ const DEFAULT_WEIGHTS = {
43
43
  'human-approval': 0.0,
44
44
  'agent-evidence': 0.0,
45
45
  };
46
+ /** Model sources with non-zero default weight — the full evidence model for partial-run disclosure. */
47
+ const MODEL_SOURCES = Object.entries(DEFAULT_WEIGHTS)
48
+ .filter(([, weight]) => weight > 0)
49
+ .map(([source]) => source);
50
+ const UNCOLLECTED_NEXT_CHECKS = {
51
+ 'live-app-quality': 'Run analyze_app against the deployed URL to collect live-app quality evidence.',
52
+ 'accessibility': 'Run analyze_app against the deployed URL to evaluate accessibility.',
53
+ 'crawl-coverage': 'Run analyze_app against the deployed URL to measure crawl coverage.',
54
+ 'test-automation': 'Run qulib score-automation against the repo to score test automation maturity.',
55
+ 'api-coverage': 'Run qulib score-api against the repo to measure API test coverage.',
56
+ 'ci-results': 'Ingest CI status from your pipeline (ci-results source not yet wired).',
57
+ };
46
58
  function resolvePolicy(p) {
47
59
  const base = ConfidencePolicySchema.parse(p ?? {});
48
60
  return {
@@ -72,6 +84,93 @@ function buildHonestyNote(item) {
72
84
  }
73
85
  return `${base} has partial or degraded signal.`;
74
86
  }
87
+ function resolveModelWeight(source, policyWeights) {
88
+ if (policyWeights && source in policyWeights) {
89
+ return policyWeights[source];
90
+ }
91
+ return DEFAULT_WEIGHTS[source] ?? 0;
92
+ }
93
+ function inferUncollectedReason(source, presentSources) {
94
+ const hasAnalyzeEvidence = presentSources.has('live-app-quality') ||
95
+ presentSources.has('accessibility') ||
96
+ presentSources.has('crawl-coverage');
97
+ const hasRepoEvidence = presentSources.has('test-automation') || presentSources.has('api-coverage');
98
+ switch (source) {
99
+ case 'live-app-quality':
100
+ case 'accessibility':
101
+ case 'crawl-coverage':
102
+ return hasAnalyzeEvidence
103
+ ? 'not collected in this confidence run'
104
+ : 'app-runtime analysis not run — no url provided';
105
+ case 'test-automation':
106
+ case 'api-coverage':
107
+ return hasRepoEvidence
108
+ ? 'not collected in this confidence run'
109
+ : 'repo scoring not run — no repo provided';
110
+ case 'ci-results':
111
+ return 'CI status not ingested — no ci-results source wired';
112
+ default:
113
+ return 'not collected';
114
+ }
115
+ }
116
+ function buildUncollectedHonestyNote(source, reason, rawWeight) {
117
+ const pct = Math.round(rawWeight * 100);
118
+ return `'${source}' not collected (${pct}% raw model weight): ${reason}.`;
119
+ }
120
+ function buildCoverageSummaryNote(scoredSourceCount, modelSourceCount, rawWeightScored, rawWeightModel) {
121
+ const coveragePct = rawWeightModel > 0 ? Math.round((rawWeightScored / rawWeightModel) * 100) : 0;
122
+ return (`Partial evidence: verdict computed on ${scoredSourceCount} of ${modelSourceCount} model sources ` +
123
+ `(~${coveragePct}% of raw model weight). Collected weights were renormalized to 100% for the score.`);
124
+ }
125
+ function isPositiveEvidence(text) {
126
+ if (/appear covered/i.test(text))
127
+ return true;
128
+ if (/Automation maturity: L\d/i.test(text))
129
+ return true;
130
+ if (/No a11y gaps/i.test(text))
131
+ return true;
132
+ if (/^L\d —/i.test(text))
133
+ return true;
134
+ if (/^releaseConfidence=/i.test(text))
135
+ return true;
136
+ if (/^coverageScore=/i.test(text))
137
+ return true;
138
+ if (/^No .* gaps detected/i.test(text))
139
+ return true;
140
+ return false;
141
+ }
142
+ function extractItemRisks(item, passThreshold) {
143
+ const risks = [];
144
+ if (item.blocking) {
145
+ if (item.reason)
146
+ risks.push(item.reason);
147
+ risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry)));
148
+ return risks;
149
+ }
150
+ const applicability = item.applicability ?? 'applicable';
151
+ if (applicability === 'unknown' || item.score === null) {
152
+ if (item.reason)
153
+ risks.push(`${item.source}: ${item.reason}`);
154
+ risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry) && /(gap|critical|high|untested|uncovered|missing|block|fail|warning|auth|blocked)/i.test(entry)));
155
+ return risks;
156
+ }
157
+ if (applicability === 'not_applicable') {
158
+ if (item.reason)
159
+ risks.push(`${item.source}: ${item.reason}`);
160
+ return risks;
161
+ }
162
+ if (item.score !== null && item.score < passThreshold) {
163
+ risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry)));
164
+ if (item.score < passThreshold) {
165
+ risks.push(`${item.source} scored ${item.score}/100 — below pass threshold (${passThreshold}).`);
166
+ }
167
+ }
168
+ else {
169
+ risks.push(...item.evidence.filter((entry) => !isPositiveEvidence(entry) &&
170
+ /(gap|critical|high|untested|uncovered|missing|block|fail|warning|penalty|below)/i.test(entry)));
171
+ }
172
+ return risks;
173
+ }
75
174
  /**
76
175
  * Compute the fused Release Confidence result from an evidence bundle.
77
176
  *
@@ -137,29 +236,56 @@ export function computeReleaseConfidence(input) {
137
236
  }
138
237
  // Level / label from shared ladder.
139
238
  const { level, label } = scoreLevel(confidenceScore ?? 0);
140
- // Honesty notes one per degraded/excluded source.
239
+ const presentSources = new Set(input.evidence.map((item) => item.source));
240
+ const uncollectedSources = MODEL_SOURCES.filter((source) => !presentSources.has(source));
241
+ const modelWeightSum = MODEL_SOURCES.reduce((sum, source) => sum + resolveModelWeight(source, policy.weights), 0);
242
+ // Honesty notes — partial-run summary first, then present-but-excluded sources (must not
243
+ // be truncated by maxListLength), then uncollected model sources.
141
244
  const honestyNotes = [];
245
+ if (uncollectedSources.length > 0 || (weightSum > 0 && weightSum < modelWeightSum - 0.001)) {
246
+ honestyNotes.push(buildCoverageSummaryNote(applicable.length, MODEL_SOURCES.length, weightSum, modelWeightSum));
247
+ }
142
248
  for (const item of excluded) {
143
249
  honestyNotes.push(buildHonestyNote(item));
144
250
  }
145
- // Also note any blocking items that aren't in the excluded set.
251
+ for (const source of uncollectedSources) {
252
+ const rawWeight = resolveModelWeight(source, policy.weights);
253
+ const reason = inferUncollectedReason(source, presentSources);
254
+ honestyNotes.push(buildUncollectedHonestyNote(source, reason, rawWeight));
255
+ }
146
256
  for (const item of blockingItems) {
147
257
  if ((item.applicability ?? 'applicable') === 'applicable' && item.score !== null) {
148
258
  honestyNotes.push(`'${item.source}' is a hard blocker${item.reason ? ': ' + item.reason : ''}.`);
149
259
  }
150
260
  }
151
- // Top risks — merge evidence across sources, severity-sorted by position.
152
- const allRisks = [
153
- ...blockingItems.flatMap((item) => item.evidence),
154
- ...input.evidence
155
- .filter((item) => (item.applicability ?? 'applicable') === 'applicable')
156
- .sort((a, b) => (a.score ?? 0) - (b.score ?? 0))
157
- .flatMap((item) => item.evidence),
158
- ];
159
- const topRisks = [...new Set(allRisks)].slice(0, limit);
160
- // Recommended next checks merge and deduplicate.
161
- const allRecs = input.evidence.flatMap((item) => item.recommendations ?? []);
162
- const recommendedNextChecks = [...new Set(allRecs)].slice(0, limit);
261
+ // Top risks — gaps and blockers only; never surface coverage successes as risks.
262
+ const allRisks = [];
263
+ for (const source of uncollectedSources) {
264
+ const rawWeight = resolveModelWeight(source, policy.weights);
265
+ if (rawWeight >= 0.10) {
266
+ const reason = inferUncollectedReason(source, presentSources);
267
+ allRisks.push(`Uncollected high-weight evidence: ${source} (${Math.round(rawWeight * 100)}% raw weight) — ${reason}.`);
268
+ }
269
+ }
270
+ for (const item of blockingItems) {
271
+ allRisks.push(...extractItemRisks(item, policy.passThreshold));
272
+ }
273
+ for (const item of [...excluded].sort((a, b) => resolveWeight(a, policy.weights) - resolveWeight(b, policy.weights))) {
274
+ allRisks.push(...extractItemRisks(item, policy.passThreshold));
275
+ }
276
+ for (const item of [...applicable].sort((a, b) => (a.score ?? 0) - (b.score ?? 0))) {
277
+ allRisks.push(...extractItemRisks(item, policy.passThreshold));
278
+ }
279
+ const topRisks = [...new Set(allRisks.filter(Boolean))].slice(0, limit);
280
+ // Recommended next checks — concrete actions for uncollected sources plus per-item recommendations.
281
+ const allRecs = [];
282
+ for (const source of uncollectedSources) {
283
+ const rec = UNCOLLECTED_NEXT_CHECKS[source];
284
+ if (rec)
285
+ allRecs.push(rec);
286
+ }
287
+ allRecs.push(...input.evidence.flatMap((item) => item.recommendations ?? []));
288
+ const recommendedNextChecks = [...new Set(allRecs.filter(Boolean))].slice(0, limit);
163
289
  const result = {
164
290
  schemaVersion: 1,
165
291
  computedAt: now,
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Prompt-leakage detector — gap category `prompt-leakage`.
3
+ *
4
+ * Flags when a web page inadvertently exposes AI system-prompt / agent
5
+ * instructions in its public surface: inline scripts, HTML comments, meta
6
+ * tags, visible text, response headers, or error bodies.
7
+ *
8
+ * CONSERVATIVE design: every signal requires TWO corroborating markers
9
+ * before generating a Gap, to keep the false-positive rate low.
10
+ * A page that merely uses the word "AI" or "assistant" will NOT trip.
11
+ *
12
+ * Heuristics are derived from first principles — the structural telltale
13
+ * shapes of an exposed instruction block. No third-party leaked-prompt
14
+ * text or vendor identifiers were used.
15
+ */
16
+ import type { Gap } from '../../schemas/gap-analysis.schema.js';
17
+ import type { Route } from '../../schemas/route-inventory.schema.js';
18
+ /**
19
+ * Scan a captured page surface for signals that an AI system prompt or agent
20
+ * instructions are exposed in its public surface.
21
+ *
22
+ * Accepts the `Route` shape from `route-inventory.schema.ts`, which now
23
+ * includes the optional `headers` and `bodySnippet` fields.
24
+ *
25
+ * Returns an array of `Gap` objects with `category: 'prompt-leakage'`.
26
+ * Returns an empty array when no signals are found.
27
+ */
28
+ export declare function detectPromptLeakage(route: Pick<Route, 'path' | 'headers' | 'bodySnippet'>): Gap[];
29
+ //# sourceMappingURL=prompt-leakage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-leakage.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/prompt-leakage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,sCAAsC,CAAC;AAChE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,yCAAyC,CAAC;AAqLrE;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,GAAG,GAAG,EAAE,CAgGjG"}
@@ -0,0 +1,256 @@
1
+ /**
2
+ * Prompt-leakage detector — gap category `prompt-leakage`.
3
+ *
4
+ * Flags when a web page inadvertently exposes AI system-prompt / agent
5
+ * instructions in its public surface: inline scripts, HTML comments, meta
6
+ * tags, visible text, response headers, or error bodies.
7
+ *
8
+ * CONSERVATIVE design: every signal requires TWO corroborating markers
9
+ * before generating a Gap, to keep the false-positive rate low.
10
+ * A page that merely uses the word "AI" or "assistant" will NOT trip.
11
+ *
12
+ * Heuristics are derived from first principles — the structural telltale
13
+ * shapes of an exposed instruction block. No third-party leaked-prompt
14
+ * text or vendor identifiers were used.
15
+ */
16
+ import { randomUUID } from 'node:crypto';
17
+ // ---------------------------------------------------------------------------
18
+ // Pattern constants — all original heuristics; no vendor identifiers
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Patterns that mark the OPENING of a system-instruction block.
22
+ * These alone are weak — we require corroboration.
23
+ */
24
+ const ROLE_DIRECTIVE_RE = /\b(?:you\s+are\s+(?:an?\s+)?(?:ai|assistant|agent|bot|helpful|language\s+model)|act\s+as\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|your\s+(?:role|persona|job|task|purpose)\s+is\s+to|i\s+am\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|as\s+(?:an?\s+)?(?:ai|assistant|agent|language\s+model))\b/i;
25
+ /**
26
+ * Patterns that mark instruction-block structural keywords.
27
+ * Typical in system prompts to delineate sections/rules.
28
+ */
29
+ const INSTRUCTION_KEYWORD_RE = /\b(?:do\s+not\s+(?:reveal|disclose|share|tell|mention|discuss)\s+(?:this|these|your\s+instructions?|the\s+(?:system\s+)?prompt)|never\s+(?:reveal|disclose|share|tell)\s+(?:this|these|your|the)\b|keep\s+(?:this|these|the\s+following)\s+(?:confidential|secret|private|hidden)|do\s+not\s+(?:break|exit|leave)\s+(?:character|role|persona)|stay\s+in\s+character|maintain\s+(?:your\s+)?(?:persona|role|character))\b/i;
30
+ /**
31
+ * Markers that signal a tool/function definition block being echoed back
32
+ * (e.g. an OpenAI-style function spec or a Claude tool_use block).
33
+ */
34
+ const TOOL_DEFINITION_RE = /(?:"function_call"\s*:|"tool_use"\s*:|"tools"\s*:\s*\[|"tool_name"\s*:|function\s+definitions?\s*:)/i;
35
+ /**
36
+ * Structural markers of a multi-turn instruction payload being echoed:
37
+ * system/user/assistant roles in JSON or XML-style markup.
38
+ */
39
+ const SYSTEM_ROLE_BLOCK_RE = /(?:"role"\s*:\s*"system"|<\s*system\s*>[\s\S]{10,}<\s*\/\s*system\s*>|<\s*instructions?\s*>[\s\S]{10,}<\s*\/\s*instructions?\s*>|\[\s*INST\s*\][\s\S]{10,}\[\/\s*INST\s*\])/i;
40
+ /**
41
+ * Header names that should never expose agent instructions.
42
+ */
43
+ const LEAKY_HEADER_NAMES_RE = /^(?:x-system-prompt|x-agent-instructions?|x-llm-prompt|x-ai-context|x-openai-system|x-anthropic-system|x-bot-instructions?)$/i;
44
+ /**
45
+ * Markers that suggest a debug-mode echo of the model's instructions
46
+ * inside an error or JSON response body.
47
+ */
48
+ const DEBUG_ECHO_RE = /(?:"system_prompt"\s*:|"system_message"\s*:|"instructions"\s*:\s*"[^"]{50,}"|"agent_instructions"\s*:|"prompt_template"\s*:)/i;
49
+ // ---------------------------------------------------------------------------
50
+ // Helper utilities
51
+ // ---------------------------------------------------------------------------
52
+ /** Strip HTML tags, returning visible text only. */
53
+ function stripHtml(html) {
54
+ return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
55
+ }
56
+ /** Extract content of HTML comments. */
57
+ function extractComments(html) {
58
+ const results = [];
59
+ const re = /<!--([\s\S]*?)-->/g;
60
+ let m;
61
+ while ((m = re.exec(html)) !== null) {
62
+ const content = m[1]?.trim() ?? '';
63
+ if (content.length > 0)
64
+ results.push(content);
65
+ }
66
+ return results;
67
+ }
68
+ /** Extract inline <script> content (non-src scripts). */
69
+ function extractInlineScripts(html) {
70
+ const results = [];
71
+ const re = /<script(?![^>]+\bsrc\s*=)[^>]*>([\s\S]*?)<\/script>/gi;
72
+ let m;
73
+ while ((m = re.exec(html)) !== null) {
74
+ const content = m[1]?.trim() ?? '';
75
+ if (content.length > 0)
76
+ results.push(content);
77
+ }
78
+ return results;
79
+ }
80
+ /** Extract <meta> tag content values. */
81
+ function extractMetaContents(html) {
82
+ const results = [];
83
+ const re = /<meta[^>]+content\s*=\s*["']([^"']{30,})["'][^>]*>/gi;
84
+ let m;
85
+ while ((m = re.exec(html)) !== null) {
86
+ const content = m[1]?.trim() ?? '';
87
+ if (content.length > 0)
88
+ results.push(content);
89
+ }
90
+ return results;
91
+ }
92
+ /** Truncate a string for embedding in gap evidence. */
93
+ function truncate(s, max = 200) {
94
+ return s.length <= max ? s : `${s.slice(0, max)}…`;
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // Two-signal corroboration check
98
+ //
99
+ // A "leak" is flagged only when BOTH a role-directive AND at least one of the
100
+ // structural markers co-occur in the same text block. This prevents a single
101
+ // casual mention of "AI" from tripping the detector.
102
+ // ---------------------------------------------------------------------------
103
+ function detectInBlock(text, location) {
104
+ const hasRoleDirective = ROLE_DIRECTIVE_RE.test(text);
105
+ const hasToolDef = TOOL_DEFINITION_RE.test(text);
106
+ const hasSystemRoleBlock = SYSTEM_ROLE_BLOCK_RE.test(text);
107
+ const hasInstructionKeyword = INSTRUCTION_KEYWORD_RE.test(text);
108
+ const hasDebugEcho = DEBUG_ECHO_RE.test(text);
109
+ // Highest confidence: a role directive + an explicit secrecy/instruction keyword
110
+ if (hasRoleDirective && hasInstructionKeyword) {
111
+ const match = text.match(ROLE_DIRECTIVE_RE)?.[0] ?? '';
112
+ return {
113
+ description: `Role-framing directive with instruction confidentiality keyword in ${location}`,
114
+ evidence: truncate(`${match} … [instruction keyword found]`),
115
+ severity: 'critical',
116
+ };
117
+ }
118
+ // High confidence: system-role JSON/XML block containing a role directive
119
+ if (hasSystemRoleBlock && hasRoleDirective) {
120
+ return {
121
+ description: `System-role payload block with role directive in ${location}`,
122
+ evidence: truncate(text.match(SYSTEM_ROLE_BLOCK_RE)?.[0] ?? text),
123
+ severity: 'high',
124
+ };
125
+ }
126
+ // High confidence: tool/function definition echoed in page surface with role directive
127
+ if (hasToolDef && hasRoleDirective) {
128
+ return {
129
+ description: `Tool/function definition block with role directive in ${location}`,
130
+ evidence: truncate(text.match(TOOL_DEFINITION_RE)?.[0] ?? text),
131
+ severity: 'high',
132
+ };
133
+ }
134
+ // Medium confidence: debug echo of system prompt field in JSON
135
+ if (hasDebugEcho && (hasRoleDirective || hasSystemRoleBlock)) {
136
+ return {
137
+ description: `Debug-mode system-prompt echo in ${location}`,
138
+ evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
139
+ severity: 'high',
140
+ };
141
+ }
142
+ // Lower confidence: standalone debug echo field (without corroborating role directive)
143
+ // Still worth flagging if the field name alone is a strong indicator
144
+ if (hasDebugEcho && text.length > 100) {
145
+ return {
146
+ description: `Possible debug-mode prompt field echo in ${location}`,
147
+ evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
148
+ severity: 'medium',
149
+ };
150
+ }
151
+ return null;
152
+ }
153
+ // ---------------------------------------------------------------------------
154
+ // Public detector
155
+ // ---------------------------------------------------------------------------
156
+ /**
157
+ * Scan a captured page surface for signals that an AI system prompt or agent
158
+ * instructions are exposed in its public surface.
159
+ *
160
+ * Accepts the `Route` shape from `route-inventory.schema.ts`, which now
161
+ * includes the optional `headers` and `bodySnippet` fields.
162
+ *
163
+ * Returns an array of `Gap` objects with `category: 'prompt-leakage'`.
164
+ * Returns an empty array when no signals are found.
165
+ */
166
+ export function detectPromptLeakage(route) {
167
+ const gaps = [];
168
+ const path = route.path;
169
+ const html = route.bodySnippet ?? '';
170
+ // 1. Check inline scripts
171
+ for (const script of extractInlineScripts(html)) {
172
+ const signal = detectInBlock(script, 'inline-script');
173
+ if (signal) {
174
+ gaps.push({
175
+ id: randomUUID(),
176
+ path,
177
+ severity: signal.severity,
178
+ reason: signal.description,
179
+ category: 'prompt-leakage',
180
+ description: `Prompt-leakage signal detected in inline JavaScript: ${signal.evidence}`,
181
+ recommendation: 'Remove agent instruction content from client-facing JavaScript. Never embed system prompts in frontend bundles or inline scripts.',
182
+ });
183
+ }
184
+ }
185
+ // 2. Check HTML comments
186
+ for (const comment of extractComments(html)) {
187
+ const signal = detectInBlock(comment, 'HTML-comment');
188
+ if (signal) {
189
+ gaps.push({
190
+ id: randomUUID(),
191
+ path,
192
+ severity: signal.severity,
193
+ reason: signal.description,
194
+ category: 'prompt-leakage',
195
+ description: `Prompt-leakage signal detected in HTML comment: ${signal.evidence}`,
196
+ recommendation: 'Remove agent instructions from HTML comments. Comments are visible in page source.',
197
+ });
198
+ }
199
+ }
200
+ // 3. Check meta tag content
201
+ for (const content of extractMetaContents(html)) {
202
+ const signal = detectInBlock(content, 'meta-tag');
203
+ if (signal) {
204
+ gaps.push({
205
+ id: randomUUID(),
206
+ path,
207
+ severity: signal.severity,
208
+ reason: signal.description,
209
+ category: 'prompt-leakage',
210
+ description: `Prompt-leakage signal detected in meta tag: ${signal.evidence}`,
211
+ recommendation: 'Remove agent instructions from HTML meta tags. Meta content is public.',
212
+ });
213
+ }
214
+ }
215
+ // 4. Check visible body text (stripped of tags)
216
+ if (html.length > 0) {
217
+ const visible = stripHtml(html);
218
+ const signal = detectInBlock(visible, 'page-body');
219
+ if (signal) {
220
+ gaps.push({
221
+ id: randomUUID(),
222
+ path,
223
+ severity: signal.severity,
224
+ reason: signal.description,
225
+ category: 'prompt-leakage',
226
+ description: `Prompt-leakage signal detected in visible page body: ${signal.evidence}`,
227
+ recommendation: 'Ensure agent instructions are never rendered into visible page content. Check debug/error pages.',
228
+ });
229
+ }
230
+ }
231
+ // 5. Check response headers
232
+ const headers = route.headers ?? {};
233
+ for (const [name, value] of Object.entries(headers)) {
234
+ if (LEAKY_HEADER_NAMES_RE.test(name)) {
235
+ gaps.push({
236
+ id: randomUUID(),
237
+ path,
238
+ severity: 'critical',
239
+ reason: `Response header "${name}" exposes agent configuration`,
240
+ category: 'prompt-leakage',
241
+ description: `Header "${name}: ${truncate(value, 80)}" should not be sent to clients.`,
242
+ recommendation: `Remove the "${name}" response header. Agent configuration must never be transmitted to the browser.`,
243
+ });
244
+ }
245
+ }
246
+ // Deduplicate by (path + severity + reason) to avoid double-counting when
247
+ // the same signal appears in multiple extraction contexts.
248
+ const seen = new Set();
249
+ return gaps.filter((g) => {
250
+ const key = `${g.path}::${g.severity}::${g.reason}`;
251
+ if (seen.has(key))
252
+ return false;
253
+ seen.add(key);
254
+ return true;
255
+ });
256
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@qulib/core",
3
- "version": "0.9.0",
3
+ "version": "0.10.1",
4
4
  "description": "Qulib — release confidence for deployed web apps. Fuses live-app quality, automation maturity, and API coverage into a single ship/caution/hold/block verdict.",
5
5
  "license": "MIT",
6
6
  "author": "Tapesh Nagarwal",
@@ -23,7 +23,11 @@
23
23
  "accessibility",
24
24
  "playwright",
25
25
  "mcp",
26
- "ai"
26
+ "ai",
27
+ "ci-gate",
28
+ "test-confidence",
29
+ "web-quality",
30
+ "wcag"
27
31
  ],
28
32
  "publishConfig": {
29
33
  "access": "public"
@@ -52,7 +56,7 @@
52
56
  "build": "tsc",
53
57
  "prepack": "npm run build",
54
58
  "prepublishOnly": "npm run build",
55
- "test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts",
59
+ "test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/runner/__tests__/golden-manifest.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts src/cli/__tests__/baseline.test.ts src/cli/__tests__/naming-aliases.test.ts src/cli/__tests__/analyze-diff.test.ts src/reporters/__tests__/heatmap.test.ts src/tools/scoring/__tests__/prompt-leakage.test.ts",
56
60
  "test:integration": "node --import tsx/esm --test src/__tests__/analyze.integration.test.ts",
57
61
  "eval": "node --import tsx/esm evals/runner/index.ts",
58
62
  "eval:judge": "node --import tsx/esm evals/judge/eval-judge.ts",
@@ -71,6 +75,6 @@
71
75
  "devDependencies": {
72
76
  "@types/js-yaml": "^4.0.9",
73
77
  "@types/node": "^20.0.0",
74
- "tsx": "^4.11.0"
78
+ "tsx": "^4.22.4"
75
79
  }
76
80
  }