@qulib/core 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/baseline/baseline.schema.d.ts +26 -26
  2. package/dist/baseline/baseline.schema.d.ts.map +1 -1
  3. package/dist/baseline/baseline.schema.js +1 -0
  4. package/dist/cli/confidence-run.js +5 -5
  5. package/dist/index.d.ts +1 -0
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +1 -0
  8. package/dist/phases/think.d.ts.map +1 -1
  9. package/dist/phases/think.js +4 -1
  10. package/dist/reporters/heatmap.d.ts +1 -1
  11. package/dist/reporters/heatmap.d.ts.map +1 -1
  12. package/dist/reporters/heatmap.js +2 -0
  13. package/dist/schemas/confidence.schema.d.ts +2 -2
  14. package/dist/schemas/gap-analysis.schema.d.ts +8 -8
  15. package/dist/schemas/gap-analysis.schema.js +1 -1
  16. package/dist/schemas/golden-manifest.schema.d.ts +137 -0
  17. package/dist/schemas/golden-manifest.schema.d.ts.map +1 -0
  18. package/dist/schemas/golden-manifest.schema.js +25 -0
  19. package/dist/schemas/index.d.ts +1 -0
  20. package/dist/schemas/index.d.ts.map +1 -1
  21. package/dist/schemas/index.js +1 -0
  22. package/dist/schemas/public-surface.schema.d.ts +15 -5
  23. package/dist/schemas/public-surface.schema.d.ts.map +1 -1
  24. package/dist/schemas/route-inventory.schema.d.ts +20 -0
  25. package/dist/schemas/route-inventory.schema.d.ts.map +1 -1
  26. package/dist/schemas/route-inventory.schema.js +4 -0
  27. package/dist/schemas/views.schema.d.ts +1 -1
  28. package/dist/tools/scoring/confidence.d.ts.map +1 -1
  29. package/dist/tools/scoring/confidence.js +140 -14
  30. package/dist/tools/scoring/prompt-leakage.d.ts +29 -0
  31. package/dist/tools/scoring/prompt-leakage.d.ts.map +1 -0
  32. package/dist/tools/scoring/prompt-leakage.js +256 -0
  33. package/package.json +2 -2
@@ -0,0 +1,256 @@
1
+ /**
2
+ * Prompt-leakage detector — gap category `prompt-leakage`.
3
+ *
4
+ * Flags when a web page inadvertently exposes AI system-prompt / agent
5
+ * instructions in its public surface: inline scripts, HTML comments, meta
6
+ * tags, visible text, response headers, or error bodies.
7
+ *
8
+ * CONSERVATIVE design: every signal requires TWO corroborating markers
9
+ * before generating a Gap, to keep the false-positive rate low.
10
+ * A page that merely uses the word "AI" or "assistant" will NOT trip.
11
+ *
12
+ * Heuristics are derived from first principles — the structural telltale
13
+ * shapes of an exposed instruction block. No third-party leaked-prompt
14
+ * text or vendor identifiers were used.
15
+ */
16
+ import { randomUUID } from 'node:crypto';
17
+ // ---------------------------------------------------------------------------
18
+ // Pattern constants — all original heuristics; no vendor identifiers
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Patterns that mark the OPENING of a system-instruction block.
22
+ * These alone are weak — we require corroboration.
23
+ */
24
+ const ROLE_DIRECTIVE_RE = /\b(?:you\s+are\s+(?:an?\s+)?(?:ai|assistant|agent|bot|helpful|language\s+model)|act\s+as\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|your\s+(?:role|persona|job|task|purpose)\s+is\s+to|i\s+am\s+(?:an?\s+)?(?:ai|assistant|agent|bot)|as\s+(?:an?\s+)?(?:ai|assistant|agent|language\s+model))\b/i;
25
+ /**
26
+ * Patterns that mark instruction-block structural keywords.
27
+ * Typical in system prompts to delineate sections/rules.
28
+ */
29
+ const INSTRUCTION_KEYWORD_RE = /\b(?:do\s+not\s+(?:reveal|disclose|share|tell|mention|discuss)\s+(?:this|these|your\s+instructions?|the\s+(?:system\s+)?prompt)|never\s+(?:reveal|disclose|share|tell)\s+(?:this|these|your|the)\b|keep\s+(?:this|these|the\s+following)\s+(?:confidential|secret|private|hidden)|do\s+not\s+(?:break|exit|leave)\s+(?:character|role|persona)|stay\s+in\s+character|maintain\s+(?:your\s+)?(?:persona|role|character))\b/i;
30
+ /**
31
+ * Markers that signal a tool/function definition block being echoed back
32
+ * (e.g. an OpenAI-style function spec or a Claude tool_use block).
33
+ */
34
+ const TOOL_DEFINITION_RE = /(?:"function_call"\s*:|"tool_use"\s*:|"tools"\s*:\s*\[|"tool_name"\s*:|function\s+definitions?\s*:)/i;
35
+ /**
36
+ * Structural markers of a multi-turn instruction payload being echoed:
37
+ * system/user/assistant roles in JSON or XML-style markup.
38
+ */
39
+ const SYSTEM_ROLE_BLOCK_RE = /(?:"role"\s*:\s*"system"|<\s*system\s*>[\s\S]{10,}<\s*\/\s*system\s*>|<\s*instructions?\s*>[\s\S]{10,}<\s*\/\s*instructions?\s*>|\[\s*INST\s*\][\s\S]{10,}\[\/\s*INST\s*\])/i;
40
+ /**
41
+ * Header names that should never expose agent instructions.
42
+ */
43
+ const LEAKY_HEADER_NAMES_RE = /^(?:x-system-prompt|x-agent-instructions?|x-llm-prompt|x-ai-context|x-openai-system|x-anthropic-system|x-bot-instructions?)$/i;
44
+ /**
45
+ * Markers that suggest a debug-mode echo of the model's instructions
46
+ * inside an error or JSON response body.
47
+ */
48
+ const DEBUG_ECHO_RE = /(?:"system_prompt"\s*:|"system_message"\s*:|"instructions"\s*:\s*"[^"]{50,}"|"agent_instructions"\s*:|"prompt_template"\s*:)/i;
49
+ // ---------------------------------------------------------------------------
50
+ // Helper utilities
51
+ // ---------------------------------------------------------------------------
52
+ /** Strip HTML tags, returning visible text only. */
53
+ function stripHtml(html) {
54
+ return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
55
+ }
56
+ /** Extract content of HTML comments. */
57
+ function extractComments(html) {
58
+ const results = [];
59
+ const re = /<!--([\s\S]*?)-->/g;
60
+ let m;
61
+ while ((m = re.exec(html)) !== null) {
62
+ const content = m[1]?.trim() ?? '';
63
+ if (content.length > 0)
64
+ results.push(content);
65
+ }
66
+ return results;
67
+ }
68
+ /** Extract inline <script> content (non-src scripts). */
69
+ function extractInlineScripts(html) {
70
+ const results = [];
71
+ const re = /<script(?![^>]+\bsrc\s*=)[^>]*>([\s\S]*?)<\/script>/gi;
72
+ let m;
73
+ while ((m = re.exec(html)) !== null) {
74
+ const content = m[1]?.trim() ?? '';
75
+ if (content.length > 0)
76
+ results.push(content);
77
+ }
78
+ return results;
79
+ }
80
+ /** Extract <meta> tag content values. */
81
+ function extractMetaContents(html) {
82
+ const results = [];
83
+ const re = /<meta[^>]+content\s*=\s*["']([^"']{30,})["'][^>]*>/gi;
84
+ let m;
85
+ while ((m = re.exec(html)) !== null) {
86
+ const content = m[1]?.trim() ?? '';
87
+ if (content.length > 0)
88
+ results.push(content);
89
+ }
90
+ return results;
91
+ }
92
+ /** Truncate a string for embedding in gap evidence. */
93
+ function truncate(s, max = 200) {
94
+ return s.length <= max ? s : `${s.slice(0, max)}…`;
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // Two-signal corroboration check
98
+ //
99
+ // A "leak" is flagged only when BOTH a role-directive AND at least one of the
100
+ // structural markers co-occur in the same text block. This prevents a single
101
+ // casual mention of "AI" from tripping the detector.
102
+ // ---------------------------------------------------------------------------
103
+ function detectInBlock(text, location) {
104
+ const hasRoleDirective = ROLE_DIRECTIVE_RE.test(text);
105
+ const hasToolDef = TOOL_DEFINITION_RE.test(text);
106
+ const hasSystemRoleBlock = SYSTEM_ROLE_BLOCK_RE.test(text);
107
+ const hasInstructionKeyword = INSTRUCTION_KEYWORD_RE.test(text);
108
+ const hasDebugEcho = DEBUG_ECHO_RE.test(text);
109
+ // Highest confidence: a role directive + an explicit secrecy/instruction keyword
110
+ if (hasRoleDirective && hasInstructionKeyword) {
111
+ const match = text.match(ROLE_DIRECTIVE_RE)?.[0] ?? '';
112
+ return {
113
+ description: `Role-framing directive with instruction confidentiality keyword in ${location}`,
114
+ evidence: truncate(`${match} … [instruction keyword found]`),
115
+ severity: 'critical',
116
+ };
117
+ }
118
+ // High confidence: system-role JSON/XML block containing a role directive
119
+ if (hasSystemRoleBlock && hasRoleDirective) {
120
+ return {
121
+ description: `System-role payload block with role directive in ${location}`,
122
+ evidence: truncate(text.match(SYSTEM_ROLE_BLOCK_RE)?.[0] ?? text),
123
+ severity: 'high',
124
+ };
125
+ }
126
+ // High confidence: tool/function definition echoed in page surface with role directive
127
+ if (hasToolDef && hasRoleDirective) {
128
+ return {
129
+ description: `Tool/function definition block with role directive in ${location}`,
130
+ evidence: truncate(text.match(TOOL_DEFINITION_RE)?.[0] ?? text),
131
+ severity: 'high',
132
+ };
133
+ }
134
+ // Medium confidence: debug echo of system prompt field in JSON
135
+ if (hasDebugEcho && (hasRoleDirective || hasSystemRoleBlock)) {
136
+ return {
137
+ description: `Debug-mode system-prompt echo in ${location}`,
138
+ evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
139
+ severity: 'high',
140
+ };
141
+ }
142
+ // Lower confidence: standalone debug echo field (without corroborating role directive)
143
+ // Still worth flagging if the field name alone is a strong indicator
144
+ if (hasDebugEcho && text.length > 100) {
145
+ return {
146
+ description: `Possible debug-mode prompt field echo in ${location}`,
147
+ evidence: truncate(text.match(DEBUG_ECHO_RE)?.[0] ?? text),
148
+ severity: 'medium',
149
+ };
150
+ }
151
+ return null;
152
+ }
153
+ // ---------------------------------------------------------------------------
154
+ // Public detector
155
+ // ---------------------------------------------------------------------------
156
+ /**
157
+ * Scan a captured page surface for signals that an AI system prompt or agent
158
+ * instructions are exposed in its public surface.
159
+ *
160
+ * Accepts the `Route` shape from `route-inventory.schema.ts`, which now
161
+ * includes the optional `headers` and `bodySnippet` fields.
162
+ *
163
+ * Returns an array of `Gap` objects with `category: 'prompt-leakage'`.
164
+ * Returns an empty array when no signals are found.
165
+ */
166
+ export function detectPromptLeakage(route) {
167
+ const gaps = [];
168
+ const path = route.path;
169
+ const html = route.bodySnippet ?? '';
170
+ // 1. Check inline scripts
171
+ for (const script of extractInlineScripts(html)) {
172
+ const signal = detectInBlock(script, 'inline-script');
173
+ if (signal) {
174
+ gaps.push({
175
+ id: randomUUID(),
176
+ path,
177
+ severity: signal.severity,
178
+ reason: signal.description,
179
+ category: 'prompt-leakage',
180
+ description: `Prompt-leakage signal detected in inline JavaScript: ${signal.evidence}`,
181
+ recommendation: 'Remove agent instruction content from client-facing JavaScript. Never embed system prompts in frontend bundles or inline scripts.',
182
+ });
183
+ }
184
+ }
185
+ // 2. Check HTML comments
186
+ for (const comment of extractComments(html)) {
187
+ const signal = detectInBlock(comment, 'HTML-comment');
188
+ if (signal) {
189
+ gaps.push({
190
+ id: randomUUID(),
191
+ path,
192
+ severity: signal.severity,
193
+ reason: signal.description,
194
+ category: 'prompt-leakage',
195
+ description: `Prompt-leakage signal detected in HTML comment: ${signal.evidence}`,
196
+ recommendation: 'Remove agent instructions from HTML comments. Comments are visible in page source.',
197
+ });
198
+ }
199
+ }
200
+ // 3. Check meta tag content
201
+ for (const content of extractMetaContents(html)) {
202
+ const signal = detectInBlock(content, 'meta-tag');
203
+ if (signal) {
204
+ gaps.push({
205
+ id: randomUUID(),
206
+ path,
207
+ severity: signal.severity,
208
+ reason: signal.description,
209
+ category: 'prompt-leakage',
210
+ description: `Prompt-leakage signal detected in meta tag: ${signal.evidence}`,
211
+ recommendation: 'Remove agent instructions from HTML meta tags. Meta content is public.',
212
+ });
213
+ }
214
+ }
215
+ // 4. Check visible body text (stripped of tags)
216
+ if (html.length > 0) {
217
+ const visible = stripHtml(html);
218
+ const signal = detectInBlock(visible, 'page-body');
219
+ if (signal) {
220
+ gaps.push({
221
+ id: randomUUID(),
222
+ path,
223
+ severity: signal.severity,
224
+ reason: signal.description,
225
+ category: 'prompt-leakage',
226
+ description: `Prompt-leakage signal detected in visible page body: ${signal.evidence}`,
227
+ recommendation: 'Ensure agent instructions are never rendered into visible page content. Check debug/error pages.',
228
+ });
229
+ }
230
+ }
231
+ // 5. Check response headers
232
+ const headers = route.headers ?? {};
233
+ for (const [name, value] of Object.entries(headers)) {
234
+ if (LEAKY_HEADER_NAMES_RE.test(name)) {
235
+ gaps.push({
236
+ id: randomUUID(),
237
+ path,
238
+ severity: 'critical',
239
+ reason: `Response header "${name}" exposes agent configuration`,
240
+ category: 'prompt-leakage',
241
+ description: `Header "${name}: ${truncate(value, 80)}" should not be sent to clients.`,
242
+ recommendation: `Remove the "${name}" response header. Agent configuration must never be transmitted to the browser.`,
243
+ });
244
+ }
245
+ }
246
+ // Deduplicate by (path + severity + reason) to avoid double-counting when
247
+ // the same signal appears in multiple extraction contexts.
248
+ const seen = new Set();
249
+ return gaps.filter((g) => {
250
+ const key = `${g.path}::${g.severity}::${g.reason}`;
251
+ if (seen.has(key))
252
+ return false;
253
+ seen.add(key);
254
+ return true;
255
+ });
256
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@qulib/core",
3
- "version": "0.10.0",
3
+ "version": "0.10.1",
4
4
  "description": "Qulib — release confidence for deployed web apps. Fuses live-app quality, automation maturity, and API coverage into a single ship/caution/hold/block verdict.",
5
5
  "license": "MIT",
6
6
  "author": "Tapesh Nagarwal",
@@ -56,7 +56,7 @@
56
56
  "build": "tsc",
57
57
  "prepack": "npm run build",
58
58
  "prepublishOnly": "npm run build",
59
- "test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts src/cli/__tests__/baseline.test.ts src/cli/__tests__/naming-aliases.test.ts src/cli/__tests__/analyze-diff.test.ts src/reporters/__tests__/heatmap.test.ts",
59
+ "test": "node --import tsx/esm --test src/llm/__tests__/cost-intelligence.test.ts src/llm/__tests__/context-builder.test.ts src/tools/scoring/__tests__/gaps.test.ts src/tools/auth/__tests__/gaps.test.ts src/tools/auth/__tests__/detect.test.ts src/tools/scoring/__tests__/automation-maturity.test.ts src/tools/scoring/__tests__/api-coverage.test.ts src/tools/scoring/__tests__/automation-maturity-with-api.test.ts src/harness/__tests__/state-manager.test.ts src/telemetry/__tests__/redact-url.test.ts src/cli/__tests__/auth-login.test.ts src/cli/__tests__/cli-version.test.ts src/cli/__tests__/bin-shim.test.ts src/cli/__tests__/score-automation.test.ts src/cli/__tests__/scaffold.test.ts src/__tests__/agent-summary.test.ts src/__tests__/cli-agent-summary.test.ts src/__tests__/analyze.storage-state-invalid.test.ts src/__tests__/analyze.fixtures.test.ts src/adapters/__tests__/playwright-adapter.test.ts src/adapters/__tests__/api-adapter.test.ts src/adapters/__tests__/ci-results-adapter.test.ts src/adapters/__tests__/pr-metadata-adapter.test.ts src/adapters/__tests__/validate-specs.test.ts src/tools/repo/__tests__/api-surface.test.ts src/baseline/__tests__/baseline.test.ts evals/runner/__tests__/runner.test.ts evals/runner/__tests__/golden-manifest.test.ts evals/judge/__tests__/judge.test.ts src/tools/scoring/__tests__/confidence.test.ts src/tools/scoring/__tests__/confidence-from-qulib.test.ts src/tools/scoring/__tests__/confidence-views.test.ts src/cli/__tests__/confidence.test.ts src/__tests__/notquality-dogfood.test.ts src/cli/__tests__/default-config-fallback.test.ts src/cli/__tests__/baseline.test.ts src/cli/__tests__/naming-aliases.test.ts src/cli/__tests__/analyze-diff.test.ts src/reporters/__tests__/heatmap.test.ts src/tools/scoring/__tests__/prompt-leakage.test.ts",
60
60
  "test:integration": "node --import tsx/esm --test src/__tests__/analyze.integration.test.ts",
61
61
  "eval": "node --import tsx/esm evals/runner/index.ts",
62
62
  "eval:judge": "node --import tsx/esm evals/judge/eval-judge.ts",