@qulib/core 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +38 -13
  2. package/bin/qulib.js +2 -3
  3. package/dist/__tests__/playwright-available.d.ts +32 -0
  4. package/dist/__tests__/playwright-available.d.ts.map +1 -0
  5. package/dist/__tests__/playwright-available.js +35 -0
  6. package/dist/adapters/ci-results-adapter.d.ts +67 -0
  7. package/dist/adapters/ci-results-adapter.d.ts.map +1 -0
  8. package/dist/adapters/ci-results-adapter.js +143 -0
  9. package/dist/adapters/cypress-e2e-adapter.d.ts.map +1 -1
  10. package/dist/adapters/cypress-e2e-adapter.js +25 -2
  11. package/dist/adapters/playwright-adapter.d.ts.map +1 -1
  12. package/dist/adapters/playwright-adapter.js +25 -2
  13. package/dist/adapters/pr-metadata-adapter.d.ts +75 -0
  14. package/dist/adapters/pr-metadata-adapter.d.ts.map +1 -0
  15. package/dist/adapters/pr-metadata-adapter.js +146 -0
  16. package/dist/adapters/validate-specs.d.ts +55 -0
  17. package/dist/adapters/validate-specs.d.ts.map +1 -0
  18. package/dist/adapters/validate-specs.js +67 -0
  19. package/dist/baseline/baseline.d.ts +54 -0
  20. package/dist/baseline/baseline.d.ts.map +1 -0
  21. package/dist/baseline/baseline.js +252 -0
  22. package/dist/baseline/baseline.schema.d.ts +233 -0
  23. package/dist/baseline/baseline.schema.d.ts.map +1 -0
  24. package/dist/baseline/baseline.schema.js +59 -0
  25. package/dist/cli/analyze-diff-run.d.ts +77 -0
  26. package/dist/cli/analyze-diff-run.d.ts.map +1 -0
  27. package/dist/cli/analyze-diff-run.js +266 -0
  28. package/dist/cli/baseline-run.d.ts +55 -0
  29. package/dist/cli/baseline-run.d.ts.map +1 -0
  30. package/dist/cli/baseline-run.js +259 -0
  31. package/dist/cli/confidence-run.d.ts +16 -0
  32. package/dist/cli/confidence-run.d.ts.map +1 -0
  33. package/dist/cli/confidence-run.js +162 -0
  34. package/dist/cli/index.d.ts +11 -1
  35. package/dist/cli/index.d.ts.map +1 -1
  36. package/dist/cli/index.js +84 -4
  37. package/dist/cli/scaffold-run.d.ts +86 -0
  38. package/dist/cli/scaffold-run.d.ts.map +1 -0
  39. package/dist/cli/scaffold-run.js +232 -0
  40. package/dist/cli/score-automation-run.d.ts +25 -0
  41. package/dist/cli/score-automation-run.d.ts.map +1 -0
  42. package/dist/cli/score-automation-run.js +127 -0
  43. package/dist/examples/notquality-dogfood/fixture.d.ts +166 -0
  44. package/dist/examples/notquality-dogfood/fixture.d.ts.map +1 -0
  45. package/dist/examples/notquality-dogfood/fixture.js +174 -0
  46. package/dist/examples/notquality-dogfood/run.d.ts +34 -0
  47. package/dist/examples/notquality-dogfood/run.d.ts.map +1 -0
  48. package/dist/examples/notquality-dogfood/run.js +139 -0
  49. package/dist/index.d.ts +18 -1
  50. package/dist/index.d.ts.map +1 -1
  51. package/dist/index.js +15 -0
  52. package/dist/recipes/a11y.d.ts +36 -0
  53. package/dist/recipes/a11y.d.ts.map +1 -0
  54. package/dist/recipes/a11y.js +118 -0
  55. package/dist/recipes/auth.d.ts +38 -0
  56. package/dist/recipes/auth.d.ts.map +1 -0
  57. package/dist/recipes/auth.js +156 -0
  58. package/dist/recipes/index.d.ts +26 -0
  59. package/dist/recipes/index.d.ts.map +1 -0
  60. package/dist/recipes/index.js +41 -0
  61. package/dist/recipes/nav.d.ts +34 -0
  62. package/dist/recipes/nav.d.ts.map +1 -0
  63. package/dist/recipes/nav.js +128 -0
  64. package/dist/recipes/seed.d.ts +34 -0
  65. package/dist/recipes/seed.d.ts.map +1 -0
  66. package/dist/recipes/seed.js +87 -0
  67. package/dist/reporters/heatmap.d.ts +55 -0
  68. package/dist/reporters/heatmap.d.ts.map +1 -0
  69. package/dist/reporters/heatmap.js +146 -0
  70. package/dist/reporters/markdown-reporter.d.ts.map +1 -1
  71. package/dist/reporters/markdown-reporter.js +4 -1
  72. package/dist/scaffold-tests.d.ts +21 -0
  73. package/dist/scaffold-tests.d.ts.map +1 -1
  74. package/dist/scaffold-tests.js +12 -2
  75. package/dist/schemas/confidence.schema.d.ts +526 -0
  76. package/dist/schemas/confidence.schema.d.ts.map +1 -0
  77. package/dist/schemas/confidence.schema.js +161 -0
  78. package/dist/schemas/config.schema.d.ts.map +1 -1
  79. package/dist/schemas/config.schema.js +6 -1
  80. package/dist/schemas/index.d.ts +3 -0
  81. package/dist/schemas/index.d.ts.map +1 -1
  82. package/dist/schemas/index.js +3 -0
  83. package/dist/schemas/recipe.schema.d.ts +66 -0
  84. package/dist/schemas/recipe.schema.d.ts.map +1 -0
  85. package/dist/schemas/recipe.schema.js +45 -0
  86. package/dist/schemas/views.schema.d.ts +234 -0
  87. package/dist/schemas/views.schema.d.ts.map +1 -0
  88. package/dist/schemas/views.schema.js +82 -0
  89. package/dist/tools/scoring/confidence-from-qulib.d.ts +34 -0
  90. package/dist/tools/scoring/confidence-from-qulib.d.ts.map +1 -0
  91. package/dist/tools/scoring/confidence-from-qulib.js +206 -0
  92. package/dist/tools/scoring/confidence-views.d.ts +40 -0
  93. package/dist/tools/scoring/confidence-views.d.ts.map +1 -0
  94. package/dist/tools/scoring/confidence-views.js +163 -0
  95. package/dist/tools/scoring/confidence.d.ts +32 -0
  96. package/dist/tools/scoring/confidence.d.ts.map +1 -0
  97. package/dist/tools/scoring/confidence.js +180 -0
  98. package/dist/tools/scoring/levels.d.ts +15 -0
  99. package/dist/tools/scoring/levels.d.ts.map +1 -0
  100. package/dist/tools/scoring/levels.js +21 -0
  101. package/package.json +18 -8
@@ -0,0 +1,34 @@
1
+ /**
2
+ * qulib-native adapter — maps qulib collector outputs to EvidenceItem[].
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * This is the THIN WIRING layer, not the pure scorer. It translates:
7
+ * AnalyzeResult → live-app-quality + accessibility + crawl-coverage EvidenceItems
8
+ * AutomationMaturity → test-automation EvidenceItem
9
+ * ApiCoverageResult → api-coverage EvidenceItem
10
+ *
11
+ * Honesty rules (mirrors agent-summary.ts and the spec §2.5):
12
+ * - auth-required scan → applicability='unknown' (never silent pass)
13
+ * - blocked scan → blocking=true (hard blocker)
14
+ * - low-coverage → crawl-coverage applicability='unknown'
15
+ * - 0-endpoint API → api-coverage carries its own not_applicable (passed through verbatim)
16
+ *
17
+ * Pure function: no I/O.
18
+ */
19
+ import type { AnalyzeResult } from '../../analyze.js';
20
+ import type { AutomationMaturity } from '../../schemas/automation-maturity.schema.js';
21
+ import type { ApiCoverageResult } from './api-coverage.js';
22
+ import type { ConfidenceInput, ConfidenceSubject } from '../../schemas/confidence.schema.js';
23
+ /**
24
+ * Build a ConfidenceInput from qulib's own collector outputs.
25
+ * Pass whichever collectors you have; omitted collectors produce no evidence item.
26
+ */
27
+ export declare function buildConfidenceInputFromQulib(args: {
28
+ analyze?: AnalyzeResult;
29
+ maturity?: AutomationMaturity;
30
+ apiCoverage?: ApiCoverageResult;
31
+ subject: ConfidenceSubject;
32
+ policy?: ConfidenceInput['policy'];
33
+ }): ConfidenceInput;
34
+ //# sourceMappingURL=confidence-from-qulib.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"confidence-from-qulib.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence-from-qulib.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6CAA6C,CAAC;AACtF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,KAAK,EAEV,eAAe,EACf,iBAAiB,EAClB,MAAM,oCAAoC,CAAC;AAS5C;;;GAGG;AACH,wBAAgB,6BAA6B,CAAC,IAAI,EAAE;IAClD,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAC9B,WAAW,CAAC,EAAE,iBAAiB,CAAC;IAChC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,MAAM,CAAC,EAAE,eAAe,CAAC,QAAQ,CAAC,CAAC;CACpC,GAAG,eAAe,CAgMlB"}
@@ -0,0 +1,206 @@
1
+ /**
2
+ * qulib-native adapter — maps qulib collector outputs to EvidenceItem[].
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * This is the THIN WIRING layer, not the pure scorer. It translates:
7
+ * AnalyzeResult → live-app-quality + accessibility + crawl-coverage EvidenceItems
8
+ * AutomationMaturity → test-automation EvidenceItem
9
+ * ApiCoverageResult → api-coverage EvidenceItem
10
+ *
11
+ * Honesty rules (mirrors agent-summary.ts and the spec §2.5):
12
+ * - auth-required scan → applicability='unknown' (never silent pass)
13
+ * - blocked scan → blocking=true (hard blocker)
14
+ * - low-coverage → crawl-coverage applicability='unknown'
15
+ * - 0-endpoint API → api-coverage carries its own not_applicable (passed through verbatim)
16
+ *
17
+ * Pure function: no I/O.
18
+ */
19
+ // Default weights for the qulib-native sources (match confidence.ts DEFAULT_WEIGHTS).
20
+ const W_LIVE_APP = 0.30;
21
+ const W_TEST_AUTOMATION = 0.22;
22
+ const W_API_COVERAGE = 0.15;
23
+ const W_ACCESSIBILITY = 0.13;
24
+ const W_CRAWL_COVERAGE = 0.10;
25
+ /**
26
+ * Build a ConfidenceInput from qulib's own collector outputs.
27
+ * Pass whichever collectors you have; omitted collectors produce no evidence item.
28
+ */
29
+ export function buildConfidenceInputFromQulib(args) {
30
+ const items = [];
31
+ const now = new Date().toISOString();
32
+ // ------------------------------------------------------------------
33
+ // AnalyzeResult → live-app-quality + accessibility + crawl-coverage
34
+ // ------------------------------------------------------------------
35
+ if (args.analyze) {
36
+ const r = args.analyze;
37
+ const g = r.gapAnalysis;
38
+ // Determine if auth-required (honest: never silently pass).
39
+ const authRequired = g.mode === 'auth-required' || g.coverageWarning === 'auth-required';
40
+ const isBlocked = r.status === 'blocked';
41
+ // --- live-app-quality ---
42
+ const appRecs = [];
43
+ if (authRequired) {
44
+ appRecs.push('Provide auth credentials (form login or storage state) and re-run to evaluate the protected surface.');
45
+ }
46
+ const criticalGaps = r.gaps.filter((gap) => gap.severity === 'critical');
47
+ const highGaps = r.gaps.filter((gap) => gap.severity === 'high');
48
+ if (criticalGaps.length > 0) {
49
+ appRecs.push(`Fix ${criticalGaps.length} critical gap(s) before shipping.`);
50
+ }
51
+ else if (highGaps.length > 0) {
52
+ appRecs.push(`Address ${highGaps.length} high-severity gap(s).`);
53
+ }
54
+ const appEvidence = [];
55
+ if (isBlocked) {
56
+ appEvidence.push('Scan was blocked before producing a meaningful evaluation.');
57
+ }
58
+ else if (authRequired) {
59
+ appEvidence.push('Auth wall prevented scanning the protected surface.');
60
+ }
61
+ else {
62
+ appEvidence.push(`releaseConfidence=${r.releaseConfidence ?? 'null'}, status=${r.status}, gaps=${r.gaps.length}`);
63
+ if (criticalGaps.length > 0) {
64
+ appEvidence.push(`Critical gaps: ${criticalGaps.map((g2) => g2.path).join(', ')}`);
65
+ }
66
+ }
67
+ const liveAppItem = {
68
+ source: 'live-app-quality',
69
+ score: isBlocked ? null : (authRequired ? null : (r.releaseConfidence ?? null)),
70
+ weight: W_LIVE_APP,
71
+ applicability: authRequired ? 'unknown' : 'applicable',
72
+ blocking: isBlocked || criticalGaps.length > 0,
73
+ evidence: appEvidence,
74
+ recommendations: appRecs,
75
+ reason: authRequired
76
+ ? 'Auth wall prevented scanning — confidence score would be dishonest without the protected surface.'
77
+ : isBlocked
78
+ ? 'Scan was blocked; no evaluable surface.'
79
+ : undefined,
80
+ collectedAt: g.analyzedAt,
81
+ collector: {
82
+ tool: 'analyze_app',
83
+ inputRef: undefined,
84
+ },
85
+ };
86
+ items.push(liveAppItem);
87
+ // --- accessibility ---
88
+ const a11yGaps = r.gaps.filter((gap) => gap.category === 'a11y');
89
+ const a11yPenalty = a11yGaps.reduce((acc, gap) => {
90
+ const penalties = { critical: 30, high: 20, medium: 10, low: 5 };
91
+ return acc + (penalties[gap.severity] ?? 5);
92
+ }, 0);
93
+ const a11yScore = !isBlocked && !authRequired
94
+ ? Math.max(0, 100 - a11yPenalty)
95
+ : null;
96
+ const a11yItem = {
97
+ source: 'accessibility',
98
+ score: a11yScore,
99
+ weight: W_ACCESSIBILITY,
100
+ applicability: authRequired ? 'unknown' : 'applicable',
101
+ blocking: false,
102
+ evidence: isBlocked || authRequired
103
+ ? ['Accessibility could not be evaluated (scan blocked or auth-required).']
104
+ : a11yGaps.length === 0
105
+ ? ['No a11y gaps detected.']
106
+ : [`${a11yGaps.length} a11y gap(s) — penalty ${a11yPenalty} pts.`],
107
+ recommendations: a11yGaps.length > 0
108
+ ? ['Fix a11y violations flagged by the qulib scan (see gaps[].category=\'a11y\').']
109
+ : [],
110
+ reason: authRequired
111
+ ? 'Auth wall prevented a11y evaluation.'
112
+ : isBlocked
113
+ ? 'Scan blocked; no a11y signal.'
114
+ : undefined,
115
+ collectedAt: g.analyzedAt,
116
+ collector: {
117
+ tool: 'analyze_app',
118
+ inputRef: undefined,
119
+ },
120
+ };
121
+ items.push(a11yItem);
122
+ // --- crawl-coverage ---
123
+ const lowCoverage = g.coverageWarning === 'low-coverage';
124
+ const crawlScore = !isBlocked && !authRequired
125
+ ? (r.coverageScore ?? null)
126
+ : null;
127
+ const crawlItem = {
128
+ source: 'crawl-coverage',
129
+ score: crawlScore,
130
+ weight: W_CRAWL_COVERAGE,
131
+ applicability: authRequired || lowCoverage ? 'unknown' : 'applicable',
132
+ blocking: false,
133
+ evidence: [
134
+ `coverageScore=${r.coverageScore ?? 'null'}, pagesScanned=${g.coveragePagesScanned}`,
135
+ ...(g.coverageWarning ? [`coverageWarning: ${g.coverageWarning}`] : []),
136
+ ],
137
+ recommendations: lowCoverage
138
+ ? ['Increase crawl budget or supply deeper entry URLs to raise coverage above the floor.']
139
+ : [],
140
+ reason: authRequired
141
+ ? 'Auth-required scan; coverage limited to pre-auth pages.'
142
+ : lowCoverage
143
+ ? 'Coverage was below the confidence floor; treating as unknown signal.'
144
+ : undefined,
145
+ collectedAt: g.analyzedAt,
146
+ collector: {
147
+ tool: 'analyze_app',
148
+ inputRef: undefined,
149
+ },
150
+ };
151
+ items.push(crawlItem);
152
+ }
153
+ // ------------------------------------------------------------------
154
+ // AutomationMaturity → test-automation
155
+ // ------------------------------------------------------------------
156
+ if (args.maturity) {
157
+ const m = args.maturity;
158
+ const maturityItem = {
159
+ source: 'test-automation',
160
+ score: m.overallScore,
161
+ weight: W_TEST_AUTOMATION,
162
+ applicability: 'applicable',
163
+ blocking: false,
164
+ evidence: [`Automation maturity: ${m.label} (score ${m.overallScore})`],
165
+ recommendations: m.topRecommendations.slice(0, 3),
166
+ collectedAt: m.computedAt,
167
+ collector: {
168
+ tool: 'qulib_score_automation',
169
+ inputRef: m.repoPath,
170
+ },
171
+ };
172
+ items.push(maturityItem);
173
+ }
174
+ // ------------------------------------------------------------------
175
+ // ApiCoverageResult → api-coverage
176
+ // ------------------------------------------------------------------
177
+ if (args.apiCoverage) {
178
+ const d = args.apiCoverage.dimension;
179
+ const apiApplicability = d.applicability === 'not_applicable'
180
+ ? 'not_applicable'
181
+ : d.applicability === 'unknown'
182
+ ? 'unknown'
183
+ : 'applicable';
184
+ const apiItem = {
185
+ source: 'api-coverage',
186
+ score: d.score,
187
+ weight: W_API_COVERAGE,
188
+ applicability: apiApplicability,
189
+ blocking: false,
190
+ evidence: d.evidence,
191
+ recommendations: d.recommendations,
192
+ reason: d.reason,
193
+ collectedAt: new Date().toISOString(),
194
+ collector: {
195
+ tool: 'qulib_score_api',
196
+ inputRef: undefined,
197
+ },
198
+ };
199
+ items.push(apiItem);
200
+ }
201
+ return {
202
+ subject: args.subject,
203
+ evidence: items,
204
+ policy: args.policy,
205
+ };
206
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Pure view projections for the qulib Confidence Layer (Views 2–5).
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * All functions are pure (no I/O). Persistence sinks (file/db) are deferred to P4.
7
+ * View 1 (Release Confidence) IS the ReleaseConfidence object from the scorer.
8
+ *
9
+ * View 2 — diffConfidence: build a DeliveryTrafficPoint from two consecutive verdicts.
10
+ * View 3 — deriveInbox: extract human-judgment items from a verdict.
11
+ * View 4 — buildReplay: construct the provenance trace from input + result.
12
+ * View 5 — toAuditEntry: serialize a verdict to a tamper-evident audit record.
13
+ */
14
+ import type { ReleaseConfidence, ConfidenceInput } from '../../schemas/confidence.schema.js';
15
+ import type { DeliveryTrafficPoint, InboxItem, ReplayTrace, AuditEntry } from '../../schemas/views.schema.js';
16
+ /**
17
+ * Build a DeliveryTrafficPoint from the current verdict and an optional prior verdict.
18
+ * deltaFromPrev is null when there is no prior point.
19
+ */
20
+ export declare function diffConfidence(current: ReleaseConfidence, prior: ReleaseConfidence | null): DeliveryTrafficPoint;
21
+ /**
22
+ * Derive human-judgment inbox items from a verdict.
23
+ * Raises items for:
24
+ * - every blocking evidence item
25
+ * - every 'unknown' contribution on a requiredSource (when policy provides them)
26
+ * - 'block' verdict with a null score (nothing evaluable)
27
+ */
28
+ export declare function deriveInbox(rc: ReleaseConfidence, input: ConfidenceInput): InboxItem[];
29
+ /**
30
+ * Build the provenance trace from the scorer input + result.
31
+ * Steps are ordered by their appearance in the input evidence array,
32
+ * with all provenance fields carried from EvidenceItem.collector.
33
+ */
34
+ export declare function buildReplay(input: ConfidenceInput, rc: ReleaseConfidence): ReplayTrace;
35
+ /**
36
+ * Serialize a verdict to a tamper-evident audit record.
37
+ * recordHash is SHA-256 over the canonical record — changes when any field changes.
38
+ */
39
+ export declare function toAuditEntry(rc: ReleaseConfidence, evidenceSourceCount: number): AuditEntry;
40
+ //# sourceMappingURL=confidence-views.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"confidence-views.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence-views.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH,OAAO,KAAK,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,oCAAoC,CAAC;AAC7F,OAAO,KAAK,EACV,oBAAoB,EACpB,SAAS,EACT,WAAW,EACX,UAAU,EACX,MAAM,+BAA+B,CAAC;AAYvC;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,OAAO,EAAE,iBAAiB,EAC1B,KAAK,EAAE,iBAAiB,GAAG,IAAI,GAC9B,oBAAoB,CAgBtB;AAMD;;;;;;GAMG;AACH,wBAAgB,WAAW,CACzB,EAAE,EAAE,iBAAiB,EACrB,KAAK,EAAE,eAAe,GACrB,SAAS,EAAE,CAwDb;AAMD;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,eAAe,EAAE,EAAE,EAAE,iBAAiB,GAAG,WAAW,CAsBtF;AA0BD;;;GAGG;AACH,wBAAgB,YAAY,CAAC,EAAE,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,GAAG,UAAU,CAe3F"}
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Pure view projections for the qulib Confidence Layer (Views 2–5).
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * All functions are pure (no I/O). Persistence sinks (file/db) are deferred to P4.
7
+ * View 1 (Release Confidence) IS the ReleaseConfidence object from the scorer.
8
+ *
9
+ * View 2 — diffConfidence: build a DeliveryTrafficPoint from two consecutive verdicts.
10
+ * View 3 — deriveInbox: extract human-judgment items from a verdict.
11
+ * View 4 — buildReplay: construct the provenance trace from input + result.
12
+ * View 5 — toAuditEntry: serialize a verdict to a tamper-evident audit record.
13
+ */
14
+ import { createHash } from 'node:crypto';
15
+ import { randomUUID } from 'node:crypto';
16
+ import { DeliveryTrafficPointSchema, InboxItemSchema, ReplayTraceSchema, AuditEntrySchema, } from '../../schemas/views.schema.js';
17
+ // ---------------------------------------------------------------------------
18
+ // View 2 — Delivery Traffic
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Build a DeliveryTrafficPoint from the current verdict and an optional prior verdict.
22
+ * deltaFromPrev is null when there is no prior point.
23
+ */
24
+ export function diffConfidence(current, prior) {
25
+ const delta = prior !== null &&
26
+ current.confidenceScore !== null &&
27
+ prior.confidenceScore !== null
28
+ ? current.confidenceScore - prior.confidenceScore
29
+ : null;
30
+ return DeliveryTrafficPointSchema.parse({
31
+ subjectRef: current.subject.ref,
32
+ tenantId: current.subject.tenantId,
33
+ computedAt: current.computedAt,
34
+ confidenceScore: current.confidenceScore,
35
+ verdict: current.verdict,
36
+ deltaFromPrev: delta,
37
+ });
38
+ }
39
+ // ---------------------------------------------------------------------------
40
+ // View 3 — Inbox
41
+ // ---------------------------------------------------------------------------
42
+ /**
43
+ * Derive human-judgment inbox items from a verdict.
44
+ * Raises items for:
45
+ * - every blocking evidence item
46
+ * - every 'unknown' contribution on a requiredSource (when policy provides them)
47
+ * - 'block' verdict with a null score (nothing evaluable)
48
+ */
49
+ export function deriveInbox(rc, input) {
50
+ const items = [];
51
+ const now = rc.computedAt;
52
+ const requiredSources = input.policy?.requiredSources ?? [];
53
+ for (const evidence of input.evidence) {
54
+ if (evidence.blocking) {
55
+ items.push(InboxItemSchema.parse({
56
+ id: randomUUID(),
57
+ subjectRef: rc.subject.ref,
58
+ tenantId: rc.subject.tenantId,
59
+ kind: 'blocker',
60
+ source: evidence.source,
61
+ summary: evidence.reason
62
+ ? `${evidence.source}: ${evidence.reason}`
63
+ : `${evidence.source} is a hard blocker.`,
64
+ raisedAt: now,
65
+ }));
66
+ }
67
+ else if ((evidence.applicability ?? 'applicable') === 'unknown' &&
68
+ requiredSources.includes(evidence.source)) {
69
+ items.push(InboxItemSchema.parse({
70
+ id: randomUUID(),
71
+ subjectRef: rc.subject.ref,
72
+ tenantId: rc.subject.tenantId,
73
+ kind: 'unknown-signal',
74
+ source: evidence.source,
75
+ summary: evidence.reason
76
+ ? `${evidence.source}: ${evidence.reason}`
77
+ : `${evidence.source} could not produce a reliable score and is a required source.`,
78
+ raisedAt: now,
79
+ }));
80
+ }
81
+ }
82
+ // Raise an inbox item if verdict=block with null score (nothing evaluable).
83
+ if (rc.verdict === 'block' && rc.confidenceScore === null && input.evidence.every((e) => !e.blocking)) {
84
+ items.push(InboxItemSchema.parse({
85
+ id: randomUUID(),
86
+ subjectRef: rc.subject.ref,
87
+ tenantId: rc.subject.tenantId,
88
+ kind: 'approval-needed',
89
+ source: 'human-approval',
90
+ summary: 'No applicable evidence produced a score — manual review required before shipping.',
91
+ raisedAt: now,
92
+ }));
93
+ }
94
+ return items;
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // View 4 — Replay
98
+ // ---------------------------------------------------------------------------
99
+ /**
100
+ * Build the provenance trace from the scorer input + result.
101
+ * Steps are ordered by their appearance in the input evidence array,
102
+ * with all provenance fields carried from EvidenceItem.collector.
103
+ */
104
+ export function buildReplay(input, rc) {
105
+ const steps = input.evidence.map((item, idx) => {
106
+ const contribution = rc.contributions[idx];
107
+ return {
108
+ source: item.source,
109
+ tool: item.collector.tool,
110
+ inputRef: item.collector.inputRef,
111
+ score: item.score,
112
+ weight: contribution?.weight ?? item.weight,
113
+ effectiveWeight: contribution?.effectiveWeight ?? 0,
114
+ durationMs: item.collector.durationMs,
115
+ cost: item.collector.cost,
116
+ };
117
+ });
118
+ return ReplayTraceSchema.parse({
119
+ subjectRef: rc.subject.ref,
120
+ computedAt: rc.computedAt,
121
+ steps,
122
+ formula: rc.scoreFormula,
123
+ finalVerdict: rc.verdict,
124
+ });
125
+ }
126
+ // ---------------------------------------------------------------------------
127
+ // View 5 — Audit Trail
128
+ // ---------------------------------------------------------------------------
129
+ /**
130
+ * Canonical audit record shape for hashing.
131
+ * Fields are sorted so the hash is deterministic regardless of insertion order.
132
+ */
133
+ function canonicalRecord(rc, evidenceSourceCount) {
134
+ return JSON.stringify({
135
+ blockers: [...rc.blockers].sort(),
136
+ computedAt: rc.computedAt,
137
+ confidenceScore: rc.confidenceScore,
138
+ evidenceSourceCount,
139
+ schemaVersion: 1,
140
+ subjectRef: rc.subject.ref,
141
+ tenantId: rc.subject.tenantId,
142
+ verdict: rc.verdict,
143
+ });
144
+ }
145
+ /**
146
+ * Serialize a verdict to a tamper-evident audit record.
147
+ * recordHash is SHA-256 over the canonical record — changes when any field changes.
148
+ */
149
+ export function toAuditEntry(rc, evidenceSourceCount) {
150
+ const canonical = canonicalRecord(rc, evidenceSourceCount);
151
+ const recordHash = createHash('sha256').update(canonical).digest('hex');
152
+ return AuditEntrySchema.parse({
153
+ tenantId: rc.subject.tenantId,
154
+ subjectRef: rc.subject.ref,
155
+ computedAt: rc.computedAt,
156
+ confidenceScore: rc.confidenceScore,
157
+ verdict: rc.verdict,
158
+ evidenceSourceCount,
159
+ blockers: rc.blockers,
160
+ schemaVersion: 1,
161
+ recordHash,
162
+ });
163
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Release Confidence Aggregator — pure scorer.
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * Pure function: no I/O, no side effects. All I/O (CLI, MCP) lives in the wiring layer.
7
+ * Algorithm mirrors computeAutomationMaturity's denominator-renormalization math, generalized
8
+ * to operate over a heterogeneous evidence bundle.
9
+ *
10
+ * Score formula:
11
+ * confidenceScore = round( Σ score_i * weight_i / Σ weight_i )
12
+ * where i ∈ { applicable items with score !== null }
13
+ *
14
+ * Excluded from denominator: not_applicable | unknown | score === null items.
15
+ * Each excluded item is reported in contributions + narrated in honestyNotes.
16
+ *
17
+ * Verdict ladder (mirrors agent-summary.ts deriveGate, lifted to fused score):
18
+ * any blocking item → block
19
+ * confidenceScore === null → block (nothing evaluable; honesty floor)
20
+ * confidenceScore < failThreshold → hold
21
+ * unknown on a requiredSource OR
22
+ * confidenceScore < passThreshold → caution
23
+ * else → ship
24
+ */
25
+ import type { ConfidenceInput, ReleaseConfidence } from '../../schemas/confidence.schema.js';
26
+ /**
27
+ * Compute the fused Release Confidence result from an evidence bundle.
28
+ *
29
+ * Pure function — deterministic over the same input.
30
+ */
31
+ export declare function computeReleaseConfidence(input: ConfidenceInput): ReleaseConfidence;
32
+ //# sourceMappingURL=confidence.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"confidence.d.ts","sourceRoot":"","sources":["../../../src/tools/scoring/confidence.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EACV,eAAe,EAGf,iBAAiB,EAElB,MAAM,oCAAoC,CAAC;AAiE5C;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,eAAe,GAAG,iBAAiB,CA8HlF"}
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Release Confidence Aggregator — pure scorer.
3
+ *
4
+ * P3 — qulib Confidence Layer v1.
5
+ *
6
+ * Pure function: no I/O, no side effects. All I/O (CLI, MCP) lives in the wiring layer.
7
+ * Algorithm mirrors computeAutomationMaturity's denominator-renormalization math, generalized
8
+ * to operate over a heterogeneous evidence bundle.
9
+ *
10
+ * Score formula:
11
+ * confidenceScore = round( Σ score_i * weight_i / Σ weight_i )
12
+ * where i ∈ { applicable items with score !== null }
13
+ *
14
+ * Excluded from denominator: not_applicable | unknown | score === null items.
15
+ * Each excluded item is reported in contributions + narrated in honestyNotes.
16
+ *
17
+ * Verdict ladder (mirrors agent-summary.ts deriveGate, lifted to fused score):
18
+ * any blocking item → block
19
+ * confidenceScore === null → block (nothing evaluable; honesty floor)
20
+ * confidenceScore < failThreshold → hold
21
+ * unknown on a requiredSource OR
22
+ * confidenceScore < passThreshold → caution
23
+ * else → ship
24
+ */
25
+ import { ReleaseConfidenceSchema, ConfidencePolicySchema } from '../../schemas/confidence.schema.js';
26
+ import { scoreLevel } from './levels.js';
27
+ // ---------------------------------------------------------------------------
28
+ // Default per-source weights (sum over the qulib-native set ≈ 0.90; renormalized at runtime)
29
+ // Rationale grounded in §2.4 of the P3 spec.
30
+ // ---------------------------------------------------------------------------
31
+ const DEFAULT_WEIGHTS = {
32
+ 'live-app-quality': 0.30,
33
+ 'test-automation': 0.22,
34
+ 'api-coverage': 0.15,
35
+ 'accessibility': 0.13,
36
+ 'crawl-coverage': 0.10,
37
+ 'ci-results': 0.10,
38
+ // External sources reserved for P4 — zero weight until wired:
39
+ 'deploy-metadata': 0.0,
40
+ 'error-telemetry': 0.0,
41
+ 'feature-flags': 0.0,
42
+ 'doc-health': 0.0,
43
+ 'human-approval': 0.0,
44
+ 'agent-evidence': 0.0,
45
+ };
46
+ function resolvePolicy(p) {
47
+ const base = ConfidencePolicySchema.parse(p ?? {});
48
+ return {
49
+ passThreshold: base.passThreshold,
50
+ failThreshold: base.failThreshold,
51
+ maxListLength: base.maxListLength,
52
+ requiredSources: base.requiredSources,
53
+ weights: base.weights,
54
+ };
55
+ }
56
+ function resolveWeight(item, policyWeights) {
57
+ if (policyWeights && item.source in policyWeights) {
58
+ return policyWeights[item.source];
59
+ }
60
+ return item.weight > 0 ? item.weight : (DEFAULT_WEIGHTS[item.source] ?? 0.10);
61
+ }
62
+ function buildHonestyNote(item) {
63
+ const base = `'${item.source}' source`;
64
+ if (item.applicability === 'not_applicable') {
65
+ return `${base} is not applicable${item.reason ? ': ' + item.reason : ' for this subject'}.`;
66
+ }
67
+ if (item.applicability === 'unknown') {
68
+ return `${base} could not produce a reliable score${item.reason ? ': ' + item.reason : ''}.`;
69
+ }
70
+ if (item.score === null) {
71
+ return `${base} ran but returned a null score${item.reason ? ': ' + item.reason : ''}.`;
72
+ }
73
+ return `${base} has partial or degraded signal.`;
74
+ }
75
+ /**
76
+ * Compute the fused Release Confidence result from an evidence bundle.
77
+ *
78
+ * Pure function — deterministic over the same input.
79
+ */
80
+ export function computeReleaseConfidence(input) {
81
+ const policy = resolvePolicy(input.policy);
82
+ const now = new Date().toISOString();
83
+ const limit = policy.maxListLength;
84
+ // Partition evidence into applicable (score !== null) vs excluded.
85
+ const applicable = input.evidence.filter((item) => (item.applicability ?? 'applicable') === 'applicable' &&
86
+ item.score !== null &&
87
+ !item.blocking);
88
+ const excluded = input.evidence.filter((item) => (item.applicability ?? 'applicable') !== 'applicable' ||
89
+ item.score === null);
90
+ // Blocking items are evaluated separately from the score.
91
+ const blockingItems = input.evidence.filter((item) => item.blocking);
92
+ // Compute weighted score over applicable set.
93
+ let confidenceScore = null;
94
+ const weightSum = applicable.reduce((s, item) => s + resolveWeight(item, policy.weights), 0);
95
+ if (weightSum > 0) {
96
+ const numerator = applicable.reduce((s, item) => s + (item.score ?? 0) * resolveWeight(item, policy.weights), 0);
97
+ confidenceScore = Math.round(numerator / weightSum);
98
+ }
99
+ // Build contributions (all evidence, not just applicable).
100
+ const contributions = input.evidence.map((item) => {
101
+ const w = resolveWeight(item, policy.weights);
102
+ const isApplicableNonNull = (item.applicability ?? 'applicable') === 'applicable' &&
103
+ item.score !== null &&
104
+ !item.blocking;
105
+ return {
106
+ source: item.source,
107
+ score: item.score,
108
+ weight: w,
109
+ effectiveWeight: isApplicableNonNull && weightSum > 0 ? w / weightSum : 0,
110
+ applicability: item.applicability ?? 'applicable',
111
+ blocking: item.blocking ?? false,
112
+ };
113
+ });
114
+ // Determine verdict.
115
+ let verdict = 'ship';
116
+ const blockers = [];
117
+ if (blockingItems.length > 0) {
118
+ verdict = 'block';
119
+ for (const b of blockingItems) {
120
+ blockers.push(`'${b.source}' is a hard blocker${b.reason ? ': ' + b.reason : ''}.`);
121
+ }
122
+ }
123
+ else if (confidenceScore === null) {
124
+ verdict = 'block';
125
+ blockers.push('No applicable evidence produced a score — nothing evaluable (honesty floor).');
126
+ }
127
+ else if (confidenceScore < policy.failThreshold) {
128
+ verdict = 'hold';
129
+ }
130
+ else {
131
+ // Check if any required source is 'unknown'.
132
+ const unknownRequired = input.evidence.filter((item) => policy.requiredSources.includes(item.source) &&
133
+ (item.applicability ?? 'applicable') === 'unknown');
134
+ if (unknownRequired.length > 0 || confidenceScore < policy.passThreshold) {
135
+ verdict = 'caution';
136
+ }
137
+ }
138
+ // Level / label from shared ladder.
139
+ const { level, label } = scoreLevel(confidenceScore ?? 0);
140
+ // Honesty notes — one per degraded/excluded source.
141
+ const honestyNotes = [];
142
+ for (const item of excluded) {
143
+ honestyNotes.push(buildHonestyNote(item));
144
+ }
145
+ // Also note any blocking items that aren't in the excluded set.
146
+ for (const item of blockingItems) {
147
+ if ((item.applicability ?? 'applicable') === 'applicable' && item.score !== null) {
148
+ honestyNotes.push(`'${item.source}' is a hard blocker${item.reason ? ': ' + item.reason : ''}.`);
149
+ }
150
+ }
151
+ // Top risks — merge evidence across sources, severity-sorted by position.
152
+ const allRisks = [
153
+ ...blockingItems.flatMap((item) => item.evidence),
154
+ ...input.evidence
155
+ .filter((item) => (item.applicability ?? 'applicable') === 'applicable')
156
+ .sort((a, b) => (a.score ?? 0) - (b.score ?? 0))
157
+ .flatMap((item) => item.evidence),
158
+ ];
159
+ const topRisks = [...new Set(allRisks)].slice(0, limit);
160
+ // Recommended next checks — merge and deduplicate.
161
+ const allRecs = input.evidence.flatMap((item) => item.recommendations ?? []);
162
+ const recommendedNextChecks = [...new Set(allRecs)].slice(0, limit);
163
+ const result = {
164
+ schemaVersion: 1,
165
+ computedAt: now,
166
+ subject: input.subject,
167
+ confidenceScore,
168
+ verdict,
169
+ level,
170
+ label,
171
+ contributions,
172
+ topRisks,
173
+ recommendedNextChecks,
174
+ honestyNotes: honestyNotes.slice(0, limit),
175
+ blockers,
176
+ scoreFormula: 'confidenceScore = round( Σ (score * weight) / Σ weight ) for applicable, non-null, non-blocking evidence only. ' +
177
+ 'not_applicable, unknown, and null-score items are excluded from the denominator but reported in contributions and honestyNotes.',
178
+ };
179
+ return ReleaseConfidenceSchema.parse(result);
180
+ }