task-summary-extractor 8.3.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +38 -0
- package/ARCHITECTURE.md +99 -3
- package/EXPLORATION.md +148 -89
- package/QUICK_START.md +5 -2
- package/README.md +51 -7
- package/bin/taskex.js +11 -4
- package/package.json +38 -5
- package/src/config.js +52 -3
- package/src/modes/focused-reanalysis.js +2 -1
- package/src/modes/progress-updater.js +1 -1
- package/src/phases/_shared.js +43 -0
- package/src/phases/compile.js +101 -0
- package/src/phases/deep-dive.js +118 -0
- package/src/phases/discover.js +178 -0
- package/src/phases/init.js +192 -0
- package/src/phases/output.js +238 -0
- package/src/phases/process-media.js +633 -0
- package/src/phases/services.js +104 -0
- package/src/phases/summary.js +86 -0
- package/src/pipeline.js +431 -1463
- package/src/renderers/docx.js +531 -0
- package/src/renderers/html.js +672 -0
- package/src/renderers/markdown.js +15 -183
- package/src/renderers/pdf.js +90 -0
- package/src/renderers/shared.js +211 -0
- package/src/schemas/analysis-compiled.schema.json +381 -0
- package/src/schemas/analysis-segment.schema.json +380 -0
- package/src/services/doc-parser.js +346 -0
- package/src/services/gemini.js +101 -44
- package/src/services/video.js +123 -8
- package/src/utils/adaptive-budget.js +6 -4
- package/src/utils/checkpoint.js +2 -1
- package/src/utils/cli.js +131 -110
- package/src/utils/colors.js +83 -0
- package/src/utils/confidence-filter.js +138 -0
- package/src/utils/diff-engine.js +2 -1
- package/src/utils/global-config.js +6 -5
- package/src/utils/health-dashboard.js +11 -9
- package/src/utils/json-parser.js +4 -2
- package/src/utils/learning-loop.js +3 -2
- package/src/utils/progress-bar.js +286 -0
- package/src/utils/quality-gate.js +4 -2
- package/src/utils/retry.js +3 -1
- package/src/utils/schema-validator.js +314 -0
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
'use strict';
|
|
16
16
|
|
|
17
|
+
const { c } = require('./colors');
|
|
18
|
+
|
|
17
19
|
// ======================== QUALITY THRESHOLDS ========================
|
|
18
20
|
|
|
19
21
|
const THRESHOLDS = {
|
|
@@ -374,9 +376,9 @@ function buildRetryHints(analysis, issues) {
|
|
|
374
376
|
* @returns {string}
|
|
375
377
|
*/
|
|
376
378
|
function formatQualityLine(report, segmentName) {
|
|
377
|
-
const icon = report.grade === 'PASS' ?
|
|
379
|
+
const icon = report.grade === 'PASS' ? c.success : report.grade === 'WARN' ? c.warn : c.error;
|
|
378
380
|
const dims = report.dimensions;
|
|
379
|
-
return ` ${icon
|
|
381
|
+
return ` ${icon(`Quality: ${report.score}/100 (${report.grade})`)} — ` +
|
|
380
382
|
`struct:${dims.structure.score} density:${dims.density.score} ` +
|
|
381
383
|
`integrity:${dims.integrity.score} xref:${dims.crossRef.score}`;
|
|
382
384
|
}
|
package/src/utils/retry.js
CHANGED
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
|
|
11
11
|
'use strict';
|
|
12
12
|
|
|
13
|
+
const { c } = require('./colors');
|
|
14
|
+
|
|
13
15
|
/** Default retry attempts — overridable via opts.maxRetries */
|
|
14
16
|
const DEFAULT_MAX_RETRIES = 3;
|
|
15
17
|
/** Default base delay in ms — overridable via opts.baseDelay */
|
|
@@ -94,7 +96,7 @@ async function withRetry(fn, opts = {}) {
|
|
|
94
96
|
onRetry(attempt + 1, delay, err);
|
|
95
97
|
} else {
|
|
96
98
|
const msg = err.message || String(err);
|
|
97
|
-
console.warn(`
|
|
99
|
+
console.warn(` ${c.warn(`${label} failed (attempt ${attempt + 1}/${maxRetries + 1}): ${msg.slice(0, 120)}`)}`);
|
|
98
100
|
console.warn(` → Retrying in ${(delay / 1000).toFixed(1)}s...`);
|
|
99
101
|
}
|
|
100
102
|
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema Validator — validates AI analysis output against JSON schemas
|
|
3
|
+
* using Ajv. Provides human-readable error messages and retry hints.
|
|
4
|
+
*
|
|
5
|
+
* @module schema-validator
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
'use strict';
|
|
9
|
+
|
|
10
|
+
const Ajv = require('ajv');
|
|
11
|
+
const { c } = require('./colors');
|
|
12
|
+
|
|
13
|
+
// Load schemas (require works for JSON files)
|
|
14
|
+
const segmentSchema = require('../schemas/analysis-segment.schema.json');
|
|
15
|
+
const compiledSchema = require('../schemas/analysis-compiled.schema.json');
|
|
16
|
+
|
|
17
|
+
// ======================== AJV INSTANCE ========================
|
|
18
|
+
|
|
19
|
+
const ajv = new Ajv({
|
|
20
|
+
allErrors: true, // Report ALL errors, not just the first
|
|
21
|
+
verbose: true, // Include failing data in error objects
|
|
22
|
+
strict: false, // Allow draft-07 features without strict-mode warnings
|
|
23
|
+
coerceTypes: false, // Don't coerce types — report mismatches
|
|
24
|
+
allowUnionTypes: true, // Support type: ["string", "null"]
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
// Compile validators
|
|
28
|
+
const validateSegment = ajv.compile(segmentSchema);
|
|
29
|
+
const validateCompiled = ajv.compile(compiledSchema);
|
|
30
|
+
|
|
31
|
+
// ======================== PUBLIC API ========================
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @typedef {Object} SchemaReport
|
|
35
|
+
* @property {boolean} valid - Whether the data passed schema validation
|
|
36
|
+
* @property {number} errorCount - Number of schema errors
|
|
37
|
+
* @property {Array<SchemaError>} errors - Human-readable error descriptions
|
|
38
|
+
* @property {string[]} retryHints - Actionable hints for Gemini retry prompts
|
|
39
|
+
* @property {string} summary - Single line summary (for logging)
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* @typedef {Object} SchemaError
|
|
44
|
+
* @property {string} path - JSON path to the error (e.g. "/tickets/0/ticket_id")
|
|
45
|
+
* @property {string} message - Human-readable error message
|
|
46
|
+
* @property {string} keyword - Ajv keyword that failed (e.g. "required", "type", "enum")
|
|
47
|
+
* @property {*} [actual] - Actual value (only for type/enum mismatches)
|
|
48
|
+
* @property {*} [expected] - Expected value/type
|
|
49
|
+
*/
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Validate an analysis object against the appropriate schema.
|
|
53
|
+
*
|
|
54
|
+
* @param {object} data - The parsed analysis output to validate
|
|
55
|
+
* @param {'segment'|'compiled'} type - Which schema to validate against
|
|
56
|
+
* @returns {SchemaReport}
|
|
57
|
+
*/
|
|
58
|
+
function validateAnalysis(data, type = 'segment') {
|
|
59
|
+
if (!data || typeof data !== 'object') {
|
|
60
|
+
return {
|
|
61
|
+
valid: false,
|
|
62
|
+
errorCount: 1,
|
|
63
|
+
errors: [{ path: '/', message: 'Analysis is null or not an object', keyword: 'type' }],
|
|
64
|
+
retryHints: ['Your response could not be parsed as a valid JSON object. Return ONLY valid JSON starting with { and ending with }.'],
|
|
65
|
+
summary: 'Schema: FAIL — analysis is null/non-object',
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Skip validation for error objects (segments that failed Gemini)
|
|
70
|
+
if (data.error || data.rawResponse) {
|
|
71
|
+
return {
|
|
72
|
+
valid: false,
|
|
73
|
+
errorCount: 0,
|
|
74
|
+
errors: [],
|
|
75
|
+
retryHints: [],
|
|
76
|
+
summary: 'Schema: SKIP — error/raw response object',
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const validate = type === 'compiled' ? validateCompiled : validateSegment;
|
|
81
|
+
const valid = validate(data);
|
|
82
|
+
|
|
83
|
+
if (valid) {
|
|
84
|
+
return {
|
|
85
|
+
valid: true,
|
|
86
|
+
errorCount: 0,
|
|
87
|
+
errors: [],
|
|
88
|
+
retryHints: [],
|
|
89
|
+
summary: `Schema: PASS (${type})`,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Convert Ajv errors to human-readable format
|
|
94
|
+
const errors = formatErrors(validate.errors || []);
|
|
95
|
+
const retryHints = buildSchemaRetryHints(validate.errors || [], type);
|
|
96
|
+
const errorCount = errors.length;
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
valid,
|
|
100
|
+
errorCount,
|
|
101
|
+
errors,
|
|
102
|
+
retryHints,
|
|
103
|
+
summary: `Schema: ${errorCount} error(s) in ${type} output — ${errors.slice(0, 3).map(e => e.message).join('; ')}${errorCount > 3 ? ` (+${errorCount - 3} more)` : ''}`,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ======================== ERROR FORMATTING ========================
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Convert Ajv error objects to human-readable SchemaError objects.
|
|
111
|
+
* Deduplicates and groups related errors.
|
|
112
|
+
*
|
|
113
|
+
* @param {import('ajv').ErrorObject[]} ajvErrors
|
|
114
|
+
* @returns {SchemaError[]}
|
|
115
|
+
*/
|
|
116
|
+
function formatErrors(ajvErrors) {
|
|
117
|
+
const seen = new Set();
|
|
118
|
+
const errors = [];
|
|
119
|
+
|
|
120
|
+
for (const err of ajvErrors) {
|
|
121
|
+
const dataPath = err.instancePath || '';
|
|
122
|
+
|
|
123
|
+
let message;
|
|
124
|
+
let actual;
|
|
125
|
+
let expected;
|
|
126
|
+
|
|
127
|
+
switch (err.keyword) {
|
|
128
|
+
case 'required': {
|
|
129
|
+
const field = err.params.missingProperty;
|
|
130
|
+
message = `Missing required field "${field}" at ${dataPath || '/'}`;
|
|
131
|
+
expected = field;
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
case 'type': {
|
|
135
|
+
const expType = err.params.type;
|
|
136
|
+
const actType = Array.isArray(err.data) ? 'array' : typeof err.data;
|
|
137
|
+
message = `Expected type "${expType}" but got "${actType}" at ${dataPath}`;
|
|
138
|
+
actual = actType;
|
|
139
|
+
expected = expType;
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
case 'enum': {
|
|
143
|
+
const allowed = err.params.allowedValues;
|
|
144
|
+
actual = err.data;
|
|
145
|
+
expected = allowed;
|
|
146
|
+
message = `Invalid value "${err.data}" at ${dataPath} — allowed: ${allowed.join(', ')}`;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
case 'minLength': {
|
|
150
|
+
message = `Value at ${dataPath} is too short (minimum length: ${err.params.limit})`;
|
|
151
|
+
actual = typeof err.data === 'string' ? err.data.length : 0;
|
|
152
|
+
expected = err.params.limit;
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
case 'additionalProperties': {
|
|
156
|
+
// Shouldn't fire since we allow additional properties, but just in case
|
|
157
|
+
message = `Unexpected property "${err.params.additionalProperty}" at ${dataPath}`;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
default: {
|
|
161
|
+
message = `Validation error at ${dataPath}: ${err.message}`;
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const key = `${dataPath}:${err.keyword}:${err.params?.missingProperty || ''}`;
|
|
167
|
+
if (seen.has(key)) continue;
|
|
168
|
+
seen.add(key);
|
|
169
|
+
|
|
170
|
+
errors.push({
|
|
171
|
+
path: dataPath || '/',
|
|
172
|
+
message,
|
|
173
|
+
keyword: err.keyword,
|
|
174
|
+
...(actual !== undefined && { actual }),
|
|
175
|
+
...(expected !== undefined && { expected }),
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return errors;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ======================== RETRY HINTS ========================
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Build actionable retry hints from schema errors to inject into Gemini
|
|
186
|
+
* retry prompts. Grouped by error category for concise prompts.
|
|
187
|
+
*
|
|
188
|
+
* @param {import('ajv').ErrorObject[]} ajvErrors
|
|
189
|
+
* @param {'segment'|'compiled'} type
|
|
190
|
+
* @returns {string[]}
|
|
191
|
+
*/
|
|
192
|
+
function buildSchemaRetryHints(ajvErrors, type) {
|
|
193
|
+
if (!ajvErrors || ajvErrors.length === 0) return [];
|
|
194
|
+
|
|
195
|
+
const hints = [];
|
|
196
|
+
const missingFields = new Set();
|
|
197
|
+
const typeMismatches = new Set();
|
|
198
|
+
const enumViolations = new Set();
|
|
199
|
+
|
|
200
|
+
for (const err of ajvErrors) {
|
|
201
|
+
const dataPath = err.instancePath || '';
|
|
202
|
+
|
|
203
|
+
switch (err.keyword) {
|
|
204
|
+
case 'required':
|
|
205
|
+
missingFields.add(`${dataPath}/${err.params.missingProperty}`);
|
|
206
|
+
break;
|
|
207
|
+
case 'type':
|
|
208
|
+
typeMismatches.add(`${dataPath} (expected ${err.params.type}, got ${typeof err.data})`);
|
|
209
|
+
break;
|
|
210
|
+
case 'enum':
|
|
211
|
+
enumViolations.add(`${dataPath} = "${err.data}" (allowed: ${err.params.allowedValues.join('|')})`);
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Missing required fields
|
|
217
|
+
if (missingFields.size > 0) {
|
|
218
|
+
const topLevel = [...missingFields].filter(f => f.split('/').length <= 2);
|
|
219
|
+
const nested = [...missingFields].filter(f => f.split('/').length > 2);
|
|
220
|
+
|
|
221
|
+
if (topLevel.length > 0) {
|
|
222
|
+
hints.push(
|
|
223
|
+
`CRITICAL: Missing required top-level fields: ${topLevel.map(f => f.replace(/^\//, '')).join(', ')}. ` +
|
|
224
|
+
`You MUST include ALL of these in your response. Use empty arrays [] if no items exist.`
|
|
225
|
+
);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (nested.length > 0) {
|
|
229
|
+
// Group by parent path for readability
|
|
230
|
+
const groups = {};
|
|
231
|
+
for (const field of nested) {
|
|
232
|
+
const parts = field.split('/');
|
|
233
|
+
const parent = parts.slice(0, -1).join('/');
|
|
234
|
+
const child = parts[parts.length - 1];
|
|
235
|
+
if (!groups[parent]) groups[parent] = [];
|
|
236
|
+
groups[parent].push(child);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const summaries = Object.entries(groups)
|
|
240
|
+
.slice(0, 5) // Don't overwhelm the retry prompt
|
|
241
|
+
.map(([parent, fields]) => `${parent}: needs ${fields.join(', ')}`);
|
|
242
|
+
|
|
243
|
+
hints.push(
|
|
244
|
+
`Missing required fields in nested objects: ${summaries.join(' | ')}. ` +
|
|
245
|
+
`Each item must have all required properties.`
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Type mismatches
|
|
251
|
+
if (typeMismatches.size > 0) {
|
|
252
|
+
const examples = [...typeMismatches].slice(0, 3).join('; ');
|
|
253
|
+
hints.push(
|
|
254
|
+
`Type errors found: ${examples}. ` +
|
|
255
|
+
`Ensure arrays are [], objects are {}, strings are quoted, and null is used for missing optional values.`
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Enum violations
|
|
260
|
+
if (enumViolations.size > 0) {
|
|
261
|
+
const examples = [...enumViolations].slice(0, 3).join('; ');
|
|
262
|
+
hints.push(
|
|
263
|
+
`Invalid enum values: ${examples}. ` +
|
|
264
|
+
`Use ONLY the allowed values specified in the output structure. Check status, type, priority, and confidence fields.`
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return hints;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// ======================== QUALITY GATE INTEGRATION ========================
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Compute a schema quality penalty for integration with quality-gate.js.
|
|
275
|
+
* Returns a number 0-100 where 100 = no schema errors.
|
|
276
|
+
*
|
|
277
|
+
* @param {SchemaReport} report
|
|
278
|
+
* @returns {number} Schema score (0-100)
|
|
279
|
+
*/
|
|
280
|
+
function schemaScore(report) {
|
|
281
|
+
if (report.valid) return 100;
|
|
282
|
+
if (report.errorCount === 0) return 100; // skip/raw response
|
|
283
|
+
|
|
284
|
+
// Penalty scaling: more errors = lower score
|
|
285
|
+
// 1 error = 85, 3 errors = 60, 5+ = 40, 10+ = 20, 20+ = 0
|
|
286
|
+
if (report.errorCount <= 1) return 85;
|
|
287
|
+
if (report.errorCount <= 3) return 60;
|
|
288
|
+
if (report.errorCount <= 5) return 40;
|
|
289
|
+
if (report.errorCount <= 10) return 20;
|
|
290
|
+
return 0;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Format a single-line schema validation result for console output.
|
|
295
|
+
*
|
|
296
|
+
* @param {SchemaReport} report
|
|
297
|
+
* @returns {string}
|
|
298
|
+
*/
|
|
299
|
+
function formatSchemaLine(report) {
|
|
300
|
+
if (report.valid) {
|
|
301
|
+
return ` ${c.success('Schema: valid')}`;
|
|
302
|
+
}
|
|
303
|
+
if (report.errorCount === 0) {
|
|
304
|
+
return ' ○ Schema: skipped (error/raw response)';
|
|
305
|
+
}
|
|
306
|
+
return ` ${c.warn(`Schema: ${report.errorCount} error(s) — ${report.errors.slice(0, 2).map(e => e.message).join('; ')}`)}`;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
module.exports = {
|
|
310
|
+
validateAnalysis,
|
|
311
|
+
buildSchemaRetryHints,
|
|
312
|
+
schemaScore,
|
|
313
|
+
formatSchemaLine,
|
|
314
|
+
};
|