task-summary-extractor 8.3.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.env.example +38 -0
  2. package/ARCHITECTURE.md +99 -3
  3. package/EXPLORATION.md +148 -89
  4. package/QUICK_START.md +5 -2
  5. package/README.md +51 -7
  6. package/bin/taskex.js +11 -4
  7. package/package.json +38 -5
  8. package/src/config.js +52 -3
  9. package/src/modes/focused-reanalysis.js +2 -1
  10. package/src/modes/progress-updater.js +1 -1
  11. package/src/phases/_shared.js +43 -0
  12. package/src/phases/compile.js +101 -0
  13. package/src/phases/deep-dive.js +118 -0
  14. package/src/phases/discover.js +178 -0
  15. package/src/phases/init.js +192 -0
  16. package/src/phases/output.js +238 -0
  17. package/src/phases/process-media.js +633 -0
  18. package/src/phases/services.js +104 -0
  19. package/src/phases/summary.js +86 -0
  20. package/src/pipeline.js +431 -1463
  21. package/src/renderers/docx.js +531 -0
  22. package/src/renderers/html.js +672 -0
  23. package/src/renderers/markdown.js +15 -183
  24. package/src/renderers/pdf.js +90 -0
  25. package/src/renderers/shared.js +211 -0
  26. package/src/schemas/analysis-compiled.schema.json +381 -0
  27. package/src/schemas/analysis-segment.schema.json +380 -0
  28. package/src/services/doc-parser.js +346 -0
  29. package/src/services/gemini.js +101 -44
  30. package/src/services/video.js +123 -8
  31. package/src/utils/adaptive-budget.js +6 -4
  32. package/src/utils/checkpoint.js +2 -1
  33. package/src/utils/cli.js +131 -110
  34. package/src/utils/colors.js +83 -0
  35. package/src/utils/confidence-filter.js +138 -0
  36. package/src/utils/diff-engine.js +2 -1
  37. package/src/utils/global-config.js +6 -5
  38. package/src/utils/health-dashboard.js +11 -9
  39. package/src/utils/json-parser.js +4 -2
  40. package/src/utils/learning-loop.js +3 -2
  41. package/src/utils/progress-bar.js +286 -0
  42. package/src/utils/quality-gate.js +4 -2
  43. package/src/utils/retry.js +3 -1
  44. package/src/utils/schema-validator.js +314 -0
@@ -14,6 +14,8 @@
14
14
 
15
15
  'use strict';
16
16
 
17
+ const { c } = require('./colors');
18
+
17
19
  // ======================== QUALITY THRESHOLDS ========================
18
20
 
19
21
  const THRESHOLDS = {
@@ -374,9 +376,9 @@ function buildRetryHints(analysis, issues) {
374
376
  * @returns {string}
375
377
  */
376
378
  function formatQualityLine(report, segmentName) {
377
- const icon = report.grade === 'PASS' ? '✓' : report.grade === 'WARN' ? '⚠' : '✗';
379
+ const icon = report.grade === 'PASS' ? c.success : report.grade === 'WARN' ? c.warn : c.error;
378
380
  const dims = report.dimensions;
379
- return ` ${icon} Quality: ${report.score}/100 (${report.grade}) — ` +
381
+ return ` ${icon(`Quality: ${report.score}/100 (${report.grade})`)} — ` +
380
382
  `struct:${dims.structure.score} density:${dims.density.score} ` +
381
383
  `integrity:${dims.integrity.score} xref:${dims.crossRef.score}`;
382
384
  }
@@ -10,6 +10,8 @@
10
10
 
11
11
  'use strict';
12
12
 
13
+ const { c } = require('./colors');
14
+
13
15
  /** Default retry attempts — overridable via opts.maxRetries */
14
16
  const DEFAULT_MAX_RETRIES = 3;
15
17
  /** Default base delay in ms — overridable via opts.baseDelay */
@@ -94,7 +96,7 @@ async function withRetry(fn, opts = {}) {
94
96
  onRetry(attempt + 1, delay, err);
95
97
  } else {
96
98
  const msg = err.message || String(err);
97
- console.warn(` ${label} failed (attempt ${attempt + 1}/${maxRetries + 1}): ${msg.slice(0, 120)}`);
99
+ console.warn(` ${c.warn(`${label} failed (attempt ${attempt + 1}/${maxRetries + 1}): ${msg.slice(0, 120)}`)}`);
98
100
  console.warn(` → Retrying in ${(delay / 1000).toFixed(1)}s...`);
99
101
  }
100
102
 
@@ -0,0 +1,314 @@
1
+ /**
2
+ * Schema Validator — validates AI analysis output against JSON schemas
3
+ * using Ajv. Provides human-readable error messages and retry hints.
4
+ *
5
+ * @module schema-validator
6
+ */
7
+
8
+ 'use strict';
9
+
10
+ const Ajv = require('ajv');
11
+ const { c } = require('./colors');
12
+
13
+ // Load schemas (require works for JSON files)
14
+ const segmentSchema = require('../schemas/analysis-segment.schema.json');
15
+ const compiledSchema = require('../schemas/analysis-compiled.schema.json');
16
+
17
+ // ======================== AJV INSTANCE ========================
18
+
19
+ const ajv = new Ajv({
20
+ allErrors: true, // Report ALL errors, not just the first
21
+ verbose: true, // Include failing data in error objects
22
+ strict: false, // Allow draft-07 features without strict-mode warnings
23
+ coerceTypes: false, // Don't coerce types — report mismatches
24
+ allowUnionTypes: true, // Support type: ["string", "null"]
25
+ });
26
+
27
+ // Compile validators
28
+ const validateSegment = ajv.compile(segmentSchema);
29
+ const validateCompiled = ajv.compile(compiledSchema);
30
+
31
+ // ======================== PUBLIC API ========================
32
+
33
+ /**
34
+ * @typedef {Object} SchemaReport
35
+ * @property {boolean} valid - Whether the data passed schema validation
36
+ * @property {number} errorCount - Number of schema errors
37
+ * @property {Array<SchemaError>} errors - Human-readable error descriptions
38
+ * @property {string[]} retryHints - Actionable hints for Gemini retry prompts
39
+ * @property {string} summary - Single line summary (for logging)
40
+ */
41
+
42
+ /**
43
+ * @typedef {Object} SchemaError
44
+ * @property {string} path - JSON path to the error (e.g. "/tickets/0/ticket_id")
45
+ * @property {string} message - Human-readable error message
46
+ * @property {string} keyword - Ajv keyword that failed (e.g. "required", "type", "enum")
47
+ * @property {*} [actual] - Actual value (only for type/enum mismatches)
48
+ * @property {*} [expected] - Expected value/type
49
+ */
50
+
51
+ /**
52
+ * Validate an analysis object against the appropriate schema.
53
+ *
54
+ * @param {object} data - The parsed analysis output to validate
55
+ * @param {'segment'|'compiled'} type - Which schema to validate against
56
+ * @returns {SchemaReport}
57
+ */
58
+ function validateAnalysis(data, type = 'segment') {
59
+ if (!data || typeof data !== 'object') {
60
+ return {
61
+ valid: false,
62
+ errorCount: 1,
63
+ errors: [{ path: '/', message: 'Analysis is null or not an object', keyword: 'type' }],
64
+ retryHints: ['Your response could not be parsed as a valid JSON object. Return ONLY valid JSON starting with { and ending with }.'],
65
+ summary: 'Schema: FAIL — analysis is null/non-object',
66
+ };
67
+ }
68
+
69
+ // Skip validation for error objects (segments that failed Gemini)
70
+ if (data.error || data.rawResponse) {
71
+ return {
72
+ valid: false,
73
+ errorCount: 0,
74
+ errors: [],
75
+ retryHints: [],
76
+ summary: 'Schema: SKIP — error/raw response object',
77
+ };
78
+ }
79
+
80
+ const validate = type === 'compiled' ? validateCompiled : validateSegment;
81
+ const valid = validate(data);
82
+
83
+ if (valid) {
84
+ return {
85
+ valid: true,
86
+ errorCount: 0,
87
+ errors: [],
88
+ retryHints: [],
89
+ summary: `Schema: PASS (${type})`,
90
+ };
91
+ }
92
+
93
+ // Convert Ajv errors to human-readable format
94
+ const errors = formatErrors(validate.errors || []);
95
+ const retryHints = buildSchemaRetryHints(validate.errors || [], type);
96
+ const errorCount = errors.length;
97
+
98
+ return {
99
+ valid,
100
+ errorCount,
101
+ errors,
102
+ retryHints,
103
+ summary: `Schema: ${errorCount} error(s) in ${type} output — ${errors.slice(0, 3).map(e => e.message).join('; ')}${errorCount > 3 ? ` (+${errorCount - 3} more)` : ''}`,
104
+ };
105
+ }
106
+
107
+ // ======================== ERROR FORMATTING ========================
108
+
109
+ /**
110
+ * Convert Ajv error objects to human-readable SchemaError objects.
111
+ * Deduplicates and groups related errors.
112
+ *
113
+ * @param {import('ajv').ErrorObject[]} ajvErrors
114
+ * @returns {SchemaError[]}
115
+ */
116
+ function formatErrors(ajvErrors) {
117
+ const seen = new Set();
118
+ const errors = [];
119
+
120
+ for (const err of ajvErrors) {
121
+ const dataPath = err.instancePath || '';
122
+
123
+ let message;
124
+ let actual;
125
+ let expected;
126
+
127
+ switch (err.keyword) {
128
+ case 'required': {
129
+ const field = err.params.missingProperty;
130
+ message = `Missing required field "${field}" at ${dataPath || '/'}`;
131
+ expected = field;
132
+ break;
133
+ }
134
+ case 'type': {
135
+ const expType = err.params.type;
136
+ const actType = Array.isArray(err.data) ? 'array' : typeof err.data;
137
+ message = `Expected type "${expType}" but got "${actType}" at ${dataPath}`;
138
+ actual = actType;
139
+ expected = expType;
140
+ break;
141
+ }
142
+ case 'enum': {
143
+ const allowed = err.params.allowedValues;
144
+ actual = err.data;
145
+ expected = allowed;
146
+ message = `Invalid value "${err.data}" at ${dataPath} — allowed: ${allowed.join(', ')}`;
147
+ break;
148
+ }
149
+ case 'minLength': {
150
+ message = `Value at ${dataPath} is too short (minimum length: ${err.params.limit})`;
151
+ actual = typeof err.data === 'string' ? err.data.length : 0;
152
+ expected = err.params.limit;
153
+ break;
154
+ }
155
+ case 'additionalProperties': {
156
+ // Shouldn't fire since we allow additional properties, but just in case
157
+ message = `Unexpected property "${err.params.additionalProperty}" at ${dataPath}`;
158
+ break;
159
+ }
160
+ default: {
161
+ message = `Validation error at ${dataPath}: ${err.message}`;
162
+ break;
163
+ }
164
+ }
165
+
166
+ const key = `${dataPath}:${err.keyword}:${err.params?.missingProperty || ''}`;
167
+ if (seen.has(key)) continue;
168
+ seen.add(key);
169
+
170
+ errors.push({
171
+ path: dataPath || '/',
172
+ message,
173
+ keyword: err.keyword,
174
+ ...(actual !== undefined && { actual }),
175
+ ...(expected !== undefined && { expected }),
176
+ });
177
+ }
178
+
179
+ return errors;
180
+ }
181
+
182
+ // ======================== RETRY HINTS ========================
183
+
184
+ /**
185
+ * Build actionable retry hints from schema errors to inject into Gemini
186
+ * retry prompts. Grouped by error category for concise prompts.
187
+ *
188
+ * @param {import('ajv').ErrorObject[]} ajvErrors
189
+ * @param {'segment'|'compiled'} type
190
+ * @returns {string[]}
191
+ */
192
+ function buildSchemaRetryHints(ajvErrors, type) {
193
+ if (!ajvErrors || ajvErrors.length === 0) return [];
194
+
195
+ const hints = [];
196
+ const missingFields = new Set();
197
+ const typeMismatches = new Set();
198
+ const enumViolations = new Set();
199
+
200
+ for (const err of ajvErrors) {
201
+ const dataPath = err.instancePath || '';
202
+
203
+ switch (err.keyword) {
204
+ case 'required':
205
+ missingFields.add(`${dataPath}/${err.params.missingProperty}`);
206
+ break;
207
+ case 'type':
208
+ typeMismatches.add(`${dataPath} (expected ${err.params.type}, got ${typeof err.data})`);
209
+ break;
210
+ case 'enum':
211
+ enumViolations.add(`${dataPath} = "${err.data}" (allowed: ${err.params.allowedValues.join('|')})`);
212
+ break;
213
+ }
214
+ }
215
+
216
+ // Missing required fields
217
+ if (missingFields.size > 0) {
218
+ const topLevel = [...missingFields].filter(f => f.split('/').length <= 2);
219
+ const nested = [...missingFields].filter(f => f.split('/').length > 2);
220
+
221
+ if (topLevel.length > 0) {
222
+ hints.push(
223
+ `CRITICAL: Missing required top-level fields: ${topLevel.map(f => f.replace(/^\//, '')).join(', ')}. ` +
224
+ `You MUST include ALL of these in your response. Use empty arrays [] if no items exist.`
225
+ );
226
+ }
227
+
228
+ if (nested.length > 0) {
229
+ // Group by parent path for readability
230
+ const groups = {};
231
+ for (const field of nested) {
232
+ const parts = field.split('/');
233
+ const parent = parts.slice(0, -1).join('/');
234
+ const child = parts[parts.length - 1];
235
+ if (!groups[parent]) groups[parent] = [];
236
+ groups[parent].push(child);
237
+ }
238
+
239
+ const summaries = Object.entries(groups)
240
+ .slice(0, 5) // Don't overwhelm the retry prompt
241
+ .map(([parent, fields]) => `${parent}: needs ${fields.join(', ')}`);
242
+
243
+ hints.push(
244
+ `Missing required fields in nested objects: ${summaries.join(' | ')}. ` +
245
+ `Each item must have all required properties.`
246
+ );
247
+ }
248
+ }
249
+
250
+ // Type mismatches
251
+ if (typeMismatches.size > 0) {
252
+ const examples = [...typeMismatches].slice(0, 3).join('; ');
253
+ hints.push(
254
+ `Type errors found: ${examples}. ` +
255
+ `Ensure arrays are [], objects are {}, strings are quoted, and null is used for missing optional values.`
256
+ );
257
+ }
258
+
259
+ // Enum violations
260
+ if (enumViolations.size > 0) {
261
+ const examples = [...enumViolations].slice(0, 3).join('; ');
262
+ hints.push(
263
+ `Invalid enum values: ${examples}. ` +
264
+ `Use ONLY the allowed values specified in the output structure. Check status, type, priority, and confidence fields.`
265
+ );
266
+ }
267
+
268
+ return hints;
269
+ }
270
+
271
+ // ======================== QUALITY GATE INTEGRATION ========================
272
+
273
+ /**
274
+ * Compute a schema quality penalty for integration with quality-gate.js.
275
+ * Returns a number 0-100 where 100 = no schema errors.
276
+ *
277
+ * @param {SchemaReport} report
278
+ * @returns {number} Schema score (0-100)
279
+ */
280
+ function schemaScore(report) {
281
+ if (report.valid) return 100;
282
+ if (report.errorCount === 0) return 100; // skip/raw response
283
+
284
+ // Penalty scaling: more errors = lower score
285
+ // 1 error = 85, 3 errors = 60, 5+ = 40, 10+ = 20, 20+ = 0
286
+ if (report.errorCount <= 1) return 85;
287
+ if (report.errorCount <= 3) return 60;
288
+ if (report.errorCount <= 5) return 40;
289
+ if (report.errorCount <= 10) return 20;
290
+ return 0;
291
+ }
292
+
293
+ /**
294
+ * Format a single-line schema validation result for console output.
295
+ *
296
+ * @param {SchemaReport} report
297
+ * @returns {string}
298
+ */
299
+ function formatSchemaLine(report) {
300
+ if (report.valid) {
301
+ return ` ${c.success('Schema: valid')}`;
302
+ }
303
+ if (report.errorCount === 0) {
304
+ return ' ○ Schema: skipped (error/raw response)';
305
+ }
306
+ return ` ${c.warn(`Schema: ${report.errorCount} error(s) — ${report.errors.slice(0, 2).map(e => e.message).join('; ')}`)}`;
307
+ }
308
+
309
+ module.exports = {
310
+ validateAnalysis,
311
+ buildSchemaRetryHints,
312
+ schemaScore,
313
+ formatSchemaLine,
314
+ };