task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ /**
2
+ * Robust JSON parsing — handles markdown fences, invalid escapes,
3
+ * duplicate blocks, truncated output, mid-output malformations,
4
+ * and other Gemini output quirks.
5
+ */
6
+
7
+ 'use strict';
8
+
9
+ /**
10
+ * Gemini sometimes produces invalid JSON escape sequences (e.g. \d, \s, \w from regex patterns).
11
+ * Fix them by double-escaping backslashes that aren't valid JSON escapes.
12
+ */
13
+ function sanitizeJsonEscapes(text) {
14
+ return text.replace(/\\(?!["\\\/bfnrtu])/g, '\\\\');
15
+ }
16
+
17
+ /**
18
+ * Fix common mid-output JSON malformations that Gemini produces:
19
+ * - Doubled closing braces/brackets: }}, ]] → }, ]
20
+ * - Doubled commas: , , → ,
21
+ * - Trailing commas before closing: ,} → } ,] → ]
22
+ * - Lone commas after opening: {, → { [, → [
23
+ * Operates only OUTSIDE of string literals to avoid corrupting string content.
24
+ */
25
+ function sanitizeMalformedJson(text) {
26
+ // Process character by character, only fix outside strings
27
+ const chars = [...text];
28
+ const result = [];
29
+ let inString = false, escape = false;
30
+
31
+ for (let i = 0; i < chars.length; i++) {
32
+ const ch = chars[i];
33
+ if (escape) { escape = false; result.push(ch); continue; }
34
+ if (ch === '\\' && inString) { escape = true; result.push(ch); continue; }
35
+ if (ch === '"') { inString = !inString; result.push(ch); continue; }
36
+ if (inString) { result.push(ch); continue; }
37
+ // Outside string — apply fixes
38
+ result.push(ch);
39
+ }
40
+
41
+ let json = result.join('');
42
+
43
+ // Now do regex-based fixes on non-string portions
44
+ // We'll use a split-on-strings approach: split by quoted strings, fix only non-string parts
45
+ const parts = json.split(/("(?:[^"\\]|\\.)*")/);
46
+ for (let i = 0; i < parts.length; i += 2) {
47
+ // Even indices are outside strings
48
+ let p = parts[i];
49
+ // Fix doubled closing braces: }} → } (but not inside nested objects — only when preceded by value)
50
+ // More specifically: fix }} when the first } closes an object and the second is extraneous
51
+ // Safest approach: fix ,} doubled patterns and trailing issues
52
+ p = p.replace(/,\s*,/g, ','); // doubled commas: , , → ,
53
+ p = p.replace(/,\s*([}\]])/g, '$1'); // trailing comma before close: ,} → }
54
+ p = p.replace(/([{[\[])\s*,/g, '$1'); // comma after open: {, → { [, → [
55
+ parts[i] = p;
56
+ }
57
+ json = parts.join('');
58
+
59
+ // Fix doubled closing braces/brackets that aren't valid nesting
60
+ // Pattern: value }} where the inner } closes an object but outer } is extra
61
+ // Strategy: validate by trying to parse; if it fails, try removing extra closers
62
+ return json;
63
+ }
64
+
65
+ /**
66
+ * Fix doubled closing structures (}}, ]]) by re-scanning and removing extras.
67
+ * Only called when initial parse fails.
68
+ */
69
+ function fixDoubledClosers(text) {
70
+ const parts = text.split(/("(?:[^"\\]|\\.)*")/);
71
+ for (let i = 0; i < parts.length; i += 2) {
72
+ // Replace }} with } when preceded by a value (not by another })
73
+ // This is conservative: only fix cases where we see value}}
74
+ parts[i] = parts[i].replace(/\}\s*\}/g, (match) => {
75
+ // Keep the match but remove one } — let the re-parser handle it
76
+ return '}';
77
+ });
78
+ parts[i] = parts[i].replace(/\]\s*\]/g, (match) => {
79
+ return ']';
80
+ });
81
+ }
82
+ return parts.join('');
83
+ }
84
+
85
+ function tryParse(text) {
86
+ try { return JSON.parse(text); } catch { return undefined; }
87
+ }
88
+
89
+ function tryParseWithSanitize(text) {
90
+ let result = tryParse(text);
91
+ if (result !== undefined) return result;
92
+ // Try fixing escape sequences
93
+ result = tryParse(sanitizeJsonEscapes(text));
94
+ if (result !== undefined) return result;
95
+ // Try fixing mid-output malformations
96
+ const sanitized = sanitizeMalformedJson(text);
97
+ result = tryParse(sanitized);
98
+ if (result !== undefined) return result;
99
+ result = tryParse(sanitizeJsonEscapes(sanitized));
100
+ if (result !== undefined) return result;
101
+ return undefined;
102
+ }
103
+
104
+ /**
105
+ * Attempt to repair truncated JSON.
106
+ * When Gemini hits the output token limit, the JSON is cut mid-way.
107
+ * We try to close any open structures so we can recover the data we have.
108
+ * Returns parsed object or undefined.
109
+ */
110
+ function repairTruncatedJson(text) {
111
+ // Strip markdown fences
112
+ let json = text.replace(/```json\s*/gi, '').replace(/```\s*/g, '').trim();
113
+
114
+ const firstBrace = json.indexOf('{');
115
+ if (firstBrace === -1) return undefined;
116
+ json = json.substring(firstBrace);
117
+
118
+ // Apply mid-output sanitization FIRST (fix doubled commas, trailing commas, etc.)
119
+ json = sanitizeMalformedJson(json);
120
+
121
+ // Find where the valid JSON roughly ends — trim trailing incomplete values
122
+ // Remove trailing incomplete string value (e.g. "key": "partial text...)
123
+ json = json.replace(/:\s*"[^"]*$/, ': null');
124
+ // Remove trailing incomplete key (e.g. , "partial_key...)
125
+ json = json.replace(/,\s*"[^"]*$/, '');
126
+ // Remove trailing comma or colon
127
+ json = json.replace(/,\s*$/, '');
128
+ json = json.replace(/:\s*$/, ': null');
129
+
130
+ // Track the STACK of open structures in order, so we close them in reverse
131
+ const stack = []; // '{' or '['
132
+ let inString = false, escape = false;
133
+ for (let i = 0; i < json.length; i++) {
134
+ const ch = json[i];
135
+ if (escape) { escape = false; continue; }
136
+ if (ch === '\\' && inString) { escape = true; continue; }
137
+ if (ch === '"') { inString = !inString; continue; }
138
+ if (inString) continue;
139
+ if (ch === '{') stack.push('{');
140
+ else if (ch === '}') { if (stack.length && stack[stack.length - 1] === '{') stack.pop(); }
141
+ else if (ch === '[') stack.push('[');
142
+ else if (ch === ']') { if (stack.length && stack[stack.length - 1] === '[') stack.pop(); }
143
+ }
144
+
145
+ // Close open structures in reverse order
146
+ // First remove any trailing comma before closing
147
+ json = json.replace(/,\s*$/, '');
148
+ while (stack.length > 0) {
149
+ const open = stack.pop();
150
+ json += open === '{' ? '}' : ']';
151
+ }
152
+
153
+ return tryParseWithSanitize(json);
154
+ }
155
+
156
+ /**
157
+ * Aggressive repair: try fixing doubled closers structure-wide.
158
+ * Only used as a last resort when other strategies fail.
159
+ */
160
+ function repairDoubledClosers(text) {
161
+ let json = text.replace(/```json\s*/gi, '').replace(/```\s*/g, '').trim();
162
+ const firstBrace = json.indexOf('{');
163
+ if (firstBrace === -1) return undefined;
164
+ json = json.substring(firstBrace);
165
+
166
+ // Sanitize mid-output issues
167
+ json = sanitizeMalformedJson(json);
168
+
169
+ // Aggressively fix doubled closers
170
+ let prevJson = '';
171
+ let iterations = 0;
172
+ while (json !== prevJson && iterations < 10) {
173
+ prevJson = json;
174
+ json = fixDoubledClosers(json);
175
+ iterations++;
176
+ }
177
+
178
+ // Try parsing as-is
179
+ let result = tryParseWithSanitize(json);
180
+ if (result !== undefined) return result;
181
+
182
+ // If still failing, combine with truncation repair
183
+ return repairTruncatedJson(json);
184
+ }
185
+
186
+ /**
187
+ * Extract JSON from raw AI output using multiple strategies:
188
+ * 1. Strip markdown fences and parse (with escape + malformation sanitization)
189
+ * 2. Brace-matching for first complete JSON object
190
+ * 3. Regex extraction between fences
191
+ * 4. Doubled-closer repair (fix }}, ]] etc.)
192
+ * 5. Truncation repair — close open structures and recover partial data
193
+ * Returns parsed object or null.
194
+ */
195
+ function extractJson(rawText) {
196
+ // Strategy 1: Strip all markdown fences and try to parse
197
+ const cleaned = rawText.replace(/```json\s*/gi, '').replace(/```\s*/g, '').trim();
198
+ let parsed = tryParseWithSanitize(cleaned);
199
+ if (parsed !== undefined) return parsed;
200
+
201
+ // Strategy 2: Extract first complete JSON object using brace matching
202
+ const firstBrace = rawText.indexOf('{');
203
+ if (firstBrace !== -1) {
204
+ let depth = 0, end = -1, inStr = false, esc = false;
205
+ for (let ci = firstBrace; ci < rawText.length; ci++) {
206
+ const c = rawText[ci];
207
+ if (esc) { esc = false; continue; }
208
+ if (c === '\\' && inStr) { esc = true; continue; }
209
+ if (c === '"') { inStr = !inStr; continue; }
210
+ if (inStr) continue;
211
+ if (c === '{') depth++;
212
+ else if (c === '}') { depth--; if (depth === 0) { end = ci; break; } }
213
+ }
214
+ if (end !== -1) {
215
+ parsed = tryParseWithSanitize(rawText.substring(firstBrace, end + 1));
216
+ if (parsed !== undefined) return parsed;
217
+ }
218
+ }
219
+
220
+ // Strategy 3: Regex extraction of JSON block between fences
221
+ const fenceMatch = rawText.match(/```(?:json)?\s*([\s\S]*?)```/i);
222
+ if (fenceMatch) {
223
+ parsed = tryParseWithSanitize(fenceMatch[1].trim());
224
+ if (parsed !== undefined) return parsed;
225
+ }
226
+
227
+ // Strategy 4: Truncation repair — Gemini hit token limit mid-JSON
228
+ // This is common for large compilation outputs (safest repair, stack-based)
229
+ parsed = repairTruncatedJson(rawText);
230
+ if (parsed !== undefined) {
231
+ console.warn(' ⚠ JSON was truncated — recovered partial data by closing open structures');
232
+ return parsed;
233
+ }
234
+
235
+ // Strategy 5: Fix doubled closers and mid-output structural errors (aggressive, last resort)
236
+ parsed = repairDoubledClosers(rawText);
237
+ if (parsed !== undefined) {
238
+ console.warn(' ⚠ JSON had structural errors (doubled braces/commas) — repaired');
239
+ return parsed;
240
+ }
241
+
242
+ return null;
243
+ }
244
+
245
+ module.exports = { extractJson, sanitizeJsonEscapes, sanitizeMalformedJson, fixDoubledClosers, tryParse, tryParseWithSanitize, repairTruncatedJson, repairDoubledClosers };
@@ -0,0 +1,301 @@
1
+ /**
2
+ * Learning Loop — stores pipeline execution history and uses it to
3
+ * auto-adjust quality thresholds, thinking budgets, and extraction strategies.
4
+ *
5
+ * After each run, the health report + key metrics are appended to history.json.
6
+ * Before each run, historical data is analyzed to produce recommendations
7
+ * for the current execution.
8
+ *
9
+ * This creates a feedback loop: each run gets smarter based on past performance.
10
+ */
11
+
12
+ 'use strict';
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+
17
+ const HISTORY_FILE = 'history.json';
18
+ const MAX_HISTORY_ENTRIES = 50; // Keep last 50 runs
19
+
20
+ // ======================== HISTORY I/O ========================
21
+
22
+ /**
23
+ * Load run history from disk.
24
+ *
25
+ * @param {string} projectRoot - Project root directory
26
+ * @returns {Array} Array of historical run entries
27
+ */
28
+ function loadHistory(projectRoot) {
29
+ const historyPath = path.join(projectRoot, HISTORY_FILE);
30
+ try {
31
+ if (fs.existsSync(historyPath)) {
32
+ const data = JSON.parse(fs.readFileSync(historyPath, 'utf8'));
33
+ return Array.isArray(data) ? data : [];
34
+ }
35
+ } catch (err) {
36
+ console.warn(` ⚠ Could not load history: ${err.message}`);
37
+ }
38
+ return [];
39
+ }
40
+
41
+ /**
42
+ * Save a new run entry to history.
43
+ *
44
+ * @param {string} projectRoot - Project root directory
45
+ * @param {object} entry - Run entry to append
46
+ */
47
+ function saveHistory(projectRoot, entry) {
48
+ const historyPath = path.join(projectRoot, HISTORY_FILE);
49
+ try {
50
+ const history = loadHistory(projectRoot);
51
+ history.push(entry);
52
+
53
+ // Trim to max entries
54
+ const trimmed = history.slice(-MAX_HISTORY_ENTRIES);
55
+ fs.writeFileSync(historyPath, JSON.stringify(trimmed, null, 2), 'utf8');
56
+ } catch (err) {
57
+ console.warn(` ⚠ Could not save history: ${err.message}`);
58
+ }
59
+ }
60
+
61
+ // ======================== RUN ENTRY BUILDER ========================
62
+
63
+ /**
64
+ * Build a compact history entry from pipeline execution data.
65
+ *
66
+ * @param {object} params
67
+ * @param {string} params.callName - Name of the call
68
+ * @param {object} params.healthReport - From health-dashboard.js
69
+ * @param {object} params.costSummary - From CostTracker
70
+ * @param {number} params.segmentCount - Number of segments
71
+ * @param {object} [params.compilationQuality] - Quality report for compilation
72
+ * @param {number} [params.baseBudget] - Thinking budget used
73
+ * @param {number} [params.compilationBudget] - Compilation budget used
74
+ * @param {boolean} [params.hadFocusedPasses] - Whether focused re-analysis was used
75
+ * @returns {object} Compact history entry
76
+ */
77
+ function buildHistoryEntry(params) {
78
+ const {
79
+ callName,
80
+ healthReport,
81
+ costSummary = {},
82
+ segmentCount = 0,
83
+ compilationQuality = null,
84
+ baseBudget = 0,
85
+ compilationBudget = 0,
86
+ hadFocusedPasses = false,
87
+ } = params;
88
+
89
+ const hr = healthReport || {};
90
+ const summary = hr.summary || {};
91
+ const extraction = hr.extraction || {};
92
+ const retry = hr.retry || {};
93
+ const efficiency = hr.efficiency || {};
94
+
95
+ return {
96
+ timestamp: new Date().toISOString(),
97
+ callName,
98
+ segmentCount,
99
+ quality: {
100
+ avgScore: summary.avgQualityScore || 0,
101
+ minScore: summary.minQualityScore || 0,
102
+ maxScore: summary.maxQualityScore || 0,
103
+ grades: summary.grades || {},
104
+ parseSuccessRate: summary.parseSuccessRate || 0,
105
+ },
106
+ extraction: {
107
+ totalItems: extraction.totalItems || 0,
108
+ tickets: extraction.totalTickets || 0,
109
+ crs: extraction.totalChangeRequests || 0,
110
+ actions: extraction.totalActionItems || 0,
111
+ blockers: extraction.totalBlockers || 0,
112
+ scopes: extraction.totalScopeChanges || 0,
113
+ },
114
+ cost: {
115
+ totalTokens: costSummary.totalTokens || 0,
116
+ totalCost: costSummary.totalCost || 0,
117
+ tokensPerItem: efficiency.tokensPerExtractedItem || 0,
118
+ },
119
+ retry: {
120
+ segmentsRetried: retry.segmentsRetried || 0,
121
+ retriesImproved: retry.retriesImproved || 0,
122
+ },
123
+ budgets: {
124
+ baseBudget,
125
+ compilationBudget,
126
+ },
127
+ compilation: compilationQuality ? {
128
+ score: compilationQuality.score,
129
+ grade: compilationQuality.grade,
130
+ } : null,
131
+ focusedPasses: hadFocusedPasses,
132
+ };
133
+ }
134
+
135
+ // ======================== TREND ANALYSIS ========================
136
+
137
+ /**
138
+ * Analyze historical trends and produce recommendations for the next run.
139
+ *
140
+ * @param {Array} history - Array of historical run entries
141
+ * @returns {object} Recommendations
142
+ */
143
+ function analyzeHistory(history) {
144
+ if (!history || history.length === 0) {
145
+ return {
146
+ hasData: false,
147
+ recommendations: [],
148
+ budgetAdjustment: 0,
149
+ compilationBudgetAdjustment: 0,
150
+ qualityThresholdAdjustment: 0,
151
+ avgQuality: 0,
152
+ trend: 'none',
153
+ runCount: 0,
154
+ };
155
+ }
156
+
157
+ const recent = history.slice(-10); // Last 10 runs
158
+ const recommendations = [];
159
+
160
+ // Quality trend
161
+ const qualities = recent.map(r => r.quality?.avgScore || 0).filter(q => q > 0);
162
+ const avgQuality = qualities.length > 0
163
+ ? qualities.reduce((a, b) => a + b, 0) / qualities.length
164
+ : 0;
165
+
166
+ // Determine trend direction
167
+ let trend = 'stable';
168
+ if (qualities.length >= 3) {
169
+ const firstHalf = qualities.slice(0, Math.floor(qualities.length / 2));
170
+ const secondHalf = qualities.slice(Math.floor(qualities.length / 2));
171
+ const firstAvg = firstHalf.reduce((a, b) => a + b, 0) / firstHalf.length;
172
+ const secondAvg = secondHalf.reduce((a, b) => a + b, 0) / secondHalf.length;
173
+ if (secondAvg > firstAvg + 5) trend = 'improving';
174
+ else if (secondAvg < firstAvg - 5) trend = 'declining';
175
+ }
176
+
177
+ // Budget adjustment — if quality is consistently low, boost budget
178
+ let budgetAdjustment = 0;
179
+ let compilationBudgetAdjustment = 0;
180
+ let qualityThresholdAdjustment = 0;
181
+
182
+ if (avgQuality < 45 && qualities.length >= 3) {
183
+ budgetAdjustment = 4096; // +4K tokens
184
+ recommendations.push(
185
+ `Low average quality (${avgQuality.toFixed(0)}/100) across ${qualities.length} runs — boosting thinking budget by +4096 tokens`
186
+ );
187
+ } else if (avgQuality > 80 && qualities.length >= 3) {
188
+ budgetAdjustment = -2048; // Save tokens if quality is great
189
+ recommendations.push(
190
+ `High average quality (${avgQuality.toFixed(0)}/100) — reducing thinking budget by 2048 tokens to save cost`
191
+ );
192
+ }
193
+
194
+ // Retry effectiveness
195
+ const retryRuns = recent.filter(r => r.retry?.segmentsRetried > 0);
196
+ if (retryRuns.length > 0) {
197
+ const totalRetried = retryRuns.reduce((s, r) => s + (r.retry?.segmentsRetried || 0), 0);
198
+ const totalImproved = retryRuns.reduce((s, r) => s + (r.retry?.retriesImproved || 0), 0);
199
+ const retrySuccessRate = totalRetried > 0 ? (totalImproved / totalRetried * 100).toFixed(0) : 0;
200
+
201
+ if (retrySuccessRate < 30 && totalRetried >= 3) {
202
+ recommendations.push(
203
+ `Retry success rate is low (${retrySuccessRate}% of ${totalRetried} retries improved) — consider increasing base thinking budget instead of relying on retries`
204
+ );
205
+ budgetAdjustment = Math.max(budgetAdjustment, 2048);
206
+ }
207
+ }
208
+
209
+ // Cost efficiency
210
+ const costs = recent.map(r => r.cost?.tokensPerItem || 0).filter(c => c > 0);
211
+ if (costs.length >= 3) {
212
+ const avgCostPerItem = costs.reduce((a, b) => a + b, 0) / costs.length;
213
+ if (avgCostPerItem > 50000) {
214
+ recommendations.push(
215
+ `High token usage per item (${avgCostPerItem.toFixed(0)} tokens/item) — extraction may be inefficient`
216
+ );
217
+ }
218
+ }
219
+
220
+ // Compilation quality
221
+ const compilationScores = recent.map(r => r.compilation?.score || 0).filter(s => s > 0);
222
+ if (compilationScores.length >= 2) {
223
+ const avgCompScore = compilationScores.reduce((a, b) => a + b, 0) / compilationScores.length;
224
+ if (avgCompScore < 50) {
225
+ compilationBudgetAdjustment = 4096;
226
+ recommendations.push(
227
+ `Low compilation quality (avg ${avgCompScore.toFixed(0)}/100) — boosting compilation budget by +4096`
228
+ );
229
+ }
230
+ }
231
+
232
+ // Focused pass effectiveness
233
+ const focusedRuns = recent.filter(r => r.focusedPasses);
234
+ if (focusedRuns.length > 0 && focusedRuns.length < recent.length * 0.3) {
235
+ recommendations.push(
236
+ `Focused re-analysis was used in ${focusedRuns.length}/${recent.length} runs — system is self-correcting effectively`
237
+ );
238
+ }
239
+
240
+ // Quality threshold — if everything consistently passes, tighten threshold
241
+ const failRuns = recent.filter(r => r.quality?.grades?.FAIL > 0);
242
+ if (failRuns.length === 0 && recent.length >= 5 && avgQuality > 70) {
243
+ qualityThresholdAdjustment = 5; // Raise PASS threshold by 5
244
+ recommendations.push(
245
+ `No quality failures in last ${recent.length} runs (avg ${avgQuality.toFixed(0)}) — consider raising quality threshold`
246
+ );
247
+ }
248
+
249
+ return {
250
+ hasData: true,
251
+ recommendations,
252
+ budgetAdjustment,
253
+ compilationBudgetAdjustment,
254
+ qualityThresholdAdjustment,
255
+ avgQuality,
256
+ trend,
257
+ runCount: history.length,
258
+ };
259
+ }
260
+
261
+ // ======================== PRINT INSIGHTS ========================
262
+
263
+ /**
264
+ * Print learning insights to the console.
265
+ *
266
+ * @param {object} insights - From analyzeHistory()
267
+ */
268
+ function printLearningInsights(insights) {
269
+ if (!insights.hasData) return;
270
+
271
+ console.log('');
272
+ console.log(' 📈 Learning Insights:');
273
+ console.log(` Historical runs : ${insights.runCount}`);
274
+ console.log(` Quality trend : ${insights.trend} (avg: ${insights.avgQuality.toFixed(0)}/100)`);
275
+
276
+ if (insights.budgetAdjustment !== 0) {
277
+ const dir = insights.budgetAdjustment > 0 ? '+' : '';
278
+ console.log(` Budget adjust : ${dir}${insights.budgetAdjustment} tokens (analysis)`);
279
+ }
280
+ if (insights.compilationBudgetAdjustment !== 0) {
281
+ const dir = insights.compilationBudgetAdjustment > 0 ? '+' : '';
282
+ console.log(` Budget adjust : ${dir}${insights.compilationBudgetAdjustment} tokens (compilation)`);
283
+ }
284
+
285
+ if (insights.recommendations.length > 0) {
286
+ console.log(' Recommendations :');
287
+ for (const rec of insights.recommendations) {
288
+ console.log(` • ${rec}`);
289
+ }
290
+ }
291
+ console.log('');
292
+ }
293
+
294
+ module.exports = {
295
+ loadHistory,
296
+ saveHistory,
297
+ buildHistoryEntry,
298
+ analyzeHistory,
299
+ printLearningInsights,
300
+ MAX_HISTORY_ENTRIES,
301
+ };