task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Focused Re-Analysis — performs targeted second-pass extraction
3
+ * when the quality gate identifies specific weak dimensions.
4
+ *
5
+ * Instead of re-running the full analysis, this sends a focused prompt
6
+ * to Gemini targeting ONLY the weak areas (e.g., missing blockers,
7
+ * sparse action items, low confidence items).
8
+ *
9
+ * The results are then intelligently merged with the original analysis.
10
+ */
11
+
12
+ 'use strict';
13
+
14
+ const { extractJson } = require('./json-parser');
15
+ const { withRetry } = require('./retry');
16
+ const config = require('../config');
17
+ // Access config.GEMINI_MODEL / config.GEMINI_CONTEXT_WINDOW at call time for runtime model changes.
18
+
19
+ // ======================== WEAKNESS DETECTION ========================
20
+
21
+ /**
22
+ * Analyze a quality report + analysis to identify specific extraction weaknesses.
23
+ *
24
+ * @param {object} qualityReport - From assessQuality()
25
+ * @param {object} analysis - The parsed analysis
26
+ * @returns {{ weakAreas: string[], focusPrompt: string|null, shouldReanalyze: boolean }}
27
+ */
28
+ function identifyWeaknesses(qualityReport, analysis) {
29
+ if (!qualityReport || !analysis) {
30
+ return { weakAreas: [], focusPrompt: null, shouldReanalyze: false };
31
+ }
32
+
33
+ const weakAreas = [];
34
+ const focusInstructions = [];
35
+
36
+ // Check each dimension for specific weaknesses
37
+ const dims = qualityReport.dimensions || {};
38
+
39
+ // Low density score — dig into what's sparse
40
+ if (dims.density && dims.density.score < 50) {
41
+ const tickets = analysis.tickets || [];
42
+ const actions = analysis.action_items || [];
43
+ const crs = analysis.change_requests || [];
44
+ const blockers = analysis.blockers || [];
45
+ const scopes = analysis.scope_changes || [];
46
+
47
+ if (tickets.length === 0) {
48
+ weakAreas.push('tickets');
49
+ focusInstructions.push(
50
+ 'FOCUS: TICKET EXTRACTION — Your previous analysis found no tickets. ' +
51
+ 'Re-examine the video carefully for any work items, bugs, features, tasks, ' +
52
+ 'or CR numbers discussed. Even brief mentions count. ' +
53
+ 'Extract at minimum: ticket_id, title, status, and a brief summary.'
54
+ );
55
+ }
56
+
57
+ if (actions.length === 0) {
58
+ weakAreas.push('action_items');
59
+ focusInstructions.push(
60
+ 'FOCUS: ACTION ITEMS — Your previous analysis found no action items. ' +
61
+ 'Listen for any task assignments, next steps, follow-ups, or commitments made. ' +
62
+ 'Include who is responsible and what they need to do.'
63
+ );
64
+ }
65
+
66
+ if (blockers.length === 0 && tickets.length > 0) {
67
+ weakAreas.push('blockers');
68
+ focusInstructions.push(
69
+ 'FOCUS: BLOCKERS — Tickets were found but no blockers. ' +
70
+ 'Re-examine: are there any pending decisions, DB prerequisites, ' +
71
+ 'external dependencies, or items waiting on someone? ' +
72
+ 'Even implicit blockers (waiting for a response, needing approval) should be captured.'
73
+ );
74
+ }
75
+
76
+ if (scopes.length === 0 && tickets.length > 1) {
77
+ weakAreas.push('scope_changes');
78
+ focusInstructions.push(
79
+ 'FOCUS: SCOPE CHANGES — Multiple tickets were discussed but no scope changes detected. ' +
80
+ 'Check if anything was added, removed, deferred, or had its approach changed ' +
81
+ 'compared to what the context documents say.'
82
+ );
83
+ }
84
+
85
+ // Check for sparse action items (present but no assignees)
86
+ if (actions.length > 0) {
87
+ const unassigned = actions.filter(a => !a.assigned_to);
88
+ if (unassigned.length > actions.length * 0.5) {
89
+ weakAreas.push('action_item_assignees');
90
+ focusInstructions.push(
91
+ `FOCUS: ACTION ITEM ASSIGNEES — ${unassigned.length}/${actions.length} action items have no assignee. ` +
92
+ 'Re-examine who was asked to do each task. Use speaker identification and context clues.'
93
+ );
94
+ }
95
+ }
96
+ }
97
+
98
+ // Low confidence coverage
99
+ const allItems = [
100
+ ...(analysis.tickets || []),
101
+ ...(analysis.action_items || []),
102
+ ...(analysis.change_requests || []),
103
+ ...(analysis.blockers || []),
104
+ ...(analysis.scope_changes || []),
105
+ ];
106
+ const withConf = allItems.filter(i => i.confidence && ['HIGH', 'MEDIUM', 'LOW'].includes(i.confidence));
107
+ if (allItems.length > 0 && withConf.length < allItems.length * 0.5) {
108
+ weakAreas.push('confidence');
109
+ focusInstructions.push(
110
+ 'FOCUS: CONFIDENCE SCORING — Most items are missing confidence fields. ' +
111
+ 'For every item, set confidence to HIGH (explicit + corroborated), ' +
112
+ 'MEDIUM (partial evidence), or LOW (inferred). Include confidence_reason.'
113
+ );
114
+ }
115
+
116
+ // Check for low-confidence items that might benefit from re-examination
117
+ const lowConfItems = allItems.filter(i => i.confidence === 'LOW');
118
+ if (lowConfItems.length >= 3) {
119
+ weakAreas.push('low_confidence_verification');
120
+ focusInstructions.push(
121
+ `FOCUS: LOW-CONFIDENCE VERIFICATION — ${lowConfItems.length} items were marked LOW confidence. ` +
122
+ 'Re-examine these specific items against the video and context documents. ' +
123
+ 'Either upgrade their confidence with supporting evidence, or remove them if truly unsupported: ' +
124
+ lowConfItems.slice(0, 5).map(i => `"${i.id || i.ticket_id || i.description?.slice(0, 50)}"`).join(', ')
125
+ );
126
+ }
127
+
128
+ // Cross-reference issues
129
+ if (dims.crossRef && dims.crossRef.score < 70) {
130
+ weakAreas.push('cross_references');
131
+ focusInstructions.push(
132
+ 'FOCUS: CROSS-REFERENCES — There are consistency issues between items. ' +
133
+ 'Verify that all ticket IDs referenced in change_requests and action_items ' +
134
+ 'actually exist in the tickets array. Fix any orphaned references.'
135
+ );
136
+ }
137
+
138
+ const shouldReanalyze = focusInstructions.length > 0 &&
139
+ qualityReport.score < 60 && // Only re-analyze if quality is truly lacking
140
+ weakAreas.length >= 2; // At least 2 weak areas to justify the cost
141
+
142
+ const focusPrompt = focusInstructions.length > 0
143
+ ? focusInstructions.join('\n\n')
144
+ : null;
145
+
146
+ return { weakAreas, focusPrompt, shouldReanalyze };
147
+ }
148
+
149
+ // ======================== FOCUSED RE-ANALYSIS ========================
150
+
151
+ /**
152
+ * Run a focused second pass on a segment, targeting specific weak areas.
153
+ *
154
+ * @param {object} ai - Gemini AI instance
155
+ * @param {object} originalAnalysis - The first-pass analysis
156
+ * @param {string} focusPrompt - Specific focus instructions
157
+ * @param {object} segmentOpts - { videoUri, videoMime, segmentIndex, totalSegments, thinkingBudget }
158
+ * @returns {object|null} Additional/corrected extraction, or null if failed
159
+ */
160
+ async function runFocusedPass(ai, originalAnalysis, focusPrompt, segmentOpts = {}) {
161
+ const {
162
+ videoUri,
163
+ videoMime = 'video/mp4',
164
+ segmentIndex = 0,
165
+ totalSegments = 1,
166
+ thinkingBudget = 12288,
167
+ } = segmentOpts;
168
+
169
+ // Build the focused prompt
170
+ const promptText = `You are performing a FOCUSED RE-ANALYSIS of a video segment.
171
+
172
+ A first-pass analysis was already done but had gaps. Your job is to fill ONLY the gaps — do NOT repeat items already extracted correctly.
173
+
174
+ FIRST-PASS RESULT (for reference — do not duplicate these):
175
+ ${JSON.stringify(originalAnalysis, null, 2).slice(0, 8000)}
176
+
177
+ ${focusPrompt}
178
+
179
+ INSTRUCTIONS:
180
+ - Output ONLY the items that are NEW or CORRECTED compared to the first pass.
181
+ - Use the same JSON structure as a normal analysis.
182
+ - For corrections to existing items, include the original item's ID with updated fields.
183
+ - For new items, use new sequential IDs that don't conflict with the first pass.
184
+ - Every item MUST have "confidence" (HIGH/MEDIUM/LOW) and "confidence_reason".
185
+ - Set "_focused_pass": true on every item you produce so the merger knows these are second-pass items.
186
+ - If you find NO new items after careful re-examination, return: {"_no_new_items": true}
187
+
188
+ Output ONLY valid JSON.`;
189
+
190
+ const contentParts = [];
191
+
192
+ // Include video reference if available
193
+ if (videoUri) {
194
+ contentParts.push({ fileData: { mimeType: videoMime, fileUri: videoUri } });
195
+ }
196
+
197
+ contentParts.push({ text: promptText });
198
+
199
+ const requestPayload = {
200
+ model: config.GEMINI_MODEL,
201
+ contents: [{ role: 'user', parts: contentParts }],
202
+ config: {
203
+ systemInstruction: 'You are a focused re-extraction agent. Find ONLY missing or incorrect items from the first pass. Output valid JSON only.',
204
+ maxOutputTokens: 32768,
205
+ temperature: 0,
206
+ thinkingConfig: { thinkingBudget },
207
+ },
208
+ };
209
+
210
+ try {
211
+ const response = await withRetry(
212
+ () => ai.models.generateContent(requestPayload),
213
+ { label: `Focused re-analysis (seg ${segmentIndex + 1}/${totalSegments})`, maxRetries: 1, baseDelay: 3000 }
214
+ );
215
+
216
+ const rawText = response.text;
217
+ const parsed = extractJson(rawText);
218
+
219
+ if (!parsed) return null;
220
+ if (parsed._no_new_items) return null;
221
+
222
+ // Extract token usage for cost tracking
223
+ const usage = response.usageMetadata || {};
224
+ parsed._focusedPassMeta = {
225
+ inputTokens: usage.promptTokenCount || 0,
226
+ outputTokens: usage.candidatesTokenCount || 0,
227
+ totalTokens: usage.totalTokenCount || 0,
228
+ thoughtTokens: usage.thoughtsTokenCount || 0,
229
+ };
230
+
231
+ return parsed;
232
+ } catch (err) {
233
+ console.warn(` ⚠ Focused re-analysis failed: ${err.message}`);
234
+ return null;
235
+ }
236
+ }
237
+
238
+ // ======================== MERGE LOGIC ========================
239
+
240
+ /**
241
+ * Merge focused pass results into the original analysis.
242
+ * New items are appended; corrections update existing items.
243
+ *
244
+ * @param {object} original - First-pass analysis
245
+ * @param {object} focused - Focused pass results
246
+ * @returns {object} Merged analysis
247
+ */
248
+ function mergeFocusedResults(original, focused) {
249
+ if (!focused || focused._no_new_items) return original;
250
+
251
+ const merged = JSON.parse(JSON.stringify(original)); // deep clone
252
+
253
+ // Merge each array field
254
+ const arrayFields = [
255
+ { key: 'tickets', idField: 'ticket_id' },
256
+ { key: 'action_items', idField: 'id' },
257
+ { key: 'change_requests', idField: 'id' },
258
+ { key: 'blockers', idField: 'id' },
259
+ { key: 'scope_changes', idField: 'id' },
260
+ { key: 'file_references', idField: 'resolved_path' },
261
+ ];
262
+
263
+ for (const { key, idField } of arrayFields) {
264
+ const originalArr = merged[key] || [];
265
+ const focusedArr = focused[key] || [];
266
+
267
+ if (focusedArr.length === 0) continue;
268
+
269
+ const existingIds = new Set(originalArr.map(item => item[idField]).filter(Boolean));
270
+
271
+ for (const focusedItem of focusedArr) {
272
+ const itemId = focusedItem[idField];
273
+
274
+ if (itemId && existingIds.has(itemId)) {
275
+ // Correction — update existing item
276
+ const existingIdx = originalArr.findIndex(item => item[idField] === itemId);
277
+ if (existingIdx !== -1) {
278
+ // Merge fields: focused pass fields override if present
279
+ for (const [field, val] of Object.entries(focusedItem)) {
280
+ if (val !== null && val !== undefined && field !== '_focused_pass') {
281
+ originalArr[existingIdx][field] = val;
282
+ }
283
+ }
284
+ originalArr[existingIdx]._enhanced_by_focused_pass = true;
285
+ }
286
+ } else {
287
+ // New item — append
288
+ focusedItem._from_focused_pass = true;
289
+ originalArr.push(focusedItem);
290
+ if (itemId) existingIds.add(itemId);
291
+ }
292
+ }
293
+
294
+ merged[key] = originalArr;
295
+ }
296
+
297
+ // Handle summary enhancement
298
+ if (focused.summary && focused.summary.length > 20) {
299
+ if (merged.summary) {
300
+ merged.summary += '\n\n[Focused re-analysis addition]: ' + focused.summary;
301
+ } else {
302
+ merged.summary = focused.summary;
303
+ }
304
+ }
305
+
306
+ // Mark as enhanced
307
+ merged._focused_pass_applied = true;
308
+ merged._focused_pass_meta = focused._focusedPassMeta || null;
309
+
310
+ return merged;
311
+ }
312
+
313
+ module.exports = {
314
+ identifyWeaknesses,
315
+ runFocusedPass,
316
+ mergeFocusedResults,
317
+ };
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Formatting helpers.
3
+ */
4
+
5
+ 'use strict';
6
+
7
+ /** Format seconds → "M:SS" */
8
+ function fmtDuration(sec) {
9
+ if (!sec && sec !== 0) return 'unknown';
10
+ const m = Math.floor(sec / 60);
11
+ const s = Math.floor(sec % 60);
12
+ return `${m}:${String(s).padStart(2, '0')}`;
13
+ }
14
+
15
+ /** Format seconds → "HH:MM:SS" (used across services to avoid duplication) */
16
+ function formatHMS(sec) {
17
+ if (sec == null) return '??:??:??';
18
+ const h = Math.floor(sec / 3600);
19
+ const m = Math.floor((sec % 3600) / 60);
20
+ const s = Math.floor(sec % 60);
21
+ return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
22
+ }
23
+
24
+ /** Format bytes → human-readable "12.3 MB" */
25
+ function fmtBytes(bytes) {
26
+ if (bytes < 1024) return `${bytes} B`;
27
+ if (bytes < 1048576) return `${(bytes / 1024).toFixed(1)} KB`;
28
+ if (bytes < 1073741824) return `${(bytes / 1048576).toFixed(2)} MB`;
29
+ return `${(bytes / 1073741824).toFixed(2)} GB`;
30
+ }
31
+
32
+ module.exports = { fmtDuration, formatHMS, fmtBytes };
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Filesystem utilities — recursive doc finder, etc.
3
+ */
4
+
5
+ 'use strict';
6
+
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+
10
+ /** Directories to always skip when scanning recursively */
11
+ const SKIP_DIRS = new Set(['node_modules', '.git', 'compressed', 'logs', 'gemini_runs', 'runs']);
12
+
13
+ /**
14
+ * Recursively find all files matching given extensions under a directory.
15
+ * Returns array of { absPath, relPath } where relPath is relative to baseDir.
16
+ * Skips node_modules, .git, compressed, and other build directories.
17
+ */
18
+ function findDocsRecursive(baseDir, exts, _relBase = '') {
19
+ const results = [];
20
+ let entries;
21
+ try {
22
+ entries = fs.readdirSync(path.join(baseDir, _relBase), { withFileTypes: true });
23
+ } catch {
24
+ return results;
25
+ }
26
+ for (const entry of entries) {
27
+ const rel = _relBase ? path.join(_relBase, entry.name) : entry.name;
28
+ if (entry.isDirectory()) {
29
+ if (!SKIP_DIRS.has(entry.name)) {
30
+ results.push(...findDocsRecursive(baseDir, exts, rel));
31
+ }
32
+ } else if (exts.includes(path.extname(entry.name).toLowerCase())) {
33
+ results.push({ absPath: path.join(baseDir, rel), relPath: rel.replace(/\\/g, '/') });
34
+ }
35
+ }
36
+ return results;
37
+ }
38
+
39
+ module.exports = { findDocsRecursive };