task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,466 @@
1
+ /**
2
+ * Change Detector — correlates git changes and document updates with
3
+ * items from a previous call analysis.
4
+ *
5
+ * Uses multiple matching strategies:
6
+ * 1. File path matching — git changed files vs. analysis file_references/code_changes
7
+ * 2. ID matching — ticket/CR IDs in commit messages
8
+ * 3. Keyword matching — semantic overlap between item descriptions and commit messages
9
+ * 4. Document change detection — mtime comparison for docs in the call folder
10
+ *
11
+ * Produces a unified change report consumed by progress-updater.js.
12
+ */
13
+
14
+ 'use strict';
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+ const {
19
+ isGitAvailable,
20
+ findGitRoot,
21
+ getCommitsWithFiles,
22
+ getChangedFilesSince,
23
+ getDiffSummary,
24
+ getWorkingTreeChanges,
25
+ getCurrentBranch,
26
+ normPath,
27
+ } = require('../services/git');
28
+
29
+ // ======================== STOP WORDS ========================
30
+
31
+ const STOP_WORDS = new Set([
32
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
33
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
34
+ 'should', 'may', 'might', 'shall', 'can', 'need', 'must', 'to', 'of',
35
+ 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about',
36
+ 'and', 'but', 'or', 'not', 'no', 'nor', 'so', 'if', 'then', 'than',
37
+ 'that', 'this', 'these', 'those', 'it', 'its', 'we', 'our', 'they',
38
+ 'them', 'he', 'she', 'his', 'her', 'you', 'your', 'i', 'my', 'me',
39
+ 'string', 'null', 'true', 'false', 'new', 'each', 'all', 'any',
40
+ 'add', 'use', 'get', 'set', 'make', 'update', 'change', 'fix',
41
+ ]);
42
+
43
+ // ======================== ITEM EXTRACTION ========================
44
+
45
+ /**
46
+ * Extract all trackable items from a compiled analysis into a flat list
47
+ * with searchable metadata.
48
+ *
49
+ * @param {object} analysis - Compiled analysis
50
+ * @returns {Array<{id: string, type: string, title: string, description: string, keywords: string[], fileRefs: string[]}>}
51
+ */
52
+ function extractTrackableItems(analysis) {
53
+ if (!analysis) return [];
54
+
55
+ const items = [];
56
+
57
+ // Tickets
58
+ for (const t of (analysis.tickets || [])) {
59
+ const fileRefs = [];
60
+ for (const cc of (t.code_changes || [])) {
61
+ if (cc.file_path) fileRefs.push(normPath(cc.file_path));
62
+ }
63
+ items.push({
64
+ id: t.ticket_id || t.id || `ticket_${items.length}`,
65
+ type: 'ticket',
66
+ title: t.title || '',
67
+ description: t.discussed_state?.summary || t.description || '',
68
+ status: t.status || 'unknown',
69
+ assignee: t.assignee || null,
70
+ keywords: extractKeywords(`${t.title || ''} ${t.discussed_state?.summary || ''} ${t.description || ''}`),
71
+ fileRefs,
72
+ confidence: t.confidence || null,
73
+ });
74
+ }
75
+
76
+ // Change requests
77
+ for (const cr of (analysis.change_requests || [])) {
78
+ const fileRefs = [];
79
+ if (cr.where?.file_path) fileRefs.push(normPath(cr.where.file_path));
80
+ items.push({
81
+ id: cr.id || `cr_${items.length}`,
82
+ type: 'change_request',
83
+ title: cr.title || '',
84
+ description: cr.what || cr.description || '',
85
+ status: cr.status || 'unknown',
86
+ assignee: cr.assigned_to || null,
87
+ keywords: extractKeywords(`${cr.title || ''} ${cr.what || ''} ${cr.how || ''} ${cr.where?.component || ''}`),
88
+ fileRefs,
89
+ confidence: cr.confidence || null,
90
+ });
91
+ }
92
+
93
+ // Action items
94
+ for (const ai of (analysis.action_items || [])) {
95
+ items.push({
96
+ id: ai.id || `ai_${items.length}`,
97
+ type: 'action_item',
98
+ title: ai.description || '',
99
+ description: ai.description || '',
100
+ status: ai.status || 'unknown',
101
+ assignee: ai.assigned_to || null,
102
+ keywords: extractKeywords(ai.description || ''),
103
+ fileRefs: [],
104
+ confidence: ai.confidence || null,
105
+ });
106
+ }
107
+
108
+ // Blockers
109
+ for (const b of (analysis.blockers || [])) {
110
+ items.push({
111
+ id: b.id || `blk_${items.length}`,
112
+ type: 'blocker',
113
+ title: b.description || '',
114
+ description: b.description || '',
115
+ status: b.status || 'open',
116
+ assignee: b.owner || null,
117
+ keywords: extractKeywords(b.description || ''),
118
+ fileRefs: [],
119
+ confidence: b.confidence || null,
120
+ });
121
+ }
122
+
123
+ // Scope changes
124
+ for (const sc of (analysis.scope_changes || [])) {
125
+ items.push({
126
+ id: sc.id || `sc_${items.length}`,
127
+ type: 'scope_change',
128
+ title: sc.new_scope || sc.description || '',
129
+ description: `${sc.original_scope || ''} → ${sc.new_scope || ''}`,
130
+ status: 'noted',
131
+ assignee: sc.decided_by || null,
132
+ keywords: extractKeywords(`${sc.original_scope || ''} ${sc.new_scope || ''}`),
133
+ fileRefs: [],
134
+ confidence: sc.confidence || null,
135
+ });
136
+ }
137
+
138
+ // File references — add to existing items' fileRefs
139
+ for (const fr of (analysis.file_references || [])) {
140
+ if (fr.resolved_path) {
141
+ const normRef = normPath(fr.resolved_path);
142
+ // Attach to related tickets
143
+ for (const tid of (fr.mentioned_in_tickets || [])) {
144
+ const item = items.find(i => i.id === tid);
145
+ if (item && !item.fileRefs.includes(normRef)) {
146
+ item.fileRefs.push(normRef);
147
+ }
148
+ }
149
+ // Attach to related CRs
150
+ for (const cid of (fr.mentioned_in_changes || [])) {
151
+ const item = items.find(i => i.id === cid);
152
+ if (item && !item.fileRefs.includes(normRef)) {
153
+ item.fileRefs.push(normRef);
154
+ }
155
+ }
156
+ }
157
+ }
158
+
159
+ return items;
160
+ }
161
+
162
+ /**
163
+ * Extract meaningful keywords from text.
164
+ * Removes stop words, splits on word boundaries, lowercases.
165
+ *
166
+ * @param {string} text
167
+ * @returns {string[]}
168
+ */
169
+ function extractKeywords(text) {
170
+ if (!text) return [];
171
+ const words = text
172
+ .toLowerCase()
173
+ .replace(/[^a-z0-9\-_.\/\\]/g, ' ')
174
+ .split(/\s+/)
175
+ .filter(w => w.length > 2 && !STOP_WORDS.has(w));
176
+ return [...new Set(words)];
177
+ }
178
+
179
+ // ======================== CORRELATION ENGINE ========================
180
+
181
+ /**
182
+ * Match analysis items against git changes using multiple strategies.
183
+ *
184
+ * @param {Array} items - From extractTrackableItems()
185
+ * @param {object} gitData - { commits, changedFiles, workingChanges }
186
+ * @returns {Map<string, object>} Map of itemId → correlation data
187
+ */
188
+ function correlateItemsWithChanges(items, gitData) {
189
+ const { commits, changedFiles, workingChanges } = gitData;
190
+
191
+ // Build searchable indexes
192
+ const allChangedPaths = new Set(changedFiles.map(f => f.path));
193
+ const workingPaths = new Set((workingChanges || []).map(f => f.path));
194
+ const allCommitMessages = commits.map(c => c.message.toLowerCase());
195
+ const allCommitText = commits.map(c => `${c.message} ${(c.files || []).join(' ')}`).join(' ').toLowerCase();
196
+
197
+ const correlations = new Map();
198
+
199
+ for (const item of items) {
200
+ const evidence = [];
201
+ let score = 0;
202
+
203
+ // Strategy 1: File path matching
204
+ for (const ref of item.fileRefs) {
205
+ const refBase = path.basename(ref).toLowerCase();
206
+ const refNorm = ref.toLowerCase();
207
+
208
+ for (const changed of changedFiles) {
209
+ const changedNorm = changed.path.toLowerCase();
210
+ const changedBase = path.basename(changed.path).toLowerCase();
211
+
212
+ // Exact path match or suffix match
213
+ if (changedNorm === refNorm || changedNorm.endsWith(refNorm) || refNorm.endsWith(changedNorm)) {
214
+ score += 0.4;
215
+ evidence.push({
216
+ type: 'file_match',
217
+ detail: `${changed.path} (${changed.status}, touched ${changed.changes}x)`,
218
+ confidence: 'high',
219
+ });
220
+ } else if (refBase === changedBase) {
221
+ // Same filename, different path
222
+ score += 0.2;
223
+ evidence.push({
224
+ type: 'file_name_match',
225
+ detail: `${changed.path} — same filename as referenced ${ref}`,
226
+ confidence: 'medium',
227
+ });
228
+ }
229
+ }
230
+
231
+ // Also check working tree
232
+ if (workingPaths.has(ref.toLowerCase()) || [...workingPaths].some(p => p.endsWith(refBase))) {
233
+ score += 0.1;
234
+ evidence.push({
235
+ type: 'working_tree',
236
+ detail: `${ref} has uncommitted changes`,
237
+ confidence: 'medium',
238
+ });
239
+ }
240
+ }
241
+
242
+ // Strategy 2: ID matching in commit messages
243
+ const itemIdPattern = item.id.replace(/[-_]/g, '[-_\\s]?');
244
+ const idRegex = new RegExp(itemIdPattern, 'i');
245
+ for (const commit of commits) {
246
+ if (idRegex.test(commit.message)) {
247
+ score += 0.5;
248
+ evidence.push({
249
+ type: 'id_in_commit',
250
+ detail: `Commit ${commit.hash}: "${commit.message}"`,
251
+ confidence: 'high',
252
+ });
253
+ }
254
+ }
255
+
256
+ // Strategy 3: Keyword matching in commits
257
+ const matchedKeywords = [];
258
+ for (const kw of item.keywords) {
259
+ if (kw.length < 4) continue; // skip very short keywords
260
+ if (allCommitText.includes(kw)) {
261
+ matchedKeywords.push(kw);
262
+ }
263
+ }
264
+ if (matchedKeywords.length > 0) {
265
+ const kwScore = Math.min(0.3, matchedKeywords.length * 0.05);
266
+ score += kwScore;
267
+ evidence.push({
268
+ type: 'keyword_match',
269
+ detail: `Keywords found in commits: ${matchedKeywords.slice(0, 8).join(', ')}`,
270
+ confidence: matchedKeywords.length >= 3 ? 'medium' : 'low',
271
+ });
272
+ }
273
+
274
+ // Strategy 4: Per-commit file overlap
275
+ for (const commit of commits) {
276
+ if (!commit.files) continue;
277
+ const overlapFiles = item.fileRefs.filter(ref => {
278
+ const refLower = ref.toLowerCase();
279
+ return commit.files.some(f => f.toLowerCase().endsWith(refLower) || refLower.endsWith(f.toLowerCase()));
280
+ });
281
+ if (overlapFiles.length > 0 && !evidence.some(e => e.type === 'id_in_commit' && e.detail.includes(commit.hash))) {
282
+ score += 0.15;
283
+ evidence.push({
284
+ type: 'commit_file_overlap',
285
+ detail: `Commit ${commit.hash}: "${commit.message}" touches ${overlapFiles.length} referenced file(s)`,
286
+ confidence: 'medium',
287
+ });
288
+ }
289
+ }
290
+
291
+ // Clamp score to [0, 1]
292
+ score = Math.min(1.0, score);
293
+
294
+ correlations.set(item.id, {
295
+ itemId: item.id,
296
+ itemType: item.type,
297
+ score,
298
+ evidence,
299
+ localAssessment: score >= 0.6 ? 'DONE' : score >= 0.25 ? 'IN_PROGRESS' : 'NOT_STARTED',
300
+ localConfidence: score >= 0.6 ? 'MEDIUM' : score >= 0.25 ? 'LOW' : 'LOW',
301
+ });
302
+ }
303
+
304
+ return correlations;
305
+ }
306
+
307
+ // ======================== DOCUMENT CHANGES ========================
308
+
309
+ /**
310
+ * Detect document changes in the call folder since the analysis timestamp.
311
+ * Checks file modification times against sinceTimestamp.
312
+ *
313
+ * @param {string} callDir - The call folder path
314
+ * @param {string} sinceISO - ISO timestamp of the previous analysis
315
+ * @returns {Array<{path: string, relPath: string, modified: string, status: string}>}
316
+ */
317
+ function detectDocumentChanges(callDir, sinceISO) {
318
+ const sinceMs = new Date(sinceISO).getTime();
319
+ if (isNaN(sinceMs)) return [];
320
+
321
+ const docExts = new Set(['.md', '.txt', '.vtt', '.srt', '.csv', '.json', '.pdf', '.docx']);
322
+ const skipDirs = new Set(['node_modules', '.git', 'compressed', 'runs', 'gemini_runs', 'logs']);
323
+ const changes = [];
324
+
325
+ function walk(dir) {
326
+ let entries;
327
+ try { entries = fs.readdirSync(dir); } catch { return; }
328
+
329
+ for (const entry of entries) {
330
+ const full = path.join(dir, entry);
331
+ let stat;
332
+ try { stat = fs.statSync(full); } catch { continue; }
333
+
334
+ if (stat.isDirectory()) {
335
+ if (!skipDirs.has(entry)) walk(full);
336
+ } else if (stat.isFile() && docExts.has(path.extname(entry).toLowerCase())) {
337
+ const mtime = stat.mtimeMs;
338
+ if (mtime > sinceMs) {
339
+ changes.push({
340
+ path: full,
341
+ relPath: path.relative(callDir, full),
342
+ modified: new Date(mtime).toISOString(),
343
+ status: 'modified',
344
+ });
345
+ }
346
+ }
347
+ }
348
+ }
349
+
350
+ walk(callDir);
351
+ return changes;
352
+ }
353
+
354
+ // ======================== MAIN DETECTION ========================
355
+
356
+ /**
357
+ * Run full change detection — git + documents.
358
+ *
359
+ * @param {object} opts
360
+ * @param {string} [opts.repoPath] - Git repo path (auto-detected if not provided)
361
+ * @param {string} opts.callDir - Call folder path
362
+ * @param {string} opts.sinceISO - Timestamp of the previous analysis
363
+ * @param {object} opts.analysis - The compiled analysis to check progress for
364
+ * @returns {object} Change report
365
+ */
366
+ function detectAllChanges({ repoPath, callDir, sinceISO, analysis }) {
367
+ const report = {
368
+ timestamp: new Date().toISOString(),
369
+ sinceTimestamp: sinceISO,
370
+ git: {
371
+ available: false,
372
+ repoPath: null,
373
+ branch: null,
374
+ commits: [],
375
+ changedFiles: [],
376
+ workingChanges: [],
377
+ summary: 'Git not available',
378
+ },
379
+ documents: {
380
+ changes: [],
381
+ },
382
+ items: [],
383
+ correlations: new Map(),
384
+ totals: {
385
+ commits: 0,
386
+ filesChanged: 0,
387
+ docsChanged: 0,
388
+ itemsWithMatches: 0,
389
+ itemsWithoutMatches: 0,
390
+ },
391
+ };
392
+
393
+ // --- Extract trackable items from analysis ---
394
+ report.items = extractTrackableItems(analysis);
395
+
396
+ // --- Git change detection ---
397
+ if (isGitAvailable()) {
398
+ // Resolve repo path: explicit > callDir > cwd
399
+ let resolvedRepo = repoPath ? findGitRoot(path.resolve(repoPath)) : null;
400
+ if (!resolvedRepo) resolvedRepo = findGitRoot(callDir);
401
+ if (!resolvedRepo) resolvedRepo = findGitRoot(process.cwd());
402
+
403
+ if (resolvedRepo) {
404
+ report.git.available = true;
405
+ report.git.repoPath = resolvedRepo;
406
+ report.git.branch = getCurrentBranch(resolvedRepo);
407
+ report.git.commits = getCommitsWithFiles(resolvedRepo, sinceISO, 100);
408
+ report.git.changedFiles = getChangedFilesSince(resolvedRepo, sinceISO);
409
+ report.git.workingChanges = getWorkingTreeChanges(resolvedRepo);
410
+ report.git.summary = getDiffSummary(resolvedRepo, sinceISO);
411
+
412
+ report.totals.commits = report.git.commits.length;
413
+ report.totals.filesChanged = report.git.changedFiles.length;
414
+ } else {
415
+ report.git.summary = 'No git repository found';
416
+ }
417
+ }
418
+
419
+ // --- Document change detection ---
420
+ report.documents.changes = detectDocumentChanges(callDir, sinceISO);
421
+ report.totals.docsChanged = report.documents.changes.length;
422
+
423
+ // --- Correlate items with changes ---
424
+ if (report.git.available && report.git.commits.length > 0) {
425
+ report.correlations = correlateItemsWithChanges(report.items, report.git);
426
+ }
427
+
428
+ // --- Compute totals ---
429
+ let withMatches = 0;
430
+ let withoutMatches = 0;
431
+ for (const item of report.items) {
432
+ const corr = report.correlations.get(item.id);
433
+ if (corr && corr.score > 0.1) {
434
+ withMatches++;
435
+ } else {
436
+ withoutMatches++;
437
+ }
438
+ }
439
+ report.totals.itemsWithMatches = withMatches;
440
+ report.totals.itemsWithoutMatches = withoutMatches;
441
+
442
+ return report;
443
+ }
444
+
445
+ /**
446
+ * Serialize a change report for JSON output.
447
+ * Converts the correlations Map to a plain object.
448
+ *
449
+ * @param {object} report - From detectAllChanges()
450
+ * @returns {object} JSON-serializable report
451
+ */
452
+ function serializeReport(report) {
453
+ return {
454
+ ...report,
455
+ correlations: Object.fromEntries(report.correlations),
456
+ };
457
+ }
458
+
459
+ module.exports = {
460
+ detectAllChanges,
461
+ serializeReport,
462
+ extractTrackableItems,
463
+ extractKeywords,
464
+ correlateItemsWithChanges,
465
+ detectDocumentChanges,
466
+ };