task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,549 @@
1
+ /**
2
+ * Deep Dive — AI-powered explanatory document generation.
3
+ *
4
+ * After the normal extraction pipeline, this module takes the compiled analysis
5
+ * and asks Gemini to identify topics that warrant deeper explanation, then
6
+ * generates a set of standalone Markdown documents — one per topic.
7
+ *
8
+ * Use cases:
9
+ * - Technical concepts discussed in a dev call → architecture docs
10
+ * - Client requirements → detailed requirement breakdowns
11
+ * - Decisions made → decision records with rationale
12
+ * - Processes discussed → step-by-step guides
13
+ * - Any complex topic → accessible explanations
14
+ *
15
+ * Two-phase approach:
16
+ * Phase 1: Topic Discovery — AI identifies what can be explained
17
+ * Phase 2: Document Generation — AI writes each document in parallel batches
18
+ */
19
+
20
+ 'use strict';
21
+
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const config = require('../config');
25
+ // Access config.GEMINI_MODEL / config.GEMINI_CONTEXT_WINDOW at call time for runtime model changes.
26
+ const { extractJson } = require('./json-parser');
27
+ const { withRetry } = require('./retry');
28
+
29
+ // ======================== TOPIC DISCOVERY ========================
30
+
31
+ /**
32
+ * Ask Gemini to identify topics from the compiled analysis that can be
33
+ * expanded into standalone explanatory documents.
34
+ *
35
+ * @param {object} ai - GoogleGenAI instance
36
+ * @param {object} compiledAnalysis - The compiled analysis from the pipeline
37
+ * @param {object} options
38
+ * @param {string} options.callName - Name of the call/meeting
39
+ * @param {string} options.userName - Current user's name
40
+ * @param {number} options.thinkingBudget - Thinking tokens
41
+ * @param {string[]} [options.contextSnippets] - Optional raw document snippets for richer context
42
+ * @returns {Promise<{topics: Array<{id: string, title: string, category: string, description: string, relevance: string, source_items: string[]}>, raw: string}>}
43
+ */
44
+ async function discoverTopics(ai, compiledAnalysis, options = {}) {
45
+ const { callName = 'meeting', userName = '', thinkingBudget = 16384, contextSnippets = [] } = options;
46
+
47
+ const analysisStr = JSON.stringify(compiledAnalysis, null, 2);
48
+
49
+ let contextSection = '';
50
+ if (contextSnippets.length > 0) {
51
+ contextSection = `\n\nADDITIONAL CONTEXT FROM MEETING DOCUMENTS:\n${contextSnippets.join('\n---\n')}`;
52
+ }
53
+
54
+ const prompt = `You are an expert technical writer and knowledge analyst. You have the complete analysis of a recorded meeting/call.
55
+
56
+ MEETING: "${callName}"
57
+ USER: "${userName}"
58
+
59
+ COMPILED ANALYSIS:
60
+ ${analysisStr}${contextSection}
61
+
62
+ YOUR TASK:
63
+ Identify topics, concepts, decisions, processes, or systems discussed in this meeting that would benefit from a deeper explanatory document. Think about what a team member who wasn't on the call would need to understand.
64
+
65
+ TOPIC CATEGORIES (use these exact category names):
66
+ - "concept" — Technical concepts, patterns, or architectures discussed
67
+ - "decision" — Key decisions made with rationale (ADR-style)
68
+ - "process" — Workflows, procedures, or step-by-step processes discussed
69
+ - "system" — Systems, services, or components explained or referenced
70
+ - "requirement" — Requirements, specs, or acceptance criteria discussed
71
+ - "guide" — How-to guides or implementation approaches covered
72
+ - "context" — Background context, history, or domain knowledge shared
73
+ - "action-plan" — Detailed expansion of complex action items or ticket work
74
+
75
+ RULES:
76
+ 1. Identify 3-10 topics depending on meeting complexity. More topics for richer meetings.
77
+ 2. Each topic should be substantial enough for a standalone 200-500 word document.
78
+ 3. Don't create topics for trivial items or simple status updates.
79
+ 4. DO create topics for anything that needed explanation during the call.
80
+ 5. Focus on what was DISCUSSED and EXPLAINED, not just mentioned in passing.
81
+ 6. Link each topic back to the specific tickets, CRs, action items, or discussion points that inspired it.
82
+ 7. Order by relevance — most important topics first.
83
+
84
+ RESPOND WITH ONLY VALID JSON — no markdown fences, no extra text:
85
+
86
+ {
87
+ "topics": [
88
+ {
89
+ "id": "DD-01",
90
+ "title": "Clear, descriptive title for the document",
91
+ "category": "concept|decision|process|system|requirement|guide|context|action-plan",
92
+ "description": "2-3 sentence description of what this document should cover",
93
+ "relevance": "Why this topic needs a deeper explanation",
94
+ "source_items": ["TICKET-123", "CR-45", "AI-3"]
95
+ }
96
+ ]
97
+ }`;
98
+
99
+ const requestPayload = {
100
+ model: config.GEMINI_MODEL,
101
+ contents: [{ role: 'user', parts: [{ text: prompt }] }],
102
+ config: {
103
+ systemInstruction: 'You are a knowledge analyst. Identify topics from meeting analysis that warrant deeper explanatory documentation. Respond with valid JSON only.',
104
+ maxOutputTokens: 16384,
105
+ temperature: 0.3,
106
+ thinkingConfig: { thinkingBudget },
107
+ },
108
+ };
109
+
110
+ const t0 = Date.now();
111
+ const response = await withRetry(
112
+ () => ai.models.generateContent(requestPayload),
113
+ { label: 'Deep dive topic discovery', maxRetries: 2, baseDelay: 3000 }
114
+ );
115
+ const durationMs = Date.now() - t0;
116
+ const rawText = response.text;
117
+
118
+ const parsed = extractJson(rawText);
119
+ const topics = parsed?.topics || [];
120
+
121
+ const usage = response.usageMetadata || {};
122
+ const tokenUsage = {
123
+ inputTokens: usage.promptTokenCount || 0,
124
+ outputTokens: usage.candidatesTokenCount || 0,
125
+ totalTokens: usage.totalTokenCount || 0,
126
+ thoughtTokens: usage.thoughtsTokenCount || 0,
127
+ };
128
+
129
+ return { topics, raw: rawText, durationMs, tokenUsage };
130
+ }
131
+
132
+ // ======================== DOCUMENT GENERATION ========================
133
+
134
+ /**
135
+ * Generate a single deep-dive Markdown document for a given topic.
136
+ *
137
+ * @param {object} ai - GoogleGenAI instance
138
+ * @param {object} topic - Topic object from discoverTopics
139
+ * @param {object} compiledAnalysis - Full compiled analysis for context
140
+ * @param {object} options
141
+ * @returns {Promise<{markdown: string, raw: string, durationMs: number, tokenUsage: object}>}
142
+ */
143
+ async function generateDocument(ai, topic, compiledAnalysis, options = {}) {
144
+ const { callName = 'meeting', userName = '', thinkingBudget = 16384, contextSnippets = [] } = options;
145
+
146
+ // Extract relevant items from analysis based on source_items
147
+ const relevantContext = extractRelevantItems(compiledAnalysis, topic.source_items);
148
+
149
+ let contextSection = '';
150
+ if (contextSnippets.length > 0) {
151
+ contextSection = `\n\nRELEVANT MEETING DOCUMENTS:\n${contextSnippets.slice(0, 3).join('\n---\n')}`;
152
+ }
153
+
154
+ const categoryGuidance = getCategoryGuidance(topic.category);
155
+
156
+ const prompt = `You are an expert technical writer creating a deep-dive explanatory document based on a meeting discussion.
157
+
158
+ MEETING: "${callName}"
159
+ DOCUMENT TO WRITE: "${topic.title}"
160
+ CATEGORY: ${topic.category}
161
+ DESCRIPTION: ${topic.description}
162
+
163
+ RELEVANT ITEMS FROM THE MEETING ANALYSIS:
164
+ ${JSON.stringify(relevantContext, null, 2)}${contextSection}
165
+
166
+ ${categoryGuidance}
167
+
168
+ WRITING RULES:
169
+ 1. Write in clear, professional Markdown.
170
+ 2. Target 300-800 words depending on complexity.
171
+ 3. Use headers (##, ###), bullet points, tables, and code blocks where appropriate.
172
+ 4. Include a "Background" section explaining context from the meeting.
173
+ 5. Include a "Details" section with the deep explanation.
174
+ 6. Include a "Next Steps" or "Implications" section where relevant.
175
+ 7. Reference specific items (tickets, CRs, action items) from the meeting using their IDs.
176
+ 8. Write for someone who WASN'T on the call — they should understand the topic fully.
177
+ 9. Be factual — only include information that was discussed or can be inferred from the analysis.
178
+ 10. DO NOT include YAML frontmatter or metadata blocks — start directly with the title.
179
+
180
+ START YOUR RESPONSE DIRECTLY WITH THE MARKDOWN CONTENT (no fences, no preamble):`;
181
+
182
+ const requestPayload = {
183
+ model: config.GEMINI_MODEL,
184
+ contents: [{ role: 'user', parts: [{ text: prompt }] }],
185
+ config: {
186
+ systemInstruction: 'You are a technical writer creating explanatory documentation from meeting analysis. Write clear, well-structured Markdown. Start directly with the content.',
187
+ maxOutputTokens: 8192,
188
+ temperature: 0.4,
189
+ thinkingConfig: { thinkingBudget },
190
+ },
191
+ };
192
+
193
+ const t0 = Date.now();
194
+ const response = await withRetry(
195
+ () => ai.models.generateContent(requestPayload),
196
+ { label: `Deep dive doc: ${topic.title}`, maxRetries: 2, baseDelay: 3000 }
197
+ );
198
+ const durationMs = Date.now() - t0;
199
+ const rawText = response.text;
200
+
201
+ // Clean up — strip markdown fences if the model wrapped it
202
+ let markdown = rawText.trim();
203
+ if (markdown.startsWith('```markdown')) {
204
+ markdown = markdown.replace(/^```markdown\s*\n?/, '').replace(/\n?```\s*$/, '');
205
+ } else if (markdown.startsWith('```md')) {
206
+ markdown = markdown.replace(/^```md\s*\n?/, '').replace(/\n?```\s*$/, '');
207
+ } else if (markdown.startsWith('```')) {
208
+ markdown = markdown.replace(/^```\s*\n?/, '').replace(/\n?```\s*$/, '');
209
+ }
210
+
211
+ const usage = response.usageMetadata || {};
212
+ const tokenUsage = {
213
+ inputTokens: usage.promptTokenCount || 0,
214
+ outputTokens: usage.candidatesTokenCount || 0,
215
+ totalTokens: usage.totalTokenCount || 0,
216
+ thoughtTokens: usage.thoughtsTokenCount || 0,
217
+ };
218
+
219
+ return { markdown, raw: rawText, durationMs, tokenUsage };
220
+ }
221
+
222
+ // ======================== BATCH GENERATION ========================
223
+
224
+ /**
225
+ * Generate all deep-dive documents in parallel batches.
226
+ *
227
+ * @param {object} ai - GoogleGenAI instance
228
+ * @param {Array} topics - Topics from discoverTopics
229
+ * @param {object} compiledAnalysis - Full compiled analysis
230
+ * @param {object} options
231
+ * @param {number} [options.concurrency=2] - Max parallel document generations
232
+ * @param {Function} [options.onProgress] - Callback(completed, total, topic) for progress
233
+ * @returns {Promise<Array<{topic: object, markdown: string, durationMs: number, tokenUsage: object, error?: string}>>}
234
+ */
235
+ async function generateAllDocuments(ai, topics, compiledAnalysis, options = {}) {
236
+ const { concurrency = 2, onProgress, ...docOptions } = options;
237
+
238
+ const results = [];
239
+ const queue = [...topics];
240
+ let completed = 0;
241
+
242
+ // Process in batches
243
+ while (queue.length > 0) {
244
+ const batch = queue.splice(0, concurrency);
245
+ const batchResults = await Promise.allSettled(
246
+ batch.map(topic =>
247
+ generateDocument(ai, topic, compiledAnalysis, docOptions)
248
+ .then(result => {
249
+ completed++;
250
+ if (onProgress) onProgress(completed, topics.length, topic);
251
+ return { topic, ...result };
252
+ })
253
+ )
254
+ );
255
+
256
+ for (let i = 0; i < batchResults.length; i++) {
257
+ const r = batchResults[i];
258
+ if (r.status === 'fulfilled') {
259
+ results.push(r.value);
260
+ } else {
261
+ completed++;
262
+ if (onProgress) onProgress(completed, topics.length, batch[i]);
263
+ results.push({
264
+ topic: batch[i],
265
+ markdown: null,
266
+ raw: null,
267
+ durationMs: 0,
268
+ tokenUsage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, thoughtTokens: 0 },
269
+ error: r.reason?.message || 'Unknown error',
270
+ });
271
+ }
272
+ }
273
+ }
274
+
275
+ return results;
276
+ }
277
+
278
+ // ======================== OUTPUT ========================
279
+
280
+ /**
281
+ * Write all generated documents to disk and create an index.
282
+ *
283
+ * @param {string} deepDiveDir - Output directory for deep-dive docs
284
+ * @param {Array} documents - Results from generateAllDocuments
285
+ * @param {object} meta - Metadata for the index
286
+ * @returns {{ indexPath: string, docPaths: string[], stats: object }}
287
+ */
288
+ function writeDeepDiveOutput(deepDiveDir, documents, meta = {}) {
289
+ fs.mkdirSync(deepDiveDir, { recursive: true });
290
+
291
+ const docPaths = [];
292
+ const successful = documents.filter(d => d.markdown);
293
+ const failed = documents.filter(d => !d.markdown);
294
+
295
+ // Write individual documents
296
+ for (const doc of successful) {
297
+ const slug = slugify(doc.topic.title);
298
+ const fileName = `${doc.topic.id.toLowerCase()}-${slug}.md`;
299
+ const filePath = path.join(deepDiveDir, fileName);
300
+ fs.writeFileSync(filePath, doc.markdown, 'utf8');
301
+ docPaths.push(filePath);
302
+ doc._fileName = fileName;
303
+ }
304
+
305
+ // Build index
306
+ const indexLines = [
307
+ `# Deep Dive — ${meta.callName || 'Meeting Analysis'}`,
308
+ '',
309
+ `> Generated ${successful.length} explanatory document(s) from the meeting discussion.`,
310
+ `> Run: ${meta.timestamp || new Date().toISOString()}`,
311
+ '',
312
+ ];
313
+
314
+ // Group by category
315
+ const categories = {};
316
+ for (const doc of successful) {
317
+ const cat = doc.topic.category || 'other';
318
+ if (!categories[cat]) categories[cat] = [];
319
+ categories[cat].push(doc);
320
+ }
321
+
322
+ const categoryLabels = {
323
+ 'concept': 'Concepts & Architecture',
324
+ 'decision': 'Decisions',
325
+ 'process': 'Processes & Workflows',
326
+ 'system': 'Systems & Components',
327
+ 'requirement': 'Requirements',
328
+ 'guide': 'Guides & How-To',
329
+ 'context': 'Background & Context',
330
+ 'action-plan': 'Action Plans',
331
+ };
332
+
333
+ for (const [cat, docs] of Object.entries(categories)) {
334
+ indexLines.push(`## ${categoryLabels[cat] || cat}`);
335
+ indexLines.push('');
336
+ for (const doc of docs) {
337
+ indexLines.push(`- **[${doc.topic.title}](${doc._fileName})** — ${doc.topic.description}`);
338
+ }
339
+ indexLines.push('');
340
+ }
341
+
342
+ // Stats
343
+ const totalTokens = documents.reduce((s, d) => s + (d.tokenUsage?.totalTokens || 0), 0);
344
+ const totalDuration = documents.reduce((s, d) => s + (d.durationMs || 0), 0);
345
+
346
+ indexLines.push('---');
347
+ indexLines.push('');
348
+ indexLines.push(`*${successful.length} documents generated | ${totalTokens.toLocaleString()} tokens | ${(totalDuration / 1000).toFixed(1)}s*`);
349
+
350
+ if (failed.length > 0) {
351
+ indexLines.push('');
352
+ indexLines.push(`> ⚠ ${failed.length} document(s) failed to generate:`);
353
+ for (const doc of failed) {
354
+ indexLines.push(`> - ${doc.topic.title}: ${doc.error}`);
355
+ }
356
+ }
357
+
358
+ const indexPath = path.join(deepDiveDir, 'INDEX.md');
359
+ fs.writeFileSync(indexPath, indexLines.join('\n'), 'utf8');
360
+ docPaths.unshift(indexPath);
361
+
362
+ // Write metadata JSON
363
+ const metaPath = path.join(deepDiveDir, 'deep-dive.json');
364
+ fs.writeFileSync(metaPath, JSON.stringify({
365
+ timestamp: meta.timestamp,
366
+ callName: meta.callName,
367
+ topicCount: successful.length,
368
+ failedCount: failed.length,
369
+ totalTokens,
370
+ totalDurationMs: totalDuration,
371
+ topics: documents.map(d => ({
372
+ id: d.topic.id,
373
+ title: d.topic.title,
374
+ category: d.topic.category,
375
+ fileName: d._fileName || null,
376
+ success: !!d.markdown,
377
+ error: d.error || null,
378
+ tokens: d.tokenUsage?.totalTokens || 0,
379
+ durationMs: d.durationMs,
380
+ })),
381
+ }, null, 2), 'utf8');
382
+ docPaths.push(metaPath);
383
+
384
+ return {
385
+ indexPath,
386
+ docPaths,
387
+ stats: {
388
+ total: documents.length,
389
+ successful: successful.length,
390
+ failed: failed.length,
391
+ totalTokens,
392
+ totalDurationMs: totalDuration,
393
+ },
394
+ };
395
+ }
396
+
397
+ // ======================== HELPERS ========================
398
+
399
+ /**
400
+ * Extract items from compiled analysis that match the given source_items IDs.
401
+ */
402
+ function extractRelevantItems(analysis, sourceItems = []) {
403
+ if (!sourceItems || sourceItems.length === 0) return analysis;
404
+
405
+ const ids = new Set(sourceItems.map(id => id.toLowerCase()));
406
+ const relevant = {};
407
+
408
+ // Tickets
409
+ if (analysis.tickets) {
410
+ const matched = analysis.tickets.filter(t =>
411
+ ids.has((t.ticket_id || '').toLowerCase()) ||
412
+ ids.has((t.id || '').toLowerCase())
413
+ );
414
+ if (matched.length > 0) relevant.tickets = matched;
415
+ }
416
+
417
+ // Change requests
418
+ if (analysis.change_requests) {
419
+ const matched = analysis.change_requests.filter(cr =>
420
+ ids.has((cr.id || '').toLowerCase()) ||
421
+ sourceItems.some(si => (cr.WHAT || '').toLowerCase().includes(si.toLowerCase()))
422
+ );
423
+ if (matched.length > 0) relevant.change_requests = matched;
424
+ }
425
+
426
+ // Action items
427
+ if (analysis.action_items) {
428
+ const matched = analysis.action_items.filter(ai =>
429
+ ids.has((ai.id || '').toLowerCase()) ||
430
+ ids.has((ai.action_id || '').toLowerCase())
431
+ );
432
+ if (matched.length > 0) relevant.action_items = matched;
433
+ }
434
+
435
+ // Blockers
436
+ if (analysis.blockers) {
437
+ const matched = analysis.blockers.filter(b =>
438
+ ids.has((b.id || '').toLowerCase()) ||
439
+ ids.has((b.blocker_id || '').toLowerCase())
440
+ );
441
+ if (matched.length > 0) relevant.blockers = matched;
442
+ }
443
+
444
+ // Scope changes
445
+ if (analysis.scope_changes) {
446
+ const matched = analysis.scope_changes.filter(sc =>
447
+ ids.has((sc.id || '').toLowerCase())
448
+ );
449
+ if (matched.length > 0) relevant.scope_changes = matched;
450
+ }
451
+
452
+ // Include summary for context
453
+ if (analysis.summary) relevant.summary = analysis.summary;
454
+ if (analysis.your_tasks) relevant.your_tasks = analysis.your_tasks;
455
+
456
+ // If nothing matched specifically, return the full analysis as context
457
+ const hasMatches = Object.keys(relevant).some(k => k !== 'summary' && k !== 'your_tasks');
458
+ return hasMatches ? relevant : analysis;
459
+ }
460
+
461
+ /**
462
+ * Get category-specific writing guidance for the AI.
463
+ */
464
+ function getCategoryGuidance(category) {
465
+ const guides = {
466
+ 'concept': `CATEGORY GUIDANCE — CONCEPT DOCUMENT:
467
+ Write an explanatory document about this technical concept or pattern.
468
+ - Start with a "What Is It?" section for someone unfamiliar
469
+ - Explain HOW it works and WHY it's used in this context
470
+ - Include diagrams (as text descriptions) if helpful
471
+ - Connect it to the specific implementation discussed in the meeting`,
472
+
473
+ 'decision': `CATEGORY GUIDANCE — DECISION RECORD:
474
+ Write this as an Architecture Decision Record (ADR) style document.
475
+ - "Context" — what situation led to this decision
476
+ - "Decision" — what was decided and by whom
477
+ - "Rationale" — why this option was chosen over alternatives
478
+ - "Consequences" — what this means going forward
479
+ - "Alternatives Considered" if they were discussed`,
480
+
481
+ 'process': `CATEGORY GUIDANCE — PROCESS DOCUMENT:
482
+ Write a clear step-by-step process or workflow guide.
483
+ - Use numbered steps for sequential processes
484
+ - Include who is responsible for each step
485
+ - Note any prerequisites or dependencies
486
+ - Highlight decision points or branching paths
487
+ - Include any tools or systems involved`,
488
+
489
+ 'system': `CATEGORY GUIDANCE — SYSTEM OVERVIEW:
490
+ Write an overview of this system, service, or component.
491
+ - What it does and its role in the larger architecture
492
+ - Key interfaces or integration points
493
+ - Configuration or setup considerations
494
+ - Known limitations or technical debt discussed
495
+ - How it relates to other systems mentioned`,
496
+
497
+ 'requirement': `CATEGORY GUIDANCE — REQUIREMENT BREAKDOWN:
498
+ Write a detailed requirement specification.
499
+ - Clear statement of what is needed
500
+ - Acceptance criteria if discussed
501
+ - Technical constraints or dependencies
502
+ - Scope boundaries — what's in and what's out
503
+ - Priority and timeline if mentioned`,
504
+
505
+ 'guide': `CATEGORY GUIDANCE — HOW-TO GUIDE:
506
+ Write a practical implementation guide.
507
+ - Prerequisites and setup
508
+ - Step-by-step instructions
509
+ - Code snippets or configuration examples if relevant
510
+ - Common pitfalls or gotchas mentioned
511
+ - Testing or verification steps`,
512
+
513
+ 'context': `CATEGORY GUIDANCE — BACKGROUND CONTEXT:
514
+ Write a context document for team knowledge sharing.
515
+ - Historical context — how we got here
516
+ - Current state of affairs
517
+ - Key stakeholders and their perspectives
518
+ - Relevant constraints or dependencies
519
+ - Why this context matters for current work`,
520
+
521
+ 'action-plan': `CATEGORY GUIDANCE — ACTION PLAN:
522
+ Write a detailed action plan expanding on the discussed items.
523
+ - Break down complex action items into sub-tasks
524
+ - Identify dependencies between tasks
525
+ - Suggest implementation order
526
+ - Highlight risks or blockers for each step
527
+ - Include rough estimates if discussed`,
528
+ };
529
+
530
+ return guides[category] || `CATEGORY GUIDANCE: Write a clear, well-structured explanatory document about this topic.`;
531
+ }
532
+
533
+ /**
534
+ * Convert a title to a URL-safe slug.
535
+ */
536
+ function slugify(text) {
537
+ return text
538
+ .toLowerCase()
539
+ .replace(/[^a-z0-9]+/g, '-')
540
+ .replace(/^-+|-+$/g, '')
541
+ .slice(0, 60);
542
+ }
543
+
544
+ module.exports = {
545
+ discoverTopics,
546
+ generateDocument,
547
+ generateAllDocuments,
548
+ writeDeepDiveOutput,
549
+ };