task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,266 @@
1
+ /**
2
+ * Adaptive Thinking Budget — dynamically scales Gemini thinking tokens
3
+ * based on segment complexity analysis.
4
+ *
5
+ * Factors considered:
6
+ * - Segment position in call (later segments need more cross-referencing)
7
+ * - VTT transcript density (more dialogue = more complex)
8
+ * - Context document count and relevance
9
+ * - Previous analysis density (accumulated ticket/CR count)
10
+ * - Segment boundary context (mid-conversation segments need more thought)
11
+ *
12
+ * Returns exact thinking budget per segment rather than using a flat value.
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ // ======================== BUDGET RANGES ========================
18
+
19
+ const BUDGET = {
20
+ /** Absolute minimum thinking budget */
21
+ MIN: 8192,
22
+ /** Base thinking budget for a simple segment */
23
+ BASE: 16384,
24
+ /** Maximum thinking budget per segment (avoid eating output token pool) */
25
+ MAX: 32768,
26
+ /** Base compilation thinking budget */
27
+ COMPILATION_BASE: 10240,
28
+ /** Max compilation thinking budget */
29
+ COMPILATION_MAX: 24576,
30
+ };
31
+
32
+ // ======================== COMPLEXITY ANALYSIS ========================
33
+
34
+ /**
35
+ * Analyze VTT/transcript content to estimate segment complexity.
36
+ *
37
+ * @param {string} vttContent - Raw VTT/SRT transcript text
38
+ * @returns {{ speakerCount: number, cueCount: number, wordCount: number,
39
+ * hasTechnicalTerms: boolean, hasCodeReferences: boolean,
40
+ * topicDensity: number, complexityScore: number }}
41
+ */
42
+ function analyzeTranscriptComplexity(vttContent) {
43
+ if (!vttContent || typeof vttContent !== 'string') {
44
+ return {
45
+ speakerCount: 0, cueCount: 0, wordCount: 0,
46
+ hasTechnicalTerms: false, hasCodeReferences: false,
47
+ topicDensity: 0, complexityScore: 0,
48
+ };
49
+ }
50
+
51
+ const lines = vttContent.split('\n');
52
+
53
+ // Count cues (lines with timestamps)
54
+ const cueCount = lines.filter(l => /\d{2}:\d{2}/.test(l)).length;
55
+
56
+ // Extract text lines (non-timestamp, non-empty, non-WEBVTT header)
57
+ const textLines = lines.filter(l => {
58
+ const t = l.trim();
59
+ return t && !t.startsWith('WEBVTT') && !t.startsWith('NOTE') &&
60
+ !/^\d+$/.test(t) && !/-->/.test(t);
61
+ });
62
+ const fullText = textLines.join(' ');
63
+ const wordCount = fullText.split(/\s+/).filter(Boolean).length;
64
+
65
+ // Detect speakers (name prefixes like "Mohamed Elhadi: " or "<v Mohamed>")
66
+ const speakerPatterns = new Set();
67
+ for (const line of textLines) {
68
+ // WebVTT voice tags: <v Name>
69
+ const voiceMatch = line.match(/<v\s+([^>]+)>/);
70
+ if (voiceMatch) speakerPatterns.add(voiceMatch[1].toLowerCase().trim());
71
+ // Colon-separated speakers: "Name: text"
72
+ const colonMatch = line.match(/^([A-Z][a-zA-Z\s]{2,30}):\s/);
73
+ if (colonMatch) speakerPatterns.add(colonMatch[1].toLowerCase().trim());
74
+ }
75
+ const speakerCount = speakerPatterns.size;
76
+
77
+ // Detect technical terms
78
+ const techPatterns = /\b(API|backend|frontend|endpoint|database|migration|deploy|merge|branch|commit|sprint|bug|regression|hotfix|release|staging|production|microservice|dockerfile|kubernetes|nginx|redis|elasticsearch|JWT|OAuth|CORS|webhook|CI\/CD|pipeline|schema|query|index|enum|interface|repository)\b/gi;
79
+ const techMatchCount = (fullText.match(techPatterns) || []).length;
80
+ const hasTechnicalTerms = techMatchCount > 3;
81
+
82
+ // Detect code/file references
83
+ const codePatterns = /\b([A-Z][a-z]+(?:Service|Controller|Repository|Component|Module|Factory|Provider|Handler|Middleware|DTO|Entity|Model|Mapper|Resolver|Guard|Interceptor|Filter|Pipe|Directive))\b|\b(\.cs|\.ts|\.js|\.html|\.scss|\.json|\.yaml|\.xml)\b|[a-zA-Z]+\.[a-zA-Z]+\.[a-zA-Z]+/g;
84
+ const codeMatchCount = (fullText.match(codePatterns) || []).length;
85
+ const hasCodeReferences = codeMatchCount > 2;
86
+
87
+ // Topic density: rough estimate of distinct topics via keyword clustering
88
+ const ticketPatterns = /\b(CR\s*\d+|ticket\s*#?\d+|bug\s*#?\d+|task\s*#?\d+|item\s*#?\d+|issue\s*#?\d+)\b/gi;
89
+ const ticketMentions = (fullText.match(ticketPatterns) || []).length;
90
+
91
+ // Complexity score (0-100)
92
+ let complexityScore = 20; // base
93
+
94
+ // Word count factor: more words = more complex
95
+ if (wordCount > 2000) complexityScore += 20;
96
+ else if (wordCount > 1000) complexityScore += 10;
97
+ else if (wordCount > 500) complexityScore += 5;
98
+
99
+ // Speaker count: more speakers = more complex
100
+ if (speakerCount >= 4) complexityScore += 15;
101
+ else if (speakerCount >= 2) complexityScore += 8;
102
+
103
+ // Technical density
104
+ if (hasTechnicalTerms) complexityScore += 10;
105
+ if (hasCodeReferences) complexityScore += 10;
106
+
107
+ // Ticket mentions
108
+ if (ticketMentions >= 5) complexityScore += 15;
109
+ else if (ticketMentions >= 2) complexityScore += 8;
110
+ else if (ticketMentions >= 1) complexityScore += 3;
111
+
112
+ // Cue density (more cues = more conversation = more complex)
113
+ if (cueCount > 100) complexityScore += 10;
114
+ else if (cueCount > 50) complexityScore += 5;
115
+
116
+ return {
117
+ speakerCount,
118
+ cueCount,
119
+ wordCount,
120
+ hasTechnicalTerms,
121
+ hasCodeReferences,
122
+ topicDensity: ticketMentions,
123
+ complexityScore: Math.min(100, complexityScore),
124
+ };
125
+ }
126
+
127
+ /**
128
+ * Calculate thinking budget for a segment based on multiple complexity factors.
129
+ *
130
+ * @param {object} params
131
+ * @param {number} params.segmentIndex - 0-based segment index
132
+ * @param {number} params.totalSegments - Total number of segments
133
+ * @param {Array} params.previousAnalyses - All prior segment analyses
134
+ * @param {Array} params.contextDocs - Available context documents
135
+ * @param {string} [params.vttContent] - VTT transcript content for this segment
136
+ * @param {number} [params.baseBudget] - Override base budget from config
137
+ * @returns {{ budget: number, reason: string, complexity: object }}
138
+ */
139
+ function calculateThinkingBudget(params) {
140
+ const {
141
+ segmentIndex = 0,
142
+ totalSegments = 1,
143
+ previousAnalyses = [],
144
+ contextDocs = [],
145
+ vttContent = '',
146
+ baseBudget = BUDGET.BASE,
147
+ } = params;
148
+
149
+ let budget = baseBudget;
150
+ const reasons = [];
151
+
152
+ // 1. Segment position scaling — later segments accumulate more cross-references
153
+ const positionRatio = totalSegments > 1 ? segmentIndex / (totalSegments - 1) : 0;
154
+ const positionBoost = Math.round(positionRatio * 6144); // up to +6K for last segment
155
+ if (positionBoost > 0) {
156
+ budget += positionBoost;
157
+ reasons.push(`+${positionBoost} position (seg ${segmentIndex + 1}/${totalSegments})`);
158
+ }
159
+
160
+ // 2. Previous analysis density — more accumulated items = more cross-referencing needed
161
+ let totalItems = 0;
162
+ for (const prev of previousAnalyses) {
163
+ totalItems += (prev.tickets?.length || 0);
164
+ totalItems += (prev.action_items?.length || 0);
165
+ totalItems += (prev.change_requests?.length || 0);
166
+ totalItems += (prev.blockers?.length || 0);
167
+ }
168
+ if (totalItems > 20) {
169
+ const crossRefBoost = Math.min(4096, Math.round(totalItems * 100));
170
+ budget += crossRefBoost;
171
+ reasons.push(`+${crossRefBoost} cross-ref (${totalItems} accumulated items)`);
172
+ } else if (totalItems > 8) {
173
+ const crossRefBoost = Math.min(2048, Math.round(totalItems * 80));
174
+ budget += crossRefBoost;
175
+ reasons.push(`+${crossRefBoost} cross-ref (${totalItems} items)`);
176
+ }
177
+
178
+ // 3. Context document complexity
179
+ const docCount = contextDocs.length;
180
+ if (docCount > 5) {
181
+ const docBoost = Math.min(3072, docCount * 256);
182
+ budget += docBoost;
183
+ reasons.push(`+${docBoost} docs (${docCount} context docs)`);
184
+ }
185
+
186
+ // 4. Transcript complexity analysis
187
+ const txComplexity = analyzeTranscriptComplexity(vttContent);
188
+ if (txComplexity.complexityScore > 60) {
189
+ const txBoost = Math.round((txComplexity.complexityScore - 40) * 80);
190
+ budget += txBoost;
191
+ reasons.push(`+${txBoost} transcript (complexity: ${txComplexity.complexityScore}/100)`);
192
+ } else if (txComplexity.complexityScore > 30) {
193
+ const txBoost = Math.round((txComplexity.complexityScore - 30) * 40);
194
+ budget += txBoost;
195
+ reasons.push(`+${txBoost} transcript (complexity: ${txComplexity.complexityScore}/100)`);
196
+ }
197
+
198
+ // 5. First segment bonus — the first segment sets the context foundation
199
+ if (segmentIndex === 0 && totalSegments > 1) {
200
+ budget += 2048;
201
+ reasons.push('+2048 first-segment foundation');
202
+ }
203
+
204
+ // Clamp
205
+ budget = Math.max(BUDGET.MIN, Math.min(BUDGET.MAX, budget));
206
+
207
+ return {
208
+ budget,
209
+ reason: reasons.length > 0 ? reasons.join(', ') : 'base budget',
210
+ complexity: txComplexity,
211
+ };
212
+ }
213
+
214
+ /**
215
+ * Calculate compilation thinking budget based on total analysis size.
216
+ *
217
+ * @param {Array} allSegmentAnalyses - All segment analyses to compile
218
+ * @param {number} [baseBudget] - Override base
219
+ * @returns {{ budget: number, reason: string }}
220
+ */
221
+ function calculateCompilationBudget(allSegmentAnalyses, baseBudget = BUDGET.COMPILATION_BASE) {
222
+ let budget = baseBudget;
223
+ const reasons = [];
224
+
225
+ // Scale with segment count
226
+ const segCount = allSegmentAnalyses.length;
227
+ if (segCount > 4) {
228
+ const segBoost = Math.min(8192, (segCount - 4) * 2048);
229
+ budget += segBoost;
230
+ reasons.push(`+${segBoost} segments (${segCount} to compile)`);
231
+ }
232
+
233
+ // Scale with total item count
234
+ let totalItems = 0;
235
+ for (const analysis of allSegmentAnalyses) {
236
+ totalItems += (analysis.tickets?.length || 0);
237
+ totalItems += (analysis.action_items?.length || 0);
238
+ totalItems += (analysis.change_requests?.length || 0);
239
+ totalItems += (analysis.blockers?.length || 0);
240
+ totalItems += (analysis.scope_changes?.length || 0);
241
+ }
242
+ if (totalItems > 30) {
243
+ const itemBoost = Math.min(6144, Math.round(totalItems * 100));
244
+ budget += itemBoost;
245
+ reasons.push(`+${itemBoost} items (${totalItems} total to dedup)`);
246
+ } else if (totalItems > 10) {
247
+ const itemBoost = Math.min(3072, Math.round(totalItems * 80));
248
+ budget += itemBoost;
249
+ reasons.push(`+${itemBoost} items (${totalItems} total)`);
250
+ }
251
+
252
+ // Clamp
253
+ budget = Math.max(BUDGET.COMPILATION_BASE, Math.min(BUDGET.COMPILATION_MAX, budget));
254
+
255
+ return {
256
+ budget,
257
+ reason: reasons.length > 0 ? reasons.join(', ') : 'base budget',
258
+ };
259
+ }
260
+
261
+ module.exports = {
262
+ analyzeTranscriptComplexity,
263
+ calculateThinkingBudget,
264
+ calculateCompilationBudget,
265
+ BUDGET,
266
+ };