task-summary-extractor 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +605 -0
- package/EXPLORATION.md +451 -0
- package/QUICK_START.md +272 -0
- package/README.md +544 -0
- package/bin/taskex.js +64 -0
- package/package.json +63 -0
- package/process_and_upload.js +107 -0
- package/prompt.json +265 -0
- package/setup.js +505 -0
- package/src/config.js +327 -0
- package/src/logger.js +355 -0
- package/src/pipeline.js +2006 -0
- package/src/renderers/markdown.js +968 -0
- package/src/services/firebase.js +106 -0
- package/src/services/gemini.js +779 -0
- package/src/services/git.js +329 -0
- package/src/services/video.js +305 -0
- package/src/utils/adaptive-budget.js +266 -0
- package/src/utils/change-detector.js +466 -0
- package/src/utils/cli.js +415 -0
- package/src/utils/context-manager.js +499 -0
- package/src/utils/cost-tracker.js +156 -0
- package/src/utils/deep-dive.js +549 -0
- package/src/utils/diff-engine.js +315 -0
- package/src/utils/dynamic-mode.js +567 -0
- package/src/utils/focused-reanalysis.js +317 -0
- package/src/utils/format.js +32 -0
- package/src/utils/fs.js +39 -0
- package/src/utils/global-config.js +315 -0
- package/src/utils/health-dashboard.js +216 -0
- package/src/utils/inject-cli-flags.js +58 -0
- package/src/utils/json-parser.js +245 -0
- package/src/utils/learning-loop.js +301 -0
- package/src/utils/progress-updater.js +451 -0
- package/src/utils/progress.js +166 -0
- package/src/utils/prompt.js +32 -0
- package/src/utils/quality-gate.js +429 -0
- package/src/utils/retry.js +129 -0
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context manager — intelligent context prioritization, VTT slicing,
|
|
3
|
+
* and token-budget-aware document selection for Gemini AI calls.
|
|
4
|
+
*
|
|
5
|
+
* Problem solved: sending 69 docs (~1.5MB) to each segment wastes tokens
|
|
6
|
+
* on general reference docs, diluting AI focus from task/ticket extraction.
|
|
7
|
+
*
|
|
8
|
+
* Solution: 4-tier priority system + VTT time-slicing + budget management.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
'use strict';
|
|
12
|
+
|
|
13
|
+
const { formatHMS } = require('./format');
|
|
14
|
+
|
|
15
|
+
// ════════════════════════════════════════════════════════════
|
|
16
|
+
// Token Estimation
|
|
17
|
+
// ════════════════════════════════════════════════════════════
|
|
18
|
+
|
|
19
|
+
/** Rough token estimate — ~0.3 tokens per byte for mixed English/Arabic markdown. */
|
|
20
|
+
function estimateTokens(text) {
|
|
21
|
+
if (!text) return 0;
|
|
22
|
+
return Math.ceil(text.length * 0.3);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Estimate tokens for a prepared context doc. */
|
|
26
|
+
function estimateDocTokens(doc) {
|
|
27
|
+
if (doc.type === 'inlineText') return estimateTokens(doc.content);
|
|
28
|
+
if (doc.type === 'fileData') return 2000; // PDFs: rough estimate, actual varies
|
|
29
|
+
return 500;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// ════════════════════════════════════════════════════════════
|
|
33
|
+
// Priority Classification
|
|
34
|
+
// ════════════════════════════════════════════════════════════
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Priority tiers for context documents:
|
|
38
|
+
* P0 — CRITICAL: VTT subtitle, execution plans, checklists (always include)
|
|
39
|
+
* P1 — HIGH: file maps (.robot/ top-level), code-maps from .tasks (always include)
|
|
40
|
+
* P2 — MEDIUM: .docs/summary/ (condensed reference), sub-tickets, business-req docs
|
|
41
|
+
* P3 — LOW: .robot/core/ patterns, remaining .tasks non-essential docs
|
|
42
|
+
* P4 — BACKGROUND: .docs/ full deep-dives (only if budget allows)
|
|
43
|
+
*/
|
|
44
|
+
const PRIORITY = { CRITICAL: 0, HIGH: 1, MEDIUM: 2, LOW: 3, BACKGROUND: 4 };
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Classify a document by its file path into a priority tier.
|
|
48
|
+
*/
|
|
49
|
+
function classifyDocPriority(fileName) {
|
|
50
|
+
const fl = fileName.toLowerCase().replace(/\\/g, '/');
|
|
51
|
+
|
|
52
|
+
// P0 — VTT/subtitle files & execution plans/checklists
|
|
53
|
+
if (fl.endsWith('.vtt') || fl.endsWith('.srt')) return PRIORITY.CRITICAL;
|
|
54
|
+
if (fl.includes('.tasks/') && (
|
|
55
|
+
fl.includes('execution-plan') ||
|
|
56
|
+
fl.includes('checklist') ||
|
|
57
|
+
fl.includes('remaining-work')
|
|
58
|
+
)) return PRIORITY.CRITICAL;
|
|
59
|
+
|
|
60
|
+
// P1 — File maps and code maps
|
|
61
|
+
if (fl.includes('.tasks/') && fl.includes('code-map')) return PRIORITY.HIGH;
|
|
62
|
+
if (fl.includes('.robot/') && !fl.includes('/core/')) return PRIORITY.HIGH;
|
|
63
|
+
|
|
64
|
+
// P2 — Summaries, sub-tickets, business docs
|
|
65
|
+
if (fl.includes('.docs/summary/')) return PRIORITY.MEDIUM;
|
|
66
|
+
if (fl.includes('.tasks/') && fl.includes('sub-tickets/')) return PRIORITY.MEDIUM;
|
|
67
|
+
if (fl.includes('.tasks/') && fl.includes('business-requirements')) return PRIORITY.MEDIUM;
|
|
68
|
+
if (fl.includes('.tasks/') && fl.includes('call-transcript')) return PRIORITY.MEDIUM;
|
|
69
|
+
|
|
70
|
+
// P3 — Robot core patterns, PR templates, merge checks, remaining .tasks docs
|
|
71
|
+
if (fl.includes('.robot/core/')) return PRIORITY.LOW;
|
|
72
|
+
if (fl.includes('.tasks/')) return PRIORITY.LOW; // remaining .tasks docs (PR templates, merge checks, etc.)
|
|
73
|
+
|
|
74
|
+
// P4 — Full .docs deep-dives
|
|
75
|
+
if (fl.includes('.docs/')) return PRIORITY.BACKGROUND;
|
|
76
|
+
|
|
77
|
+
// Other root-level docs
|
|
78
|
+
if (fl.endsWith('.md') || fl.endsWith('.txt')) return PRIORITY.MEDIUM;
|
|
79
|
+
return PRIORITY.LOW;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Select documents for a segment within a token budget, ordered by priority.
|
|
84
|
+
*
|
|
85
|
+
* @param {Array} allDocs - All prepared context docs [{type, fileName, content?, ...}]
|
|
86
|
+
* @param {number} tokenBudget - Max tokens to allocate for documents
|
|
87
|
+
* @param {object} [opts] - Options
|
|
88
|
+
* @param {number} [opts.segmentIndex] - Current segment index (for logging)
|
|
89
|
+
* @returns {{ selected: Array, excluded: Array, stats: object }}
|
|
90
|
+
*/
|
|
91
|
+
function selectDocsByBudget(allDocs, tokenBudget, opts = {}) {
|
|
92
|
+
// Classify and sort by priority
|
|
93
|
+
const classified = allDocs.map(doc => ({
|
|
94
|
+
doc,
|
|
95
|
+
priority: classifyDocPriority(doc.fileName),
|
|
96
|
+
tokens: estimateDocTokens(doc),
|
|
97
|
+
})).sort((a, b) => a.priority - b.priority || a.tokens - b.tokens);
|
|
98
|
+
|
|
99
|
+
const selected = [];
|
|
100
|
+
const excluded = [];
|
|
101
|
+
let usedTokens = 0;
|
|
102
|
+
|
|
103
|
+
for (const item of classified) {
|
|
104
|
+
if (usedTokens + item.tokens <= tokenBudget) {
|
|
105
|
+
selected.push(item.doc);
|
|
106
|
+
usedTokens += item.tokens;
|
|
107
|
+
} else if (item.priority <= PRIORITY.HIGH) {
|
|
108
|
+
// P0 and P1 are always included even if over budget
|
|
109
|
+
selected.push(item.doc);
|
|
110
|
+
usedTokens += item.tokens;
|
|
111
|
+
} else {
|
|
112
|
+
excluded.push({ fileName: item.doc.fileName, priority: item.priority, tokens: item.tokens });
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const stats = {
|
|
117
|
+
totalDocs: allDocs.length,
|
|
118
|
+
selectedDocs: selected.length,
|
|
119
|
+
excludedDocs: excluded.length,
|
|
120
|
+
estimatedTokens: usedTokens,
|
|
121
|
+
tokenBudget,
|
|
122
|
+
segmentIndex: opts.segmentIndex,
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
return { selected, excluded, stats };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ════════════════════════════════════════════════════════════
|
|
129
|
+
// VTT Time-Slicing
|
|
130
|
+
// ════════════════════════════════════════════════════════════
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Parse a VTT file into an array of cue objects.
|
|
134
|
+
* @param {string} vttContent - Raw VTT file content
|
|
135
|
+
* @returns {Array<{startSec: number, endSec: number, text: string}>}
|
|
136
|
+
*/
|
|
137
|
+
function parseVttCues(vttContent) {
|
|
138
|
+
const cues = [];
|
|
139
|
+
const blocks = vttContent.split(/\n\s*\n/);
|
|
140
|
+
|
|
141
|
+
for (const block of blocks) {
|
|
142
|
+
const lines = block.trim().split('\n');
|
|
143
|
+
// Find the timestamp line
|
|
144
|
+
for (let i = 0; i < lines.length; i++) {
|
|
145
|
+
const match = lines[i].match(
|
|
146
|
+
/(\d{2}):(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2})\.(\d{3})/
|
|
147
|
+
);
|
|
148
|
+
if (match) {
|
|
149
|
+
const startSec = +match[1] * 3600 + +match[2] * 60 + +match[3] + +match[4] / 1000;
|
|
150
|
+
const endSec = +match[5] * 3600 + +match[6] * 60 + +match[7] + +match[8] / 1000;
|
|
151
|
+
const textLines = lines.slice(i + 1).join('\n').trim();
|
|
152
|
+
if (textLines) {
|
|
153
|
+
cues.push({ startSec, endSec, text: textLines });
|
|
154
|
+
}
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return cues;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Slice VTT content to only include cues within a time range.
|
|
164
|
+
* Used to give each segment ONLY the relevant portion of the transcript.
|
|
165
|
+
*
|
|
166
|
+
* @param {string} vttContent - Full VTT content
|
|
167
|
+
* @param {number} segStartSec - Segment start time in seconds
|
|
168
|
+
* @param {number} segEndSec - Segment end time in seconds
|
|
169
|
+
* @param {number} [overlapSec=30] - Overlap margin to include context
|
|
170
|
+
* @returns {string} Sliced VTT content (or full content if parsing fails)
|
|
171
|
+
*/
|
|
172
|
+
function sliceVttForSegment(vttContent, segStartSec, segEndSec, overlapSec = 30) {
|
|
173
|
+
const cues = parseVttCues(vttContent);
|
|
174
|
+
if (cues.length === 0) return vttContent; // fallback: return full VTT
|
|
175
|
+
|
|
176
|
+
const rangeStart = Math.max(0, segStartSec - overlapSec);
|
|
177
|
+
const rangeEnd = segEndSec + overlapSec;
|
|
178
|
+
|
|
179
|
+
const filtered = cues.filter(c => c.endSec >= rangeStart && c.startSec <= rangeEnd);
|
|
180
|
+
|
|
181
|
+
if (filtered.length === 0) return vttContent; // fallback
|
|
182
|
+
|
|
183
|
+
const header = `WEBVTT\n\n[Segment transcript: ${formatHMS(segStartSec)} — ${formatHMS(segEndSec)}]\n[Showing cues from ${formatHMS(rangeStart)} to ${formatHMS(rangeEnd)} with ${overlapSec}s overlap]\n`;
|
|
184
|
+
|
|
185
|
+
const body = filtered.map(c => {
|
|
186
|
+
const start = formatVttTime(c.startSec);
|
|
187
|
+
const end = formatVttTime(c.endSec);
|
|
188
|
+
return `${start} --> ${end}\n${c.text}`;
|
|
189
|
+
}).join('\n\n');
|
|
190
|
+
|
|
191
|
+
return header + '\n' + body;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function formatVttTime(sec) {
|
|
195
|
+
const h = Math.floor(sec / 3600);
|
|
196
|
+
const m = Math.floor((sec % 3600) / 60);
|
|
197
|
+
const s = sec % 60;
|
|
198
|
+
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${s.toFixed(3).padStart(6, '0')}`;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ════════════════════════════════════════════════════════════
|
|
202
|
+
// Previous-Segment Context Compression
|
|
203
|
+
// ════════════════════════════════════════════════════════════
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Build compressed previous-analyses context with progressive detail:
|
|
207
|
+
* - Most recent segment: FULL detail (all fields)
|
|
208
|
+
* - Older segments: COMPRESSED (IDs + statuses only, no verbose descriptions)
|
|
209
|
+
*
|
|
210
|
+
* This prevents unbounded growth of previous-segment context.
|
|
211
|
+
*
|
|
212
|
+
* @param {Array} previousAnalyses - All prior segment analyses
|
|
213
|
+
* @param {string} userName - Current user name
|
|
214
|
+
* @returns {string|null} Context text or null if no previous analyses
|
|
215
|
+
*/
|
|
216
|
+
function buildProgressiveContext(previousAnalyses, userName) {
|
|
217
|
+
if (previousAnalyses.length === 0) return null;
|
|
218
|
+
|
|
219
|
+
const parts = [];
|
|
220
|
+
parts.push('PREVIOUS SEGMENT ANALYSES — maintain continuity, reuse REAL ticket IDs, continue numbering.');
|
|
221
|
+
parts.push('Rules: DO NOT repeat already-extracted items. Only add NEW information or STATE CHANGES.');
|
|
222
|
+
parts.push(`Track tasks for "${userName}" consistently across segments.\n`);
|
|
223
|
+
|
|
224
|
+
for (let idx = 0; idx < previousAnalyses.length; idx++) {
|
|
225
|
+
const prev = previousAnalyses[idx];
|
|
226
|
+
const isRecent = idx >= previousAnalyses.length - 2; // last 2 segments get full detail
|
|
227
|
+
|
|
228
|
+
if (isRecent) {
|
|
229
|
+
// FULL detail for recent segments
|
|
230
|
+
parts.push(buildFullSegmentSummary(prev, idx));
|
|
231
|
+
} else {
|
|
232
|
+
// COMPRESSED for older segments
|
|
233
|
+
parts.push(buildCompressedSegmentSummary(prev, idx));
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return parts.join('\n');
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/** Full detail summary for a segment (recent segments). */
|
|
241
|
+
function buildFullSegmentSummary(prev, idx) {
|
|
242
|
+
const lines = [`=== SEGMENT ${idx + 1} (FULL DETAIL) ===`];
|
|
243
|
+
|
|
244
|
+
// Summary
|
|
245
|
+
if (prev.summary) lines.push(`Summary: ${prev.summary}`);
|
|
246
|
+
|
|
247
|
+
// Tickets
|
|
248
|
+
if (prev.tickets?.length > 0) {
|
|
249
|
+
lines.push('Tickets:');
|
|
250
|
+
for (const t of prev.tickets) {
|
|
251
|
+
lines.push(` ${t.ticket_id} (${t.status}) — ${t.title || 'untitled'}`);
|
|
252
|
+
if (t.documented_state?.plan_status) lines.push(` Doc state: ${t.documented_state.plan_status}`);
|
|
253
|
+
if (t.discussed_state?.summary) lines.push(` Discussed: ${t.discussed_state.summary.substring(0, 200)}`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Change requests
|
|
258
|
+
if (prev.change_requests?.length > 0) {
|
|
259
|
+
lines.push('Change Requests:');
|
|
260
|
+
for (const cr of prev.change_requests) {
|
|
261
|
+
lines.push(` ${cr.id}: ${cr.title || cr.what} [${cr.status}] → ${cr.assigned_to || 'unassigned'}`);
|
|
262
|
+
if (cr.where?.file_path) lines.push(` File: ${cr.where.file_path}`);
|
|
263
|
+
if (cr.blocked_by) lines.push(` Blocked by: ${cr.blocked_by}`);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Action items
|
|
268
|
+
if (prev.action_items?.length > 0) {
|
|
269
|
+
lines.push('Action Items:');
|
|
270
|
+
for (const ai of prev.action_items) {
|
|
271
|
+
lines.push(` ${ai.id}: ${ai.description} → ${ai.assigned_to} [${ai.status}]`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Scope changes
|
|
276
|
+
if (prev.scope_changes?.length > 0) {
|
|
277
|
+
lines.push('Scope Changes:');
|
|
278
|
+
for (const sc of prev.scope_changes) {
|
|
279
|
+
lines.push(` ${sc.id} (${sc.type}): ${sc.new_scope} [${sc.impact}]`);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Blockers
|
|
284
|
+
if (prev.blockers?.length > 0) {
|
|
285
|
+
lines.push('Blockers:');
|
|
286
|
+
for (const b of prev.blockers) {
|
|
287
|
+
lines.push(` ${b.id}: ${b.description} → ${b.owner} [${b.status}]`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// User tasks summary
|
|
292
|
+
if (prev.your_tasks) {
|
|
293
|
+
const yt = prev.your_tasks;
|
|
294
|
+
lines.push(`User Tasks (${yt.user_name || userName}):`);
|
|
295
|
+
lines.push(` Todo: ${(yt.tasks_todo || []).length} | Waiting: ${(yt.tasks_waiting_on_others || []).length} | Completed: ${(yt.completed_in_call || []).length}`);
|
|
296
|
+
if (yt.summary) lines.push(` Focus: ${yt.summary.substring(0, 200)}`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// File references (compact)
|
|
300
|
+
if (prev.file_references?.length > 0) {
|
|
301
|
+
const fileList = prev.file_references
|
|
302
|
+
.map(f => `${f.file_name} (${f.role})${f.resolved_path ? ' → ' + f.resolved_path : ''}`)
|
|
303
|
+
.join(', ');
|
|
304
|
+
lines.push(`Files: ${fileList}`);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return lines.join('\n') + '\n';
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/** Compressed summary for older segments — IDs and states only. */
|
|
311
|
+
function buildCompressedSegmentSummary(prev, idx) {
|
|
312
|
+
const lines = [`=== SEGMENT ${idx + 1} (COMPRESSED — older segment) ===`];
|
|
313
|
+
|
|
314
|
+
// One-line summary
|
|
315
|
+
if (prev.summary) lines.push(`Summary: ${prev.summary.substring(0, 150)}...`);
|
|
316
|
+
|
|
317
|
+
// Tickets: just IDs and statuses
|
|
318
|
+
if (prev.tickets?.length > 0) {
|
|
319
|
+
const ticketList = prev.tickets.map(t => `${t.ticket_id}(${t.status})`).join(', ');
|
|
320
|
+
lines.push(`Tickets: [${ticketList}]`);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// CRs: just IDs, statuses, assignees
|
|
324
|
+
if (prev.change_requests?.length > 0) {
|
|
325
|
+
const crList = prev.change_requests.map(cr => `${cr.id}[${cr.status}]`).join(', ');
|
|
326
|
+
lines.push(`CRs: [${crList}]`);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Action items: just IDs and who
|
|
330
|
+
if (prev.action_items?.length > 0) {
|
|
331
|
+
const aiList = prev.action_items.map(ai => `${ai.id}→${ai.assigned_to}[${ai.status}]`).join(', ');
|
|
332
|
+
lines.push(`Actions: [${aiList}]`);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Blockers: IDs only
|
|
336
|
+
if (prev.blockers?.length > 0) {
|
|
337
|
+
const bList = prev.blockers.map(b => `${b.id}[${b.status}]`).join(', ');
|
|
338
|
+
lines.push(`Blockers: [${bList}]`);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Scope changes count
|
|
342
|
+
if (prev.scope_changes?.length > 0) {
|
|
343
|
+
lines.push(`Scope changes: ${prev.scope_changes.length} recorded`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return lines.join('\n') + '\n';
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// ════════════════════════════════════════════════════════════
|
|
350
|
+
// Segment Focus Instructions
|
|
351
|
+
// ════════════════════════════════════════════════════════════
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Generate dynamic per-segment focus instructions.
|
|
355
|
+
* Tells the AI what's been found so far and what to look for.
|
|
356
|
+
*
|
|
357
|
+
* @param {number} segmentIndex - Current segment (0-based)
|
|
358
|
+
* @param {number} totalSegments - Total segments
|
|
359
|
+
* @param {Array} previousAnalyses - Analyses from prior segments
|
|
360
|
+
* @param {string} userName - Current user name
|
|
361
|
+
* @returns {string} Focus instructions text
|
|
362
|
+
*/
|
|
363
|
+
function buildSegmentFocus(segmentIndex, totalSegments, previousAnalyses, userName) {
|
|
364
|
+
const lines = [];
|
|
365
|
+
|
|
366
|
+
lines.push(`SEGMENT POSITION: ${segmentIndex + 1} of ${totalSegments} (${
|
|
367
|
+
segmentIndex === 0 ? 'FIRST — establish baseline' :
|
|
368
|
+
segmentIndex === totalSegments - 1 ? 'LAST — capture final decisions & wrap-up tasks' :
|
|
369
|
+
'MIDDLE — track changes & new items'
|
|
370
|
+
})`);
|
|
371
|
+
|
|
372
|
+
if (segmentIndex === 0) {
|
|
373
|
+
lines.push('FOCUS: Identify ALL tickets, participants, and initial task assignments.');
|
|
374
|
+
lines.push('Establish the baseline state for each ticket. Cross-reference everything against task documents.');
|
|
375
|
+
lines.push(`Pay special attention to tasks assigned to "${userName}".`);
|
|
376
|
+
} else {
|
|
377
|
+
// Build awareness of what's been found
|
|
378
|
+
const allTicketIds = new Set();
|
|
379
|
+
const allCrIds = new Set();
|
|
380
|
+
const allActionIds = new Set();
|
|
381
|
+
const allBlockerIds = new Set();
|
|
382
|
+
const allScopeIds = new Set();
|
|
383
|
+
|
|
384
|
+
for (const prev of previousAnalyses) {
|
|
385
|
+
(prev.tickets || []).forEach(t => allTicketIds.add(t.ticket_id));
|
|
386
|
+
(prev.change_requests || []).forEach(cr => allCrIds.add(cr.id));
|
|
387
|
+
(prev.action_items || []).forEach(ai => allActionIds.add(ai.id));
|
|
388
|
+
(prev.blockers || []).forEach(b => allBlockerIds.add(b.id));
|
|
389
|
+
(prev.scope_changes || []).forEach(sc => allScopeIds.add(sc.id));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
lines.push(`ALREADY FOUND in previous segments:`);
|
|
393
|
+
if (allTicketIds.size > 0) lines.push(` Tickets: ${[...allTicketIds].join(', ')}`);
|
|
394
|
+
if (allCrIds.size > 0) lines.push(` CRs: ${[...allCrIds].slice(0, 20).join(', ')}${allCrIds.size > 20 ? ` (+${allCrIds.size - 20} more)` : ''}`);
|
|
395
|
+
if (allActionIds.size > 0) lines.push(` Actions: ${[...allActionIds].join(', ')}`);
|
|
396
|
+
if (allBlockerIds.size > 0) lines.push(` Blockers: ${[...allBlockerIds].join(', ')}`);
|
|
397
|
+
|
|
398
|
+
lines.push('');
|
|
399
|
+
lines.push('FOCUS for this segment:');
|
|
400
|
+
lines.push('1. DETECT NEW tickets, CRs, action items, blockers not yet found');
|
|
401
|
+
lines.push('2. TRACK STATE CHANGES to already-known items (status updates, new decisions, scope changes)');
|
|
402
|
+
lines.push('3. CAPTURE any tasks assigned, re-assigned, or completed during this segment');
|
|
403
|
+
lines.push(`4. UPDATE ${userName}'s task list — any new assignments, completions, or blockers`);
|
|
404
|
+
lines.push('5. NOTE discussion depth — conversations with detailed decisions have HIGH task relevance');
|
|
405
|
+
|
|
406
|
+
if (segmentIndex === totalSegments - 1) {
|
|
407
|
+
lines.push('');
|
|
408
|
+
lines.push('LAST SEGMENT SPECIAL:');
|
|
409
|
+
lines.push('- Capture all FINAL DECISIONS and wrap-up action items');
|
|
410
|
+
lines.push('- Note any "next steps" or "follow-up" items mentioned');
|
|
411
|
+
lines.push('- Identify items that were discussed but NOT resolved — these become blockers/waiting items');
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return lines.join('\n');
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// ════════════════════════════════════════════════════════════
|
|
419
|
+
// Smart Boundary Overlap Detection
|
|
420
|
+
// ════════════════════════════════════════════════════════════
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Analyze the VTT content near segment boundaries to detect if a
|
|
424
|
+
* conversation was cut mid-sentence or mid-topic. This helps Gemini
|
|
425
|
+
* understand that continuity context is especially important.
|
|
426
|
+
*
|
|
427
|
+
* Returns a boundary context note to inject into the prompt, or null.
|
|
428
|
+
*
|
|
429
|
+
* @param {string} vttContent - Full VTT content
|
|
430
|
+
* @param {number} segmentStartSec - This segment's start time (in call time)
|
|
431
|
+
* @param {number} segmentEndSec - This segment's end time
|
|
432
|
+
* @param {number} segmentIndex - 0-based segment index
|
|
433
|
+
* @param {object} [previousAnalysis] - The analysis from the previous segment
|
|
434
|
+
* @returns {string|null} Boundary context note or null
|
|
435
|
+
*/
|
|
436
|
+
function detectBoundaryContext(vttContent, segmentStartSec, segmentEndSec, segmentIndex, previousAnalysis) {
|
|
437
|
+
if (segmentIndex === 0 || !vttContent) return null;
|
|
438
|
+
|
|
439
|
+
const cues = parseVttCues(vttContent);
|
|
440
|
+
if (cues.length === 0) return null;
|
|
441
|
+
|
|
442
|
+
const notes = [];
|
|
443
|
+
|
|
444
|
+
// Check if there's VTT content very near the start of the segment (within 5s)
|
|
445
|
+
// This suggests the conversation was already ongoing when the segment started
|
|
446
|
+
const earlyStartCues = cues.filter(c => c.startSec >= segmentStartSec && c.startSec < segmentStartSec + 5);
|
|
447
|
+
if (earlyStartCues.length > 0) {
|
|
448
|
+
notes.push('This segment starts MID-CONVERSATION — a discussion was already in progress.');
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Check if the last cue is near the end of the segment (within 3s of end)
|
|
452
|
+
// This suggests the conversation continues into the next segment
|
|
453
|
+
const lastCue = cues[cues.length - 1];
|
|
454
|
+
if (lastCue && segmentEndSec - lastCue.endSec < 3) {
|
|
455
|
+
notes.push('This segment ends MID-CONVERSATION — the discussion likely continues in the next segment.');
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// If previous analysis exists, check for open topics
|
|
459
|
+
if (previousAnalysis) {
|
|
460
|
+
// Check for open tickets still being discussed
|
|
461
|
+
const openTickets = (previousAnalysis.tickets || [])
|
|
462
|
+
.filter(t => t.status === 'in_progress' || t.status === 'open');
|
|
463
|
+
if (openTickets.length > 0) {
|
|
464
|
+
const openIds = openTickets.map(t => t.ticket_id).join(', ');
|
|
465
|
+
notes.push(`Previous segment had ${openTickets.length} open ticket(s) that may continue here: ${openIds}`);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Check for unresolved blockers
|
|
469
|
+
const openBlockers = (previousAnalysis.blockers || [])
|
|
470
|
+
.filter(b => b.status === 'open');
|
|
471
|
+
if (openBlockers.length > 0) {
|
|
472
|
+
notes.push(`Previous segment had ${openBlockers.length} unresolved blocker(s) — check if they're addressed in this segment.`);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Check if previous summary suggests ongoing discussion
|
|
476
|
+
const prevSummary = previousAnalysis.summary || '';
|
|
477
|
+
if (prevSummary.toLowerCase().includes('continu') || prevSummary.toLowerCase().includes('next') ||
|
|
478
|
+
prevSummary.toLowerCase().includes('follow') || prevSummary.toLowerCase().includes('مستمر')) {
|
|
479
|
+
notes.push('Previous segment\'s summary suggests topics carry over into this segment.');
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
if (notes.length === 0) return null;
|
|
484
|
+
|
|
485
|
+
return `SEGMENT BOUNDARY CONTEXT:\n${notes.map(n => `• ${n}`).join('\n')}\n→ Pay special attention to continuity — pick up where the previous segment left off. Do NOT re-extract items that were already captured in previous segments unless their status changed.`;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
module.exports = {
|
|
489
|
+
PRIORITY,
|
|
490
|
+
estimateTokens,
|
|
491
|
+
estimateDocTokens,
|
|
492
|
+
classifyDocPriority,
|
|
493
|
+
selectDocsByBudget,
|
|
494
|
+
parseVttCues,
|
|
495
|
+
sliceVttForSegment,
|
|
496
|
+
buildProgressiveContext,
|
|
497
|
+
buildSegmentFocus,
|
|
498
|
+
detectBoundaryContext,
|
|
499
|
+
};
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CostTracker — tracks Gemini API token usage and estimates cost.
|
|
3
|
+
*
|
|
4
|
+
* Pricing is model-specific — passed from config.getActiveModelPricing()
|
|
5
|
+
* at construction time based on the user's selected Gemini model.
|
|
6
|
+
*
|
|
7
|
+
* Output pricing includes thinking tokens (unified rate since 2.5+ models).
|
|
8
|
+
* Falls back to Gemini 2.5 Flash defaults if no pricing is provided.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
'use strict';
|
|
12
|
+
|
|
13
|
+
// Default pricing per million tokens (Gemini 2.5 Flash — Feb 2026)
|
|
14
|
+
const DEFAULT_PRICING = {
|
|
15
|
+
inputPerM: 0.30,
|
|
16
|
+
inputLongPerM: 0.30, // flat rate (no long context tier)
|
|
17
|
+
outputPerM: 2.50, // includes thinking tokens
|
|
18
|
+
outputLongPerM: 2.50,
|
|
19
|
+
thinkingPerM: 2.50, // same as output (unified pricing)
|
|
20
|
+
longContextThreshold: 200_000,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
class CostTracker {
|
|
24
|
+
/**
|
|
25
|
+
* @param {object} [pricing] - Override default pricing
|
|
26
|
+
*/
|
|
27
|
+
constructor(pricing = {}) {
|
|
28
|
+
this.pricing = { ...DEFAULT_PRICING, ...pricing };
|
|
29
|
+
this.segments = [];
|
|
30
|
+
this.compilation = null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Record token usage for a segment analysis.
|
|
35
|
+
* @param {string} segmentName - e.g. "segment_00.mp4"
|
|
36
|
+
* @param {object} tokenUsage - { inputTokens, outputTokens, thoughtTokens, totalTokens }
|
|
37
|
+
* @param {number} durationMs - Wall-clock time
|
|
38
|
+
* @param {boolean} [cached=false] - Whether this was loaded from cache
|
|
39
|
+
*/
|
|
40
|
+
addSegment(segmentName, tokenUsage, durationMs, cached = false) {
|
|
41
|
+
if (!tokenUsage) return;
|
|
42
|
+
this.segments.push({
|
|
43
|
+
name: segmentName,
|
|
44
|
+
input: tokenUsage.inputTokens || 0,
|
|
45
|
+
output: tokenUsage.outputTokens || 0,
|
|
46
|
+
thinking: tokenUsage.thoughtTokens || 0,
|
|
47
|
+
total: tokenUsage.totalTokens || 0,
|
|
48
|
+
durationMs: durationMs || 0,
|
|
49
|
+
cached,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Record token usage for the final compilation step.
|
|
55
|
+
* @param {object} tokenUsage
|
|
56
|
+
* @param {number} durationMs
|
|
57
|
+
*/
|
|
58
|
+
addCompilation(tokenUsage, durationMs) {
|
|
59
|
+
if (!tokenUsage) return;
|
|
60
|
+
this.compilation = {
|
|
61
|
+
input: tokenUsage.inputTokens || 0,
|
|
62
|
+
output: tokenUsage.outputTokens || 0,
|
|
63
|
+
thinking: tokenUsage.thoughtTokens || 0,
|
|
64
|
+
total: tokenUsage.totalTokens || 0,
|
|
65
|
+
durationMs: durationMs || 0,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Calculate cost for a given token count and rate.
|
|
71
|
+
* @param {number} tokens
|
|
72
|
+
* @param {number} inputTokens - total input tokens (for long-context detection)
|
|
73
|
+
* @param {'input'|'output'|'thinking'} type
|
|
74
|
+
* @returns {number} Cost in USD
|
|
75
|
+
*/
|
|
76
|
+
_calcCost(tokens, inputTokens, type) {
|
|
77
|
+
if (tokens === 0) return 0;
|
|
78
|
+
const p = this.pricing;
|
|
79
|
+
const isLong = inputTokens > p.longContextThreshold;
|
|
80
|
+
|
|
81
|
+
let ratePerM;
|
|
82
|
+
switch (type) {
|
|
83
|
+
case 'input':
|
|
84
|
+
ratePerM = isLong ? p.inputLongPerM : p.inputPerM;
|
|
85
|
+
break;
|
|
86
|
+
case 'output':
|
|
87
|
+
ratePerM = isLong ? p.outputLongPerM : p.outputPerM;
|
|
88
|
+
break;
|
|
89
|
+
case 'thinking':
|
|
90
|
+
ratePerM = p.thinkingPerM;
|
|
91
|
+
break;
|
|
92
|
+
default:
|
|
93
|
+
ratePerM = 0;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return (tokens / 1_000_000) * ratePerM;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Get aggregated summary of all tracked usage.
|
|
101
|
+
* @returns {object} Summary with token counts, costs, and per-segment breakdown.
|
|
102
|
+
*/
|
|
103
|
+
getSummary() {
|
|
104
|
+
const all = [...this.segments];
|
|
105
|
+
if (this.compilation) all.push(this.compilation);
|
|
106
|
+
|
|
107
|
+
const inputTokens = all.reduce((s, e) => s + e.input, 0);
|
|
108
|
+
const outputTokens = all.reduce((s, e) => s + e.output, 0);
|
|
109
|
+
const thinkingTokens = all.reduce((s, e) => s + e.thinking, 0);
|
|
110
|
+
const totalTokens = all.reduce((s, e) => s + e.total, 0);
|
|
111
|
+
const totalDurationMs = all.reduce((s, e) => s + e.durationMs, 0);
|
|
112
|
+
|
|
113
|
+
// Cost calculation — segment-level (each segment has its own context size)
|
|
114
|
+
let inputCost = 0;
|
|
115
|
+
let outputCost = 0;
|
|
116
|
+
let thinkingCost = 0;
|
|
117
|
+
|
|
118
|
+
for (const entry of all) {
|
|
119
|
+
inputCost += this._calcCost(entry.input, entry.input, 'input');
|
|
120
|
+
outputCost += this._calcCost(entry.output, entry.input, 'output');
|
|
121
|
+
thinkingCost += this._calcCost(entry.thinking, entry.input, 'thinking');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const totalCost = inputCost + outputCost + thinkingCost;
|
|
125
|
+
|
|
126
|
+
const cachedSegments = this.segments.filter(s => s.cached).length;
|
|
127
|
+
const freshSegments = this.segments.filter(s => !s.cached).length;
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
inputTokens,
|
|
131
|
+
outputTokens,
|
|
132
|
+
thinkingTokens,
|
|
133
|
+
totalTokens,
|
|
134
|
+
totalDurationMs,
|
|
135
|
+
inputCost,
|
|
136
|
+
outputCost,
|
|
137
|
+
thinkingCost,
|
|
138
|
+
totalCost,
|
|
139
|
+
segmentCount: this.segments.length,
|
|
140
|
+
cachedSegments,
|
|
141
|
+
freshSegments,
|
|
142
|
+
hasCompilation: !!this.compilation,
|
|
143
|
+
perSegment: this.segments.map(s => ({
|
|
144
|
+
name: s.name,
|
|
145
|
+
tokens: s.total,
|
|
146
|
+
cost: this._calcCost(s.input, s.input, 'input')
|
|
147
|
+
+ this._calcCost(s.output, s.input, 'output')
|
|
148
|
+
+ this._calcCost(s.thinking, s.input, 'thinking'),
|
|
149
|
+
durationMs: s.durationMs,
|
|
150
|
+
cached: s.cached,
|
|
151
|
+
})),
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
module.exports = CostTracker;
|