task-summary-extractor 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * taskex — AI-powered meeting analysis & document generation.
4
+ *
5
+ * Backward-compatible entry point — delegates to src/pipeline.js.
6
+ * For global installs, use the `taskex` command directly.
7
+ *
8
+ * Usage:
9
+ * taskex [options] [folder]
10
+ * node process_and_upload.js [options] "C:\path\to\call folder"
11
+ *
12
+ * Config flags (override .env):
13
+ * --gemini-key <key> Gemini API key
14
+ * --firebase-key <key> Firebase API key
15
+ * --firebase-project <id> Firebase project ID
16
+ * --firebase-bucket <bucket> Firebase storage bucket
17
+ * --firebase-domain <domain> Firebase auth domain
18
+ *
19
+ * Options:
20
+ * --name <name> Your name (skips interactive prompt)
21
+ * --model <id> Gemini model (default: gemini-2.5-flash)
22
+ * --skip-upload Skip Firebase Storage uploads
23
+ * --force-upload Upload even if remote file exists
24
+ * --no-storage-url Disable Storage URL strategy for Gemini
25
+ * --skip-compression Skip video compression (use existing segments)
26
+ * --skip-gemini Skip Gemini AI analysis
27
+ * --resume Resume from last checkpoint
28
+ * --reanalyze Force re-analysis of all segments
29
+ * --parallel <n> Max parallel uploads (default: 3)
30
+ * --parallel-analysis <n> Max concurrent Gemini analyses (default: 2)
31
+ * --thinking-budget <n> Gemini thinking token budget
32
+ * --compilation-thinking-budget <n> Compilation thinking budget
33
+ * --log-level <level> Log level: debug, info, warn, error
34
+ * --output <dir> Custom output directory
35
+ * --dry-run Show what would be done without executing
36
+ * --dynamic Document-only mode (no video required)
37
+ * --deep-dive Generate deep-dive documents after analysis
38
+ * --request <text> Custom research prompt for deep-dive/dynamic
39
+ * --update-progress Smart change detection & progress update
40
+ * --repo <path> Git repo path for progress tracking
41
+ * --no-focused-pass Disable focused re-analysis pass
42
+ * --no-learning Disable learning loop
43
+ * --no-diff Disable diff against previous run
44
+ * --help, -h Show help
45
+ * --version, -v Show version
46
+ *
47
+ * Project structure:
48
+ * src/
49
+ * config.js — Environment-based config with validation
50
+ * logger.js — Buffered dual-file logger with levels
51
+ * pipeline.js — Main orchestrator with CLI flags & progress
52
+ * services/
53
+ * firebase.js — Firebase init, upload with retry, exists checks
54
+ * gemini.js — Gemini init, segment analysis with retry
55
+ * git.js — Git CLI wrapper for change detection
56
+ * video.js — ffmpeg compression, segmentation, probing
57
+ * renderers/
58
+ * markdown.js — Action-focused Markdown renderer
59
+ * utils/
60
+ * adaptive-budget.js — Transcript complexity → thinking budget
61
+ * change-detector.js — Git + document change correlation engine
62
+ * cli.js — CLI argument parser & interactive prompts
63
+ * context-manager.js — Smart context prioritization for Gemini
64
+ * cost-tracker.js — Model-specific token cost tracking
65
+ * deep-dive.js — AI topic discovery & document generation
66
+ * diff-engine.js — Compilation diff between runs
67
+ * dynamic-mode.js — Document-only analysis mode
68
+ * focused-reanalysis.js — Second-pass extraction for weak dimensions
69
+ * format.js — Duration/size formatting helpers
70
+ * fs.js — Recursive file discovery
71
+ * health-dashboard.js — Quality report builder
72
+ * json-parser.js — Robust JSON extraction from AI output
73
+ * learning-loop.js — Cross-run history & trend analysis
74
+ * progress.js — Pipeline checkpoint/resume persistence
75
+ * progress-updater.js — Smart progress assessment & rendering
76
+ * prompt.js — Interactive CLI prompts (stdin/stdout)
77
+ * quality-gate.js — Multi-dimension confidence scoring
78
+ * retry.js — Exponential backoff retry with parallelMap
79
+ */
80
+
81
+ 'use strict';
82
+
83
+ // ── Inject CLI config flags into process.env ──────────────────────────────
84
+ // Must run BEFORE any require() that touches config.js / dotenv
85
+ const { injectCliFlags } = require('./src/utils/inject-cli-flags');
86
+ injectCliFlags();
87
+
88
+ // ── Delegate to pipeline ──────────────────────────────────────────────────
89
+ const { run, getLog } = require('./src/pipeline');
90
+
91
+ run().catch(err => {
92
+ // showHelp() throws with code HELP_SHOWN — clean exit, not an error
93
+ if (err.code === 'HELP_SHOWN' || err.code === 'VERSION_SHOWN') {
94
+ process.exit(0);
95
+ }
96
+
97
+ const log = getLog();
98
+ if (log) {
99
+ log.error(`FATAL: ${err.message || err}`);
100
+ log.error(err.stack || '');
101
+ log.step('FAILED');
102
+ log.close();
103
+ }
104
+ process.stderr.write(`\nFATAL: ${err.message || err}\n`);
105
+ process.stderr.write(`${err.stack || ''}\n`);
106
+ process.exit(1);
107
+ });
package/prompt.json ADDED
@@ -0,0 +1,265 @@
1
+ {
2
+ "system": "You are an expert software-project analyst specializing in extracting structured data from bilingual (Arabic + English) developer video calls. You understand .NET/C#, Angular/Ionic, Azure DevOps, SQL Server, and mobile-app architectures. You are provided with:\n1. A VIDEO SEGMENT of a call between developers (may include screen-sharing).\n2. CONTEXT DOCUMENTS organized in three tiers:\n • .tasks/ — Execution plans, checklists (✅/⬜/⏸️ states), sub-tickets, code maps, PRs. These are the SOURCE OF TRUTH for ticket implementation state.\n • .robot/ — AI agent knowledge base: file maps, coding patterns, database schemas, auth configs. Use these to resolve exact file paths and component names.\n • .docs/ — Project documentation: architecture, tech stack, patterns, internals. Use these for background understanding.\n3. PREVIOUS SEGMENT ANALYSES from earlier parts of the same call (if any).\n\nYour job: extract every ticket, change request, action item, scope change, file reference, and blocker discussed — cross-referencing against the provided context documents to produce precise, implementation-aware output. Always prefer REAL identifiers (CR numbers, file paths, enum values) from documents over invented ones.",
3
+ "task": "Extract tickets, change requests (where/what/how), action items, blockers, scope changes, and implementation state — cross-referencing video discussion against provided task documents, code maps, and project documentation",
4
+ "instructions": [
5
+ "TICKET IDENTIFICATION",
6
+ " - Identify all ticket/CR numbers mentioned (e.g., TICKET-01, CR31296872, CR#21604773)",
7
+ " - Use REAL CR numbers from task documents when available — never invent generic IDs like CR-1 if the actual number is known",
8
+ " - If a ticket number is not explicitly mentioned but can be inferred from context docs, use the real ID with a note '(inferred from context)'",
9
+ " - Only use INFERRED_1 labels when no matching task document exists",
10
+ " - Segment the transcript by ticket — note timestamps where each ticket is discussed",
11
+ "TICKET STATE RECONCILIATION — For each ticket found in both the call and the task documents:",
12
+ " - Read the execution plan status (e.g., '🟢 Implementation Complete', 'Awaiting DB Team')",
13
+ " - Read checklist items: count ✅ (done), ⬜ (todo), ⏸️ (deferred), 🔲 (blocked)",
14
+ " - Compare DOCUMENTED state vs. DISCUSSED state in the call",
15
+ " - Flag any discrepancies (e.g., checklist says ✅ but call discusses rework)",
16
+ " - Extract open questions (Q8, Q9, etc.) and their resolution status from checklists",
17
+ " - Extract database prerequisites (DB-1 through DB-6 etc.) and their completion status",
18
+ "For each ticket, extract:",
19
+ " - Ticket ID and title (use real CR numbers and TICKET-## IDs from task docs)",
20
+ " - Start and end timestamps in the video",
21
+ " - Summary of discussion",
22
+ " - documented_state: the state according to task documents (execution plan, checklist)",
23
+ " - discussed_state: the state as described/revealed during the call",
24
+ " - Status/resolution: synthesize from both documented + discussed states",
25
+ " - Assignee and reviewer if mentioned",
26
+ "BILINGUAL AWARENESS",
27
+ " - Calls mix Arabic and English. Speakers may use Arabic terms for technical concepts (e.g., 'مقيمة' = residential, 'تقييم' = evaluation, 'طلب' = request).",
28
+ " - Map Arabic terms to their technical equivalents using context docs.",
29
+ " - Speaker names may be said in Arabic — match to team members mentioned in task docs.",
30
+ "CHANGE REQUESTS — For every code/system change discussed, capture:",
31
+ " - ID: use the real CR number + sub-ID (e.g., 'CR21604773-05') when matching a task doc change, or generate a new ID for changes not in task docs",
32
+ " - WHERE: exact file path from code-map.md or .robot/ file maps — never use vague paths when precise ones exist in context",
33
+ " - WHAT: the specific change (add field, remove code, refactor, new enum value, etc.)",
34
+ " - HOW: implementation approach discussed (e.g., create new class, move to DTO, add validation attribute)",
35
+ " - WHY: business reason or technical justification mentioned",
36
+ " - DEPENDENCIES: what other changes or decisions this depends on",
37
+ " - BLOCKED_BY: any pending decisions, business confirmations, DBA work, or other blockers",
38
+ " - CODE_MAP_MATCH: if this change corresponds to a file in a code-map.md, cite the exact entry",
39
+ "FILE CROSS-REFERENCES — For every file, document, or artifact mentioned in the call:",
40
+ " - RESOLVE exact paths using .robot/ file maps (maharah-app-map.md, backend-map.md, mymaharah-app-map.md, databases-and-entities.md)",
41
+ " - Map it to which tickets and change requests reference it",
42
+ " - Note what role the file plays (source of truth, needs modification, reference only, etc.)",
43
+ " - Cross-reference with code-map.md entries where available",
44
+ "ACTION ITEMS — Extract all explicit and implied action items:",
45
+ " - Who is responsible (use real names from the call)",
46
+ " - What they need to do",
47
+ " - Any deadline or dependency mentioned",
48
+ " - Related ticket and change request IDs",
49
+ " - If the item matches a checklist entry from .tasks/, note whether it's ✅ already done, ⬜ still pending, or ⏸️ deferred",
50
+ "SCOPE CHANGES — Detect any scope modifications discussed during the call:",
51
+ " - Items ADDED to a ticket's scope that weren't in the original plan or task documents",
52
+ " - Items REMOVED or DEFERRED from scope (explicitly decided not to do now, pushed to later)",
53
+ " - Items where the APPROACH CHANGED from what was originally planned in the task docs",
54
+ " - Items where OWNERSHIP CHANGED (responsibility moved from one person/team to another)",
55
+ " - For each scope change, note: the original scope (from task docs if available), the new scope (as discussed), who decided it, and the reason",
56
+ " - Cross-reference with provided task documents (.tasks/) to identify deviations from the execution plan, checklist, or sub-tickets",
57
+ "BLOCKERS & EXTERNAL DEPENDENCIES — Extract all blocking items:",
58
+ " - Database prerequisites (inserts, schema changes, stored procedures) — note target environments (dev/staging/prod)",
59
+ " - Decisions pending from specific people",
60
+ " - DBA work or SQL scripts needed",
61
+ " - External teams or services blocking progress",
62
+ " - Match against DB-1..DB-N items from checklists where applicable",
63
+ "TIMESTAMP ACCURACY — CRITICAL:",
64
+ " - All timestamps (start_time, end_time, referenced_at) MUST correspond to the SEGMENT's internal time, starting at 00:00:00 for the beginning of this segment.",
65
+ " - Cross-check timestamps against the VTT cue times provided — if a topic is discussed at VTT cue 02:15, your referenced_at should be close to 02:15.",
66
+ " - Do NOT extrapolate timestamps beyond the segment duration. If the segment is 4 minutes, no timestamp should exceed ~04:00.",
67
+ " - If you cannot determine an exact timestamp, use the closest VTT cue time where the topic appears.",
68
+ "ACCURACY & DEDUPLICATION:",
69
+ " - NEVER invent or hallucinate ticket IDs, file paths, or names not present in the video or context documents.",
70
+ " - If unsure whether a ticket was mentioned, check context docs for matching keywords before deciding.",
71
+ " - Each action item, blocker, scope change, and change request MUST appear exactly ONCE. If the same topic is discussed multiple times in this segment, merge into one entry with the most complete information.",
72
+ " - Prefer SPECIFIC over VAGUE: use exact file paths from code maps, exact CR numbers from task docs, exact names from the conversation.",
73
+ " - When speakers reference something vaguely (e.g., 'that service', 'the evaluation thing'), resolve it to a specific artifact using context documents.",
74
+ "OUTPUT SIZE RULES — CRITICAL:",
75
+ " - 'comments' per ticket: MAX 10 entries. Include only KEY decisions, confirmations, and pivotal statements. Do NOT transcribe the entire conversation.",
76
+ " - Do NOT create a 'conversation_transcript' field. The output schema does not include it.",
77
+ " - Keep all text fields concise. Avoid repeating information that appears in other fields.",
78
+ " - Your output MUST be valid JSON. No trailing commas, no doubled braces }}, no doubled commas ,,.",
79
+ " - Respond with ONLY the JSON object — no markdown fences, no explanatory text before or after.",
80
+ "CONFIDENCE SCORING — For every extracted item (ticket, change_request, action_item, blocker, scope_change), assign a confidence level:",
81
+ " - HIGH: Item is explicitly stated in the video AND corroborated by context documents (task docs, code maps). Clear audio/visual evidence.",
82
+ " - MEDIUM: Item is mentioned in the video OR found in context documents, but not both. Partially discussed, or audio is unclear but context fills the gap.",
83
+ " - LOW: Item is inferred from indirect discussion, vague references, or extrapolated from context. Not explicitly confirmed in the video.",
84
+ " - For each item, set 'confidence' to 'HIGH', 'MEDIUM', or 'LOW' and provide 'confidence_reason' explaining why that level was chosen (1 sentence).",
85
+ " - Confidence helps downstream consumers prioritize: HIGH items are actionable immediately, LOW items need human verification.",
86
+ "SELF-VERIFICATION — Before responding, mentally verify:",
87
+ " - Does every ticket_id match a real ID from context docs or the video? If not, fix or remove it.",
88
+ " - Does every file_path in change_requests and file_references resolve to a real path from the code maps? If not, use the best match from .robot/ file maps.",
89
+ " - Are there duplicate items across different arrays (same action item in both action_items and your_tasks.tasks_todo)? Remove duplicates.",
90
+ " - Is the JSON syntactically valid? Mentally scan for trailing commas, unclosed brackets, doubled braces.",
91
+ " - Does the output have ALL required top-level fields (tickets, change_requests, action_items, blockers, scope_changes, file_references, your_tasks, summary)?",
92
+ " - Does every extractable item have a confidence field set to HIGH, MEDIUM, or LOW?"
93
+ ],
94
+ "output_structure": {
95
+ "tickets": [
96
+ {
97
+ "ticket_id": "string (use REAL CR/ticket numbers from task docs e.g. CR31296872, TICKET-01; only use INFERRED_N if no match)",
98
+ "title": "string (short descriptive title)",
99
+ "status": "open|closed|in_progress|blocked|pending_review|complete_pending_deployment",
100
+ "assignee": "string or null",
101
+ "reviewer": "string or null",
102
+ "documented_state": {
103
+ "source": "string or null (path to execution-plan.md or checklist.md that contains state)",
104
+ "plan_status": "string (status from execution plan e.g. 'Implementation Complete — Awaiting DB Team')",
105
+ "checklist_progress": "string (e.g. '35/74 done, 6 blocked, 12 deferred')",
106
+ "sub_tickets": [
107
+ {
108
+ "id": "string (e.g. TICKET-01)",
109
+ "title": "string",
110
+ "documented_status": "string (e.g. '✅ Complete', '⏸️ Deferred', '⬜ Pending')"
111
+ }
112
+ ],
113
+ "open_blockers": ["string (DB prerequisites, pending decisions from checklist)"]
114
+ },
115
+ "discussed_state": {
116
+ "summary": "string (what the call reveals about this ticket's actual state)",
117
+ "discrepancies": ["string (any differences between documented state and what's discussed)"]
118
+ },
119
+ "video_segments": [
120
+ {
121
+ "start_time": "HH:MM:SS",
122
+ "end_time": "HH:MM:SS",
123
+ "description": "string"
124
+ }
125
+ ],
126
+ "comments": [
127
+ {
128
+ "timestamp": "HH:MM:SS",
129
+ "speaker": "string",
130
+ "text": "string (concise key decision or statement — NOT full verbatim transcript)"
131
+ }
132
+ ],
133
+ "code_changes": [
134
+ {
135
+ "type": "string (bug_fix|feature|refactor|optimization|cleanup|upgrade|integration)",
136
+ "file_path": "string (exact path from code-map.md or .robot/ file maps — never use vague paths)",
137
+ "description": "string",
138
+ "details": "string",
139
+ "priority": "low|medium|high|critical",
140
+ "referenced_at": "HH:MM:SS"
141
+ }
142
+ ],
143
+ "confidence": "HIGH|MEDIUM|LOW",
144
+ "confidence_reason": "string (1 sentence explaining why this confidence level)"
145
+ }
146
+ ],
147
+ "change_requests": [
148
+ {
149
+ "id": "string (use real CR+sub-ID when matching task docs e.g. 'CR21604773-05'; otherwise generate NEW-CR-1, NEW-CR-2)",
150
+ "title": "string (short title)",
151
+ "where": {
152
+ "file_path": "string (exact path from code-map.md or .robot/ maps — e.g. 'Services/Common/Shared.Domain/Evaluation/EvaluationService.cs')",
153
+ "module": "string (system module or layer: API, Frontend, Backend, Database, etc.)",
154
+ "component": "string (specific class, procedure, enum, or component name)"
155
+ },
156
+ "code_map_match": "string or null (exact entry from a code-map.md if this change matches one)",
157
+ "what": "string (precise description of the change)",
158
+ "how": "string (implementation approach discussed)",
159
+ "why": "string (business or technical reason)",
160
+ "type": "bug_fix|feature|refactor|optimization|cleanup|upgrade|integration|configuration",
161
+ "priority": "low|medium|high|critical",
162
+ "status": "actionable|pending_decision|blocked|completed",
163
+ "dependencies": ["string (IDs of other change requests this depends on)"],
164
+ "blocked_by": "string or null (what decision/confirmation is needed)",
165
+ "related_tickets": ["string (ticket IDs this change request belongs to)"],
166
+ "referenced_at": "HH:MM:SS",
167
+ "assigned_to": "string or null",
168
+ "confidence": "HIGH|MEDIUM|LOW",
169
+ "confidence_reason": "string (1 sentence explaining why this confidence level)"
170
+ }
171
+ ],
172
+ "file_references": [
173
+ {
174
+ "file_name": "string (file name as mentioned in the call)",
175
+ "resolved_path": "string or null (full path resolved from .robot/ file maps or code-map.md — e.g. 'Services/Products/Maharah.App/Maharah.App/src/app/shared/components/dynamic-form/dynamic-form.component.ts')",
176
+ "file_type": "source_code|configuration|documentation|database|enum|dto|api_endpoint|ui_component|script|diagram|translation|stylesheet",
177
+ "role": "needs_modification|reference_only|source_of_truth|to_be_created|to_be_deleted|to_be_reviewed|already_modified",
178
+ "mentioned_in_tickets": ["string (ticket IDs)"],
179
+ "mentioned_in_changes": ["string (change request IDs)"],
180
+ "context_doc_match": "string or null (name of provided supporting document that relates to this file, if any)",
181
+ "notes": "string (any additional context about this file)"
182
+ }
183
+ ],
184
+ "action_items": [
185
+ {
186
+ "id": "string (AI-1, AI-2, etc.)",
187
+ "description": "string",
188
+ "assigned_to": "string",
189
+ "status": "todo|in_progress|done|blocked|deferred",
190
+ "checklist_match": "string or null (matching checklist item ID if it exists e.g. '1.2', 'DB-1')",
191
+ "deadline": "string or null",
192
+ "depends_on": "string or null (other action item or external dependency)",
193
+ "related_tickets": ["string (ticket IDs)"],
194
+ "related_changes": ["string (change request IDs)"],
195
+ "referenced_at": "HH:MM:SS",
196
+ "confidence": "HIGH|MEDIUM|LOW",
197
+ "confidence_reason": "string (1 sentence explaining why this confidence level)"
198
+ }
199
+ ],
200
+ "scope_changes": [
201
+ {
202
+ "id": "string (SC-1, SC-2, etc.)",
203
+ "type": "added|removed|deferred|approach_changed|ownership_changed|requirements_changed",
204
+ "related_tickets": ["string (ticket IDs affected)"],
205
+ "related_changes": ["string (change request IDs affected)"],
206
+ "original_scope": "string (what was originally planned — quote from task docs if available, or 'not documented')",
207
+ "new_scope": "string (what was decided in this call)",
208
+ "reason": "string (why the scope changed — business decision, technical constraint, time pressure, etc.)",
209
+ "decided_by": "string (speaker who made or confirmed the decision)",
210
+ "impact": "low|medium|high|critical (how much this affects the overall delivery)",
211
+ "referenced_at": "HH:MM:SS",
212
+ "task_doc_reference": "string or null (path of the task document that contained the original scope, e.g. .tasks/executions/CR31296872/sub-tickets/TICKET-02-notification-triggers.md)",
213
+ "confidence": "HIGH|MEDIUM|LOW",
214
+ "confidence_reason": "string (1 sentence explaining why this confidence level)"
215
+ }
216
+ ],
217
+ "your_tasks": {
218
+ "user_name": "string (the current user's name as provided)",
219
+ "owned_tickets": ["string (ticket IDs where the user is assignee or has primary responsibility)"],
220
+ "tasks_todo": [
221
+ {
222
+ "description": "string (what needs to be done)",
223
+ "source": "string (which ticket/CR/action item this comes from)",
224
+ "priority": "low|medium|high|critical",
225
+ "blocked_by": "string or null (what's preventing progress)",
226
+ "referenced_at": "HH:MM:SS"
227
+ }
228
+ ],
229
+ "tasks_waiting_on_others": [
230
+ {
231
+ "description": "string (what the user is waiting for)",
232
+ "waiting_on": "string (person or team name)",
233
+ "source": "string (ticket/CR reference)",
234
+ "referenced_at": "HH:MM:SS"
235
+ }
236
+ ],
237
+ "decisions_needed": [
238
+ {
239
+ "description": "string (what decision the user needs or is waiting for)",
240
+ "from_whom": "string (who needs to provide the decision)",
241
+ "source": "string (ticket/CR reference)",
242
+ "referenced_at": "HH:MM:SS"
243
+ }
244
+ ],
245
+ "completed_in_call": ["string (items resolved/confirmed during this call for the user)"],
246
+ "summary": "string (personalized summary: what the user should focus on next, what's blocked, what's done)"
247
+ },
248
+ "blockers": [
249
+ {
250
+ "id": "string (BLK-1, BLK-2, or DB-1 if matching checklist)",
251
+ "type": "database_prerequisite|pending_decision|dba_work|external_dependency|testing|deployment",
252
+ "description": "string",
253
+ "owner": "string (who needs to resolve this — DBA, Mohamed, DevOps, etc.)",
254
+ "blocks": ["string (ticket IDs or action items blocked by this)"],
255
+ "environments": ["string (dev|staging|production — for DB items)"],
256
+ "checklist_match": "string or null (e.g. 'DB-1', 'Q14')",
257
+ "status": "open|resolved|partially_resolved",
258
+ "referenced_at": "HH:MM:SS",
259
+ "confidence": "HIGH|MEDIUM|LOW",
260
+ "confidence_reason": "string (1 sentence explaining why this confidence level)"
261
+ }
262
+ ],
263
+ "summary": "Overall summary of call including key decisions, scope changes, blockers, implementation state reconciliation, and concrete next steps"
264
+ }
265
+ }