task-summary-extractor 8.1.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.env.example +38 -0
  2. package/ARCHITECTURE.md +116 -15
  3. package/EXPLORATION.md +164 -101
  4. package/QUICK_START.md +5 -2
  5. package/README.md +70 -18
  6. package/bin/taskex.js +11 -4
  7. package/package.json +39 -6
  8. package/process_and_upload.js +7 -100
  9. package/prompt.json +199 -131
  10. package/src/config.js +52 -3
  11. package/src/{utils → modes}/deep-dive.js +2 -2
  12. package/src/{utils → modes}/dynamic-mode.js +2 -2
  13. package/src/{utils → modes}/focused-reanalysis.js +4 -3
  14. package/src/{utils → modes}/progress-updater.js +2 -2
  15. package/src/phases/_shared.js +43 -0
  16. package/src/phases/compile.js +101 -0
  17. package/src/phases/deep-dive.js +118 -0
  18. package/src/phases/discover.js +178 -0
  19. package/src/phases/init.js +192 -0
  20. package/src/phases/output.js +238 -0
  21. package/src/phases/process-media.js +633 -0
  22. package/src/phases/services.js +104 -0
  23. package/src/phases/summary.js +86 -0
  24. package/src/pipeline.js +431 -1462
  25. package/src/renderers/docx.js +531 -0
  26. package/src/renderers/html.js +672 -0
  27. package/src/renderers/markdown.js +15 -183
  28. package/src/renderers/pdf.js +90 -0
  29. package/src/renderers/shared.js +211 -0
  30. package/src/schemas/analysis-compiled.schema.json +381 -0
  31. package/src/schemas/analysis-segment.schema.json +380 -0
  32. package/src/services/doc-parser.js +346 -0
  33. package/src/services/gemini.js +105 -48
  34. package/src/services/git.js +0 -29
  35. package/src/services/video.js +123 -8
  36. package/src/utils/adaptive-budget.js +6 -6
  37. package/src/utils/{progress.js → checkpoint.js} +2 -1
  38. package/src/utils/cli.js +161 -113
  39. package/src/utils/colors.js +83 -0
  40. package/src/utils/confidence-filter.js +138 -0
  41. package/src/utils/context-manager.js +0 -4
  42. package/src/utils/diff-engine.js +2 -4
  43. package/src/utils/global-config.js +6 -5
  44. package/src/utils/health-dashboard.js +11 -9
  45. package/src/utils/json-parser.js +5 -3
  46. package/src/utils/learning-loop.js +3 -3
  47. package/src/utils/progress-bar.js +286 -0
  48. package/src/utils/quality-gate.js +4 -8
  49. package/src/utils/retry.js +13 -5
  50. package/src/utils/schema-validator.js +314 -0
  51. package/src/utils/prompt.js +0 -32
  52. /package/src/{utils → modes}/change-detector.js +0 -0
package/.env.example ADDED
@@ -0,0 +1,38 @@
1
+ # ======================== FIREBASE ========================
2
+ FIREBASE_API_KEY=your_firebase_api_key
3
+ FIREBASE_AUTH_DOMAIN=your-project.firebaseapp.com
4
+ FIREBASE_PROJECT_ID=your-project
5
+ FIREBASE_STORAGE_BUCKET=your-project.appspot.com
6
+ FIREBASE_MESSAGING_SENDER_ID=1234567890
7
+ FIREBASE_APP_ID=1:1234567890:web:abc123
8
+ FIREBASE_MEASUREMENT_ID=G-XXXXXXXXXX
9
+
10
+ # ======================== GEMINI AI ========================
11
+ GEMINI_API_KEY=your_gemini_api_key
12
+ GEMINI_MODEL=gemini-2.5-flash
13
+
14
+ # ======================== VIDEO PROCESSING ========================
15
+ # Speed multiplier (default: 1.5)
16
+ VIDEO_SPEED=1.5
17
+ # Segment duration in seconds (default: 280)
18
+ VIDEO_SEGMENT_TIME=280
19
+ # ffmpeg preset: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow
20
+ VIDEO_PRESET=slow
21
+
22
+ # ======================== PIPELINE ========================
23
+ # Log level: debug, info, warn, error (default: info)
24
+ LOG_LEVEL=info
25
+ # Max concurrent uploads (default: 3)
26
+ MAX_PARALLEL_UPLOADS=3
27
+ # Max retries for API calls (default: 3)
28
+ MAX_RETRIES=3
29
+ # Retry base delay in ms (default: 2000)
30
+ RETRY_BASE_DELAY_MS=2000
31
+
32
+ # ======================== GEMINI TUNING ========================
33
+ # Thinking token budget per segment analysis (default: 24576)
34
+ THINKING_BUDGET=24576
35
+ # Thinking token budget for final compilation (default: 10240)
36
+ COMPILATION_THINKING_BUDGET=10240
37
+ # Max polling time for Gemini File API processing in ms (default: 300000 = 5 min)
38
+ GEMINI_POLL_TIMEOUT_MS=300000
package/ARCHITECTURE.md CHANGED
@@ -74,26 +74,34 @@ flowchart TB
74
74
  FB["firebase.js"]
75
75
  VID["video.js"]
76
76
  GIT["git.js"]
77
+ DP["doc-parser.js"]
77
78
  end
78
79
 
79
- subgraph Utils["Utilities — 19 modules"]
80
+ subgraph Utils["Utilities"]
80
81
  QG["quality-gate"]
81
- FR["focused-reanalysis"]
82
82
  LL["learning-loop"]
83
83
  DE["diff-engine"]
84
- CD["change-detector"]
85
- PU["progress-updater"]
86
84
  CM["context-manager"]
87
85
  JP["json-parser"]
88
86
  AB["adaptive-budget"]
89
87
  HD["health-dashboard"]
88
+ OT["+ 7 more"]
89
+ end
90
+
91
+ subgraph Modes["Modes — AI pipeline phases"]
92
+ FR["focused-reanalysis"]
93
+ CD["change-detector"]
94
+ PU["progress-updater"]
90
95
  DD["deep-dive"]
91
96
  DM["dynamic-mode"]
92
- OT["+ 7 more"]
93
97
  end
94
98
 
95
99
  subgraph Renderers["Renderers"]
96
100
  MD["markdown.js"]
101
+ HTML["html.js"]
102
+ PDF["pdf.js"]
103
+ DOCX["docx.js"]
104
+ SHARED["shared.js"]
97
105
  end
98
106
 
99
107
  EP --> Pipeline
@@ -101,6 +109,7 @@ flowchart TB
101
109
  P1 -.->|"--dynamic"| DYN
102
110
  Pipeline --> Services
103
111
  Pipeline --> Utils
112
+ Pipeline --> Modes
104
113
  Pipeline --> Renderers
105
114
  UP --> GIT
106
115
  UP --> CD
@@ -115,11 +124,11 @@ flowchart TB
115
124
  | Phase | Name | What Happens |
116
125
  |-------|------|-------------|
117
126
  | 1 | **Init** | CLI parsing, interactive folder selection (if no arg), config validation, logger setup, load learning insights, route to dynamic/progress mode |
118
- | 2 | **Discover** | Find videos, discover documents, resolve user name, check resume state |
127
+ | 2 | **Discover** | Find videos/audio, discover documents, resolve user name, check resume state |
119
128
  | 3 | **Services** | Firebase auth, Gemini init, prepare document parts |
120
129
  | 4 | **Process** | Compress → Upload → Analyze → Quality Gate → Retry → Focused Pass |
121
130
  | 5 | **Compile** | Cross-segment compilation, diff engine comparison |
122
- | 6 | **Output** | Write JSON, render Markdown, upload to Firebase |
131
+ | 6 | **Output** | Write JSON, render Markdown + HTML, upload to Firebase |
123
132
  | 7 | **Health** | Quality metrics dashboard, cost breakdown |
124
133
  | 8 | **Summary** | Save learning history, print run summary |
125
134
  | 9 | **Deep Dive** | (optional, `--deep-dive`) Topic discovery + explanatory document generation |
@@ -166,6 +175,7 @@ flowchart LR
166
175
  subgraph P6["Phase 6: Output"]
167
176
  JSON["results.json"]
168
177
  MDR["results.md"]
178
+ HTMLR["results.html"]
169
179
  FBU["Firebase upload"]
170
180
  end
171
181
 
@@ -294,17 +304,17 @@ flowchart TB
294
304
 
295
305
  ## Extraction Schema
296
306
 
297
- The AI extracts 6 structured categories from each meeting. The categories are content-adaptive the AI populates whichever fields are relevant to the actual discussion.
307
+ The AI extracts 6 structured categories from any content source (video, audio, documents, or mixed). The prompt auto-detects the input type and adapts: temporal content (video/audio) gets timestamps; document-only content uses section references and null timestamps. All field names remain identical regardless of input type for backward compatibility.
298
308
 
299
309
  ### Categories
300
310
 
301
311
  | Category | Key Fields | Adapts To |
302
312
  |----------|-----------|----------|
303
- | **Tickets / Items** | `ticket_id`, `title`, `status`, `assignee`, `reviewer`, `video_segments` with timestamps, `speaker_comments`, `details` with priority, confidence | Sprint items, requirements, interview topics, incident items |
304
- | **Change Requests** | `WHERE` (target: file, system, process, scope), `WHAT` (specific change), `HOW` (approach), `WHY` (justification), `dependencies`, `blocked_by`, confidence | Code changes, requirement changes, process changes, scope adjustments |
305
- | **References** | `name`, `type`, `role`, cross-refs to tickets & CRs, `context_doc_match` | Files, documents, URLs, tools, systems, resources mentioned |
306
- | **Action Items** | `description`, `assigned_to`, `status`, `deadline`, `dependencies`, related tickets & CRs, confidence | Any follow-up work discussed |
307
- | **Blockers** | `description`, `severity`, `owner`, `status`, `proposed_resolution`, confidence | Technical blockers, approval gates, resource constraints |
313
+ | **Tickets / Items** | `ticket_id`, `title`, `status`, `assignee`, `reviewer`, `video_segments` with timestamps (or null for docs), `speaker_comments`, `details` with priority, confidence | Sprint items, requirements, interview topics, incident items, legal matters, deals |
314
+ | **Change Requests** | `WHERE` (target: file, system, process, scope), `WHAT` (specific change), `HOW` (approach), `WHY` (justification), `dependencies`, `blocked_by`, confidence | Code changes, requirement changes, process changes, scope adjustments, contract revisions, policy updates |
315
+ | **References** | `name`, `type`, `role`, cross-refs to tickets & CRs, `context_doc_match` | Files, documents, URLs, tools, systems, resources, contracts, reports mentioned |
316
+ | **Action Items** | `description`, `assigned_to`, `status`, `deadline`, `dependencies`, related tickets & CRs, confidence | Any follow-up work discussed or documented |
317
+ | **Blockers** | `description`, `severity`, `owner`, `status`, `proposed_resolution`, confidence | Technical blockers, approval gates, resource constraints, legal reviews, budget approvals |
308
318
  | **Scope Changes** | `type` (added/removed/deferred), `original` vs `new` scope, `decided_by`, `impact`, confidence | Feature scope, project scope, contract scope, training scope |
309
319
 
310
320
  ### Personalized Task Section
@@ -500,7 +510,11 @@ taskex --dynamic --request "Document this microservices architecture"
500
510
  |-----------|--------|-------------|
501
511
  | `.vtt` `.srt` `.txt` `.md` `.csv` | Inline text | Read and passed directly as text parts |
502
512
  | `.pdf` | Gemini File API | Uploaded as binary, Gemini processes natively |
503
- | `.docx` `.doc` | Firebase only | Uploaded for archival, not processable by Gemini |
513
+ | `.mp3` `.wav` `.ogg` `.m4a` | Gemini File API | Uploaded as audio, Gemini processes natively |
514
+ | `.docx` | Doc parser (mammoth) | Converted to plain text, sent as inline text |
515
+ | `.xlsx` `.xls` | Doc parser (xlsx) | Converted to pipe-delimited tables, sent as inline text |
516
+ | `.doc` `.pptx` `.ppt` `.odt` `.odp` `.ods` `.rtf` `.epub` | Doc parser (officeparser) | Converted to plain text, sent as inline text |
517
+ | `.html` `.htm` | Doc parser (built-in) | HTML tags stripped, sent as inline text |
504
518
 
505
519
  Directories skipped during recursive discovery: `node_modules`, `.git`, `compressed`, `logs`, `gemini_runs`, `runs`
506
520
 
@@ -541,10 +555,15 @@ JSONL structured format includes phase spans with timing metrics for observabili
541
555
  | **Gemini AI** | `@google/genai@^1.42.0` | Video analysis, File API, 1M context window |
542
556
  | **Firebase** | `firebase@^12.9.0` | Anonymous auth + Cloud Storage uploads |
543
557
  | **dotenv** | `dotenv@^17.3.1` | Environment variable loading |
558
+ | **puppeteer** | `puppeteer` | HTML → PDF conversion for PDF output format |
559
+ | **docx** | `docx` | Programmatic Word document generation for DOCX output format |
560
+ | **mammoth** | `mammoth` | DOCX → plain text conversion |
561
+ | **xlsx** | `xlsx` | Excel spreadsheet parsing (XLSX/XLS) |
562
+ | **officeparser** | `officeparser` | DOC, PPTX, ODT, RTF, EPUB text extraction |
544
563
  | **ffmpeg** | System binary | H.264 video compression + segmentation |
545
564
  | **Git** | System binary | Change detection for progress tracking |
546
565
 
547
- **Codebase: 31 files · ~10,300 lines** · npm package: `task-summary-extractor` · CLI: `taskex`
566
+ **Codebase: ~45 files · ~13,000+ lines** · npm package: `task-summary-extractor` · CLI: `taskex`
548
567
 
549
568
  ---
550
569
 
@@ -596,6 +615,47 @@ When `usedExternalUrl` is `true`, the `fileUri` contains the Firebase Storage do
596
615
 
597
616
  ---
598
617
 
618
+ ## JSON Schema Validation
619
+
620
+ All AI output is validated against JSON Schema definitions in `src/schemas/`:
621
+
622
+ | Schema | File | Purpose |
623
+ |--------|------|---------|
624
+ | Segment analysis | `analysis-segment.schema.json` | Validates each segment's extracted data |
625
+ | Compiled analysis | `analysis-compiled.schema.json` | Validates the final cross-segment compilation |
626
+
627
+ Validation is performed by `src/utils/schema-validator.js` using [ajv](https://ajv.js.org/). Validation errors are reported as warnings with contextual hints for the retry/focused-pass cycle — they do not hard-fail the pipeline but are injected as corrective hints when the quality gate triggers a retry.
628
+
629
+ ---
630
+
631
+ ## Test Suite
632
+
633
+ The project includes a comprehensive test suite using [vitest](https://vitest.dev/):
634
+
635
+ | Metric | Value |
636
+ |--------|-------|
637
+ | Test files | 13 |
638
+ | Total tests | 285 |
639
+ | Framework | vitest v4.x |
640
+ | Coverage | `@vitest/coverage-v8` |
641
+
642
+ **Test categories:**
643
+
644
+ | Directory | What's Tested |
645
+ |-----------|---------------|
646
+ | `tests/utils/` | Utility modules: adaptive-budget, cli, confidence-filter, context-manager, diff-engine, format, json-parser, progress-bar, quality-gate, retry, schema-validator |
647
+ | `tests/renderers/` | Renderer modules: html, markdown |
648
+
649
+ **Commands:**
650
+
651
+ ```bash
652
+ npm test # Run all tests
653
+ npm run test:watch # Watch mode
654
+ npm run test:coverage # Coverage report
655
+ ```
656
+
657
+ ---
658
+
599
659
  ## See Also
600
660
 
601
661
  | Doc | What's In It |
@@ -603,3 +663,44 @@ When `usedExternalUrl` is `true`, the `fileUri` contains the Firebase Storage do
603
663
  | 📖 [README.md](README.md) | Setup, CLI flags, configuration, features |
604
664
  | 📖 [QUICK_START.md](QUICK_START.md) | Step-by-step first-time walkthrough |
605
665
  | 🔭 [EXPLORATION.md](EXPLORATION.md) | Module map, line counts, future roadmap |
666
+
667
+ ---
668
+
669
+ ## JSON Schema Validation
670
+
671
+ All AI output is validated against JSON Schema definitions in `src/schemas/`:
672
+
673
+ | Schema | File | Purpose |
674
+ |--------|------|---------|
675
+ | Segment analysis | `analysis-segment.schema.json` | Validates each segment's extracted data |
676
+ | Compiled analysis | `analysis-compiled.schema.json` | Validates the final cross-segment compilation |
677
+
678
+ Validation is performed by `src/utils/schema-validator.js` using [ajv](https://ajv.js.org/). Validation errors are reported as warnings with contextual hints for the retry/focused-pass cycle — they do not hard-fail the pipeline but are injected as corrective hints when the quality gate triggers a retry.
679
+
680
+ ---
681
+
682
+ ## Test Suite
683
+
684
+ The project includes a comprehensive test suite using [vitest](https://vitest.dev/):
685
+
686
+ | Metric | Value |
687
+ |--------|-------|
688
+ | Test files | 13 |
689
+ | Total tests | 285 |
690
+ | Framework | vitest v4.x |
691
+ | Coverage | `@vitest/coverage-v8` |
692
+
693
+ **Test categories:**
694
+
695
+ | Directory | What's Tested |
696
+ |-----------|---------------|
697
+ | `tests/utils/` | Utility modules: adaptive-budget, cli, confidence-filter, context-manager, diff-engine, format, json-parser, progress-bar, quality-gate, retry, schema-validator |
698
+ | `tests/renderers/` | Renderer modules: html, markdown |
699
+
700
+ **Commands:**
701
+
702
+ ```bash
703
+ npm test # Run all tests
704
+ npm run test:watch # Watch mode
705
+ npm run test:coverage # Coverage report
706
+ ```