task-summary-extractor 9.3.1 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/ARCHITECTURE.md +43 -37
- package/QUICK_START.md +5 -3
- package/README.md +50 -13
- package/package.json +1 -1
- package/src/modes/deep-summary.js +51 -20
- package/src/phases/discover.js +1 -0
- package/src/phases/init.js +63 -1
- package/src/phases/process-media.js +32 -5
- package/src/pipeline.js +22 -4
- package/src/services/gemini.js +139 -14
- package/src/services/video.js +116 -25
- package/src/utils/cli.js +12 -2
- package/src/utils/context-manager.js +31 -4
package/.env.example
CHANGED
|
@@ -9,7 +9,7 @@ FIREBASE_MEASUREMENT_ID=G-XXXXXXXXXX
|
|
|
9
9
|
|
|
10
10
|
# ======================== GEMINI AI ========================
|
|
11
11
|
GEMINI_API_KEY=your_gemini_api_key
|
|
12
|
-
GEMINI_MODEL=gemini-2.5-flash
|
|
12
|
+
GEMINI_MODEL=gemini-2.5-flash-lite
|
|
13
13
|
|
|
14
14
|
# ======================== VIDEO PROCESSING ========================
|
|
15
15
|
# Speed multiplier (default: 1.6)
|
package/ARCHITECTURE.md
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# Architecture & Technical Deep Dive
|
|
2
2
|
|
|
3
3
|
> Internal reference for the pipeline's architecture, processing flows, and design decisions.
|
|
4
|
-
> For setup instructions, see [README.md](README.md) · [Quick Start](QUICK_START.md)
|
|
5
|
-
> For module map and roadmap, see [EXPLORATION.md](EXPLORATION.md)
|
|
4
|
+
> For setup instructions, see [README.md](README.md) · [Quick Start](QUICK_START.md)
|
|
6
5
|
|
|
7
6
|
---
|
|
8
7
|
|
|
@@ -126,6 +125,7 @@ flowchart TB
|
|
|
126
125
|
| 1 | **Init** | CLI parsing, interactive folder selection (if no arg), config validation, logger setup, load learning insights, route to dynamic/progress mode |
|
|
127
126
|
| 2 | **Discover** | Find videos/audio, discover documents, resolve user name, check resume state |
|
|
128
127
|
| 3 | **Services** | Firebase auth, Gemini init, prepare document parts |
|
|
128
|
+
| 3.5 | **Deep Summary** | (optional) Pre-summarize context docs with Gemini — 60-80% token savings |
|
|
129
129
|
| 4 | **Process** | Compress → Upload → Analyze → Quality Gate → Retry → Focused Pass |
|
|
130
130
|
| 5 | **Compile** | Cross-segment compilation, diff engine comparison |
|
|
131
131
|
| 6 | **Output** | Write JSON, render Markdown + HTML, upload to Firebase |
|
|
@@ -199,7 +199,7 @@ Each video segment goes through this flow (Phase 4 detail):
|
|
|
199
199
|
|
|
200
200
|
```mermaid
|
|
201
201
|
flowchart TB
|
|
202
|
-
START(["Segment N"]) --> COMPRESS["ffmpeg compress\nH.264 CRF 24, 1.
|
|
202
|
+
START(["Segment N"]) --> COMPRESS["ffmpeg compress\nH.264 CRF 24, 1.6x speed"]
|
|
203
203
|
COMPRESS --> VERIFY["Verify segment integrity"]
|
|
204
204
|
VERIFY --> UPLOAD_FB["Upload to Firebase Storage\n→ download URL"]
|
|
205
205
|
|
|
@@ -525,6 +525,7 @@ Directories skipped during recursive discovery: `node_modules`, `.git`, `compres
|
|
|
525
525
|
| Stage | Skip Condition |
|
|
526
526
|
|-------|----------------|
|
|
527
527
|
| **Compression** | `compressed/{video}/segment_*.mp4` exist on disk |
|
|
528
|
+
| **No-compress split** | `--no-compress` flag: raw keyframe split via `ffmpeg -c copy` (no re-encoding) |
|
|
528
529
|
| **Firebase upload** | File already exists at `calls/{name}/segments/{video}/` (bypassed by `--force-upload`) |
|
|
529
530
|
| **Storage URL → Gemini** | Firebase download URL available (bypassed by `--no-storage-url`) |
|
|
530
531
|
| **Gemini analysis** | Run file exists in `gemini_runs/` AND user chooses not to re-analyze |
|
|
@@ -563,7 +564,7 @@ JSONL structured format includes phase spans with timing metrics for observabili
|
|
|
563
564
|
| **ffmpeg** | System binary | H.264 video compression + segmentation |
|
|
564
565
|
| **Git** | System binary | Change detection for progress tracking |
|
|
565
566
|
|
|
566
|
-
**Codebase: ~
|
|
567
|
+
**Codebase: ~48 files · ~13,600+ lines** · npm package: `task-summary-extractor` · CLI: `taskex`
|
|
567
568
|
|
|
568
569
|
---
|
|
569
570
|
|
|
@@ -577,6 +578,11 @@ JSONL structured format includes phase spans with timing metrics for observabili
|
|
|
577
578
|
| Sharpening | `unsharp=3:3:0.3` | Preserve text clarity |
|
|
578
579
|
| x264 params | `aq-mode=3:deblock=-1,-1:psy-rd=1.0,0.0` | Text readability |
|
|
579
580
|
| Audio | AAC, 64–128k, original sample rate | Clear speech |
|
|
581
|
+
| Speed | 1.6× default (`--speed` flag, env `VIDEO_SPEED`) | Reduce tokens per segment |
|
|
582
|
+
| Segment Duration | 280s default, compress mode only (`--segment-time` flag) | Context budget per segment |
|
|
583
|
+
| No-Compress Mode | Off by default (`--no-compress` flag) | Stream-copy split at 1200s (20 min), no re-encoding |
|
|
584
|
+
|
|
585
|
+
> **Google Gemini constraints:** ~300 tokens/sec (default res), ~100 tok/sec (low res). File API: 2 GB/file (free), 20 GB (paid). Max ~1 hour at default res per 1M context window.
|
|
580
586
|
|
|
581
587
|
---
|
|
582
588
|
|
|
@@ -634,8 +640,8 @@ The project includes a comprehensive test suite using [vitest](https://vitest.de
|
|
|
634
640
|
|
|
635
641
|
| Metric | Value |
|
|
636
642
|
|--------|-------|
|
|
637
|
-
| Test files |
|
|
638
|
-
| Total tests |
|
|
643
|
+
| Test files | 15 |
|
|
644
|
+
| Total tests | 331 |
|
|
639
645
|
| Framework | vitest v4.x |
|
|
640
646
|
| Coverage | `@vitest/coverage-v8` |
|
|
641
647
|
|
|
@@ -662,45 +668,45 @@ npm run test:coverage # Coverage report
|
|
|
662
668
|
|-----|-------------|
|
|
663
669
|
| 📖 [README.md](README.md) | Setup, CLI flags, configuration, features |
|
|
664
670
|
| 📖 [QUICK_START.md](QUICK_START.md) | Step-by-step first-time walkthrough |
|
|
665
|
-
| 🔭 [EXPLORATION.md](EXPLORATION.md) | Module map, line counts, future roadmap |
|
|
666
671
|
|
|
667
672
|
---
|
|
668
673
|
|
|
669
|
-
##
|
|
674
|
+
## Deep Summary
|
|
670
675
|
|
|
671
|
-
|
|
676
|
+
The `--deep-summary` flag (or interactive prompt when many docs are detected) pre-summarizes context documents before segment analysis:
|
|
672
677
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
678
|
+
```mermaid
|
|
679
|
+
flowchart TB
|
|
680
|
+
START(["Context Docs"]) --> PARTITION["Partition: summarize vs. keep full"]
|
|
681
|
+
PARTITION --> SKIP["Skip tiny docs (<500 chars)"]
|
|
682
|
+
PARTITION --> EXCL["Excluded docs → keep full fidelity"]
|
|
683
|
+
PARTITION --> TO_SUM["Docs to summarize"]
|
|
684
|
+
TO_SUM --> TRUNC["Truncate oversized docs (>900K chars)"]
|
|
685
|
+
TRUNC --> BATCH["Group into batches\n(≤600K chars each)"]
|
|
686
|
+
BATCH --> AI["Gemini summarization\n(per batch)"]
|
|
687
|
+
AI --> REPLACE["Replace full content\nwith condensed summaries"]
|
|
688
|
+
REPLACE --> OUT(["Token-efficient\ncontext docs"])
|
|
689
|
+
```
|
|
681
690
|
|
|
682
|
-
|
|
691
|
+
| Constant | Value | Purpose |
|
|
692
|
+
|----------|-------|---------|
|
|
693
|
+
| `BATCH_MAX_CHARS` | 600,000 | Max input chars per summarization batch |
|
|
694
|
+
| `MAX_DOC_CHARS` | 900,000 | Hard cap per-document before truncation |
|
|
695
|
+
| `SUMMARY_MAX_OUTPUT` | 16,384 | Max output tokens per summarization call |
|
|
696
|
+
| `MIN_SUMMARIZE_LENGTH` | 500 | Docs below this skip summarization |
|
|
683
697
|
|
|
684
|
-
The
|
|
698
|
+
Typical savings: 60-80% reduction in per-segment context tokens. The user can exclude specific docs from summarization via `--exclude-docs` or the interactive picker.
|
|
685
699
|
|
|
686
|
-
|
|
687
|
-
|--------|-------|
|
|
688
|
-
| Test files | 13 |
|
|
689
|
-
| Total tests | 285 |
|
|
690
|
-
| Framework | vitest v4.x |
|
|
691
|
-
| Coverage | `@vitest/coverage-v8` |
|
|
692
|
-
|
|
693
|
-
**Test categories:**
|
|
700
|
+
---
|
|
694
701
|
|
|
695
|
-
|
|
696
|
-
|-----------|---------------|
|
|
697
|
-
| `tests/utils/` | Utility modules: adaptive-budget, cli, confidence-filter, context-manager, diff-engine, format, json-parser, progress-bar, quality-gate, retry, schema-validator |
|
|
698
|
-
| `tests/renderers/` | Renderer modules: html, markdown |
|
|
702
|
+
## Context Window Safety
|
|
699
703
|
|
|
700
|
-
|
|
704
|
+
Safeguards to prevent context window overflow:
|
|
701
705
|
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
706
|
+
| Safeguard | Where | What It Does |
|
|
707
|
+
|-----------|-------|-------------|
|
|
708
|
+
| **P0/P1 hard cap** | `context-manager.js` | Critical docs can't exceed 2× the token budget |
|
|
709
|
+
| **VTT fallback cap** | `context-manager.js` | Full VTT fallback capped at 500K chars |
|
|
710
|
+
| **Doc truncation** | `deep-summary.js` | Oversized docs truncated to 900K chars before summarization |
|
|
711
|
+
| **Compilation pre-flight** | `gemini.js` | Estimates tokens before compilation; trims middle segments if >80% of context |
|
|
712
|
+
| **RESOURCE_EXHAUSTED recovery** | `gemini.js` | On quota/context errors: waits 30s, sheds docs, retries with reduced input |
|
package/QUICK_START.md
CHANGED
|
@@ -150,8 +150,8 @@ taskex --name "Your Name" --skip-upload "my-meeting"
|
|
|
150
150
|
### What happens
|
|
151
151
|
|
|
152
152
|
The pipeline will:
|
|
153
|
-
1. **Compress** the video (~30s)
|
|
154
|
-
2. **Segment** it into ≤5 min chunks
|
|
153
|
+
1. **Compress** the video (~30s) — or **split raw** with `--no-compress`
|
|
154
|
+
2. **Segment** it into ≤5 min chunks (configurable with `--segment-time` in compress mode)
|
|
155
155
|
3. **Upload** segments to Firebase Storage (if configured)
|
|
156
156
|
4. **Analyze** each segment with Gemini AI — uses Firebase Storage URL directly when available (skips separate Gemini upload)
|
|
157
157
|
5. **Quality check** — retry weak segments automatically (reuses file reference — no re-upload)
|
|
@@ -162,6 +162,8 @@ The pipeline will:
|
|
|
162
162
|
|
|
163
163
|
> **Tip:** Use `--force-upload` to re-upload files that already exist in Storage. Use `--no-storage-url` to bypass Storage URL optimization and force Gemini File API uploads.
|
|
164
164
|
|
|
165
|
+
> **Tip:** Use `--no-compress` to skip re-encoding (auto-splits at 20 min). Use `--speed 2.0` to speed up compressed playback (saves tokens), or `--segment-time 600` for longer compressed segments.
|
|
166
|
+
|
|
165
167
|
This takes **~2-5 minutes** depending on video length.
|
|
166
168
|
|
|
167
169
|
---
|
|
@@ -223,6 +225,7 @@ my-project/runs/{timestamp}/
|
|
|
223
225
|
| **Force Gemini File API** | `taskex --no-storage-url "my-meeting"` |
|
|
224
226
|
| **Preview without running** | `taskex --dry-run "my-meeting"` |
|
|
225
227
|
| **Deep dive docs** | `taskex --deep-dive "my-meeting"` |
|
|
228
|
+
| **Pre-summarize docs** | `taskex --deep-summary "my-meeting"` |
|
|
226
229
|
| **Generate docs (no video)** | `taskex --dynamic "my-project"` |
|
|
227
230
|
| **Track progress via git** | `taskex --update-progress --repo "C:\project" "my-meeting"` |
|
|
228
231
|
| **Debug mode** | `taskex --log-level debug "my-meeting"` |
|
|
@@ -272,4 +275,3 @@ Your recordings, `.env`, logs — everything local is `.gitignore`d and safe.
|
|
|
272
275
|
|------|-------|
|
|
273
276
|
| Full feature list, all CLI flags, configuration | [README.md](README.md) |
|
|
274
277
|
| How the pipeline works internally | [ARCHITECTURE.md](ARCHITECTURE.md) |
|
|
275
|
-
| Module map, line counts, roadmap | [EXPLORATION.md](EXPLORATION.md) |
|
package/README.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Task Summary Extractor
|
|
2
2
|
|
|
3
|
-
> **v9.
|
|
3
|
+
> **v9.4.0** — AI-powered content analysis CLI — meetings, recordings, documents, or any mix. Install globally, run anywhere.
|
|
4
4
|
|
|
5
5
|
<p align="center">
|
|
6
6
|
<img src="https://img.shields.io/badge/node-%3E%3D18.0.0-green" alt="Node.js" />
|
|
7
7
|
<img src="https://img.shields.io/badge/gemini-2.5--flash-blue" alt="Gemini" />
|
|
8
8
|
<img src="https://img.shields.io/badge/firebase-11.x-orange" alt="Firebase" />
|
|
9
|
-
<img src="https://img.shields.io/badge/version-9.
|
|
10
|
-
<img src="https://img.shields.io/badge/tests-
|
|
9
|
+
<img src="https://img.shields.io/badge/version-9.4.0-brightgreen" alt="Version" />
|
|
10
|
+
<img src="https://img.shields.io/badge/tests-331%20passing-brightgreen" alt="Tests" />
|
|
11
11
|
<img src="https://img.shields.io/badge/npm-task--summary--extractor-red" alt="npm" />
|
|
12
12
|
</p>
|
|
13
13
|
|
|
@@ -62,6 +62,20 @@ taskex --update-progress --repo "C:\my-project" "my-meeting"
|
|
|
62
62
|
|
|
63
63
|
> **v7.2.3**: If the call folder isn't a git repo, the tool auto-initializes one for baseline tracking.
|
|
64
64
|
|
|
65
|
+
### ⚡ Deep Summary (`--deep-summary`)
|
|
66
|
+
|
|
67
|
+
Pre-summarize context documents to reduce per-segment token usage by 60-80%:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
taskex --deep-summary --name "Jane" "my-meeting"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Exclude specific docs from summarization (keep at full fidelity):
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
taskex --deep-summary --exclude-docs "code-map.md,sprint.md" "my-meeting"
|
|
77
|
+
```
|
|
78
|
+
|
|
65
79
|
> See all modes explained with diagrams → [ARCHITECTURE.md](ARCHITECTURE.md#pipeline-phases)
|
|
66
80
|
|
|
67
81
|
---
|
|
@@ -172,6 +186,8 @@ These are the ones you'll actually use:
|
|
|
172
186
|
| `--format <type>` | Output format: `md`, `html`, `json`, `pdf`, `docx`, `all` (default: `md`) | `--format html` |
|
|
173
187
|
| `--min-confidence <level>` | Filter items by confidence: `high`, `medium`, `low` | `--min-confidence high` |
|
|
174
188
|
| `--no-html` | Suppress HTML report generation | `--no-html` |
|
|
189
|
+
| `--deep-summary` | Pre-summarize context docs (60-80% token savings) | `--deep-summary` |
|
|
190
|
+
| `--exclude-docs <list>` | Docs to keep full during deep-summary (comma-separated) | `--exclude-docs "code-map.md"` |
|
|
175
191
|
|
|
176
192
|
**Typical usage:**
|
|
177
193
|
|
|
@@ -198,6 +214,7 @@ Choose what the tool does. Only use one at a time:
|
|
|
198
214
|
| *(none)* | **Content analysis** | `results.md` + `results.html` — structured task document |
|
|
199
215
|
| `--dynamic` | **Doc generation** | `INDEX.md` + 3–15 topic documents |
|
|
200
216
|
| `--deep-dive` | **Topic explainers** | `INDEX.md` + per-topic deep-dive docs |
|
|
217
|
+
| `--deep-summary` | **Token-efficient analysis** | Same as content analysis, but context docs pre-summarized (60-80% savings) |
|
|
201
218
|
| `--update-progress` | **Progress check** | `progress.md` — item status via git |
|
|
202
219
|
|
|
203
220
|
**Dynamic mode** also uses:
|
|
@@ -224,6 +241,23 @@ Skip parts of the pipeline you don't need:
|
|
|
224
241
|
| `--skip-compression` | Video compression | You already compressed/segmented the video |
|
|
225
242
|
| `--skip-gemini` | AI analysis entirely | You just want to compress & upload |
|
|
226
243
|
|
|
244
|
+
### Video Processing Flags
|
|
245
|
+
|
|
246
|
+
Control how video is processed before AI analysis:
|
|
247
|
+
|
|
248
|
+
| Flag | Default | Description |
|
|
249
|
+
|------|---------|-------------|
|
|
250
|
+
| `--no-compress` | off | Skip re-encoding — pass raw video to Gemini (auto-splits at 20 min) |
|
|
251
|
+
| `--speed <n>` | `1.6` | Playback speed multiplier (compress mode only) |
|
|
252
|
+
| `--segment-time <n>` | `280` | Segment duration in seconds, compress mode only (30–3600) |
|
|
253
|
+
|
|
254
|
+
**Duration constraints** (per [Google Gemini docs](https://ai.google.dev/gemini-api/docs/vision#video)):
|
|
255
|
+
- Default resolution: ~300 tokens/sec → max ~55 min/segment (recommended: ≤20 min)
|
|
256
|
+
- File API limit: 2 GB/file (free) / 20 GB (paid)
|
|
257
|
+
- Supported formats: mp4, mpeg, mov, avi, x-flv, mpg, webm, wmv, 3gpp
|
|
258
|
+
|
|
259
|
+
> **Tip:** Use `--no-compress` for large, high-quality recordings that you want to analyze at original quality. Raw video is auto-split at 20-minute intervals via `ffmpeg -c copy` (stream-copy). `--speed` and `--segment-time` only apply to compression mode.
|
|
260
|
+
|
|
227
261
|
### Tuning Flags
|
|
228
262
|
|
|
229
263
|
**You probably don't need these.** The defaults work well. These are for power users:
|
|
@@ -259,12 +293,13 @@ taskex [flags] [folder]
|
|
|
259
293
|
|
|
260
294
|
CONFIG --gemini-key --firebase-key --firebase-project
|
|
261
295
|
--firebase-bucket --firebase-domain
|
|
262
|
-
MODES --dynamic --deep-dive --update-progress
|
|
296
|
+
MODES --dynamic --deep-dive --deep-summary --update-progress
|
|
263
297
|
CORE --name --model --skip-upload --resume --reanalyze --dry-run
|
|
264
298
|
OUTPUT --format <md|html|json|pdf|docx|all> --min-confidence <high|medium|low>
|
|
265
299
|
--no-html
|
|
266
300
|
UPLOAD --force-upload --no-storage-url
|
|
267
301
|
SKIP --skip-compression --skip-gemini
|
|
302
|
+
VIDEO --no-compress --speed <n> --segment-time <n>
|
|
268
303
|
DYNAMIC --request <text>
|
|
269
304
|
PROGRESS --repo <path>
|
|
270
305
|
TUNING --thinking-budget --compilation-thinking-budget --parallel
|
|
@@ -394,7 +429,7 @@ GEMINI_API_KEY=your-key-here
|
|
|
394
429
|
|
|
395
430
|
# Optional — uncomment to customize
|
|
396
431
|
# GEMINI_MODEL=gemini-2.5-flash
|
|
397
|
-
# VIDEO_SPEED=1.
|
|
432
|
+
# VIDEO_SPEED=1.6
|
|
398
433
|
# THINKING_BUDGET=24576
|
|
399
434
|
# LOG_LEVEL=info
|
|
400
435
|
|
|
@@ -413,7 +448,7 @@ GEMINI_API_KEY=your-key-here
|
|
|
413
448
|
|
|
414
449
|
| Feature | Description |
|
|
415
450
|
|---------|-------------|
|
|
416
|
-
| **Video/Audio Compression** | H.264 CRF 24, text-optimized sharpening,
|
|
451
|
+
| **Video/Audio Compression** | H.264 CRF 24, text-optimized sharpening, 1.6× speed |
|
|
417
452
|
| **Smart Segmentation** | ≤5 min chunks with boundary-aware splitting |
|
|
418
453
|
| **Cross-Segment Continuity** | Ticket IDs, names, and context carry forward |
|
|
419
454
|
| **Document Discovery** | Auto-finds docs in all subfolders |
|
|
@@ -434,6 +469,8 @@ GEMINI_API_KEY=your-key-here
|
|
|
434
469
|
| **HTML Report** | Self-contained HTML report with collapsible sections, filtering, dark mode |
|
|
435
470
|
| **JSON Schema Validation** | Validates AI output against JSON Schema (segment + compiled) |
|
|
436
471
|
| **Confidence Filter** | `--min-confidence` flag to exclude low-confidence items from output |
|
|
472
|
+
| **Deep Summary** | `--deep-summary` pre-summarizes context docs, 60-80% token savings per segment |
|
|
473
|
+
| **Context Window Safety** | Auto-truncation, pre-flight token checks, RESOURCE_EXHAUSTED recovery |
|
|
437
474
|
| **Multi-Format Output** | `--format` flag: Markdown, HTML, JSON, PDF, DOCX, or all formats at once |
|
|
438
475
|
| **Interactive CLI** | Run with no args → guided experience |
|
|
439
476
|
| **Resume / Checkpoint** | `--resume` continues interrupted runs |
|
|
@@ -507,6 +544,7 @@ task-summary-extractor/
|
|
|
507
544
|
│ │ ├── git.js Git CLI wrapper
|
|
508
545
|
│ │ └── doc-parser.js Document text extraction (DOCX, XLSX, PPTX, etc.)
|
|
509
546
|
│ ├── modes/ AI-heavy pipeline phase modules
|
|
547
|
+
│ │ ├── deep-summary.js Pre-summarize context docs (deep-summary feature)
|
|
510
548
|
│ │ ├── deep-dive.js Topic discovery & deep-dive doc generation
|
|
511
549
|
│ │ ├── dynamic-mode.js Dynamic document planning & generation
|
|
512
550
|
│ │ ├── focused-reanalysis.js Targeted reanalysis of weak segments
|
|
@@ -528,17 +566,14 @@ task-summary-extractor/
|
|
|
528
566
|
│ ├── schema-validator.js JSON Schema validation (ajv)
|
|
529
567
|
│ └── ... (15 more utility modules)
|
|
530
568
|
│
|
|
531
|
-
├── tests/ Test suite —
|
|
569
|
+
├── tests/ Test suite — 331 tests across 15 files (vitest)
|
|
532
570
|
│ ├── utils/ Utility module tests
|
|
533
571
|
│ └── renderers/ Renderer tests
|
|
534
572
|
│
|
|
535
573
|
├── QUICK_START.md Step-by-step setup guide
|
|
536
|
-
|
|
537
|
-
└── EXPLORATION.md Roadmap & future features
|
|
574
|
+
└── ARCHITECTURE.md Technical deep dive
|
|
538
575
|
```
|
|
539
576
|
|
|
540
|
-
> Full module map with line counts → [EXPLORATION.md](EXPLORATION.md#full-module-map)
|
|
541
|
-
|
|
542
577
|
---
|
|
543
578
|
|
|
544
579
|
## npm Scripts
|
|
@@ -551,7 +586,7 @@ task-summary-extractor/
|
|
|
551
586
|
| `npm run check` | Validate environment |
|
|
552
587
|
| `npm start` | Run the pipeline |
|
|
553
588
|
| `npm run help` | Show CLI help |
|
|
554
|
-
| `npm test` | Run test suite (
|
|
589
|
+
| `npm test` | Run test suite (331 tests) |
|
|
555
590
|
| `npm run test:watch` | Run tests in watch mode |
|
|
556
591
|
| `npm run test:coverage` | Run tests with coverage report |
|
|
557
592
|
|
|
@@ -561,6 +596,9 @@ task-summary-extractor/
|
|
|
561
596
|
|
|
562
597
|
| Version | Highlights |
|
|
563
598
|
|---------|-----------|
|
|
599
|
+
| **v9.4.0** | **Context window safety** — pre-flight token checks, auto-truncation for oversized docs/VTTs, RESOURCE_EXHAUSTED recovery with automatic doc shedding, chunked compilation for large segment sets, P0/P1 hard cap (2× budget) prevents context overflow, improved deep-summary prompt quality |
|
|
600
|
+
| **v9.3.1** | **Audit & polish** — VIDEO_SPEED 1.5→1.6, `--exclude-docs` flag for non-interactive deep-summary exclusion, friendlier Gemini error messages, dead code removal, DRY RUN_PRESETS |
|
|
601
|
+
| **v9.3.0** | **Deep summary** — `--deep-summary` pre-summarizes context documents (60-80% token savings), interactive doc picker, `--exclude-docs` for CLI automation, batch processing |
|
|
564
602
|
| **v9.0.0** | **CLI UX upgrade** — colors & progress bar, HTML reports, PDF & DOCX output (via puppeteer and docx npm package), JSON Schema validation, confidence filter (`--min-confidence`), pipeline decomposition (`src/phases/` — 9 modules), test suite (285 tests via vitest), multi-format output (`--format`: md/html/json/pdf/docx/all), doc-parser service, shared renderer utilities |
|
|
565
603
|
| **v8.3.0** | **Universal content analysis** — prompt v4.0.0 supports video, audio, documents, and mixed content; input type auto-detection; timestamps conditional on content type; gemini.js bridge text generalized; all markdown docs updated |
|
|
566
604
|
| **v8.2.0** | **Architecture cleanup** — `src/modes/` for AI pipeline phases, `retry.js` self-contained defaults, dead code removal, export trimming, `process_and_upload.js` slim shim, `progress.js` → `checkpoint.js`, merged `prompt.js` into `cli.js` |
|
|
@@ -587,7 +625,6 @@ task-summary-extractor/
|
|
|
587
625
|
|-----|-------------|-------------|
|
|
588
626
|
| 📖 **[QUICK_START.md](QUICK_START.md)** | Full setup walkthrough, examples, troubleshooting | First time using the tool |
|
|
589
627
|
| 🏗️ **[ARCHITECTURE.md](ARCHITECTURE.md)** | Pipeline phases, algorithms, Mermaid diagrams | Understanding how it works |
|
|
590
|
-
| 🔭 **[EXPLORATION.md](EXPLORATION.md)** | Module map, line counts, future roadmap | Contributing or extending |
|
|
591
628
|
|
|
592
629
|
---
|
|
593
630
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "task-summary-extractor",
|
|
3
|
-
"version": "9.
|
|
3
|
+
"version": "9.5.0",
|
|
4
4
|
"description": "AI-powered meeting analysis & document generation CLI — video + document processing, deep dive docs, dynamic mode, interactive CLI with model selection, confidence scoring, learning loop, git progress tracking",
|
|
5
5
|
"main": "process_and_upload.js",
|
|
6
6
|
"bin": {
|
|
@@ -36,6 +36,13 @@ const BATCH_MAX_CHARS = 600000;
|
|
|
36
36
|
/** Minimum content length (chars) to bother summarizing — below this, keep full */
|
|
37
37
|
const MIN_SUMMARIZE_LENGTH = 500;
|
|
38
38
|
|
|
39
|
+
/**
|
|
40
|
+
* Hard cap per-document chars before sending to Gemini.
|
|
41
|
+
* Gemini context = 1M tokens; prompt overhead ~50K tokens; at 0.3 tok/char
|
|
42
|
+
* 900K chars ≈ 270K tokens — safe with prompt + thinking overhead.
|
|
43
|
+
*/
|
|
44
|
+
const MAX_DOC_CHARS = 900000;
|
|
45
|
+
|
|
39
46
|
// ======================== BATCH BUILDER ========================
|
|
40
47
|
|
|
41
48
|
/**
|
|
@@ -51,8 +58,22 @@ function buildBatches(docs, maxChars = BATCH_MAX_CHARS) {
|
|
|
51
58
|
let currentBatch = [];
|
|
52
59
|
let currentChars = 0;
|
|
53
60
|
|
|
54
|
-
for (
|
|
55
|
-
|
|
61
|
+
for (let doc of docs) {
|
|
62
|
+
let docChars = doc.content ? doc.content.length : 0;
|
|
63
|
+
|
|
64
|
+
// Truncate extremely large docs to avoid exceeding the context window.
|
|
65
|
+
// Any single doc beyond MAX_DOC_CHARS is capped (tail is dropped) and a
|
|
66
|
+
// warning is prepended so the summariser knows the content is incomplete.
|
|
67
|
+
if (docChars > MAX_DOC_CHARS) {
|
|
68
|
+
const truncated = doc.content.substring(0, MAX_DOC_CHARS);
|
|
69
|
+
doc = {
|
|
70
|
+
...doc,
|
|
71
|
+
content: `[TRUNCATED — original ${(docChars / 1024).toFixed(0)} KB exceeded the ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB limit; only the first ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB is included]\n\n${truncated}`,
|
|
72
|
+
_truncatedFrom: docChars,
|
|
73
|
+
};
|
|
74
|
+
docChars = doc.content.length;
|
|
75
|
+
console.warn(` ${c.warn(`${doc.fileName} truncated from ${(doc._truncatedFrom / 1024).toFixed(0)} KB to ${(MAX_DOC_CHARS / 1024).toFixed(0)} KB for deep summary`)}`);
|
|
76
|
+
}
|
|
56
77
|
|
|
57
78
|
// If this single doc exceeds the batch limit, it gets its own batch
|
|
58
79
|
if (docChars > maxChars) {
|
|
@@ -120,23 +141,35 @@ async function summarizeBatch(ai, docs, opts = {}) {
|
|
|
120
141
|
|
|
121
142
|
const promptText = `You are a precision document summarizer for a meeting analysis pipeline.
|
|
122
143
|
|
|
123
|
-
Your job: read ALL documents below and produce a CONDENSED version of each that preserves
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
144
|
+
Your job: read ALL documents below and produce a CONDENSED version of each that preserves every piece of actionable information.
|
|
145
|
+
|
|
146
|
+
WHAT TO PRESERVE (in order of importance):
|
|
147
|
+
1. IDENTIFIERS — Every ticket ID, task ID, CR number, PR number, JIRA key, GitHub issue, reference number, version number. Copy these VERBATIM — do not paraphrase or abbreviate IDs.
|
|
148
|
+
2. PEOPLE — All assignees, reviewers, approvers, requesters, and responsible parties. Use full names exactly as they appear.
|
|
149
|
+
3. STATUSES & STATES — All statuses (open, closed, in_progress, blocked, deferred, etc.) and state markers (✅, ⬜, ⏸️, 🔲). Preserve the exact status vocabulary used in the document.
|
|
150
|
+
4. ACTION ITEMS — Every action item, commitment, and deliverable with its owner, deadline, and dependency chain.
|
|
151
|
+
5. BLOCKERS & DEPENDENCIES — What is blocked, by whom, what it blocks downstream.
|
|
152
|
+
6. DECISIONS & RATIONALE — Key decisions and WHY they were made (not just what).
|
|
153
|
+
7. CROSS-REFERENCES — When Document A references something from Document B, preserve that linkage. If ticket X is mentioned in a code-map entry, keep both the ticket ID and the code-map path.
|
|
154
|
+
8. TECHNICAL SPECIFICS — File paths, code references, API endpoints, database tables, configuration keys, environment names (dev/staging/prod).
|
|
155
|
+
9. NUMERICAL DATA — Percentages, counts, dates, deadlines, version numbers, sizes.
|
|
156
|
+
10. CHECKLISTS & PROGRESS — Preserve checklist items with their completion status markers. Include progress ratios (e.g., "35/74 done, 6 blocked").
|
|
157
|
+
|
|
158
|
+
WHAT TO REMOVE:
|
|
134
159
|
- Verbose explanations of well-known concepts
|
|
135
|
-
- Redundant phrasing
|
|
136
|
-
- Formatting-only content (decorative headers,
|
|
137
|
-
- Boilerplate/template text that adds no information
|
|
160
|
+
- Redundant phrasing, filler text, throat-clearing sentences
|
|
161
|
+
- Formatting-only content (decorative headers, horizontal rules, empty sections)
|
|
162
|
+
- Boilerplate/template text that adds no project-specific information
|
|
163
|
+
- Repeated definitions or glossary entries that don't change across documents
|
|
138
164
|
${focusSection}
|
|
139
165
|
|
|
166
|
+
QUALITY REQUIREMENTS:
|
|
167
|
+
- Aim for 70-80% size reduction while preserving ALL actionable information.
|
|
168
|
+
- Every ID, every name, every status MUST survive the summarization.
|
|
169
|
+
- If two documents reference the same entity (ticket, file, person), ensure the summary preserves enough context in BOTH summaries for downstream consumers to make the connection.
|
|
170
|
+
- When a document contains a table, preserve the table structure (header + key rows). Omit empty or low-value rows.
|
|
171
|
+
- When a document has nested structure (subsections, indented lists), preserve the hierarchy — use indentation or numbering.
|
|
172
|
+
|
|
140
173
|
OUTPUT FORMAT:
|
|
141
174
|
Return valid JSON with this structure:
|
|
142
175
|
{
|
|
@@ -151,9 +184,6 @@ Return valid JSON with this structure:
|
|
|
151
184
|
}
|
|
152
185
|
}
|
|
153
186
|
|
|
154
|
-
Aim for 70-80% size reduction while preserving ALL actionable information.
|
|
155
|
-
Every ID, every name, every status must survive the summarization.
|
|
156
|
-
|
|
157
187
|
DOCUMENTS TO SUMMARIZE (${docEntries.length} documents):
|
|
158
188
|
|
|
159
189
|
${docEntries.join('\n\n')}`;
|
|
@@ -162,7 +192,7 @@ ${docEntries.join('\n\n')}`;
|
|
|
162
192
|
model: config.GEMINI_MODEL,
|
|
163
193
|
contents: [{ role: 'user', parts: [{ text: promptText }] }],
|
|
164
194
|
config: {
|
|
165
|
-
systemInstruction: 'You are a lossless information compressor. Preserve every ID, name, status, assignment, and actionable detail. Output valid JSON only.',
|
|
195
|
+
systemInstruction: 'You are a lossless information compressor specialized in engineering and business documents. Preserve every ID, name, status, assignment, dependency, file path, decision rationale, and actionable detail. Maintain cross-document references (when doc A mentions entity from doc B, keep both sides). Output valid JSON only.',
|
|
166
196
|
maxOutputTokens: SUMMARY_MAX_OUTPUT,
|
|
167
197
|
temperature: 0,
|
|
168
198
|
thinkingConfig: { thinkingBudget },
|
|
@@ -372,4 +402,5 @@ module.exports = {
|
|
|
372
402
|
SUMMARY_MAX_OUTPUT,
|
|
373
403
|
BATCH_MAX_CHARS,
|
|
374
404
|
MIN_SUMMARIZE_LENGTH,
|
|
405
|
+
MAX_DOC_CHARS,
|
|
375
406
|
};
|
package/src/phases/discover.js
CHANGED
|
@@ -80,6 +80,7 @@ async function phaseDiscover(ctx) {
|
|
|
80
80
|
if (opts.skipUpload) activeFlags.push('skip-upload');
|
|
81
81
|
if (opts.forceUpload) activeFlags.push('force-upload');
|
|
82
82
|
if (opts.noStorageUrl) activeFlags.push('no-storage-url');
|
|
83
|
+
if (opts.noCompress) activeFlags.push('no-compress');
|
|
83
84
|
if (opts.skipCompression) activeFlags.push('skip-compression');
|
|
84
85
|
if (opts.skipGemini) activeFlags.push('skip-gemini');
|
|
85
86
|
if (opts.resume) activeFlags.push('resume');
|
package/src/phases/init.js
CHANGED
|
@@ -50,7 +50,7 @@ async function phaseInit() {
|
|
|
50
50
|
skipUpload: !!flags['skip-upload'],
|
|
51
51
|
forceUpload: !!flags['force-upload'],
|
|
52
52
|
noStorageUrl: !!flags['no-storage-url'],
|
|
53
|
-
skipCompression: !!flags['skip-compression'],
|
|
53
|
+
skipCompression: !!flags['skip-compression'], // DEPRECATED — use --no-compress
|
|
54
54
|
skipGemini: !!flags['skip-gemini'],
|
|
55
55
|
resume: !!flags.resume,
|
|
56
56
|
reanalyze: !!flags.reanalyze,
|
|
@@ -66,6 +66,10 @@ async function phaseInit() {
|
|
|
66
66
|
disableLearning: !!flags['no-learning'],
|
|
67
67
|
disableDiff: !!flags['no-diff'],
|
|
68
68
|
noHtml: !!flags['no-html'],
|
|
69
|
+
// Video processing flags
|
|
70
|
+
noCompress: !!flags['no-compress'],
|
|
71
|
+
speed: flags.speed ? parseFloat(flags.speed) : null,
|
|
72
|
+
segmentTime: flags['segment-time'] ? parseInt(flags['segment-time'], 10) : null,
|
|
69
73
|
deepDive: !!flags['deep-dive'],
|
|
70
74
|
deepSummary: !!flags['deep-summary'],
|
|
71
75
|
deepSummaryExclude: typeof flags['exclude-docs'] === 'string'
|
|
@@ -123,6 +127,55 @@ async function phaseInit() {
|
|
|
123
127
|
}
|
|
124
128
|
}
|
|
125
129
|
|
|
130
|
+
// --- Validate video processing flags ---
|
|
131
|
+
if (opts.noCompress) {
|
|
132
|
+
// --no-compress: raw passthrough — speed and segment-time are not user-configurable
|
|
133
|
+
if (opts.speed !== null) {
|
|
134
|
+
console.log(c.warn(' ⚠ --speed is ignored with --no-compress (raw video is not re-encoded)'));
|
|
135
|
+
opts.speed = null;
|
|
136
|
+
}
|
|
137
|
+
if (opts.segmentTime !== null) {
|
|
138
|
+
console.log(c.warn(' ⚠ --segment-time is ignored with --no-compress (auto: 1200s / 20 min per segment)'));
|
|
139
|
+
opts.segmentTime = null;
|
|
140
|
+
}
|
|
141
|
+
if (opts.skipCompression) {
|
|
142
|
+
console.log(c.warn(' ⚠ --skip-compression is redundant with --no-compress — ignoring'));
|
|
143
|
+
opts.skipCompression = false;
|
|
144
|
+
}
|
|
145
|
+
} else {
|
|
146
|
+
if (opts.speed !== null) {
|
|
147
|
+
if (Number.isNaN(opts.speed) || opts.speed < 0.1 || opts.speed > 10) {
|
|
148
|
+
throw new Error(`Invalid --speed "${flags.speed}". Must be between 0.1 and 10.`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (opts.segmentTime !== null) {
|
|
152
|
+
if (Number.isNaN(opts.segmentTime) || opts.segmentTime < 30 || opts.segmentTime > 3600) {
|
|
153
|
+
throw new Error(`Invalid --segment-time "${flags['segment-time']}". Must be between 30 and 3600 seconds.`);
|
|
154
|
+
}
|
|
155
|
+
// Duration-aware validation (Google Gemini: ~300 tokens/sec at default resolution)
|
|
156
|
+
const TOKENS_PER_SEC = 300;
|
|
157
|
+
const CONTEXT_LIMIT = 1_048_576;
|
|
158
|
+
const SAFE_VIDEO_BUDGET = CONTEXT_LIMIT * 0.6; // 60% for video, rest for prompt+docs+output
|
|
159
|
+
const effectiveSpeed = opts.speed || 1.0;
|
|
160
|
+
const effectiveVideoSec = opts.segmentTime / effectiveSpeed;
|
|
161
|
+
const estimatedTokens = Math.round(effectiveVideoSec * TOKENS_PER_SEC);
|
|
162
|
+
|
|
163
|
+
if (estimatedTokens > CONTEXT_LIMIT) {
|
|
164
|
+
throw new Error(
|
|
165
|
+
`--segment-time ${opts.segmentTime}s exceeds Gemini context window! ` +
|
|
166
|
+
`Estimated ${(estimatedTokens / 1000).toFixed(0)}K tokens/segment (limit: 1,048K). ` +
|
|
167
|
+
`Reduce to ≤${Math.floor((CONTEXT_LIMIT / TOKENS_PER_SEC) * effectiveSpeed)}s.`
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
if (estimatedTokens > SAFE_VIDEO_BUDGET) {
|
|
171
|
+
console.log(c.warn(
|
|
172
|
+
` ⚠ --segment-time ${opts.segmentTime}s is very large (~${(estimatedTokens / 1000).toFixed(0)}K tokens/segment). ` +
|
|
173
|
+
`Recommended: ≤${Math.floor((SAFE_VIDEO_BUDGET / TOKENS_PER_SEC) * effectiveSpeed)}s to leave room for prompt & output.`
|
|
174
|
+
));
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
126
179
|
// --- Validate min-confidence level ---
|
|
127
180
|
if (opts.minConfidence) {
|
|
128
181
|
const { validateConfidenceLevel } = require('../utils/confidence-filter');
|
|
@@ -318,6 +371,15 @@ function _printRunSummary(opts, modelId, models, targetDir) {
|
|
|
318
371
|
console.log(` ${c.dim('Disabled:')} ${disabled.join(c.dim(' · '))}`);
|
|
319
372
|
}
|
|
320
373
|
|
|
374
|
+
// Video processing settings
|
|
375
|
+
const { SPEED, SEG_TIME } = require('../config');
|
|
376
|
+
const effectiveSpeed = opts.noCompress ? 1.0 : (opts.speed || SPEED);
|
|
377
|
+
const effectiveSegTime = opts.noCompress ? 1200 : (opts.segmentTime || SEG_TIME);
|
|
378
|
+
const videoMode = opts.noCompress
|
|
379
|
+
? c.cyan('raw (stream-copy, auto-split at 20 min)')
|
|
380
|
+
: c.green(`compress × ${effectiveSpeed}x | ${effectiveSegTime}s segments`);
|
|
381
|
+
console.log(` ${c.dim('Video:')} ${videoMode}`);
|
|
382
|
+
|
|
321
383
|
if (opts.runMode) {
|
|
322
384
|
console.log(` ${c.dim('Run mode:')} ${c.bold(opts.runMode)}`);
|
|
323
385
|
}
|
|
@@ -10,7 +10,7 @@ const { AUDIO_EXTS, SPEED } = config;
|
|
|
10
10
|
// --- Services ---
|
|
11
11
|
const { uploadToStorage, storageExists } = require('../services/firebase');
|
|
12
12
|
const { processWithGemini, cleanupGeminiFiles } = require('../services/gemini');
|
|
13
|
-
const { compressAndSegment, compressAndSegmentAudio, probeFormat, verifySegment } = require('../services/video');
|
|
13
|
+
const { compressAndSegment, compressAndSegmentAudio, splitOnly, probeFormat, verifySegment } = require('../services/video');
|
|
14
14
|
|
|
15
15
|
// --- Utils ---
|
|
16
16
|
const { fmtDuration, fmtBytes } = require('../utils/format');
|
|
@@ -60,6 +60,12 @@ async function phaseProcessVideo(ctx, videoPath, videoIndex) {
|
|
|
60
60
|
? fs.readdirSync(segmentDir).filter(f => f.startsWith('segment_') && (f.endsWith('.mp4') || f.endsWith('.m4a'))).sort()
|
|
61
61
|
: [];
|
|
62
62
|
|
|
63
|
+
// Build video processing options from CLI flags
|
|
64
|
+
// --no-compress uses hardcoded 1200s (splitOnly default); --segment-time only for compress mode
|
|
65
|
+
const videoOpts = {};
|
|
66
|
+
if (!opts.noCompress && opts.segmentTime) videoOpts.segTime = opts.segmentTime;
|
|
67
|
+
if (!opts.noCompress && opts.speed) videoOpts.speed = opts.speed;
|
|
68
|
+
|
|
63
69
|
if (opts.skipCompression || opts.dryRun) {
|
|
64
70
|
if (existingSegments.length > 0) {
|
|
65
71
|
segments = existingSegments.map(f => path.join(segmentDir, f));
|
|
@@ -70,18 +76,23 @@ async function phaseProcessVideo(ctx, videoPath, videoIndex) {
|
|
|
70
76
|
console.log(` ${c.dim(`[DRY-RUN] Would compress "${path.basename(videoPath)}" into segments`)}`);
|
|
71
77
|
return { fileResult: null, segmentAnalyses: [] };
|
|
72
78
|
}
|
|
73
|
-
segments = compressAndSegment(videoPath, segmentDir);
|
|
79
|
+
segments = compressAndSegment(videoPath, segmentDir, videoOpts);
|
|
74
80
|
log.step(`Compressed → ${segments.length} segment(s)`);
|
|
75
81
|
}
|
|
76
82
|
} else if (existingSegments.length > 0) {
|
|
77
83
|
segments = existingSegments.map(f => path.join(segmentDir, f));
|
|
78
84
|
log.step(`SKIP compression — ${segments.length} segment(s) already on disk`);
|
|
79
85
|
console.log(` ${c.success(`Skipped compression \u2014 ${c.highlight(segments.length)} segment(s) already exist`)}`);
|
|
86
|
+
} else if (opts.noCompress) {
|
|
87
|
+
// --no-compress: split raw video at keyframes, no re-encoding
|
|
88
|
+
segments = splitOnly(videoPath, segmentDir, videoOpts);
|
|
89
|
+
log.step(`Split (raw) → ${segments.length} segment(s)`);
|
|
90
|
+
console.log(` \u2192 ${c.highlight(segments.length)} raw segment(s) created`);
|
|
80
91
|
} else {
|
|
81
92
|
if (isAudio) {
|
|
82
|
-
segments = compressAndSegmentAudio(videoPath, segmentDir);
|
|
93
|
+
segments = compressAndSegmentAudio(videoPath, segmentDir, videoOpts);
|
|
83
94
|
} else {
|
|
84
|
-
segments = compressAndSegment(videoPath, segmentDir);
|
|
95
|
+
segments = compressAndSegment(videoPath, segmentDir, videoOpts);
|
|
85
96
|
}
|
|
86
97
|
log.step(`Compressed → ${segments.length} segment(s)`);
|
|
87
98
|
console.log(` \u2192 ${c.highlight(segments.length)} segment(s) created`);
|
|
@@ -90,6 +101,20 @@ async function phaseProcessVideo(ctx, videoPath, videoIndex) {
|
|
|
90
101
|
progress.markCompressed(baseName, segments.length);
|
|
91
102
|
const origSize = fs.statSync(videoPath).size;
|
|
92
103
|
log.step(`original=${(origSize / 1048576).toFixed(2)}MB (${fmtBytes(origSize)}) | ${segments.length} segment(s)`);
|
|
104
|
+
|
|
105
|
+
// Duration-aware warnings for raw segments
|
|
106
|
+
if (opts.noCompress && segments.length > 0) {
|
|
107
|
+
const totalSegSize = segments.reduce((s, p) => s + fs.statSync(p).size, 0);
|
|
108
|
+
const avgSegMB = totalSegSize / segments.length / 1048576;
|
|
109
|
+
if (avgSegMB > 500) {
|
|
110
|
+
console.warn(` ${c.warn(`Avg segment ~${avgSegMB.toFixed(0)} MB — large raw segments take longer to upload.`)}`);
|
|
111
|
+
console.warn(` ${c.dim(' Tip: remove --no-compress to re-encode into smaller segments.')}`);
|
|
112
|
+
}
|
|
113
|
+
// All raw segments must use Gemini File API (>20 MB external URL limit)
|
|
114
|
+
if (avgSegMB > 20) {
|
|
115
|
+
console.log(` ${c.dim('Raw segments >20 MB — will use Gemini File API upload (not storage URLs).')}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
93
118
|
console.log('');
|
|
94
119
|
|
|
95
120
|
const fileResult = {
|
|
@@ -178,10 +203,12 @@ async function phaseProcessVideo(ctx, videoPath, videoIndex) {
|
|
|
178
203
|
}
|
|
179
204
|
|
|
180
205
|
// Calculate cumulative time offsets for VTT time-slicing
|
|
206
|
+
// When --no-compress is active, segments play at real time (speed = 1.0)
|
|
207
|
+
const effectiveSpeed = opts.noCompress ? 1.0 : (opts.speed || SPEED);
|
|
181
208
|
let cumulativeTimeSec = 0;
|
|
182
209
|
for (const meta of segmentMeta) {
|
|
183
210
|
meta.startTimeSec = cumulativeTimeSec;
|
|
184
|
-
meta.endTimeSec = cumulativeTimeSec + (meta.durSec || 0) *
|
|
211
|
+
meta.endTimeSec = cumulativeTimeSec + (meta.durSec || 0) * effectiveSpeed;
|
|
185
212
|
cumulativeTimeSec = meta.endTimeSec;
|
|
186
213
|
}
|
|
187
214
|
|
package/src/pipeline.js
CHANGED
|
@@ -46,7 +46,7 @@ const phaseDeepDive = require('./phases/deep-dive');
|
|
|
46
46
|
// --- Utils (for run orchestration + alt modes) ---
|
|
47
47
|
const { c } = require('./utils/colors');
|
|
48
48
|
const { findDocsRecursive } = require('./utils/fs');
|
|
49
|
-
const { promptUserText, selectDocsToExclude } = require('./utils/cli');
|
|
49
|
+
const { promptUser, promptUserText, selectDocsToExclude } = require('./utils/cli');
|
|
50
50
|
const { createProgressBar } = require('./utils/progress-bar');
|
|
51
51
|
const { buildHealthReport, printHealthDashboard } = require('./utils/health-dashboard');
|
|
52
52
|
const { saveHistory, buildHistoryEntry } = require('./utils/learning-loop');
|
|
@@ -96,6 +96,23 @@ async function run() {
|
|
|
96
96
|
bar.tick('Services ready');
|
|
97
97
|
|
|
98
98
|
// Phase 3.5 (optional): Deep Summary — pre-summarize context docs
|
|
99
|
+
// If user didn't pass --deep-summary but has many context docs, offer it interactively
|
|
100
|
+
if (!fullCtx.opts.deepSummary && process.stdin.isTTY && fullCtx.ai && fullCtx.contextDocs.length >= 3) {
|
|
101
|
+
const inlineDocs = fullCtx.contextDocs.filter(d => d.type === 'inlineText' && d.content);
|
|
102
|
+
const totalChars = inlineDocs.reduce((sum, d) => sum + d.content.length, 0);
|
|
103
|
+
const totalTokensEstimate = Math.ceil(totalChars * 0.3);
|
|
104
|
+
// Only offer when context is large enough to benefit (>100K tokens)
|
|
105
|
+
if (totalTokensEstimate > 100000) {
|
|
106
|
+
console.log('');
|
|
107
|
+
console.log(` ${c.cyan('You have')} ${c.highlight(inlineDocs.length)} ${c.cyan('context docs')} (~${c.highlight((totalTokensEstimate / 1000).toFixed(0) + 'K')} ${c.cyan('tokens)')}`);
|
|
108
|
+
console.log(` ${c.dim('Deep summary can reduce per-segment context by 60-80%, saving time and cost.')}`);
|
|
109
|
+
const wantDeepSummary = await promptUser(` ${c.cyan('Enable deep summary?')} [y/N] `);
|
|
110
|
+
if (wantDeepSummary) {
|
|
111
|
+
fullCtx.opts.deepSummary = true;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
99
116
|
if (fullCtx.opts.deepSummary && fullCtx.ai && fullCtx.contextDocs.length > 0) {
|
|
100
117
|
// Interactive picker: let user choose docs to keep at full fidelity
|
|
101
118
|
if (process.stdin.isTTY && fullCtx.opts.deepSummaryExclude.length === 0) {
|
|
@@ -119,9 +136,10 @@ async function run() {
|
|
|
119
136
|
userName: fullCtx.userName,
|
|
120
137
|
inputMode: ctx.inputMode,
|
|
121
138
|
settings: {
|
|
122
|
-
speed: SPEED,
|
|
123
|
-
segmentTimeSec: SEG_TIME,
|
|
124
|
-
|
|
139
|
+
speed: fullCtx.opts.noCompress ? 1.0 : (fullCtx.opts.speed || SPEED),
|
|
140
|
+
segmentTimeSec: fullCtx.opts.noCompress ? 1200 : (fullCtx.opts.segmentTime || SEG_TIME),
|
|
141
|
+
noCompress: !!fullCtx.opts.noCompress,
|
|
142
|
+
...(fullCtx.opts.noCompress ? {} : { preset: PRESET }),
|
|
125
143
|
geminiModel: config.GEMINI_MODEL,
|
|
126
144
|
thinkingBudget: fullCtx.opts.thinkingBudget,
|
|
127
145
|
},
|
package/src/services/gemini.js
CHANGED
|
@@ -459,16 +459,53 @@ async function processWithGemini(ai, filePath, displayName, contextDocs = [], pr
|
|
|
459
459
|
throw reuploadErr;
|
|
460
460
|
}
|
|
461
461
|
} else {
|
|
462
|
-
//
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
462
|
+
// Handle RESOURCE_EXHAUSTED specifically — shed lower-priority docs and retry
|
|
463
|
+
if (errMsg.includes('RESOURCE_EXHAUSTED') || errMsg.includes('429') || errMsg.includes('quota')) {
|
|
464
|
+
console.warn(` ${c.warn('Context window or quota exceeded — shedding docs and retrying after 30s...')}`);
|
|
465
|
+
await new Promise(r => setTimeout(r, 30000));
|
|
466
|
+
// Rebuild with half the doc budget
|
|
467
|
+
const reducedBudget = Math.floor(docBudget * 0.5);
|
|
468
|
+
const { selected: reducedDocs } = selectDocsByBudget(contextDocs, reducedBudget, { segmentIndex });
|
|
469
|
+
const reducedParts = [contentParts[0]]; // keep video
|
|
470
|
+
for (const doc of reducedDocs) {
|
|
471
|
+
if (doc.type === 'inlineText') {
|
|
472
|
+
let content = doc.content;
|
|
473
|
+
const isVtt = doc.fileName.toLowerCase().endsWith('.vtt') || doc.fileName.toLowerCase().endsWith('.srt');
|
|
474
|
+
if (isVtt && segmentStartSec != null && segmentEndSec != null) {
|
|
475
|
+
content = sliceVttForSegment(content, segmentStartSec, segmentEndSec);
|
|
476
|
+
}
|
|
477
|
+
reducedParts.push({ text: `=== Document: ${doc.fileName} ===\n${content}` });
|
|
478
|
+
} else if (doc.type === 'fileData') {
|
|
479
|
+
reducedParts.push({ fileData: { mimeType: doc.mimeType, fileUri: doc.fileUri } });
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
// Re-add prompt/context parts (last 3-5 parts are prompt, focus, etc.)
|
|
483
|
+
const nonDocParts = contentParts.slice(1 + selectedDocs.length);
|
|
484
|
+
reducedParts.push(...nonDocParts);
|
|
485
|
+
requestPayload.contents[0].parts = reducedParts;
|
|
486
|
+
console.log(` Reduced to ${reducedDocs.length} docs (budget: ${(reducedBudget / 1000).toFixed(0)}K tokens)`);
|
|
487
|
+
try {
|
|
488
|
+
response = await withRetry(
|
|
489
|
+
() => ai.models.generateContent(requestPayload),
|
|
490
|
+
{ label: `Gemini segment analysis — reduced docs (${displayName})`, maxRetries: 1, baseDelay: 5000 }
|
|
491
|
+
);
|
|
492
|
+
console.log(` ${c.success('Reduced-context retry succeeded')}`);
|
|
493
|
+
} catch (reduceErr) {
|
|
494
|
+
console.error(` ${c.error(`Reduced-context retry also failed: ${reduceErr.message}`)}`);
|
|
495
|
+
throw reduceErr;
|
|
496
|
+
}
|
|
497
|
+
} else {
|
|
498
|
+
// Log request diagnostics for other errors to aid debugging
|
|
499
|
+
const partSummary = contentParts.map((p, i) => {
|
|
500
|
+
if (p.fileData) return ` [${i}] fileData: ${p.fileData.mimeType} → ${(p.fileData.fileUri || '').substring(0, 120)}`;
|
|
501
|
+
if (p.text) return ` [${i}] text: ${p.text.length} chars → ${p.text.substring(0, 80).replace(/\n/g, ' ')}...`;
|
|
502
|
+
return ` [${i}] unknown part`;
|
|
503
|
+
});
|
|
504
|
+
console.error(` ${c.error('Request diagnostics:')}`);
|
|
505
|
+
console.error(` Model: ${config.GEMINI_MODEL} | Parts: ${contentParts.length} | maxOutput: 65536`);
|
|
506
|
+
partSummary.forEach(s => console.error(` ${s}`));
|
|
507
|
+
throw apiErr;
|
|
508
|
+
}
|
|
472
509
|
}
|
|
473
510
|
}
|
|
474
511
|
const durationMs = Date.now() - t0;
|
|
@@ -628,6 +665,60 @@ ${segmentDumps}`;
|
|
|
628
665
|
|
|
629
666
|
const contentParts = [{ text: compilationPrompt }];
|
|
630
667
|
|
|
668
|
+
// ------- Pre-flight context window check -------
|
|
669
|
+
const estimatedInputTokens = estimateTokens(compilationPrompt);
|
|
670
|
+
const safeLimit = Math.floor(config.GEMINI_CONTEXT_WINDOW * 0.80); // 80% of context window
|
|
671
|
+
if (estimatedInputTokens > safeLimit) {
|
|
672
|
+
console.warn(` ${c.warn(`Compilation input (~${(estimatedInputTokens / 1000).toFixed(0)}K tokens) exceeds 80% of context window (${(safeLimit / 1000).toFixed(0)}K). Trimming older segment detail...`)}`);
|
|
673
|
+
// Re-build segment dumps with aggressive compression: keep only first & last 2 segments
|
|
674
|
+
// at full detail, compress the middle ones to IDs + statuses only.
|
|
675
|
+
const trimmedDumps = allSegmentAnalyses.map((analysis, idx) => {
|
|
676
|
+
const clean = { ...analysis };
|
|
677
|
+
delete clean._geminiMeta;
|
|
678
|
+
delete clean.seg;
|
|
679
|
+
delete clean.conversation_transcript;
|
|
680
|
+
const isEdge = idx < 2 || idx >= allSegmentAnalyses.length - 2;
|
|
681
|
+
if (!isEdge) {
|
|
682
|
+
// Aggressive compression for middle segments
|
|
683
|
+
if (clean.tickets) {
|
|
684
|
+
clean.tickets = clean.tickets.map(t => ({
|
|
685
|
+
ticket_id: t.ticket_id, status: t.status, title: t.title,
|
|
686
|
+
assignee: t.assignee, source_segment: t.source_segment,
|
|
687
|
+
}));
|
|
688
|
+
}
|
|
689
|
+
if (clean.change_requests) {
|
|
690
|
+
clean.change_requests = clean.change_requests.map(cr => ({
|
|
691
|
+
id: cr.id, status: cr.status, title: cr.title,
|
|
692
|
+
assigned_to: cr.assigned_to, source_segment: cr.source_segment,
|
|
693
|
+
}));
|
|
694
|
+
}
|
|
695
|
+
if (clean.action_items) {
|
|
696
|
+
clean.action_items = clean.action_items.map(ai => ({
|
|
697
|
+
id: ai.id, description: ai.description, assigned_to: ai.assigned_to,
|
|
698
|
+
status: ai.status, source_segment: ai.source_segment,
|
|
699
|
+
}));
|
|
700
|
+
}
|
|
701
|
+
delete clean.file_references;
|
|
702
|
+
clean.summary = (clean.summary || '').substring(0, 200);
|
|
703
|
+
} else {
|
|
704
|
+
if (clean.tickets) {
|
|
705
|
+
clean.tickets = clean.tickets.map(t => {
|
|
706
|
+
const tc = { ...t };
|
|
707
|
+
if (tc.comments && tc.comments.length > 5) {
|
|
708
|
+
tc.comments = tc.comments.slice(0, 5);
|
|
709
|
+
tc.comments.push({ note: `...${t.comments.length - 5} more comments omitted` });
|
|
710
|
+
}
|
|
711
|
+
return tc;
|
|
712
|
+
});
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
return `=== SEGMENT ${idx + 1} OF ${allSegmentAnalyses.length} ===\n${JSON.stringify(clean, null, 2)}`;
|
|
716
|
+
}).join('\n\n');
|
|
717
|
+
contentParts[0] = { text: compilationPrompt.replace(segmentDumps, trimmedDumps) };
|
|
718
|
+
const newEstimate = estimateTokens(contentParts[0].text);
|
|
719
|
+
console.log(` Trimmed compilation input to ~${(newEstimate / 1000).toFixed(0)}K tokens`);
|
|
720
|
+
}
|
|
721
|
+
|
|
631
722
|
const requestPayload = {
|
|
632
723
|
model: config.GEMINI_MODEL,
|
|
633
724
|
contents: [{ role: 'user', parts: contentParts }],
|
|
@@ -640,10 +731,44 @@ ${segmentDumps}`;
|
|
|
640
731
|
|
|
641
732
|
const t0 = Date.now();
|
|
642
733
|
console.log(` Compiling with ${config.GEMINI_MODEL}...`);
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
734
|
+
let response;
|
|
735
|
+
try {
|
|
736
|
+
response = await withRetry(
|
|
737
|
+
() => ai.models.generateContent(requestPayload),
|
|
738
|
+
{ label: 'Gemini final compilation', maxRetries: 2, baseDelay: 5000 }
|
|
739
|
+
);
|
|
740
|
+
} catch (compileErr) {
|
|
741
|
+
const errMsg = compileErr.message || '';
|
|
742
|
+
if (errMsg.includes('RESOURCE_EXHAUSTED') || errMsg.includes('429') || errMsg.includes('quota')) {
|
|
743
|
+
console.warn(` ${c.warn('Context window or quota exceeded during compilation — waiting 30s and retrying with reduced input...')}`);
|
|
744
|
+
await new Promise(r => setTimeout(r, 30000));
|
|
745
|
+
// Halve the compilation prompt by keeping only edge segments
|
|
746
|
+
const miniDumps = allSegmentAnalyses.map((analysis, idx) => {
|
|
747
|
+
const clean = { tickets: (analysis.tickets || []).map(t => ({ ticket_id: t.ticket_id, status: t.status, title: t.title, assignee: t.assignee })),
|
|
748
|
+
change_requests: (analysis.change_requests || []).map(cr => ({ id: cr.id, status: cr.status, title: cr.title })),
|
|
749
|
+
action_items: (analysis.action_items || []).map(ai => ({ id: ai.id, description: ai.description, assigned_to: ai.assigned_to, status: ai.status })),
|
|
750
|
+
blockers: (analysis.blockers || []).map(b => ({ id: b.id, description: b.description, status: b.status })),
|
|
751
|
+
scope_changes: analysis.scope_changes || [],
|
|
752
|
+
your_tasks: analysis.your_tasks || {},
|
|
753
|
+
summary: (analysis.summary || '').substring(0, 300),
|
|
754
|
+
};
|
|
755
|
+
return `=== SEGMENT ${idx + 1} OF ${allSegmentAnalyses.length} ===\n${JSON.stringify(clean, null, 2)}`;
|
|
756
|
+
}).join('\n\n');
|
|
757
|
+
requestPayload.contents[0].parts = [{ text: compilationPrompt.replace(/SEGMENT ANALYSES:\n[\s\S]*$/, `SEGMENT ANALYSES:\n${miniDumps}`) }];
|
|
758
|
+
try {
|
|
759
|
+
response = await withRetry(
|
|
760
|
+
() => ai.models.generateContent(requestPayload),
|
|
761
|
+
{ label: 'Gemini compilation (reduced)', maxRetries: 1, baseDelay: 5000 }
|
|
762
|
+
);
|
|
763
|
+
console.log(` ${c.success('Reduced compilation succeeded')}`);
|
|
764
|
+
} catch (reduceErr) {
|
|
765
|
+
console.error(` ${c.error(`Reduced compilation also failed: ${reduceErr.message}`)}`);
|
|
766
|
+
throw reduceErr;
|
|
767
|
+
}
|
|
768
|
+
} else {
|
|
769
|
+
throw compileErr;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
647
772
|
const durationMs = Date.now() - t0;
|
|
648
773
|
const rawText = response.text;
|
|
649
774
|
|
package/src/services/video.js
CHANGED
|
@@ -13,7 +13,7 @@ const { execSync, spawnSync } = require('child_process');
|
|
|
13
13
|
const fs = require('fs');
|
|
14
14
|
const path = require('path');
|
|
15
15
|
const { SPEED, SEG_TIME, PRESET } = require('../config');
|
|
16
|
-
const { fmtDuration } = require('../utils/format');
|
|
16
|
+
const { fmtDuration, fmtBytes } = require('../utils/format');
|
|
17
17
|
const { c } = require('../utils/colors');
|
|
18
18
|
|
|
19
19
|
// ======================== BINARY DETECTION ========================
|
|
@@ -103,17 +103,19 @@ function verifySegment(segPath) {
|
|
|
103
103
|
|
|
104
104
|
/**
|
|
105
105
|
* Build the common ffmpeg encoding args (video + audio filters/codecs).
|
|
106
|
+
* @param {string} inputFile
|
|
107
|
+
* @param {{ speed?: number }} [overrides]
|
|
106
108
|
* Returns { encodingArgs, effectiveDuration }.
|
|
107
109
|
*/
|
|
108
|
-
function buildEncodingArgs(inputFile) {
|
|
110
|
+
function buildEncodingArgs(inputFile, { speed = SPEED } = {}) {
|
|
109
111
|
const width = parseInt(probe(inputFile, 'v:0', 'width') || '0');
|
|
110
112
|
const channels = parseInt(probe(inputFile, 'a:0', 'channels') || '1');
|
|
111
113
|
const sampleRate = probe(inputFile, 'a:0', 'sample_rate') || '16000';
|
|
112
114
|
const duration = probeFormat(inputFile, 'duration');
|
|
113
115
|
const durationSec = duration ? parseFloat(duration) : null;
|
|
114
|
-
const effectiveDuration = durationSec ? durationSec /
|
|
116
|
+
const effectiveDuration = durationSec ? durationSec / speed : null;
|
|
115
117
|
|
|
116
|
-
let vf = `setpts=PTS/${
|
|
118
|
+
let vf = `setpts=PTS/${speed}`;
|
|
117
119
|
let crf = 24;
|
|
118
120
|
let tune = ['-tune', 'stillimage'];
|
|
119
121
|
let profile = ['-profile:v', 'main'];
|
|
@@ -122,21 +124,21 @@ function buildEncodingArgs(inputFile) {
|
|
|
122
124
|
|
|
123
125
|
if (width > 1920) {
|
|
124
126
|
// 4K+ → scale to 1080p
|
|
125
|
-
vf = `scale=1920:1080,unsharp=3:3:0.3,setpts=PTS/${
|
|
127
|
+
vf = `scale=1920:1080,unsharp=3:3:0.3,setpts=PTS/${speed}`;
|
|
126
128
|
crf = 20;
|
|
127
129
|
tune = [];
|
|
128
130
|
profile = ['-profile:v', 'high'];
|
|
129
131
|
audioBr = '128k';
|
|
130
132
|
} else if (width > 0) {
|
|
131
133
|
// Meeting / screenshare
|
|
132
|
-
vf = `unsharp=3:3:0.3,setpts=PTS/${
|
|
134
|
+
vf = `unsharp=3:3:0.3,setpts=PTS/${speed}`;
|
|
133
135
|
}
|
|
134
136
|
|
|
135
137
|
if (channels === 2) audioBr = '128k';
|
|
136
138
|
|
|
137
139
|
const encodingArgs = [
|
|
138
140
|
'-vf', vf,
|
|
139
|
-
'-af', `atempo=${
|
|
141
|
+
'-af', `atempo=${speed}`,
|
|
140
142
|
'-c:v', 'libx264', '-crf', String(crf), '-preset', PRESET,
|
|
141
143
|
...tune,
|
|
142
144
|
'-x264-params', x264p,
|
|
@@ -146,7 +148,7 @@ function buildEncodingArgs(inputFile) {
|
|
|
146
148
|
'-movflags', '+faststart',
|
|
147
149
|
];
|
|
148
150
|
|
|
149
|
-
return { encodingArgs, effectiveDuration, width, crf, audioBr, duration };
|
|
151
|
+
return { encodingArgs, effectiveDuration, width, crf, audioBr, duration, speed };
|
|
150
152
|
}
|
|
151
153
|
|
|
152
154
|
/**
|
|
@@ -155,27 +157,28 @@ function buildEncodingArgs(inputFile) {
|
|
|
155
157
|
* - Long videos → segment muxer for splitting.
|
|
156
158
|
* - Post-compression validation: verifies each output has a valid moov atom.
|
|
157
159
|
* Corrupt segments are re-encoded individually with the regular MP4 muxer.
|
|
160
|
+
* @param {{ segTime?: number, speed?: number }} [opts]
|
|
158
161
|
* Returns sorted array of segment file paths.
|
|
159
162
|
*/
|
|
160
|
-
function compressAndSegment(inputFile, outputDir) {
|
|
161
|
-
const { encodingArgs, effectiveDuration, width, crf, audioBr, duration } = buildEncodingArgs(inputFile);
|
|
163
|
+
function compressAndSegment(inputFile, outputDir, { segTime = SEG_TIME, speed = SPEED } = {}) {
|
|
164
|
+
const { encodingArgs, effectiveDuration, width, crf, audioBr, duration } = buildEncodingArgs(inputFile, { speed });
|
|
162
165
|
|
|
163
166
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
164
167
|
|
|
165
168
|
console.log(` Resolution : ${width > 0 ? width + 'p' : 'unknown'}`);
|
|
166
|
-
console.log(` Duration : ${duration ? fmtDuration(parseFloat(duration)) : 'unknown'}${effectiveDuration ? ` (${fmtDuration(effectiveDuration)} at ${
|
|
167
|
-
console.log(` CRF ${crf} | ${audioBr} audio | ${
|
|
169
|
+
console.log(` Duration : ${duration ? fmtDuration(parseFloat(duration)) : 'unknown'}${effectiveDuration ? ` (${fmtDuration(effectiveDuration)} at ${speed}x)` : ''}`);
|
|
170
|
+
console.log(` CRF ${crf} | ${audioBr} audio | ${speed}x speed`);
|
|
168
171
|
|
|
169
172
|
// Decide: single output vs segmented
|
|
170
|
-
const needsSegmentation = effectiveDuration === null || effectiveDuration >
|
|
173
|
+
const needsSegmentation = effectiveDuration === null || effectiveDuration > segTime;
|
|
171
174
|
|
|
172
175
|
if (needsSegmentation) {
|
|
173
|
-
console.log(` Compressing (segmented, ${
|
|
176
|
+
console.log(` Compressing (segmented, ${segTime}s chunks)...`);
|
|
174
177
|
const args = [
|
|
175
178
|
'-y', '-err_detect', 'ignore_err', '-fflags', '+genpts+discardcorrupt',
|
|
176
179
|
'-i', inputFile,
|
|
177
180
|
...encodingArgs,
|
|
178
|
-
'-f', 'segment', '-segment_time', String(
|
|
181
|
+
'-f', 'segment', '-segment_time', String(segTime), '-reset_timestamps', '1',
|
|
179
182
|
'-map', '0:v:0', '-map', '0:a:0',
|
|
180
183
|
path.join(outputDir, 'segment_%02d.mp4'),
|
|
181
184
|
];
|
|
@@ -248,7 +251,7 @@ function compressAndSegment(inputFile, outputDir) {
|
|
|
248
251
|
const rsArgs = [
|
|
249
252
|
'-y', '-i', fallbackPath,
|
|
250
253
|
'-c', 'copy',
|
|
251
|
-
'-f', 'segment', '-segment_time', String(
|
|
254
|
+
'-f', 'segment', '-segment_time', String(segTime), '-reset_timestamps', '1',
|
|
252
255
|
'-movflags', '+faststart',
|
|
253
256
|
path.join(reSegDir, 'segment_%02d.mp4'),
|
|
254
257
|
];
|
|
@@ -302,34 +305,34 @@ function compressAndSegment(inputFile, outputDir) {
|
|
|
302
305
|
*
|
|
303
306
|
* Returns sorted array of segment file paths.
|
|
304
307
|
*/
|
|
305
|
-
function compressAndSegmentAudio(inputFile, outputDir) {
|
|
308
|
+
function compressAndSegmentAudio(inputFile, outputDir, { segTime = SEG_TIME, speed = SPEED } = {}) {
|
|
306
309
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
307
310
|
|
|
308
311
|
const duration = probeFormat(inputFile, 'duration');
|
|
309
312
|
const durationSec = duration ? parseFloat(duration) : null;
|
|
310
|
-
const effectiveDuration = durationSec ? durationSec /
|
|
313
|
+
const effectiveDuration = durationSec ? durationSec / speed : null;
|
|
311
314
|
const channels = parseInt(probe(inputFile, 'a:0', 'channels') || '1', 10);
|
|
312
315
|
const sampleRate = probe(inputFile, 'a:0', 'sample_rate') || '16000';
|
|
313
316
|
const audioBr = channels >= 2 ? '128k' : '64k';
|
|
314
317
|
|
|
315
|
-
console.log(` Duration : ${duration ? fmtDuration(parseFloat(duration)) : 'unknown'}${effectiveDuration ? ` (${fmtDuration(effectiveDuration)} at ${
|
|
316
|
-
console.log(` Audio-only mode | ${
|
|
318
|
+
console.log(` Duration : ${duration ? fmtDuration(parseFloat(duration)) : 'unknown'}${effectiveDuration ? ` (${fmtDuration(effectiveDuration)} at ${speed}x)` : ''}`);
|
|
319
|
+
console.log(` Audio-only mode | ${speed}x speed | ${audioBr} bitrate`);
|
|
317
320
|
|
|
318
321
|
const encodingArgs = [
|
|
319
|
-
'-af', `atempo=${
|
|
322
|
+
'-af', `atempo=${speed}`,
|
|
320
323
|
'-c:a', 'aac', '-b:a', audioBr, '-ar', sampleRate, '-ac', String(channels),
|
|
321
324
|
'-vn', // no video
|
|
322
325
|
'-movflags', '+faststart',
|
|
323
326
|
];
|
|
324
327
|
|
|
325
|
-
const needsSegmentation = effectiveDuration === null || effectiveDuration >
|
|
328
|
+
const needsSegmentation = effectiveDuration === null || effectiveDuration > segTime;
|
|
326
329
|
|
|
327
330
|
if (needsSegmentation) {
|
|
328
|
-
console.log(` Compressing (segmented, ${
|
|
331
|
+
console.log(` Compressing (segmented, ${segTime}s chunks)...`);
|
|
329
332
|
const args = [
|
|
330
333
|
'-y', '-i', inputFile,
|
|
331
334
|
...encodingArgs,
|
|
332
|
-
'-f', 'segment', '-segment_time', String(
|
|
335
|
+
'-f', 'segment', '-segment_time', String(segTime), '-reset_timestamps', '1',
|
|
333
336
|
path.join(outputDir, 'segment_%02d.m4a'),
|
|
334
337
|
];
|
|
335
338
|
const result = spawnSync(getFFmpeg(), args, { stdio: 'inherit' });
|
|
@@ -383,7 +386,7 @@ function compressAndSegmentAudio(inputFile, outputDir) {
|
|
|
383
386
|
const rsArgs = [
|
|
384
387
|
'-y', '-i', fallbackPath,
|
|
385
388
|
'-c', 'copy', '-vn',
|
|
386
|
-
'-f', 'segment', '-segment_time', String(
|
|
389
|
+
'-f', 'segment', '-segment_time', String(segTime), '-reset_timestamps', '1',
|
|
387
390
|
path.join(reSegDir, 'segment_%02d.m4a'),
|
|
388
391
|
];
|
|
389
392
|
spawnSync(getFFmpeg(), rsArgs, { stdio: 'inherit' });
|
|
@@ -408,12 +411,100 @@ function compressAndSegmentAudio(inputFile, outputDir) {
|
|
|
408
411
|
return segments;
|
|
409
412
|
}
|
|
410
413
|
|
|
414
|
+
/**
|
|
415
|
+
* Split a media file into segments WITHOUT re-encoding (stream copy).
|
|
416
|
+
* No compression, no speed-up — just fast keyframe-aligned splitting.
|
|
417
|
+
* For use with --no-compress: passes raw video to Gemini via File API.
|
|
418
|
+
*
|
|
419
|
+
* @param {string} inputFile - Path to input media file
|
|
420
|
+
* @param {string} outputDir - Directory for output segments
|
|
421
|
+
* @param {{ segTime?: number }} opts - Options (segTime defaults to 1200s for raw mode)
|
|
422
|
+
* @returns {string[]} Sorted array of segment file paths
|
|
423
|
+
*/
|
|
424
|
+
function splitOnly(inputFile, outputDir, { segTime = 1200 } = {}) {
|
|
425
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
426
|
+
|
|
427
|
+
const duration = probeFormat(inputFile, 'duration');
|
|
428
|
+
const durationSec = duration ? parseFloat(duration) : null;
|
|
429
|
+
const ext = path.extname(inputFile).toLowerCase();
|
|
430
|
+
const isAudio = ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac', '.wma'].includes(ext);
|
|
431
|
+
const outExt = isAudio ? '.m4a' : '.mp4';
|
|
432
|
+
const width = isAudio ? 0 : parseInt(probe(inputFile, 'v:0', 'width') || '0');
|
|
433
|
+
|
|
434
|
+
console.log(` Mode : ${c.cyan('raw split')} (no re-encoding, no speed-up)`);
|
|
435
|
+
if (!isAudio) console.log(` Resolution : ${width > 0 ? width + 'p' : 'unknown'}`);
|
|
436
|
+
console.log(` Duration : ${duration ? fmtDuration(durationSec) : 'unknown'}`);
|
|
437
|
+
console.log(` File size: ${fmtBytes(fs.statSync(inputFile).size)}`);
|
|
438
|
+
|
|
439
|
+
const needsSegmentation = durationSec === null || durationSec > segTime;
|
|
440
|
+
|
|
441
|
+
if (needsSegmentation) {
|
|
442
|
+
console.log(` Splitting at keyframes (~${segTime}s chunks)...`);
|
|
443
|
+
const args = [
|
|
444
|
+
'-y', '-err_detect', 'ignore_err', '-fflags', '+genpts+discardcorrupt',
|
|
445
|
+
'-i', inputFile,
|
|
446
|
+
'-c', 'copy',
|
|
447
|
+
'-f', 'segment', '-segment_time', String(segTime), '-reset_timestamps', '1',
|
|
448
|
+
...(isAudio ? ['-vn'] : ['-map', '0:v:0', '-map', '0:a:0']),
|
|
449
|
+
'-movflags', '+faststart',
|
|
450
|
+
path.join(outputDir, `segment_%02d${outExt}`),
|
|
451
|
+
];
|
|
452
|
+
const result = spawnSync(getFFmpeg(), args, { stdio: 'inherit' });
|
|
453
|
+
if (result.status !== 0) {
|
|
454
|
+
console.warn(` ${c.warn(`ffmpeg exited with code ${result.status} (output may still be usable)`)}`);
|
|
455
|
+
}
|
|
456
|
+
} else {
|
|
457
|
+
console.log(` Single segment (duration ${fmtDuration(durationSec)} ≤ ${segTime}s) — copying...`);
|
|
458
|
+
const outPath = path.join(outputDir, `segment_00${outExt}`);
|
|
459
|
+
const args = [
|
|
460
|
+
'-y', '-err_detect', 'ignore_err', '-fflags', '+genpts+discardcorrupt',
|
|
461
|
+
'-i', inputFile,
|
|
462
|
+
'-c', 'copy',
|
|
463
|
+
...(isAudio ? ['-vn'] : ['-map', '0:v:0', '-map', '0:a:0']),
|
|
464
|
+
'-movflags', '+faststart',
|
|
465
|
+
outPath,
|
|
466
|
+
];
|
|
467
|
+
const result = spawnSync(getFFmpeg(), args, { stdio: 'inherit' });
|
|
468
|
+
if (result.status !== 0) {
|
|
469
|
+
console.warn(` ${c.warn(`ffmpeg exited with code ${result.status}`)}`);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Collect segments
|
|
474
|
+
const segments = fs.readdirSync(outputDir)
|
|
475
|
+
.filter(f => f.startsWith('segment_') && (f.endsWith('.mp4') || f.endsWith('.m4a')))
|
|
476
|
+
.sort()
|
|
477
|
+
.map(f => path.join(outputDir, f));
|
|
478
|
+
|
|
479
|
+
// Validate
|
|
480
|
+
const corrupt = segments.filter(s => !verifySegment(s));
|
|
481
|
+
if (corrupt.length > 0) {
|
|
482
|
+
console.warn(` ${c.warn(`${corrupt.length} segment(s) may be corrupt (no moov atom):`)}`);
|
|
483
|
+
corrupt.forEach(s => console.warn(` ${c.error(path.basename(s))}`));
|
|
484
|
+
console.warn(` ${c.dim('Stream-copy splits at keyframes — some containers may need re-mux.')}`);
|
|
485
|
+
console.warn(` ${c.dim('Remove --no-compress to re-encode instead.')}`);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Duration validation: warn if any segment exceeds 1 hour (Gemini sweet spot)
|
|
489
|
+
for (const seg of segments) {
|
|
490
|
+
const dur = probeFormat(seg, 'duration');
|
|
491
|
+
if (dur && parseFloat(dur) > 3600) {
|
|
492
|
+
console.warn(` ${c.warn(`${path.basename(seg)} is ${fmtDuration(parseFloat(dur))} — very long segments use more Gemini tokens.`)}`);
|
|
493
|
+
console.warn(` ${c.dim(' Consider removing --no-compress to re-encode into shorter segments.')}`);
|
|
494
|
+
break; // warn once
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return segments;
|
|
499
|
+
}
|
|
500
|
+
|
|
411
501
|
module.exports = {
|
|
412
502
|
findBin,
|
|
413
503
|
probe,
|
|
414
504
|
probeFormat,
|
|
415
505
|
compressAndSegment,
|
|
416
506
|
compressAndSegmentAudio,
|
|
507
|
+
splitOnly,
|
|
417
508
|
verifySegment,
|
|
418
509
|
getFFmpeg,
|
|
419
510
|
getFFprobe,
|
package/src/utils/cli.js
CHANGED
|
@@ -33,7 +33,7 @@ function parseArgs(argv) {
|
|
|
33
33
|
const BOOLEAN_FLAGS = new Set([
|
|
34
34
|
'help', 'h', 'version', 'v',
|
|
35
35
|
'skip-upload', 'force-upload', 'no-storage-url',
|
|
36
|
-
'skip-compression', 'skip-gemini',
|
|
36
|
+
'skip-compression', 'skip-gemini', 'no-compress',
|
|
37
37
|
'resume', 'reanalyze', 'dry-run',
|
|
38
38
|
'dynamic', 'deep-dive', 'deep-summary', 'update-progress',
|
|
39
39
|
'no-focused-pass', 'no-learning', 'no-diff',
|
|
@@ -381,12 +381,22 @@ ${f('--format <type>', 'Output: md, html, json, pdf, docx, all — comma-separat
|
|
|
381
381
|
${f('--min-confidence <level>', 'Filter: high, medium, low (default: all)')}
|
|
382
382
|
${f('--output <dir>', 'Custom output directory for results')}
|
|
383
383
|
${f('--skip-upload', 'Skip Firebase Storage uploads')}
|
|
384
|
-
${f('--skip-compression', 'Use existing segments (
|
|
384
|
+
${f('--skip-compression', 'Use existing segments from previous run (deprecated: auto-detected)')}
|
|
385
385
|
${f('--skip-gemini', 'Skip AI analysis')}
|
|
386
386
|
${f('--resume', 'Resume from last checkpoint')}
|
|
387
387
|
${f('--reanalyze', 'Force re-analysis of all segments')}
|
|
388
388
|
${f('--dry-run', 'Preview without executing')}
|
|
389
389
|
|
|
390
|
+
${h('VIDEO PROCESSING')}
|
|
391
|
+
${f('--no-compress', 'Skip re-encoding — pass raw video to Gemini (fast, no quality loss)')}
|
|
392
|
+
${f2('Auto-splits at 20 min (1200s) if needed. --speed and --segment-time are ignored.')}
|
|
393
|
+
${f2('Gemini File API: up to 2 GB/file, ~300 tok/sec at default resolution.')}
|
|
394
|
+
${f('--speed <n>', 'Playback speed multiplier for compression mode (default: 1.6)')}
|
|
395
|
+
${f('--segment-time <n>', 'Segment duration in seconds for compression mode (default: 280)')}
|
|
396
|
+
${f2('Duration constraints (per Google Gemini docs):')}
|
|
397
|
+
${f2(' • Default res: ~300 tok/sec → max ~55 min/segment (safe: ≤20 min)')}
|
|
398
|
+
${f2(' • File API limit: 2 GB (free) / 20 GB (paid) per file')}
|
|
399
|
+
|
|
390
400
|
${h('TUNING')}
|
|
391
401
|
${f('--parallel <n>', 'Max parallel uploads (default: 3)')}
|
|
392
402
|
${f('--parallel-analysis <n>', 'Concurrent analysis batches (default: 2)')}
|
|
@@ -29,6 +29,14 @@ function estimateDocTokens(doc) {
|
|
|
29
29
|
return 500;
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
/**
|
|
33
|
+
* Hard character limit for VTT fallback.
|
|
34
|
+
* When VTT parsing fails (0 cues), the full VTT is returned.
|
|
35
|
+
* Cap it so a huge transcript can't blow the context window.
|
|
36
|
+
* 500K chars ≈ 150K tokens — leaves plenty of room for docs + prompt.
|
|
37
|
+
*/
|
|
38
|
+
const VTT_FALLBACK_MAX_CHARS = 500000;
|
|
39
|
+
|
|
32
40
|
// ════════════════════════════════════════════════════════════
|
|
33
41
|
// Priority Classification
|
|
34
42
|
// ════════════════════════════════════════════════════════════
|
|
@@ -100,12 +108,16 @@ function selectDocsByBudget(allDocs, tokenBudget, opts = {}) {
|
|
|
100
108
|
const excluded = [];
|
|
101
109
|
let usedTokens = 0;
|
|
102
110
|
|
|
111
|
+
// Hard cap: even P0/P1 docs may not exceed 2× the budget.
|
|
112
|
+
// This prevents a handful of huge critical docs from blowing the context window.
|
|
113
|
+
const hardCap = tokenBudget * 2;
|
|
114
|
+
|
|
103
115
|
for (const item of classified) {
|
|
104
116
|
if (usedTokens + item.tokens <= tokenBudget) {
|
|
105
117
|
selected.push(item.doc);
|
|
106
118
|
usedTokens += item.tokens;
|
|
107
|
-
} else if (item.priority <= PRIORITY.HIGH) {
|
|
108
|
-
// P0 and P1 are always included even if over budget
|
|
119
|
+
} else if (item.priority <= PRIORITY.HIGH && usedTokens + item.tokens <= hardCap) {
|
|
120
|
+
// P0 and P1 are always included even if over budget, up to the hard cap
|
|
109
121
|
selected.push(item.doc);
|
|
110
122
|
usedTokens += item.tokens;
|
|
111
123
|
} else {
|
|
@@ -171,14 +183,28 @@ function parseVttCues(vttContent) {
|
|
|
171
183
|
*/
|
|
172
184
|
function sliceVttForSegment(vttContent, segStartSec, segEndSec, overlapSec = 30) {
|
|
173
185
|
const cues = parseVttCues(vttContent);
|
|
174
|
-
if (cues.length === 0)
|
|
186
|
+
if (cues.length === 0) {
|
|
187
|
+
// Fallback: return full VTT but cap size to avoid context window overflow
|
|
188
|
+
if (vttContent.length > VTT_FALLBACK_MAX_CHARS) {
|
|
189
|
+
return vttContent.substring(0, VTT_FALLBACK_MAX_CHARS) +
|
|
190
|
+
`\n\n[TRUNCATED — original VTT was ${(vttContent.length / 1024).toFixed(0)} KB; capped at ${(VTT_FALLBACK_MAX_CHARS / 1024).toFixed(0)} KB]`;
|
|
191
|
+
}
|
|
192
|
+
return vttContent;
|
|
193
|
+
}
|
|
175
194
|
|
|
176
195
|
const rangeStart = Math.max(0, segStartSec - overlapSec);
|
|
177
196
|
const rangeEnd = segEndSec + overlapSec;
|
|
178
197
|
|
|
179
198
|
const filtered = cues.filter(c => c.endSec >= rangeStart && c.startSec <= rangeEnd);
|
|
180
199
|
|
|
181
|
-
if (filtered.length === 0)
|
|
200
|
+
if (filtered.length === 0) {
|
|
201
|
+
// Fallback with cap
|
|
202
|
+
if (vttContent.length > VTT_FALLBACK_MAX_CHARS) {
|
|
203
|
+
return vttContent.substring(0, VTT_FALLBACK_MAX_CHARS) +
|
|
204
|
+
`\n\n[TRUNCATED — original VTT was ${(vttContent.length / 1024).toFixed(0)} KB; capped at ${(VTT_FALLBACK_MAX_CHARS / 1024).toFixed(0)} KB]`;
|
|
205
|
+
}
|
|
206
|
+
return vttContent;
|
|
207
|
+
}
|
|
182
208
|
|
|
183
209
|
const header = `WEBVTT\n\n[Segment transcript: ${formatHMS(segStartSec)} — ${formatHMS(segEndSec)}]\n[Showing cues from ${formatHMS(rangeStart)} to ${formatHMS(rangeEnd)} with ${overlapSec}s overlap]\n`;
|
|
184
210
|
|
|
@@ -492,4 +518,5 @@ module.exports = {
|
|
|
492
518
|
buildProgressiveContext,
|
|
493
519
|
buildSegmentFocus,
|
|
494
520
|
detectBoundaryContext,
|
|
521
|
+
VTT_FALLBACK_MAX_CHARS,
|
|
495
522
|
};
|