wogiflow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.workflow/agents/reviewer.md +81 -0
- package/.workflow/agents/security.md +94 -0
- package/.workflow/agents/story-writer.md +58 -0
- package/.workflow/bridges/base-bridge.js +395 -0
- package/.workflow/bridges/claude-bridge.js +434 -0
- package/.workflow/bridges/index.js +130 -0
- package/.workflow/lib/assumption-detector.js +481 -0
- package/.workflow/lib/config-substitution.js +371 -0
- package/.workflow/lib/failure-categories.js +478 -0
- package/.workflow/state/app-map.md.template +15 -0
- package/.workflow/state/architecture.md.template +24 -0
- package/.workflow/state/component-index.json.template +5 -0
- package/.workflow/state/decisions.md.template +15 -0
- package/.workflow/state/feedback-patterns.md.template +9 -0
- package/.workflow/state/knowledge-sync.json.template +6 -0
- package/.workflow/state/progress.md.template +14 -0
- package/.workflow/state/ready.json.template +7 -0
- package/.workflow/state/request-log.md.template +14 -0
- package/.workflow/state/session-state.json.template +11 -0
- package/.workflow/state/stack.md.template +33 -0
- package/.workflow/state/testing.md.template +36 -0
- package/.workflow/templates/claude-md.hbs +257 -0
- package/.workflow/templates/correction-report.md +67 -0
- package/.workflow/templates/gemini-md.hbs +52 -0
- package/README.md +1802 -0
- package/bin/flow +205 -0
- package/lib/index.js +33 -0
- package/lib/installer.js +467 -0
- package/lib/release-channel.js +269 -0
- package/lib/skill-registry.js +526 -0
- package/lib/upgrader.js +401 -0
- package/lib/utils.js +305 -0
- package/package.json +64 -0
- package/scripts/flow +985 -0
- package/scripts/flow-adaptive-learning.js +1259 -0
- package/scripts/flow-aggregate.js +488 -0
- package/scripts/flow-archive +133 -0
- package/scripts/flow-auto-context.js +1015 -0
- package/scripts/flow-auto-learn.js +615 -0
- package/scripts/flow-bridge.js +223 -0
- package/scripts/flow-browser-suggest.js +316 -0
- package/scripts/flow-bug.js +247 -0
- package/scripts/flow-cascade.js +711 -0
- package/scripts/flow-changelog +85 -0
- package/scripts/flow-checkpoint.js +483 -0
- package/scripts/flow-cli.js +403 -0
- package/scripts/flow-code-intelligence.js +760 -0
- package/scripts/flow-complexity.js +502 -0
- package/scripts/flow-config-set.js +152 -0
- package/scripts/flow-constants.js +157 -0
- package/scripts/flow-context +152 -0
- package/scripts/flow-context-init.js +482 -0
- package/scripts/flow-context-monitor.js +384 -0
- package/scripts/flow-context-scoring.js +886 -0
- package/scripts/flow-correct.js +458 -0
- package/scripts/flow-damage-control.js +985 -0
- package/scripts/flow-deps +101 -0
- package/scripts/flow-diff.js +700 -0
- package/scripts/flow-done +151 -0
- package/scripts/flow-done.js +489 -0
- package/scripts/flow-durable-session.js +1541 -0
- package/scripts/flow-entropy-monitor.js +345 -0
- package/scripts/flow-export-profile +349 -0
- package/scripts/flow-export-scanner.js +1046 -0
- package/scripts/flow-figma-confirm.js +400 -0
- package/scripts/flow-figma-extract.js +496 -0
- package/scripts/flow-figma-generate.js +683 -0
- package/scripts/flow-figma-index.js +909 -0
- package/scripts/flow-figma-match.js +617 -0
- package/scripts/flow-figma-mcp-server.js +518 -0
- package/scripts/flow-figma-pipeline.js +414 -0
- package/scripts/flow-file-ops.js +301 -0
- package/scripts/flow-gate-confidence.js +825 -0
- package/scripts/flow-guided-edit.js +659 -0
- package/scripts/flow-health +185 -0
- package/scripts/flow-health.js +413 -0
- package/scripts/flow-hooks.js +556 -0
- package/scripts/flow-http-client.js +249 -0
- package/scripts/flow-hybrid-detect.js +167 -0
- package/scripts/flow-hybrid-interactive.js +591 -0
- package/scripts/flow-hybrid-test.js +152 -0
- package/scripts/flow-import-profile +439 -0
- package/scripts/flow-init +253 -0
- package/scripts/flow-instruction-richness.js +827 -0
- package/scripts/flow-jira-integration.js +579 -0
- package/scripts/flow-knowledge-router.js +522 -0
- package/scripts/flow-knowledge-sync.js +589 -0
- package/scripts/flow-linear-integration.js +631 -0
- package/scripts/flow-links.js +774 -0
- package/scripts/flow-log-manager.js +559 -0
- package/scripts/flow-loop-enforcer.js +1246 -0
- package/scripts/flow-loop-retry-learning.js +630 -0
- package/scripts/flow-lsp.js +923 -0
- package/scripts/flow-map-index +348 -0
- package/scripts/flow-map-sync +201 -0
- package/scripts/flow-memory-blocks.js +668 -0
- package/scripts/flow-memory-compactor.js +350 -0
- package/scripts/flow-memory-db.js +1110 -0
- package/scripts/flow-memory-sync.js +484 -0
- package/scripts/flow-metrics.js +353 -0
- package/scripts/flow-migrate-ids.js +370 -0
- package/scripts/flow-model-adapter.js +802 -0
- package/scripts/flow-model-router.js +884 -0
- package/scripts/flow-models.js +1231 -0
- package/scripts/flow-morning.js +517 -0
- package/scripts/flow-multi-approach.js +660 -0
- package/scripts/flow-new-feature +86 -0
- package/scripts/flow-onboard +1042 -0
- package/scripts/flow-orchestrate-llm.js +459 -0
- package/scripts/flow-orchestrate.js +3592 -0
- package/scripts/flow-output.js +123 -0
- package/scripts/flow-parallel-detector.js +399 -0
- package/scripts/flow-parallel-dispatch.js +987 -0
- package/scripts/flow-parallel.js +428 -0
- package/scripts/flow-pattern-enforcer.js +600 -0
- package/scripts/flow-prd-manager.js +282 -0
- package/scripts/flow-progress.js +323 -0
- package/scripts/flow-project-analyzer.js +975 -0
- package/scripts/flow-prompt-composer.js +487 -0
- package/scripts/flow-providers.js +1381 -0
- package/scripts/flow-queue.js +308 -0
- package/scripts/flow-ready +82 -0
- package/scripts/flow-ready.js +189 -0
- package/scripts/flow-regression.js +396 -0
- package/scripts/flow-response-parser.js +450 -0
- package/scripts/flow-resume.js +284 -0
- package/scripts/flow-rules-sync.js +439 -0
- package/scripts/flow-run-trace.js +718 -0
- package/scripts/flow-safety.js +587 -0
- package/scripts/flow-search +104 -0
- package/scripts/flow-security.js +481 -0
- package/scripts/flow-session-end +106 -0
- package/scripts/flow-session-end.js +437 -0
- package/scripts/flow-session-state.js +671 -0
- package/scripts/flow-setup-hooks +216 -0
- package/scripts/flow-setup-hooks.js +377 -0
- package/scripts/flow-skill-create.js +329 -0
- package/scripts/flow-skill-creator.js +572 -0
- package/scripts/flow-skill-generator.js +1046 -0
- package/scripts/flow-skill-learn.js +880 -0
- package/scripts/flow-skill-matcher.js +578 -0
- package/scripts/flow-spec-generator.js +820 -0
- package/scripts/flow-stack-wizard.js +895 -0
- package/scripts/flow-standup +162 -0
- package/scripts/flow-start +74 -0
- package/scripts/flow-start.js +235 -0
- package/scripts/flow-status +110 -0
- package/scripts/flow-status.js +301 -0
- package/scripts/flow-step-browser.js +83 -0
- package/scripts/flow-step-changelog.js +217 -0
- package/scripts/flow-step-comments.js +306 -0
- package/scripts/flow-step-complexity.js +234 -0
- package/scripts/flow-step-coverage.js +218 -0
- package/scripts/flow-step-knowledge.js +193 -0
- package/scripts/flow-step-pr-tests.js +364 -0
- package/scripts/flow-step-regression.js +89 -0
- package/scripts/flow-step-review.js +516 -0
- package/scripts/flow-step-security.js +162 -0
- package/scripts/flow-step-silent-failures.js +290 -0
- package/scripts/flow-step-simplifier.js +346 -0
- package/scripts/flow-story +105 -0
- package/scripts/flow-story.js +500 -0
- package/scripts/flow-suspend.js +252 -0
- package/scripts/flow-sync-daemon.js +654 -0
- package/scripts/flow-task-analyzer.js +606 -0
- package/scripts/flow-team-dashboard.js +748 -0
- package/scripts/flow-team-sync.js +752 -0
- package/scripts/flow-team.js +977 -0
- package/scripts/flow-tech-options.js +528 -0
- package/scripts/flow-templates.js +812 -0
- package/scripts/flow-tiered-learning.js +728 -0
- package/scripts/flow-trace +204 -0
- package/scripts/flow-transcript-chunking.js +1106 -0
- package/scripts/flow-transcript-digest.js +7918 -0
- package/scripts/flow-transcript-language.js +465 -0
- package/scripts/flow-transcript-parsing.js +1085 -0
- package/scripts/flow-transcript-stories.js +2194 -0
- package/scripts/flow-update-map +224 -0
- package/scripts/flow-utils.js +2242 -0
- package/scripts/flow-verification.js +644 -0
- package/scripts/flow-verify.js +1177 -0
- package/scripts/flow-voice-input.js +638 -0
- package/scripts/flow-watch +168 -0
- package/scripts/flow-workflow-steps.js +521 -0
- package/scripts/flow-workflow.js +1029 -0
- package/scripts/flow-worktree.js +489 -0
- package/scripts/hooks/adapters/base-adapter.js +102 -0
- package/scripts/hooks/adapters/claude-code.js +359 -0
- package/scripts/hooks/adapters/index.js +79 -0
- package/scripts/hooks/core/component-check.js +341 -0
- package/scripts/hooks/core/index.js +35 -0
- package/scripts/hooks/core/loop-check.js +241 -0
- package/scripts/hooks/core/session-context.js +294 -0
- package/scripts/hooks/core/task-gate.js +177 -0
- package/scripts/hooks/core/validation.js +230 -0
- package/scripts/hooks/entry/claude-code/post-tool-use.js +65 -0
- package/scripts/hooks/entry/claude-code/pre-tool-use.js +89 -0
- package/scripts/hooks/entry/claude-code/session-end.js +87 -0
- package/scripts/hooks/entry/claude-code/session-start.js +46 -0
- package/scripts/hooks/entry/claude-code/stop.js +43 -0
- package/scripts/postinstall.js +139 -0
- package/templates/browser-test-flow.json +56 -0
- package/templates/bug-report.md +43 -0
- package/templates/component-detail.md +42 -0
- package/templates/component.stories.tsx +49 -0
- package/templates/context/constraints.md +83 -0
- package/templates/context/conventions.md +177 -0
- package/templates/context/stack.md +60 -0
- package/templates/correction-report.md +90 -0
- package/templates/feature-proposal.md +35 -0
- package/templates/hybrid/_base.md +254 -0
- package/templates/hybrid/_patterns.md +45 -0
- package/templates/hybrid/create-component.md +127 -0
- package/templates/hybrid/create-file.md +56 -0
- package/templates/hybrid/create-hook.md +145 -0
- package/templates/hybrid/create-service.md +70 -0
- package/templates/hybrid/fix-bug.md +33 -0
- package/templates/hybrid/modify-file.md +55 -0
- package/templates/story.md +68 -0
- package/templates/task.json +56 -0
- package/templates/trace.md +69 -0
|
@@ -0,0 +1,1106 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow - Transcript Chunking Module
|
|
5
|
+
*
|
|
6
|
+
* Extracted from flow-transcript-digest.js for maintainability.
|
|
7
|
+
* Handles durable session persistence (E5-S3) and large transcript chunking (E5-S4).
|
|
8
|
+
*
|
|
9
|
+
* Dependencies: Requires core functions from flow-transcript-digest.js
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
// Core functions are injected via init() to avoid circular dependencies
|
|
16
|
+
let digestCore = null;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Initialize with core digest functions
|
|
20
|
+
* @param {object} core - Core functions from flow-transcript-digest.js
|
|
21
|
+
*/
|
|
22
|
+
function init(core) {
|
|
23
|
+
digestCore = core;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Helper to ensure init was called
|
|
27
|
+
function requireInit() {
|
|
28
|
+
if (!digestCore) {
|
|
29
|
+
throw new Error('flow-transcript-chunking not initialized. Call init() first.');
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Proxy functions to core module
|
|
34
|
+
function loadActiveDigest() { requireInit(); return digestCore.loadActiveDigest(); }
|
|
35
|
+
function saveActiveDigest(d) { requireInit(); return digestCore.saveActiveDigest(d); }
|
|
36
|
+
function countWords(t) { requireInit(); return digestCore.countWords(t); }
|
|
37
|
+
function now() { requireInit(); return digestCore.now(); }
|
|
38
|
+
|
|
39
|
+
// Paths
|
|
40
|
+
const STATE_DIR = path.join(process.cwd(), '.workflow', 'state', 'digests');
|
|
41
|
+
|
|
42
|
+
// ==========================================================================
|
|
43
|
+
// E5-S3: Durable Digest Session Persistence
|
|
44
|
+
// ==========================================================================
|
|
45
|
+
|
|
46
|
+
const DURABLE_DIGEST_PATH = path.join(process.cwd(), '.workflow', 'state', 'durable-digest.json');
|
|
47
|
+
const DURABLE_DIGEST_VERSION = '1.0';
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Load durable digest sessions (E5-S3)
|
|
51
|
+
*/
|
|
52
|
+
function loadDurableSessions() {
|
|
53
|
+
if (!fs.existsSync(DURABLE_DIGEST_PATH)) {
|
|
54
|
+
return {
|
|
55
|
+
version: DURABLE_DIGEST_VERSION,
|
|
56
|
+
sessions: [],
|
|
57
|
+
active_session_id: null
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
return JSON.parse(fs.readFileSync(DURABLE_DIGEST_PATH, 'utf8'));
|
|
63
|
+
} catch (err) {
|
|
64
|
+
return {
|
|
65
|
+
version: DURABLE_DIGEST_VERSION,
|
|
66
|
+
sessions: [],
|
|
67
|
+
active_session_id: null
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Save durable digest sessions (E5-S3)
|
|
74
|
+
*/
|
|
75
|
+
function saveDurableSessions(data) {
|
|
76
|
+
const dir = path.dirname(DURABLE_DIGEST_PATH);
|
|
77
|
+
if (!fs.existsSync(dir)) {
|
|
78
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
79
|
+
}
|
|
80
|
+
fs.writeFileSync(DURABLE_DIGEST_PATH, JSON.stringify(data, null, 2));
|
|
81
|
+
return data;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Create or update a durable session entry (E5-S3)
|
|
86
|
+
*/
|
|
87
|
+
function upsertDurableSession(sessionData) {
|
|
88
|
+
const durable = loadDurableSessions();
|
|
89
|
+
|
|
90
|
+
const existingIndex = durable.sessions.findIndex(s => s.id === sessionData.id);
|
|
91
|
+
|
|
92
|
+
if (existingIndex >= 0) {
|
|
93
|
+
// Update existing session
|
|
94
|
+
durable.sessions[existingIndex] = {
|
|
95
|
+
...durable.sessions[existingIndex],
|
|
96
|
+
...sessionData,
|
|
97
|
+
updated_at: now()
|
|
98
|
+
};
|
|
99
|
+
} else {
|
|
100
|
+
// Add new session
|
|
101
|
+
durable.sessions.push({
|
|
102
|
+
...sessionData,
|
|
103
|
+
created_at: now(),
|
|
104
|
+
updated_at: now()
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
saveDurableSessions(durable);
|
|
109
|
+
return sessionData;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get session progress summary (E5-S3)
|
|
114
|
+
*/
|
|
115
|
+
function getSessionProgress(digestPath) {
|
|
116
|
+
const progress = {
|
|
117
|
+
phase: 'unknown',
|
|
118
|
+
passes_completed: [],
|
|
119
|
+
topics_count: 0,
|
|
120
|
+
statements_count: 0,
|
|
121
|
+
questions_total: 0,
|
|
122
|
+
questions_answered: 0,
|
|
123
|
+
stories_generated: 0,
|
|
124
|
+
stories_approved: 0
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
// Check topics
|
|
128
|
+
const topicsPath = path.join(digestPath, 'topics.json');
|
|
129
|
+
if (fs.existsSync(topicsPath)) {
|
|
130
|
+
try {
|
|
131
|
+
const topics = JSON.parse(fs.readFileSync(topicsPath, 'utf8'));
|
|
132
|
+
progress.topics_count = topics.topics?.length || 0;
|
|
133
|
+
progress.passes_completed.push('topics');
|
|
134
|
+
progress.phase = 'topics';
|
|
135
|
+
} catch (err) {}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Check statements
|
|
139
|
+
const stmtPath = path.join(digestPath, 'statement-map.json');
|
|
140
|
+
if (fs.existsSync(stmtPath)) {
|
|
141
|
+
try {
|
|
142
|
+
const stmtMap = JSON.parse(fs.readFileSync(stmtPath, 'utf8'));
|
|
143
|
+
progress.statements_count = stmtMap.statements?.length || 0;
|
|
144
|
+
progress.passes_completed.push('statements');
|
|
145
|
+
progress.phase = 'statements';
|
|
146
|
+
} catch (err) {}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Check orphans pass
|
|
150
|
+
const orphansPath = path.join(digestPath, 'orphans.json');
|
|
151
|
+
if (fs.existsSync(orphansPath)) {
|
|
152
|
+
progress.passes_completed.push('orphans');
|
|
153
|
+
progress.phase = 'orphans';
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Check clarifications
|
|
157
|
+
const clarPath = path.join(digestPath, 'clarifications.json');
|
|
158
|
+
if (fs.existsSync(clarPath)) {
|
|
159
|
+
try {
|
|
160
|
+
const clar = JSON.parse(fs.readFileSync(clarPath, 'utf8'));
|
|
161
|
+
progress.passes_completed.push('contradictions');
|
|
162
|
+
progress.questions_total = clar.questions?.length || 0;
|
|
163
|
+
progress.questions_answered = clar.questions?.filter(q => q.status === 'answered')?.length || 0;
|
|
164
|
+
progress.phase = 'clarification';
|
|
165
|
+
} catch (err) {}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Check stories
|
|
169
|
+
const storiesPath = path.join(digestPath, 'stories.json');
|
|
170
|
+
if (fs.existsSync(storiesPath)) {
|
|
171
|
+
try {
|
|
172
|
+
const stories = JSON.parse(fs.readFileSync(storiesPath, 'utf8'));
|
|
173
|
+
progress.stories_generated = stories.stories?.length || 0;
|
|
174
|
+
progress.stories_approved = stories.stories?.filter(s => s.approval_status === 'approved')?.length || 0;
|
|
175
|
+
progress.phase = 'stories';
|
|
176
|
+
} catch (err) {}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Check queue for presentation phase
|
|
180
|
+
const queuePath = path.join(digestPath, 'queue.json');
|
|
181
|
+
if (fs.existsSync(queuePath)) {
|
|
182
|
+
progress.phase = 'presentation';
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return progress;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Register a new digest session durably (E5-S3)
|
|
190
|
+
*/
|
|
191
|
+
function registerDurableSession(sessionId, digestPath, transcriptInfo = {}) {
|
|
192
|
+
const session = {
|
|
193
|
+
id: sessionId,
|
|
194
|
+
name: transcriptInfo.name || `Digest ${sessionId.slice(-8)}`,
|
|
195
|
+
status: 'active',
|
|
196
|
+
digest_path: digestPath,
|
|
197
|
+
transcript: {
|
|
198
|
+
source: transcriptInfo.source || 'unknown',
|
|
199
|
+
word_count: transcriptInfo.word_count || 0,
|
|
200
|
+
language: transcriptInfo.language || null,
|
|
201
|
+
format: transcriptInfo.format || null
|
|
202
|
+
},
|
|
203
|
+
progress: getSessionProgress(digestPath),
|
|
204
|
+
checkpoints: [],
|
|
205
|
+
recovery_context: {
|
|
206
|
+
last_action: 'created',
|
|
207
|
+
last_question_id: null,
|
|
208
|
+
pending_questions: []
|
|
209
|
+
}
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
upsertDurableSession(session);
|
|
213
|
+
|
|
214
|
+
// Set as active
|
|
215
|
+
const durable = loadDurableSessions();
|
|
216
|
+
durable.active_session_id = sessionId;
|
|
217
|
+
saveDurableSessions(durable);
|
|
218
|
+
|
|
219
|
+
return session;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Update durable session progress (E5-S3)
|
|
224
|
+
*/
|
|
225
|
+
function updateDurableProgress(sessionId = null) {
|
|
226
|
+
const durable = loadDurableSessions();
|
|
227
|
+
const id = sessionId || durable.active_session_id;
|
|
228
|
+
|
|
229
|
+
if (!id) return null;
|
|
230
|
+
|
|
231
|
+
const session = durable.sessions.find(s => s.id === id);
|
|
232
|
+
if (!session) return null;
|
|
233
|
+
|
|
234
|
+
// Update progress
|
|
235
|
+
session.progress = getSessionProgress(session.digest_path);
|
|
236
|
+
session.updated_at = now();
|
|
237
|
+
|
|
238
|
+
saveDurableSessions(durable);
|
|
239
|
+
return session;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Create a durable checkpoint (E5-S3)
|
|
244
|
+
*/
|
|
245
|
+
function createDurableCheckpoint(phase, reason = 'manual') {
|
|
246
|
+
const durable = loadDurableSessions();
|
|
247
|
+
if (!durable.active_session_id) return null;
|
|
248
|
+
|
|
249
|
+
const session = durable.sessions.find(s => s.id === durable.active_session_id);
|
|
250
|
+
if (!session) return null;
|
|
251
|
+
|
|
252
|
+
const checkpoint = {
|
|
253
|
+
id: `cp-${Date.now().toString(36)}`,
|
|
254
|
+
phase,
|
|
255
|
+
created_at: now(),
|
|
256
|
+
reason
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
session.checkpoints = session.checkpoints || [];
|
|
260
|
+
session.checkpoints.push(checkpoint);
|
|
261
|
+
session.updated_at = now();
|
|
262
|
+
|
|
263
|
+
saveDurableSessions(durable);
|
|
264
|
+
return checkpoint;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* List all durable sessions (E5-S3)
|
|
269
|
+
*/
|
|
270
|
+
function listDurableSessions(options = {}) {
|
|
271
|
+
const durable = loadDurableSessions();
|
|
272
|
+
let sessions = [...durable.sessions];
|
|
273
|
+
|
|
274
|
+
// Filter by status
|
|
275
|
+
if (options.status) {
|
|
276
|
+
sessions = sessions.filter(s => s.status === options.status);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Sort by updated_at descending
|
|
280
|
+
sessions.sort((a, b) => new Date(b.updated_at) - new Date(a.updated_at));
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
sessions,
|
|
284
|
+
active_id: durable.active_session_id,
|
|
285
|
+
total: sessions.length
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Get a specific durable session (E5-S3)
|
|
291
|
+
*/
|
|
292
|
+
function getDurableSession(sessionId) {
|
|
293
|
+
const durable = loadDurableSessions();
|
|
294
|
+
const session = durable.sessions.find(s => s.id === sessionId);
|
|
295
|
+
|
|
296
|
+
if (!session) return null;
|
|
297
|
+
|
|
298
|
+
// Update progress from actual files
|
|
299
|
+
session.progress = getSessionProgress(session.digest_path);
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
...session,
|
|
303
|
+
is_active: durable.active_session_id === sessionId
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Switch to a different durable session (E5-S3)
|
|
309
|
+
*/
|
|
310
|
+
function switchDurableSession(sessionId) {
|
|
311
|
+
const durable = loadDurableSessions();
|
|
312
|
+
const session = durable.sessions.find(s => s.id === sessionId);
|
|
313
|
+
|
|
314
|
+
if (!session) {
|
|
315
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Verify digest path exists
|
|
319
|
+
if (!fs.existsSync(session.digest_path)) {
|
|
320
|
+
throw new Error(`Session data not found at: ${session.digest_path}`);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Update previous active session status
|
|
324
|
+
if (durable.active_session_id && durable.active_session_id !== sessionId) {
|
|
325
|
+
const prevSession = durable.sessions.find(s => s.id === durable.active_session_id);
|
|
326
|
+
if (prevSession && prevSession.status === 'active') {
|
|
327
|
+
prevSession.status = 'in_progress';
|
|
328
|
+
prevSession.updated_at = now();
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Set new active session
|
|
333
|
+
durable.active_session_id = sessionId;
|
|
334
|
+
session.status = 'active';
|
|
335
|
+
session.updated_at = now();
|
|
336
|
+
|
|
337
|
+
saveDurableSessions(durable);
|
|
338
|
+
|
|
339
|
+
// Update active digest pointer
|
|
340
|
+
const activeDigest = loadActiveDigest();
|
|
341
|
+
activeDigest.session.id = sessionId;
|
|
342
|
+
activeDigest.session.digest_path = session.digest_path;
|
|
343
|
+
saveActiveDigest(activeDigest);
|
|
344
|
+
|
|
345
|
+
return session;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Update durable session recovery context (E5-S3)
|
|
350
|
+
*/
|
|
351
|
+
function updateRecoveryContext(contextUpdate) {
|
|
352
|
+
const durable = loadDurableSessions();
|
|
353
|
+
if (!durable.active_session_id) return null;
|
|
354
|
+
|
|
355
|
+
const session = durable.sessions.find(s => s.id === durable.active_session_id);
|
|
356
|
+
if (!session) return null;
|
|
357
|
+
|
|
358
|
+
session.recovery_context = {
|
|
359
|
+
...session.recovery_context,
|
|
360
|
+
...contextUpdate
|
|
361
|
+
};
|
|
362
|
+
session.updated_at = now();
|
|
363
|
+
|
|
364
|
+
saveDurableSessions(durable);
|
|
365
|
+
return session.recovery_context;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Generate recovery summary for a session (E5-S3)
|
|
370
|
+
*/
|
|
371
|
+
function generateRecoverySummaryForSession(sessionId) {
|
|
372
|
+
const session = getDurableSession(sessionId);
|
|
373
|
+
if (!session) {
|
|
374
|
+
return { error: 'Session not found' };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const progress = session.progress;
|
|
378
|
+
const timeSince = getTimeSince(session.updated_at);
|
|
379
|
+
|
|
380
|
+
const summary = {
|
|
381
|
+
session_id: session.id,
|
|
382
|
+
name: session.name,
|
|
383
|
+
status: session.status,
|
|
384
|
+
last_active: timeSince,
|
|
385
|
+
progress: {
|
|
386
|
+
phase: progress.phase,
|
|
387
|
+
topics: progress.topics_count,
|
|
388
|
+
statements: progress.statements_count,
|
|
389
|
+
questions: {
|
|
390
|
+
answered: progress.questions_answered,
|
|
391
|
+
total: progress.questions_total,
|
|
392
|
+
pending: progress.questions_total - progress.questions_answered
|
|
393
|
+
},
|
|
394
|
+
stories: {
|
|
395
|
+
generated: progress.stories_generated,
|
|
396
|
+
approved: progress.stories_approved
|
|
397
|
+
}
|
|
398
|
+
},
|
|
399
|
+
next_action: determineNextAction(session),
|
|
400
|
+
checkpoints_count: session.checkpoints?.length || 0
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
return summary;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Get human-readable time since (E5-S3)
|
|
408
|
+
*/
|
|
409
|
+
function getTimeSince(timestamp) {
|
|
410
|
+
const diff = Date.now() - new Date(timestamp).getTime();
|
|
411
|
+
const minutes = Math.floor(diff / 60000);
|
|
412
|
+
const hours = Math.floor(minutes / 60);
|
|
413
|
+
const days = Math.floor(hours / 24);
|
|
414
|
+
|
|
415
|
+
if (days > 0) return `${days} day${days > 1 ? 's' : ''} ago`;
|
|
416
|
+
if (hours > 0) return `${hours} hour${hours > 1 ? 's' : ''} ago`;
|
|
417
|
+
if (minutes > 0) return `${minutes} minute${minutes > 1 ? 's' : ''} ago`;
|
|
418
|
+
return 'just now';
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Determine next action for a session (E5-S3)
|
|
423
|
+
*/
|
|
424
|
+
function determineNextAction(session) {
|
|
425
|
+
const progress = session.progress;
|
|
426
|
+
|
|
427
|
+
if (!progress.passes_completed.includes('topics')) {
|
|
428
|
+
return { action: 'extract_topics', command: 'topics' };
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if (!progress.passes_completed.includes('statements')) {
|
|
432
|
+
return { action: 'associate_statements', command: 'pass2' };
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
if (!progress.passes_completed.includes('orphans')) {
|
|
436
|
+
return { action: 'check_orphans', command: 'pass3' };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if (!progress.passes_completed.includes('contradictions')) {
|
|
440
|
+
return { action: 'resolve_contradictions', command: 'pass4' };
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
if (progress.questions_total > 0 && progress.questions_answered < progress.questions_total) {
|
|
444
|
+
return {
|
|
445
|
+
action: 'answer_questions',
|
|
446
|
+
command: 'show-questions',
|
|
447
|
+
pending: progress.questions_total - progress.questions_answered
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
if (progress.stories_generated === 0 && progress.topics_count > 0) {
|
|
452
|
+
return { action: 'generate_stories', command: 'generate-stories' };
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (progress.stories_generated > progress.stories_approved) {
|
|
456
|
+
return {
|
|
457
|
+
action: 'review_stories',
|
|
458
|
+
command: 'present',
|
|
459
|
+
pending: progress.stories_generated - progress.stories_approved
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
return { action: 'finalize', command: 'finalize' };
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Archive a durable session (E5-S3)
|
|
468
|
+
*/
|
|
469
|
+
function archiveDurableSession(sessionId) {
|
|
470
|
+
const durable = loadDurableSessions();
|
|
471
|
+
const session = durable.sessions.find(s => s.id === sessionId);
|
|
472
|
+
|
|
473
|
+
if (!session) {
|
|
474
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
session.status = 'archived';
|
|
478
|
+
session.updated_at = now();
|
|
479
|
+
|
|
480
|
+
// If it was the active session, clear active
|
|
481
|
+
if (durable.active_session_id === sessionId) {
|
|
482
|
+
durable.active_session_id = null;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
saveDurableSessions(durable);
|
|
486
|
+
return session;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Delete a durable session (E5-S3)
|
|
491
|
+
*/
|
|
492
|
+
function deleteDurableSession(sessionId, deleteFiles = false) {
|
|
493
|
+
const durable = loadDurableSessions();
|
|
494
|
+
const sessionIndex = durable.sessions.findIndex(s => s.id === sessionId);
|
|
495
|
+
|
|
496
|
+
if (sessionIndex < 0) {
|
|
497
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const session = durable.sessions[sessionIndex];
|
|
501
|
+
|
|
502
|
+
// Optionally delete files
|
|
503
|
+
if (deleteFiles && session.digest_path && fs.existsSync(session.digest_path)) {
|
|
504
|
+
fs.rmSync(session.digest_path, { recursive: true, force: true });
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Remove from list
|
|
508
|
+
durable.sessions.splice(sessionIndex, 1);
|
|
509
|
+
|
|
510
|
+
// Clear active if needed
|
|
511
|
+
if (durable.active_session_id === sessionId) {
|
|
512
|
+
durable.active_session_id = null;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
saveDurableSessions(durable);
|
|
516
|
+
return { deleted: true, id: sessionId };
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Mark session as completed (E5-S3)
|
|
521
|
+
*/
|
|
522
|
+
function completeDurableSession(sessionId = null) {
|
|
523
|
+
const durable = loadDurableSessions();
|
|
524
|
+
const id = sessionId || durable.active_session_id;
|
|
525
|
+
|
|
526
|
+
if (!id) return null;
|
|
527
|
+
|
|
528
|
+
const session = durable.sessions.find(s => s.id === id);
|
|
529
|
+
if (!session) return null;
|
|
530
|
+
|
|
531
|
+
session.status = 'completed';
|
|
532
|
+
session.completed_at = now();
|
|
533
|
+
session.updated_at = now();
|
|
534
|
+
|
|
535
|
+
saveDurableSessions(durable);
|
|
536
|
+
return session;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// ==========================================================================
|
|
540
|
+
// E5-S4: Large Transcript Chunking
|
|
541
|
+
// ==========================================================================
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Chunking configuration defaults (E5-S4)
|
|
545
|
+
*/
|
|
546
|
+
const CHUNKING_DEFAULTS = {
|
|
547
|
+
// Thresholds for triggering chunking
|
|
548
|
+
thresholds: {
|
|
549
|
+
words: 10000,
|
|
550
|
+
tokens: 15000,
|
|
551
|
+
chars: 50000
|
|
552
|
+
},
|
|
553
|
+
// Target chunk sizes
|
|
554
|
+
targetChunkWords: 3000,
|
|
555
|
+
targetChunkTokens: 4500,
|
|
556
|
+
maxChunkWords: 5000,
|
|
557
|
+
maxChunkTokens: 7500,
|
|
558
|
+
// Overlap for context preservation
|
|
559
|
+
overlapWords: 200,
|
|
560
|
+
overlapSentences: 5
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
/**
|
|
564
|
+
* Speaker patterns for boundary detection (E5-S4)
|
|
565
|
+
*/
|
|
566
|
+
const SPEAKER_BOUNDARY_PATTERNS = [
|
|
567
|
+
/^([A-Z][a-zA-Z\s'-]+):\s/m, // "John Smith: "
|
|
568
|
+
/^\[([^\]]+)\]\s/m, // "[Speaker]: "
|
|
569
|
+
/<v\s+([^>]+)>/, // VTT voice tags
|
|
570
|
+
/^From\s+(.+?)\s+to\s+/m, // Zoom chat format
|
|
571
|
+
/^\d{1,2}:\d{2}(:\d{2})?\t+From/m // Zoom timestamp + From
|
|
572
|
+
];
|
|
573
|
+
|
|
574
|
+
/**
|
|
575
|
+
* Check if chunking is needed for a transcript (E5-S4)
|
|
576
|
+
*/
|
|
577
|
+
function needsChunking(text, options = {}) {
|
|
578
|
+
const config = { ...CHUNKING_DEFAULTS, ...options };
|
|
579
|
+
const metrics = measureInputMetrics(text);
|
|
580
|
+
|
|
581
|
+
const exceedsWords = metrics.wordCount > config.thresholds.words;
|
|
582
|
+
const exceedsTokens = metrics.estimatedTokens > config.thresholds.tokens;
|
|
583
|
+
const exceedsChars = metrics.charCount > config.thresholds.chars;
|
|
584
|
+
|
|
585
|
+
return {
|
|
586
|
+
needed: exceedsWords || exceedsTokens || exceedsChars,
|
|
587
|
+
reason: exceedsWords ? 'word_count' :
|
|
588
|
+
exceedsTokens ? 'token_count' :
|
|
589
|
+
exceedsChars ? 'char_count' : null,
|
|
590
|
+
metrics: {
|
|
591
|
+
words: metrics.wordCount,
|
|
592
|
+
tokens: metrics.estimatedTokens,
|
|
593
|
+
chars: metrics.charCount,
|
|
594
|
+
thresholds: config.thresholds
|
|
595
|
+
}
|
|
596
|
+
};
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Split text into sentences (E5-S4)
|
|
601
|
+
*/
|
|
602
|
+
function splitIntoSentences(text) {
|
|
603
|
+
// Split on sentence endings while preserving the delimiter
|
|
604
|
+
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
605
|
+
return sentences.filter(s => s.trim().length > 0);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* Find natural boundary near target position (E5-S4)
|
|
610
|
+
* @param {string} text - Full text to search in
|
|
611
|
+
* @param {number} targetPos - Target position for boundary
|
|
612
|
+
* @param {Object} options - Options including searchRange and minBoundary
|
|
613
|
+
*/
|
|
614
|
+
function findNaturalBoundary(text, targetPos, options = {}) {
|
|
615
|
+
const searchRange = options.searchRange || 500; // Search +/- 500 chars
|
|
616
|
+
const minBoundary = options.minBoundary || 0; // Minimum valid boundary position
|
|
617
|
+
const searchStart = Math.max(minBoundary, targetPos - searchRange);
|
|
618
|
+
const searchEnd = Math.min(text.length, targetPos + searchRange);
|
|
619
|
+
const searchArea = text.substring(searchStart, searchEnd);
|
|
620
|
+
|
|
621
|
+
// Priority 1: Speaker change - find nearest one AFTER minBoundary
|
|
622
|
+
for (const pattern of SPEAKER_BOUNDARY_PATTERNS) {
|
|
623
|
+
// Find all matches and pick the one nearest to target
|
|
624
|
+
let match;
|
|
625
|
+
const flags = pattern.flags || '';
|
|
626
|
+
const regex = new RegExp(pattern.source, flags.includes('g') ? flags : flags + 'g');
|
|
627
|
+
regex.lastIndex = 0; // Reset to start
|
|
628
|
+
let bestMatch = null;
|
|
629
|
+
let bestDist = Infinity;
|
|
630
|
+
let safetyCounter = 0;
|
|
631
|
+
const maxIterations = searchArea.length + 1; // Safety limit
|
|
632
|
+
|
|
633
|
+
while ((match = regex.exec(searchArea)) !== null) {
|
|
634
|
+
// Safety: prevent infinite loop on zero-width matches
|
|
635
|
+
if (safetyCounter++ > maxIterations) break;
|
|
636
|
+
|
|
637
|
+
const boundaryPos = searchStart + match.index;
|
|
638
|
+
if (boundaryPos >= minBoundary) {
|
|
639
|
+
const dist = Math.abs(boundaryPos - targetPos);
|
|
640
|
+
if (dist < bestDist) {
|
|
641
|
+
bestDist = dist;
|
|
642
|
+
bestMatch = boundaryPos;
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Prevent infinite loop on zero-width match
|
|
647
|
+
if (match[0].length === 0) {
|
|
648
|
+
regex.lastIndex++;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
if (bestMatch !== null && bestMatch > minBoundary) {
|
|
653
|
+
return { position: bestMatch, type: 'speaker_change' };
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Priority 2: Paragraph break (double newline) - find one nearest to target
|
|
658
|
+
let lastPara = -1;
|
|
659
|
+
let idx = searchArea.indexOf('\n\n');
|
|
660
|
+
while (idx !== -1) {
|
|
661
|
+
const pos = searchStart + idx + 2;
|
|
662
|
+
if (pos >= minBoundary && pos <= searchEnd) {
|
|
663
|
+
lastPara = pos;
|
|
664
|
+
}
|
|
665
|
+
idx = searchArea.indexOf('\n\n', idx + 1);
|
|
666
|
+
}
|
|
667
|
+
if (lastPara > minBoundary) {
|
|
668
|
+
return { position: lastPara, type: 'paragraph' };
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// Priority 3: Single newline nearest to target after minBoundary
|
|
672
|
+
let lastNewline = -1;
|
|
673
|
+
idx = searchArea.indexOf('\n');
|
|
674
|
+
while (idx !== -1) {
|
|
675
|
+
const pos = searchStart + idx + 1;
|
|
676
|
+
if (pos >= minBoundary && pos <= searchEnd) {
|
|
677
|
+
lastNewline = pos;
|
|
678
|
+
}
|
|
679
|
+
idx = searchArea.indexOf('\n', idx + 1);
|
|
680
|
+
}
|
|
681
|
+
if (lastNewline > minBoundary) {
|
|
682
|
+
return { position: lastNewline, type: 'newline' };
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Priority 4: Sentence ending
|
|
686
|
+
const sentencePattern = /[.!?]\s+/g;
|
|
687
|
+
let sentenceMatch;
|
|
688
|
+
while ((sentenceMatch = sentencePattern.exec(searchArea)) !== null) {
|
|
689
|
+
const pos = searchStart + sentenceMatch.index + sentenceMatch[0].length;
|
|
690
|
+
if (pos >= minBoundary && pos <= searchEnd) {
|
|
691
|
+
return { position: pos, type: 'sentence' };
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Fallback: use target position or end of text if target is beyond
|
|
696
|
+
const fallbackPos = Math.max(minBoundary + 1, Math.min(targetPos, text.length));
|
|
697
|
+
return { position: fallbackPos, type: 'forced' };
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* Plan chunks for a transcript (E5-S4)
|
|
702
|
+
*/
|
|
703
|
+
function planChunks(text, options = {}) {
|
|
704
|
+
const config = { ...CHUNKING_DEFAULTS, ...options };
|
|
705
|
+
const metrics = measureInputMetrics(text);
|
|
706
|
+
|
|
707
|
+
// Calculate number of chunks needed
|
|
708
|
+
const targetWords = config.targetChunkWords;
|
|
709
|
+
const estimatedChunks = Math.ceil(metrics.wordCount / targetWords);
|
|
710
|
+
|
|
711
|
+
// Calculate approximate chars per chunk
|
|
712
|
+
const charsPerChunk = Math.ceil(text.length / estimatedChunks);
|
|
713
|
+
|
|
714
|
+
const chunks = [];
|
|
715
|
+
let currentPos = 0;
|
|
716
|
+
|
|
717
|
+
for (let i = 0; i < estimatedChunks && currentPos < text.length; i++) {
|
|
718
|
+
const targetEndPos = Math.min(currentPos + charsPerChunk, text.length);
|
|
719
|
+
const isLastChunk = (i === estimatedChunks - 1) || (targetEndPos >= text.length - 50);
|
|
720
|
+
|
|
721
|
+
let endPos, boundaryType;
|
|
722
|
+
|
|
723
|
+
if (isLastChunk) {
|
|
724
|
+
// For the last chunk, just use the end of text
|
|
725
|
+
endPos = text.length;
|
|
726
|
+
boundaryType = 'document_end';
|
|
727
|
+
} else {
|
|
728
|
+
// Find natural boundary near target, but not before currentPos
|
|
729
|
+
const boundary = findNaturalBoundary(text, targetEndPos, { searchRange: 500, minBoundary: currentPos });
|
|
730
|
+
// Ensure we always make forward progress
|
|
731
|
+
endPos = Math.max(currentPos + 1, Math.min(boundary.position, text.length));
|
|
732
|
+
boundaryType = boundary.type;
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// Extract chunk content
|
|
736
|
+
const content = text.substring(currentPos, endPos).trim();
|
|
737
|
+
const chunkMetrics = measureInputMetrics(content);
|
|
738
|
+
|
|
739
|
+
chunks.push({
|
|
740
|
+
chunk_id: `chunk-${String(i + 1).padStart(3, '0')}`,
|
|
741
|
+
index: i,
|
|
742
|
+
start_offset: currentPos,
|
|
743
|
+
end_offset: endPos,
|
|
744
|
+
word_count: chunkMetrics.wordCount,
|
|
745
|
+
token_estimate: chunkMetrics.estimatedTokens,
|
|
746
|
+
char_count: content.length,
|
|
747
|
+
boundary_type: boundaryType
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
currentPos = endPos;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
// Update total_chunks in all chunks
|
|
754
|
+
const totalChunks = chunks.length;
|
|
755
|
+
for (const chunk of chunks) {
|
|
756
|
+
chunk.total_chunks = totalChunks;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return {
|
|
760
|
+
total_chunks: totalChunks,
|
|
761
|
+
total_words: metrics.wordCount,
|
|
762
|
+
total_tokens: metrics.estimatedTokens,
|
|
763
|
+
avg_chunk_words: Math.round(metrics.wordCount / totalChunks),
|
|
764
|
+
chunks
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* Create chunks from transcript (E5-S4)
|
|
770
|
+
*/
|
|
771
|
+
function createChunks(text, options = {}) {
|
|
772
|
+
const plan = planChunks(text, options);
|
|
773
|
+
const config = { ...CHUNKING_DEFAULTS, ...options };
|
|
774
|
+
|
|
775
|
+
const chunks = [];
|
|
776
|
+
let previousChunkEnd = null;
|
|
777
|
+
|
|
778
|
+
for (let i = 0; i < plan.chunks.length; i++) {
|
|
779
|
+
const chunkPlan = plan.chunks[i];
|
|
780
|
+
let content = text.substring(chunkPlan.start_offset, chunkPlan.end_offset).trim();
|
|
781
|
+
|
|
782
|
+
// Add overlap from previous chunk
|
|
783
|
+
let overlap = null;
|
|
784
|
+
if (i > 0 && previousChunkEnd) {
|
|
785
|
+
const overlapStart = Math.max(0, chunkPlan.start_offset - (config.overlapWords * 5)); // ~5 chars per word
|
|
786
|
+
const overlapText = text.substring(overlapStart, chunkPlan.start_offset).trim();
|
|
787
|
+
|
|
788
|
+
if (overlapText.length > 0) {
|
|
789
|
+
overlap = {
|
|
790
|
+
text: overlapText,
|
|
791
|
+
word_count: countWords(overlapText),
|
|
792
|
+
source_chunk: plan.chunks[i - 1].chunk_id
|
|
793
|
+
};
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
chunks.push({
|
|
798
|
+
...chunkPlan,
|
|
799
|
+
content,
|
|
800
|
+
has_overlap: overlap !== null,
|
|
801
|
+
overlap
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
previousChunkEnd = chunkPlan.end_offset;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
return {
|
|
808
|
+
...plan,
|
|
809
|
+
chunks
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Normalize topic title for deduplication (E5-S4)
|
|
815
|
+
*/
|
|
816
|
+
function normalizeTopicTitle(title) {
|
|
817
|
+
return title.toLowerCase()
|
|
818
|
+
.replace(/[^\w\s]/g, '')
|
|
819
|
+
.replace(/\s+/g, ' ')
|
|
820
|
+
.trim();
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Normalize statement for deduplication (E5-S4)
|
|
825
|
+
*/
|
|
826
|
+
function normalizeStatement(text) {
|
|
827
|
+
return text.toLowerCase()
|
|
828
|
+
.replace(/[^\w\s]/g, '')
|
|
829
|
+
.replace(/\s+/g, ' ')
|
|
830
|
+
.trim()
|
|
831
|
+
.substring(0, 100); // Use first 100 chars as signature
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/**
|
|
835
|
+
* Merge topics from multiple chunks (E5-S4)
|
|
836
|
+
*/
|
|
837
|
+
function mergeChunkTopics(chunkResults) {
|
|
838
|
+
const merged = {};
|
|
839
|
+
|
|
840
|
+
for (const result of chunkResults) {
|
|
841
|
+
if (!result.topics?.topics) continue;
|
|
842
|
+
|
|
843
|
+
for (const topic of result.topics.topics) {
|
|
844
|
+
const key = normalizeTopicTitle(topic.title);
|
|
845
|
+
|
|
846
|
+
if (merged[key]) {
|
|
847
|
+
// Merge keywords
|
|
848
|
+
const existingKeywords = new Set(merged[key].keywords || []);
|
|
849
|
+
for (const kw of (topic.keywords || [])) {
|
|
850
|
+
existingKeywords.add(kw);
|
|
851
|
+
}
|
|
852
|
+
merged[key].keywords = Array.from(existingKeywords);
|
|
853
|
+
|
|
854
|
+
// Track source chunks
|
|
855
|
+
merged[key].source_chunks = merged[key].source_chunks || [];
|
|
856
|
+
merged[key].source_chunks.push(result.chunk_id);
|
|
857
|
+
} else {
|
|
858
|
+
merged[key] = {
|
|
859
|
+
...topic,
|
|
860
|
+
source_chunks: [result.chunk_id]
|
|
861
|
+
};
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
// Regenerate IDs for merged topics
|
|
867
|
+
const topics = Object.values(merged).map((topic, index) => ({
|
|
868
|
+
...topic,
|
|
869
|
+
id: `topic-${index + 1}`
|
|
870
|
+
}));
|
|
871
|
+
|
|
872
|
+
return {
|
|
873
|
+
topics,
|
|
874
|
+
metadata: {
|
|
875
|
+
merged_from_chunks: chunkResults.length,
|
|
876
|
+
original_topic_count: chunkResults.reduce((sum, r) => sum + (r.topics?.topics?.length || 0), 0),
|
|
877
|
+
merged_topic_count: topics.length
|
|
878
|
+
}
|
|
879
|
+
};
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
/**
|
|
883
|
+
* Merge statements from multiple chunks (E5-S4)
|
|
884
|
+
*/
|
|
885
|
+
function mergeChunkStatements(chunkResults) {
|
|
886
|
+
const seen = new Set();
|
|
887
|
+
const statements = [];
|
|
888
|
+
|
|
889
|
+
for (const result of chunkResults) {
|
|
890
|
+
if (!result.statements) continue;
|
|
891
|
+
|
|
892
|
+
for (const stmt of result.statements) {
|
|
893
|
+
const signature = normalizeStatement(stmt.text);
|
|
894
|
+
|
|
895
|
+
if (!seen.has(signature)) {
|
|
896
|
+
seen.add(signature);
|
|
897
|
+
statements.push({
|
|
898
|
+
...stmt,
|
|
899
|
+
source_chunk: result.chunk_id
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
// Regenerate IDs
|
|
906
|
+
return statements.map((stmt, index) => ({
|
|
907
|
+
...stmt,
|
|
908
|
+
id: `stmt-${index + 1}`
|
|
909
|
+
}));
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
/**
|
|
913
|
+
* Initialize chunking state for a session (E5-S4)
|
|
914
|
+
*/
|
|
915
|
+
function initializeChunkingState(sessionId, plan) {
|
|
916
|
+
const activeDigest = loadActiveDigest();
|
|
917
|
+
if (!activeDigest.session.digest_path) {
|
|
918
|
+
throw new Error('No active digest session');
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
const chunkingState = {
|
|
922
|
+
enabled: true,
|
|
923
|
+
session_id: sessionId,
|
|
924
|
+
total_chunks: plan.total_chunks,
|
|
925
|
+
processed_chunks: 0,
|
|
926
|
+
chunk_size: {
|
|
927
|
+
target_words: CHUNKING_DEFAULTS.targetChunkWords,
|
|
928
|
+
actual_avg_words: plan.avg_chunk_words
|
|
929
|
+
},
|
|
930
|
+
chunks: plan.chunks.map(c => ({
|
|
931
|
+
id: c.chunk_id,
|
|
932
|
+
index: c.index,
|
|
933
|
+
status: 'pending',
|
|
934
|
+
topics_found: null,
|
|
935
|
+
statements_found: null
|
|
936
|
+
})),
|
|
937
|
+
merge_status: 'pending',
|
|
938
|
+
created_at: now()
|
|
939
|
+
};
|
|
940
|
+
|
|
941
|
+
// Save chunking state
|
|
942
|
+
const chunkingPath = path.join(activeDigest.session.digest_path, 'chunking.json');
|
|
943
|
+
fs.writeFileSync(chunkingPath, JSON.stringify(chunkingState, null, 2));
|
|
944
|
+
|
|
945
|
+
return chunkingState;
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
/**
|
|
949
|
+
* Load chunking state (E5-S4)
|
|
950
|
+
*/
|
|
951
|
+
function loadChunkingState() {
|
|
952
|
+
const activeDigest = loadActiveDigest();
|
|
953
|
+
if (!activeDigest.session?.digest_path) {
|
|
954
|
+
return null;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
const chunkingPath = path.join(activeDigest.session.digest_path, 'chunking.json');
|
|
958
|
+
if (!fs.existsSync(chunkingPath)) {
|
|
959
|
+
return null;
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
return JSON.parse(fs.readFileSync(chunkingPath, 'utf8'));
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
/**
|
|
966
|
+
* Save chunking state (E5-S4)
|
|
967
|
+
*/
|
|
968
|
+
function saveChunkingState(state) {
|
|
969
|
+
const activeDigest = loadActiveDigest();
|
|
970
|
+
if (!activeDigest.session?.digest_path) {
|
|
971
|
+
throw new Error('No active digest session');
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
state.updated_at = now();
|
|
975
|
+
const chunkingPath = path.join(activeDigest.session.digest_path, 'chunking.json');
|
|
976
|
+
fs.writeFileSync(chunkingPath, JSON.stringify(state, null, 2));
|
|
977
|
+
return state;
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
/**
|
|
981
|
+
* Update chunk processing status (E5-S4)
|
|
982
|
+
*/
|
|
983
|
+
function updateChunkStatus(chunkId, status, results = {}) {
|
|
984
|
+
const state = loadChunkingState();
|
|
985
|
+
if (!state) {
|
|
986
|
+
throw new Error('No chunking state found');
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
const chunk = state.chunks.find(c => c.id === chunkId);
|
|
990
|
+
if (!chunk) {
|
|
991
|
+
throw new Error(`Chunk not found: ${chunkId}`);
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
chunk.status = status;
|
|
995
|
+
if (results.topics_found !== undefined) {
|
|
996
|
+
chunk.topics_found = results.topics_found;
|
|
997
|
+
}
|
|
998
|
+
if (results.statements_found !== undefined) {
|
|
999
|
+
chunk.statements_found = results.statements_found;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
// Update processed count
|
|
1003
|
+
state.processed_chunks = state.chunks.filter(c => c.status === 'completed').length;
|
|
1004
|
+
|
|
1005
|
+
saveChunkingState(state);
|
|
1006
|
+
return state;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
/**
|
|
1010
|
+
* Get chunk content by ID (E5-S4)
|
|
1011
|
+
*/
|
|
1012
|
+
function getChunkContent(chunkId) {
|
|
1013
|
+
const activeDigest = loadActiveDigest();
|
|
1014
|
+
if (!activeDigest.session?.digest_path) {
|
|
1015
|
+
return null;
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
const chunksPath = path.join(activeDigest.session.digest_path, 'chunks.json');
|
|
1019
|
+
if (!fs.existsSync(chunksPath)) {
|
|
1020
|
+
return null;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
const chunksData = JSON.parse(fs.readFileSync(chunksPath, 'utf8'));
|
|
1024
|
+
const chunk = chunksData.chunks.find(c => c.chunk_id === chunkId);
|
|
1025
|
+
|
|
1026
|
+
return chunk || null;
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
/**
|
|
1030
|
+
* Get chunking status summary (E5-S4)
|
|
1031
|
+
*/
|
|
1032
|
+
function getChunkingStatus() {
|
|
1033
|
+
const state = loadChunkingState();
|
|
1034
|
+
if (!state) {
|
|
1035
|
+
return { enabled: false };
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
const completedChunks = state.chunks.filter(c => c.status === 'completed').length;
|
|
1039
|
+
const pendingChunks = state.chunks.filter(c => c.status === 'pending').length;
|
|
1040
|
+
const failedChunks = state.chunks.filter(c => c.status === 'failed').length;
|
|
1041
|
+
|
|
1042
|
+
return {
|
|
1043
|
+
enabled: state.enabled,
|
|
1044
|
+
total_chunks: state.total_chunks,
|
|
1045
|
+
completed: completedChunks,
|
|
1046
|
+
pending: pendingChunks,
|
|
1047
|
+
failed: failedChunks,
|
|
1048
|
+
progress: Math.round((completedChunks / state.total_chunks) * 100),
|
|
1049
|
+
merge_status: state.merge_status,
|
|
1050
|
+
chunks: state.chunks.map(c => ({
|
|
1051
|
+
id: c.id,
|
|
1052
|
+
status: c.status,
|
|
1053
|
+
topics: c.topics_found,
|
|
1054
|
+
statements: c.statements_found
|
|
1055
|
+
}))
|
|
1056
|
+
};
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
// ============================================================================
|
|
1060
|
+
// Module Exports
|
|
1061
|
+
// ============================================================================
|
|
1062
|
+
|
|
1063
|
+
module.exports = {
|
|
1064
|
+
// Initialization
|
|
1065
|
+
init,
|
|
1066
|
+
|
|
1067
|
+
// Durable Session Persistence (E5-S3)
|
|
1068
|
+
DURABLE_DIGEST_PATH,
|
|
1069
|
+
DURABLE_DIGEST_VERSION,
|
|
1070
|
+
loadDurableSessions,
|
|
1071
|
+
saveDurableSessions,
|
|
1072
|
+
upsertDurableSession,
|
|
1073
|
+
getSessionProgress,
|
|
1074
|
+
registerDurableSession,
|
|
1075
|
+
updateDurableProgress,
|
|
1076
|
+
createDurableCheckpoint,
|
|
1077
|
+
listDurableSessions,
|
|
1078
|
+
getDurableSession,
|
|
1079
|
+
switchDurableSession,
|
|
1080
|
+
updateRecoveryContext,
|
|
1081
|
+
generateRecoverySummaryForSession,
|
|
1082
|
+
getTimeSince,
|
|
1083
|
+
determineNextAction,
|
|
1084
|
+
archiveDurableSession,
|
|
1085
|
+
deleteDurableSession,
|
|
1086
|
+
completeDurableSession,
|
|
1087
|
+
|
|
1088
|
+
// Large Transcript Chunking (E5-S4)
|
|
1089
|
+
CHUNKING_DEFAULTS,
|
|
1090
|
+
SPEAKER_BOUNDARY_PATTERNS,
|
|
1091
|
+
needsChunking,
|
|
1092
|
+
splitIntoSentences,
|
|
1093
|
+
findNaturalBoundary,
|
|
1094
|
+
planChunks,
|
|
1095
|
+
createChunks,
|
|
1096
|
+
normalizeTopicTitle,
|
|
1097
|
+
normalizeStatement,
|
|
1098
|
+
mergeChunkTopics,
|
|
1099
|
+
mergeChunkStatements,
|
|
1100
|
+
initializeChunkingState,
|
|
1101
|
+
loadChunkingState,
|
|
1102
|
+
saveChunkingState,
|
|
1103
|
+
updateChunkStatus,
|
|
1104
|
+
getChunkContent,
|
|
1105
|
+
getChunkingStatus
|
|
1106
|
+
};
|