agileflow 2.92.0 → 2.93.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +6 -6
- package/lib/codebase-indexer.js +2 -1
- package/package.json +1 -1
- package/scripts/agileflow-statusline.sh +106 -0
- package/scripts/agileflow-welcome.js +135 -22
- package/scripts/document-repl.js +793 -0
- package/scripts/lib/configure-features.js +8 -1
- package/scripts/lib/context-loader.js +16 -16
- package/scripts/query-codebase.js +8 -3
- package/scripts/session-manager.js +374 -16
- package/scripts/spawn-parallel.js +72 -30
- package/src/core/agents/accessibility.md +19 -125
- package/src/core/agents/adr-writer.md +18 -1
- package/src/core/agents/analytics.md +19 -125
- package/src/core/agents/api.md +5 -130
- package/src/core/agents/ci.md +26 -131
- package/src/core/agents/compliance.md +21 -125
- package/src/core/agents/database.md +20 -125
- package/src/core/agents/datamigration.md +20 -125
- package/src/core/agents/design.md +19 -125
- package/src/core/agents/devops.md +12 -129
- package/src/core/agents/documentation.md +18 -1
- package/src/core/agents/epic-planner.md +31 -10
- package/src/core/agents/integrations.md +19 -125
- package/src/core/agents/mobile.md +19 -125
- package/src/core/agents/monitoring.md +19 -125
- package/src/core/agents/performance.md +19 -125
- package/src/core/agents/product.md +18 -1
- package/src/core/agents/qa.md +21 -125
- package/src/core/agents/readme-updater.md +18 -1
- package/src/core/agents/refactor.md +19 -125
- package/src/core/agents/research.md +3 -1
- package/src/core/agents/rlm-subcore.md +202 -0
- package/src/core/agents/security.md +7 -125
- package/src/core/agents/testing.md +20 -125
- package/src/core/agents/ui.md +14 -135
- package/src/core/commands/adr/list.md +20 -0
- package/src/core/commands/adr/update.md +24 -1
- package/src/core/commands/adr/view.md +23 -1
- package/src/core/commands/adr.md +2 -2
- package/src/core/commands/agent.md +11 -1
- package/src/core/commands/assign.md +15 -6
- package/src/core/commands/auto.md +11 -1
- package/src/core/commands/babysit.md +15 -4
- package/src/core/commands/baseline.md +11 -1
- package/src/core/commands/batch.md +11 -1
- package/src/core/commands/blockers.md +11 -1
- package/src/core/commands/board.md +11 -1
- package/src/core/commands/changelog.md +11 -0
- package/src/core/commands/choose.md +16 -1
- package/src/core/commands/ci.md +11 -1
- package/src/core/commands/configure.md +73 -2
- package/src/core/commands/context/export.md +8 -0
- package/src/core/commands/context/full.md +8 -0
- package/src/core/commands/context/note.md +8 -0
- package/src/core/commands/debt.md +11 -0
- package/src/core/commands/deploy.md +10 -0
- package/src/core/commands/deps.md +11 -1
- package/src/core/commands/diagnose.md +10 -0
- package/src/core/commands/docs.md +12 -2
- package/src/core/commands/epic/list.md +20 -0
- package/src/core/commands/epic/view.md +25 -0
- package/src/core/commands/epic.md +5 -6
- package/src/core/commands/feedback.md +11 -0
- package/src/core/commands/handoff.md +12 -2
- package/src/core/commands/help.md +10 -0
- package/src/core/commands/ideate.md +10 -0
- package/src/core/commands/impact.md +11 -1
- package/src/core/commands/metrics.md +11 -1
- package/src/core/commands/multi-expert.md +11 -1
- package/src/core/commands/packages.md +11 -0
- package/src/core/commands/pr.md +10 -0
- package/src/core/commands/readme-sync.md +10 -5
- package/src/core/commands/research/analyze.md +60 -3
- package/src/core/commands/research/ask.md +9 -1
- package/src/core/commands/research/import.md +8 -0
- package/src/core/commands/research/list.md +8 -0
- package/src/core/commands/research/synthesize.md +9 -1
- package/src/core/commands/research/view.md +8 -0
- package/src/core/commands/retro.md +12 -2
- package/src/core/commands/review.md +11 -1
- package/src/core/commands/rlm.md +363 -0
- package/src/core/commands/roadmap/analyze.md +1 -1
- package/src/core/commands/rpi.md +9 -1
- package/src/core/commands/session/cleanup.md +250 -0
- package/src/core/commands/session/end.md +10 -0
- package/src/core/commands/session/history.md +11 -1
- package/src/core/commands/session/init.md +10 -0
- package/src/core/commands/session/new.md +132 -13
- package/src/core/commands/session/resume.md +10 -0
- package/src/core/commands/session/spawn.md +8 -0
- package/src/core/commands/session/status.md +10 -0
- package/src/core/commands/skill/create.md +1 -1
- package/src/core/commands/skill/delete.md +11 -1
- package/src/core/commands/skill/edit.md +11 -1
- package/src/core/commands/skill/test.md +11 -1
- package/src/core/commands/skill/upgrade.md +11 -1
- package/src/core/commands/sprint.md +14 -3
- package/src/core/commands/status.md +15 -6
- package/src/core/commands/story/list.md +23 -0
- package/src/core/commands/story/view.md +24 -0
- package/src/core/commands/story.md +4 -5
- package/src/core/commands/template.md +10 -0
- package/src/core/commands/tests.md +10 -0
- package/src/core/commands/update.md +10 -0
- package/src/core/commands/validate-expertise.md +10 -1
- package/src/core/commands/velocity.md +11 -1
- package/src/core/commands/verify.md +13 -1
- package/src/core/commands/whats-new.md +8 -0
- package/src/core/commands/workflow.md +16 -1
- package/src/core/templates/agent-coordination-pattern.md +38 -0
- package/src/core/templates/agileflow-metadata.json +25 -0
- package/src/core/templates/preserve-rules-common.md +107 -0
- package/src/core/templates/preserve-rules.json +42 -0
- package/src/core/templates/proactive-action-spec.md +29 -0
- package/src/core/templates/quality-gate-priorities.md +34 -0
- package/src/core/templates/session-harness-protocol.md +128 -0
- package/tools/cli/commands/setup.js +12 -3
- package/tools/cli/installers/ide/windsurf.js +1 -1
- package/tools/cli/lib/content-injector.js +336 -0
- package/tools/cli/lib/ide-registry.js +2 -4
- package/tools/cli/lib/ui.js +2 -1
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* document-repl.js
|
|
4
|
+
*
|
|
5
|
+
* REPL (Read-Evaluate-Print-Loop) engine for document virtualization.
|
|
6
|
+
* Part of the RLM Document Analysis System (EP-0027).
|
|
7
|
+
*
|
|
8
|
+
* Virtualizes documents as searchable objects WITHOUT loading full content into LLM context.
|
|
9
|
+
* Supports programmatic operations: read, slice, regex, pattern match, keyword search.
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* node document-repl.js --load="path/to/document.pdf" # Load document
|
|
13
|
+
* node document-repl.js --info # Show document info
|
|
14
|
+
* node document-repl.js --search="keyword" # Keyword search
|
|
15
|
+
* node document-repl.js --regex="pattern" # Regex search
|
|
16
|
+
* node document-repl.js --slice="100-200" # Get lines 100-200
|
|
17
|
+
* node document-repl.js --section="Article 7" # Find section by heading
|
|
18
|
+
* node document-repl.js --toc # Extract table of contents
|
|
19
|
+
*
|
|
20
|
+
* Options:
|
|
21
|
+
* --context=<lines> Context lines around matches (default: 2)
|
|
22
|
+
* --budget=<chars> Character budget for output (default: 15000)
|
|
23
|
+
* --json Output as JSON
|
|
24
|
+
* --verbose Show debug info
|
|
25
|
+
*
|
|
26
|
+
* Supported formats:
|
|
27
|
+
* .txt, .md - Direct text processing
|
|
28
|
+
* .pdf - Via pdf-parse library
|
|
29
|
+
* .docx - Via mammoth library
|
|
30
|
+
*
|
|
31
|
+
* Exit codes:
|
|
32
|
+
* 0 = Success
|
|
33
|
+
* 1 = Error
|
|
34
|
+
* 2 = No results
|
|
35
|
+
*
|
|
36
|
+
* RLM Principles Applied:
|
|
37
|
+
* - Document virtualized outside LLM context
|
|
38
|
+
* - Programmatic search instead of semantic similarity (RAG)
|
|
39
|
+
* - Only relevant chunks returned to caller
|
|
40
|
+
* - Supports recursive handoff to sub-agents
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
const fs = require('fs');
|
|
44
|
+
const path = require('path');
|
|
45
|
+
|
|
46
|
+
// Default configuration
|
|
47
|
+
const DEFAULT_BUDGET = 15000;
|
|
48
|
+
const DEFAULT_CONTEXT_LINES = 2;
|
|
49
|
+
|
|
50
|
+
// State - virtualized document
|
|
51
|
+
let documentState = {
|
|
52
|
+
loaded: false,
|
|
53
|
+
path: null,
|
|
54
|
+
format: null,
|
|
55
|
+
text: null,
|
|
56
|
+
lines: [],
|
|
57
|
+
charCount: 0,
|
|
58
|
+
lineCount: 0,
|
|
59
|
+
headings: [],
|
|
60
|
+
sections: {},
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// Parse command line arguments
|
|
64
|
+
function parseArgs(argv) {
|
|
65
|
+
const args = {
|
|
66
|
+
load: null,
|
|
67
|
+
info: false,
|
|
68
|
+
search: null,
|
|
69
|
+
regex: null,
|
|
70
|
+
slice: null,
|
|
71
|
+
section: null,
|
|
72
|
+
toc: false,
|
|
73
|
+
context: DEFAULT_CONTEXT_LINES,
|
|
74
|
+
budget: DEFAULT_BUDGET,
|
|
75
|
+
json: false,
|
|
76
|
+
verbose: false,
|
|
77
|
+
help: false,
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
for (const arg of argv.slice(2)) {
|
|
81
|
+
if (arg === '--info') {
|
|
82
|
+
args.info = true;
|
|
83
|
+
} else if (arg === '--toc') {
|
|
84
|
+
args.toc = true;
|
|
85
|
+
} else if (arg === '--json') {
|
|
86
|
+
args.json = true;
|
|
87
|
+
} else if (arg === '--verbose') {
|
|
88
|
+
args.verbose = true;
|
|
89
|
+
} else if (arg === '--help' || arg === '-h') {
|
|
90
|
+
args.help = true;
|
|
91
|
+
} else if (arg.startsWith('--load=')) {
|
|
92
|
+
args.load = arg.slice(7);
|
|
93
|
+
} else if (arg.startsWith('--search=')) {
|
|
94
|
+
args.search = arg.slice(9);
|
|
95
|
+
} else if (arg.startsWith('--regex=')) {
|
|
96
|
+
args.regex = arg.slice(8);
|
|
97
|
+
} else if (arg.startsWith('--slice=')) {
|
|
98
|
+
args.slice = arg.slice(8);
|
|
99
|
+
} else if (arg.startsWith('--section=')) {
|
|
100
|
+
args.section = arg.slice(10);
|
|
101
|
+
} else if (arg.startsWith('--context=')) {
|
|
102
|
+
args.context = parseInt(arg.slice(10), 10) || DEFAULT_CONTEXT_LINES;
|
|
103
|
+
} else if (arg.startsWith('--budget=')) {
|
|
104
|
+
args.budget = parseInt(arg.slice(9), 10) || DEFAULT_BUDGET;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return args;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Detect document format from extension
|
|
112
|
+
function detectFormat(filePath) {
|
|
113
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
114
|
+
switch (ext) {
|
|
115
|
+
case '.txt':
|
|
116
|
+
return 'text';
|
|
117
|
+
case '.md':
|
|
118
|
+
case '.markdown':
|
|
119
|
+
return 'markdown';
|
|
120
|
+
case '.pdf':
|
|
121
|
+
return 'pdf';
|
|
122
|
+
case '.docx':
|
|
123
|
+
return 'docx';
|
|
124
|
+
case '.doc':
|
|
125
|
+
return 'doc-legacy';
|
|
126
|
+
default:
|
|
127
|
+
return 'unknown';
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Load text-based documents (txt, md)
|
|
132
|
+
function loadTextDocument(filePath) {
|
|
133
|
+
const text = fs.readFileSync(filePath, 'utf8');
|
|
134
|
+
return text;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Load PDF document (requires pdf-parse)
|
|
138
|
+
async function loadPdfDocument(filePath) {
|
|
139
|
+
try {
|
|
140
|
+
const pdfParse = require('pdf-parse');
|
|
141
|
+
const dataBuffer = fs.readFileSync(filePath);
|
|
142
|
+
const data = await pdfParse(dataBuffer);
|
|
143
|
+
return data.text;
|
|
144
|
+
} catch (err) {
|
|
145
|
+
if (err.code === 'MODULE_NOT_FOUND') {
|
|
146
|
+
throw new Error(
|
|
147
|
+
'pdf-parse not installed. Run: npm install pdf-parse\n' +
|
|
148
|
+
'Or use --format=text to treat as plain text.'
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
throw err;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Load DOCX document (requires mammoth)
|
|
156
|
+
async function loadDocxDocument(filePath) {
|
|
157
|
+
try {
|
|
158
|
+
const mammoth = require('mammoth');
|
|
159
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
160
|
+
return result.value;
|
|
161
|
+
} catch (err) {
|
|
162
|
+
if (err.code === 'MODULE_NOT_FOUND') {
|
|
163
|
+
throw new Error(
|
|
164
|
+
'mammoth not installed. Run: npm install mammoth\n' +
|
|
165
|
+
'Or use --format=text to treat as plain text.'
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
throw err;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Extract headings from text (markdown style and document patterns)
|
|
173
|
+
function extractHeadings(text, format) {
|
|
174
|
+
const headings = [];
|
|
175
|
+
const lines = text.split('\n');
|
|
176
|
+
|
|
177
|
+
lines.forEach((line, index) => {
|
|
178
|
+
// Markdown headings: # Heading
|
|
179
|
+
const mdMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
180
|
+
if (mdMatch) {
|
|
181
|
+
headings.push({
|
|
182
|
+
level: mdMatch[1].length,
|
|
183
|
+
text: mdMatch[2].trim(),
|
|
184
|
+
line: index + 1,
|
|
185
|
+
});
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Legal document patterns: Article X, Section X, PART X
|
|
190
|
+
const legalMatch = line.match(
|
|
191
|
+
/^(Article|Section|ARTICLE|SECTION|Part|PART|Chapter|CHAPTER)\s+(\d+|[IVXLCDM]+)[.:]\s*(.*)$/i
|
|
192
|
+
);
|
|
193
|
+
if (legalMatch) {
|
|
194
|
+
headings.push({
|
|
195
|
+
level: legalMatch[1].toLowerCase() === 'article' ? 1 : 2,
|
|
196
|
+
text: line.trim(),
|
|
197
|
+
line: index + 1,
|
|
198
|
+
});
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// All-caps lines (often section headers in legal docs)
|
|
203
|
+
if (
|
|
204
|
+
line.length > 5 &&
|
|
205
|
+
line.length < 100 &&
|
|
206
|
+
line === line.toUpperCase() &&
|
|
207
|
+
/^[A-Z\s\d.,;:()-]+$/.test(line)
|
|
208
|
+
) {
|
|
209
|
+
headings.push({
|
|
210
|
+
level: 2,
|
|
211
|
+
text: line.trim(),
|
|
212
|
+
line: index + 1,
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
return headings;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Build section map from headings
|
|
221
|
+
function buildSectionMap(lines, headings) {
|
|
222
|
+
const sections = {};
|
|
223
|
+
|
|
224
|
+
for (let i = 0; i < headings.length; i++) {
|
|
225
|
+
const heading = headings[i];
|
|
226
|
+
const startLine = heading.line;
|
|
227
|
+
|
|
228
|
+
// Find end line: next heading at same or higher level (lower number)
|
|
229
|
+
let endLine = lines.length;
|
|
230
|
+
for (let j = i + 1; j < headings.length; j++) {
|
|
231
|
+
if (headings[j].level <= heading.level) {
|
|
232
|
+
endLine = headings[j].line - 1;
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const sectionText = lines.slice(startLine - 1, endLine).join('\n');
|
|
238
|
+
|
|
239
|
+
// Use heading text as key (normalized)
|
|
240
|
+
const key = heading.text
|
|
241
|
+
.toLowerCase()
|
|
242
|
+
.replace(/[^a-z0-9\s]/g, '')
|
|
243
|
+
.trim();
|
|
244
|
+
sections[key] = {
|
|
245
|
+
heading: heading.text,
|
|
246
|
+
level: heading.level,
|
|
247
|
+
startLine,
|
|
248
|
+
endLine,
|
|
249
|
+
text: sectionText,
|
|
250
|
+
charCount: sectionText.length,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return sections;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Load and virtualize document
|
|
258
|
+
async function loadDocument(filePath) {
|
|
259
|
+
if (!fs.existsSync(filePath)) {
|
|
260
|
+
throw new Error(`File not found: ${filePath}`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const format = detectFormat(filePath);
|
|
264
|
+
let text;
|
|
265
|
+
|
|
266
|
+
switch (format) {
|
|
267
|
+
case 'text':
|
|
268
|
+
case 'markdown':
|
|
269
|
+
text = loadTextDocument(filePath);
|
|
270
|
+
break;
|
|
271
|
+
case 'pdf':
|
|
272
|
+
text = await loadPdfDocument(filePath);
|
|
273
|
+
break;
|
|
274
|
+
case 'docx':
|
|
275
|
+
text = await loadDocxDocument(filePath);
|
|
276
|
+
break;
|
|
277
|
+
case 'doc-legacy':
|
|
278
|
+
throw new Error('Legacy .doc format not supported. Please convert to .docx or .pdf');
|
|
279
|
+
default:
|
|
280
|
+
// Try loading as text
|
|
281
|
+
text = loadTextDocument(filePath);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const lines = text.split('\n');
|
|
285
|
+
const headings = extractHeadings(text, format);
|
|
286
|
+
const sections = buildSectionMap(lines, headings);
|
|
287
|
+
|
|
288
|
+
documentState = {
|
|
289
|
+
loaded: true,
|
|
290
|
+
path: filePath,
|
|
291
|
+
format,
|
|
292
|
+
text,
|
|
293
|
+
lines,
|
|
294
|
+
charCount: text.length,
|
|
295
|
+
lineCount: lines.length,
|
|
296
|
+
headings,
|
|
297
|
+
sections,
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
return documentState;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Get document info
|
|
304
|
+
function getDocumentInfo() {
|
|
305
|
+
if (!documentState.loaded) {
|
|
306
|
+
return { error: 'No document loaded' };
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
path: documentState.path,
|
|
311
|
+
format: documentState.format,
|
|
312
|
+
charCount: documentState.charCount,
|
|
313
|
+
lineCount: documentState.lineCount,
|
|
314
|
+
headingCount: documentState.headings.length,
|
|
315
|
+
sectionCount: Object.keys(documentState.sections).length,
|
|
316
|
+
estimatedTokens: Math.ceil(documentState.charCount / 4), // ~4 chars per token
|
|
317
|
+
complexity: assessComplexity(),
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Assess document complexity (RLM concept: complexity affects context rot)
|
|
322
|
+
function assessComplexity() {
|
|
323
|
+
if (!documentState.loaded) return 'unknown';
|
|
324
|
+
|
|
325
|
+
const { charCount, headings, sections } = documentState;
|
|
326
|
+
|
|
327
|
+
// Cross-reference density: headings per 10k chars
|
|
328
|
+
const crossRefDensity = (headings.length / charCount) * 10000;
|
|
329
|
+
|
|
330
|
+
// Internal references: count "see section", "as defined in", etc.
|
|
331
|
+
const refPatterns =
|
|
332
|
+
/(?:see|refer to|as defined in|pursuant to|in accordance with)\s+(?:section|article|clause|paragraph)/gi;
|
|
333
|
+
const refMatches = documentState.text.match(refPatterns) || [];
|
|
334
|
+
const refDensity = (refMatches.length / charCount) * 10000;
|
|
335
|
+
|
|
336
|
+
// Determine complexity level
|
|
337
|
+
if (charCount < 10000 && crossRefDensity < 1) {
|
|
338
|
+
return 'low'; // Simple document
|
|
339
|
+
} else if (charCount < 50000 && crossRefDensity < 3 && refDensity < 1) {
|
|
340
|
+
return 'medium'; // Moderate complexity
|
|
341
|
+
} else {
|
|
342
|
+
return 'high'; // High complexity - needs RLM approach
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Keyword search with context
|
|
347
|
+
function searchKeyword(keyword, contextLines, budget) {
|
|
348
|
+
if (!documentState.loaded) {
|
|
349
|
+
return { error: 'No document loaded' };
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const results = [];
|
|
353
|
+
const regex = new RegExp(escapeRegex(keyword), 'gi');
|
|
354
|
+
const { lines } = documentState;
|
|
355
|
+
let charCount = 0;
|
|
356
|
+
|
|
357
|
+
for (let i = 0; i < lines.length; i++) {
|
|
358
|
+
if (regex.test(lines[i])) {
|
|
359
|
+
const startLine = Math.max(0, i - contextLines);
|
|
360
|
+
const endLine = Math.min(lines.length - 1, i + contextLines);
|
|
361
|
+
const contextText = lines.slice(startLine, endLine + 1).join('\n');
|
|
362
|
+
|
|
363
|
+
// Check budget
|
|
364
|
+
if (charCount + contextText.length > budget) {
|
|
365
|
+
results.push({
|
|
366
|
+
truncated: true,
|
|
367
|
+
message: `Budget exceeded. Showing ${results.length} of potential matches.`,
|
|
368
|
+
});
|
|
369
|
+
break;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
results.push({
|
|
373
|
+
line: i + 1,
|
|
374
|
+
match: lines[i],
|
|
375
|
+
context: contextText,
|
|
376
|
+
contextRange: { start: startLine + 1, end: endLine + 1 },
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
charCount += contextText.length;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
query: keyword,
|
|
385
|
+
matchCount: results.filter(r => !r.truncated).length,
|
|
386
|
+
results,
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Regex search with context
|
|
391
|
+
function searchRegex(pattern, contextLines, budget) {
|
|
392
|
+
if (!documentState.loaded) {
|
|
393
|
+
return { error: 'No document loaded' };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
let regex;
|
|
397
|
+
try {
|
|
398
|
+
regex = new RegExp(pattern, 'gi');
|
|
399
|
+
} catch (err) {
|
|
400
|
+
return { error: `Invalid regex: ${err.message}` };
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
const results = [];
|
|
404
|
+
const { lines } = documentState;
|
|
405
|
+
let charCount = 0;
|
|
406
|
+
|
|
407
|
+
for (let i = 0; i < lines.length; i++) {
|
|
408
|
+
const matches = lines[i].match(regex);
|
|
409
|
+
if (matches) {
|
|
410
|
+
const startLine = Math.max(0, i - contextLines);
|
|
411
|
+
const endLine = Math.min(lines.length - 1, i + contextLines);
|
|
412
|
+
const contextText = lines.slice(startLine, endLine + 1).join('\n');
|
|
413
|
+
|
|
414
|
+
if (charCount + contextText.length > budget) {
|
|
415
|
+
results.push({
|
|
416
|
+
truncated: true,
|
|
417
|
+
message: `Budget exceeded. Showing ${results.length} of potential matches.`,
|
|
418
|
+
});
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
results.push({
|
|
423
|
+
line: i + 1,
|
|
424
|
+
matches,
|
|
425
|
+
context: contextText,
|
|
426
|
+
contextRange: { start: startLine + 1, end: endLine + 1 },
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
charCount += contextText.length;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
pattern,
|
|
435
|
+
matchCount: results.filter(r => !r.truncated).length,
|
|
436
|
+
results,
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Slice document by line range
|
|
441
|
+
function sliceDocument(rangeStr, budget) {
|
|
442
|
+
if (!documentState.loaded) {
|
|
443
|
+
return { error: 'No document loaded' };
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const match = rangeStr.match(/^(\d+)-(\d+)$/);
|
|
447
|
+
if (!match) {
|
|
448
|
+
return { error: 'Invalid range format. Use: start-end (e.g., 100-200)' };
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const start = parseInt(match[1], 10);
|
|
452
|
+
const end = parseInt(match[2], 10);
|
|
453
|
+
|
|
454
|
+
if (start < 1 || end < start || start > documentState.lineCount) {
|
|
455
|
+
return {
|
|
456
|
+
error: `Invalid range. Document has ${documentState.lineCount} lines.`,
|
|
457
|
+
};
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const slicedLines = documentState.lines.slice(start - 1, Math.min(end, documentState.lineCount));
|
|
461
|
+
let text = slicedLines.join('\n');
|
|
462
|
+
|
|
463
|
+
// Truncate if over budget
|
|
464
|
+
const truncated = text.length > budget;
|
|
465
|
+
if (truncated) {
|
|
466
|
+
text = text.slice(0, budget) + '\n... [truncated]';
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
return {
|
|
470
|
+
range: { start, end: Math.min(end, documentState.lineCount) },
|
|
471
|
+
lineCount: slicedLines.length,
|
|
472
|
+
charCount: text.length,
|
|
473
|
+
truncated,
|
|
474
|
+
text,
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Find section by heading
|
|
479
|
+
function findSection(sectionQuery, budget) {
|
|
480
|
+
if (!documentState.loaded) {
|
|
481
|
+
return { error: 'No document loaded' };
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Normalize query
|
|
485
|
+
const normalizedQuery = sectionQuery
|
|
486
|
+
.toLowerCase()
|
|
487
|
+
.replace(/[^a-z0-9\s]/g, '')
|
|
488
|
+
.trim();
|
|
489
|
+
|
|
490
|
+
// Find best matching section
|
|
491
|
+
let bestMatch = null;
|
|
492
|
+
let bestScore = 0;
|
|
493
|
+
|
|
494
|
+
for (const [key, section] of Object.entries(documentState.sections)) {
|
|
495
|
+
// Exact match
|
|
496
|
+
if (key === normalizedQuery) {
|
|
497
|
+
bestMatch = section;
|
|
498
|
+
bestScore = 1;
|
|
499
|
+
break;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Partial match (contains query)
|
|
503
|
+
if (key.includes(normalizedQuery) || normalizedQuery.includes(key)) {
|
|
504
|
+
const score = normalizedQuery.length / key.length;
|
|
505
|
+
if (score > bestScore) {
|
|
506
|
+
bestMatch = section;
|
|
507
|
+
bestScore = score;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (!bestMatch) {
|
|
513
|
+
// Return available sections as hint
|
|
514
|
+
const availableSections = Object.values(documentState.sections)
|
|
515
|
+
.slice(0, 10)
|
|
516
|
+
.map(s => s.heading);
|
|
517
|
+
return {
|
|
518
|
+
error: `Section not found: "${sectionQuery}"`,
|
|
519
|
+
hint: 'Available sections:',
|
|
520
|
+
availableSections,
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Truncate if over budget
|
|
525
|
+
let text = bestMatch.text;
|
|
526
|
+
const truncated = text.length > budget;
|
|
527
|
+
if (truncated) {
|
|
528
|
+
text = text.slice(0, budget) + '\n... [truncated]';
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
return {
|
|
532
|
+
query: sectionQuery,
|
|
533
|
+
found: bestMatch.heading,
|
|
534
|
+
lineRange: { start: bestMatch.startLine, end: bestMatch.endLine },
|
|
535
|
+
charCount: bestMatch.charCount,
|
|
536
|
+
truncated,
|
|
537
|
+
text,
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Get table of contents
|
|
542
|
+
function getTableOfContents() {
|
|
543
|
+
if (!documentState.loaded) {
|
|
544
|
+
return { error: 'No document loaded' };
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return {
|
|
548
|
+
headingCount: documentState.headings.length,
|
|
549
|
+
toc: documentState.headings.map(h => ({
|
|
550
|
+
level: h.level,
|
|
551
|
+
text: h.text,
|
|
552
|
+
line: h.line,
|
|
553
|
+
})),
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Escape special regex characters
|
|
558
|
+
function escapeRegex(string) {
|
|
559
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// Format output
|
|
563
|
+
function formatOutput(data, asJson) {
|
|
564
|
+
if (asJson) {
|
|
565
|
+
return JSON.stringify(data, null, 2);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Human-readable formatting
|
|
569
|
+
if (data.error) {
|
|
570
|
+
let output = `Error: ${data.error}`;
|
|
571
|
+
if (data.hint) {
|
|
572
|
+
output += `\n\n${data.hint}`;
|
|
573
|
+
}
|
|
574
|
+
if (data.availableSections) {
|
|
575
|
+
output += '\n' + data.availableSections.map(s => ` - ${s}`).join('\n');
|
|
576
|
+
}
|
|
577
|
+
return output;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Document info
|
|
581
|
+
if (data.path && data.format && data.charCount) {
|
|
582
|
+
return [
|
|
583
|
+
'📄 Document Info',
|
|
584
|
+
` Path: ${data.path}`,
|
|
585
|
+
` Format: ${data.format}`,
|
|
586
|
+
` Characters: ${data.charCount.toLocaleString()}`,
|
|
587
|
+
` Lines: ${data.lineCount.toLocaleString()}`,
|
|
588
|
+
` Headings: ${data.headingCount}`,
|
|
589
|
+
` Sections: ${data.sectionCount}`,
|
|
590
|
+
` Est. Tokens: ~${data.estimatedTokens.toLocaleString()}`,
|
|
591
|
+
` Complexity: ${data.complexity.toUpperCase()}`,
|
|
592
|
+
].join('\n');
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Search results
|
|
596
|
+
if ((data.query || data.pattern) && data.results) {
|
|
597
|
+
let output = `🔍 Search: "${data.query || data.pattern}"\n`;
|
|
598
|
+
output += ` Matches: ${data.matchCount}\n\n`;
|
|
599
|
+
|
|
600
|
+
for (const result of data.results) {
|
|
601
|
+
if (result.truncated) {
|
|
602
|
+
output += `\n⚠️ ${result.message}\n`;
|
|
603
|
+
continue;
|
|
604
|
+
}
|
|
605
|
+
output += `--- Line ${result.line} (context: ${result.contextRange.start}-${result.contextRange.end}) ---\n`;
|
|
606
|
+
output += result.context + '\n\n';
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
return output;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Slice result
|
|
613
|
+
if (data.range) {
|
|
614
|
+
let output = `📑 Lines ${data.range.start}-${data.range.end} (${data.lineCount} lines, ${data.charCount} chars)\n`;
|
|
615
|
+
if (data.truncated) {
|
|
616
|
+
output += '⚠️ Output truncated due to budget\n';
|
|
617
|
+
}
|
|
618
|
+
output += '\n' + data.text;
|
|
619
|
+
return output;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// Section result
|
|
623
|
+
if (data.found) {
|
|
624
|
+
let output = `📖 Section: "${data.found}"\n`;
|
|
625
|
+
output += ` Lines: ${data.lineRange.start}-${data.lineRange.end}\n`;
|
|
626
|
+
output += ` Characters: ${data.charCount}\n`;
|
|
627
|
+
if (data.truncated) {
|
|
628
|
+
output += '⚠️ Output truncated due to budget\n';
|
|
629
|
+
}
|
|
630
|
+
output += '\n' + data.text;
|
|
631
|
+
return output;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// Table of contents
|
|
635
|
+
if (data.toc) {
|
|
636
|
+
let output = `📋 Table of Contents (${data.headingCount} headings)\n\n`;
|
|
637
|
+
for (const heading of data.toc) {
|
|
638
|
+
const indent = ' '.repeat(heading.level - 1);
|
|
639
|
+
output += `${indent}${heading.text} (line ${heading.line})\n`;
|
|
640
|
+
}
|
|
641
|
+
return output;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// Fallback to JSON
|
|
645
|
+
return JSON.stringify(data, null, 2);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Print help
|
|
649
|
+
function printHelp() {
|
|
650
|
+
console.log(`
|
|
651
|
+
document-repl.js - REPL engine for document virtualization (RLM pattern)
|
|
652
|
+
|
|
653
|
+
USAGE:
|
|
654
|
+
node document-repl.js --load="path/to/doc" [operation] [options]
|
|
655
|
+
|
|
656
|
+
OPERATIONS:
|
|
657
|
+
--info Show document info (size, format, complexity)
|
|
658
|
+
--search="keyword" Keyword search with context
|
|
659
|
+
--regex="pattern" Regex search with context
|
|
660
|
+
--slice="100-200" Get lines 100-200
|
|
661
|
+
--section="name" Find section by heading
|
|
662
|
+
--toc Extract table of contents
|
|
663
|
+
|
|
664
|
+
OPTIONS:
|
|
665
|
+
--context=<lines> Context lines around matches (default: 2)
|
|
666
|
+
--budget=<chars> Character budget for output (default: 15000)
|
|
667
|
+
--json Output as JSON
|
|
668
|
+
--verbose Show debug info
|
|
669
|
+
|
|
670
|
+
SUPPORTED FORMATS:
|
|
671
|
+
.txt, .md Direct text processing (no dependencies)
|
|
672
|
+
.pdf Requires: npm install pdf-parse
|
|
673
|
+
.docx Requires: npm install mammoth
|
|
674
|
+
|
|
675
|
+
EXAMPLES:
|
|
676
|
+
# Load and get info
|
|
677
|
+
node document-repl.js --load="contract.pdf" --info
|
|
678
|
+
|
|
679
|
+
# Search for keyword
|
|
680
|
+
node document-repl.js --load="spec.md" --search="authentication"
|
|
681
|
+
|
|
682
|
+
# Find specific section
|
|
683
|
+
node document-repl.js --load="agreement.docx" --section="Article 7"
|
|
684
|
+
|
|
685
|
+
# Get lines 500-600 with increased budget
|
|
686
|
+
node document-repl.js --load="research.txt" --slice="500-600" --budget=20000
|
|
687
|
+
|
|
688
|
+
EXIT CODES:
|
|
689
|
+
0 = Success
|
|
690
|
+
1 = Error
|
|
691
|
+
2 = No results
|
|
692
|
+
`);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Main execution
|
|
696
|
+
async function main() {
|
|
697
|
+
const args = parseArgs(process.argv);
|
|
698
|
+
|
|
699
|
+
if (args.help) {
|
|
700
|
+
printHelp();
|
|
701
|
+
process.exit(0);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
// Must load document first
|
|
705
|
+
if (!args.load) {
|
|
706
|
+
console.error('Error: --load="path/to/document" is required');
|
|
707
|
+
console.error('Run with --help for usage');
|
|
708
|
+
process.exit(1);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
try {
|
|
712
|
+
// Load document
|
|
713
|
+
if (args.verbose) {
|
|
714
|
+
console.error(`Loading document: ${args.load}`);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
await loadDocument(args.load);
|
|
718
|
+
|
|
719
|
+
if (args.verbose) {
|
|
720
|
+
console.error(`Loaded: ${documentState.charCount} chars, ${documentState.lineCount} lines`);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
let result;
|
|
724
|
+
|
|
725
|
+
// Execute operation
|
|
726
|
+
if (args.info) {
|
|
727
|
+
result = getDocumentInfo();
|
|
728
|
+
} else if (args.toc) {
|
|
729
|
+
result = getTableOfContents();
|
|
730
|
+
} else if (args.search) {
|
|
731
|
+
result = searchKeyword(args.search, args.context, args.budget);
|
|
732
|
+
} else if (args.regex) {
|
|
733
|
+
result = searchRegex(args.regex, args.context, args.budget);
|
|
734
|
+
} else if (args.slice) {
|
|
735
|
+
result = sliceDocument(args.slice, args.budget);
|
|
736
|
+
} else if (args.section) {
|
|
737
|
+
result = findSection(args.section, args.budget);
|
|
738
|
+
} else {
|
|
739
|
+
// Default: show info
|
|
740
|
+
result = getDocumentInfo();
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// Output result
|
|
744
|
+
console.log(formatOutput(result, args.json));
|
|
745
|
+
|
|
746
|
+
// Set exit code
|
|
747
|
+
if (result.error) {
|
|
748
|
+
process.exit(1);
|
|
749
|
+
} else if (result.matchCount === 0 || (result.results && result.results.length === 0)) {
|
|
750
|
+
process.exit(2);
|
|
751
|
+
}
|
|
752
|
+
} catch (err) {
|
|
753
|
+
console.error(`Error: ${err.message}`);
|
|
754
|
+
if (args.verbose) {
|
|
755
|
+
console.error(err.stack);
|
|
756
|
+
}
|
|
757
|
+
process.exit(1);
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
// Run
|
|
762
|
+
main();
|
|
763
|
+
|
|
764
|
+
// Export for testing
|
|
765
|
+
module.exports = {
|
|
766
|
+
parseArgs,
|
|
767
|
+
detectFormat,
|
|
768
|
+
loadDocument,
|
|
769
|
+
getDocumentInfo,
|
|
770
|
+
assessComplexity,
|
|
771
|
+
searchKeyword,
|
|
772
|
+
searchRegex,
|
|
773
|
+
sliceDocument,
|
|
774
|
+
findSection,
|
|
775
|
+
getTableOfContents,
|
|
776
|
+
extractHeadings,
|
|
777
|
+
buildSectionMap,
|
|
778
|
+
// State access for testing
|
|
779
|
+
getState: () => documentState,
|
|
780
|
+
resetState: () => {
|
|
781
|
+
documentState = {
|
|
782
|
+
loaded: false,
|
|
783
|
+
path: null,
|
|
784
|
+
format: null,
|
|
785
|
+
text: null,
|
|
786
|
+
lines: [],
|
|
787
|
+
charCount: 0,
|
|
788
|
+
lineCount: 0,
|
|
789
|
+
headings: [],
|
|
790
|
+
sections: {},
|
|
791
|
+
};
|
|
792
|
+
},
|
|
793
|
+
};
|