mdsel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs ADDED
@@ -0,0 +1,1892 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/cli/index.ts
4
+ import { Command } from "commander";
5
+ import { createRequire } from "module";
6
+
7
+ // src/parser/parse.ts
8
+ import { readFile, access, stat } from "fs/promises";
9
+
10
+ // src/parser/processor.ts
11
+ import { unified } from "unified";
12
+ import remarkParse from "remark-parse";
13
+ import remarkGfm from "remark-gfm";
14
+ function createProcessor(options = {}) {
15
+ const { gfm = true } = options;
16
+ let processor = unified().use(remarkParse);
17
+ if (gfm) {
18
+ processor = processor.use(remarkGfm);
19
+ }
20
+ return {
21
+ parse: (markdown) => processor.parse(markdown)
22
+ };
23
+ }
24
+
25
+ // src/parser/types.ts
26
+ var ParserError = class _ParserError extends Error {
27
+ code;
28
+ filePath;
29
+ line;
30
+ column;
31
+ constructor(code, message, filePath, line, column) {
32
+ super(message);
33
+ this.name = "ParserError";
34
+ this.code = code;
35
+ this.filePath = filePath;
36
+ this.line = line;
37
+ this.column = column;
38
+ Error.captureStackTrace(this, _ParserError);
39
+ }
40
+ };
41
+
42
+ // src/utils/validation.ts
43
+ var DEFAULT_MAX_DEPTH = 20;
44
+ var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
45
+ var BINARY_THRESHOLD_RATIO = 0.1;
46
+ function isLikelyBinary(content, sampleSize = 1024) {
47
+ if (!content || content.length === 0) {
48
+ return false;
49
+ }
50
+ const sample = content.slice(0, sampleSize);
51
+ const nullCount = (sample.match(/\0/g) ?? []).length;
52
+ const ratio = nullCount / sample.length;
53
+ return ratio > BINARY_THRESHOLD_RATIO;
54
+ }
55
+ function isValidUtf8(buffer) {
56
+ try {
57
+ const decoder = new TextDecoder("utf-8", { fatal: true });
58
+ decoder.decode(buffer);
59
+ return true;
60
+ } catch {
61
+ return false;
62
+ }
63
+ }
64
+ function getNestingDepth(ast) {
65
+ let maxDepth = 0;
66
+ function traverse(node, currentDepth) {
67
+ maxDepth = Math.max(maxDepth, currentDepth);
68
+ if ("children" in node && Array.isArray(node.children)) {
69
+ for (const child of node.children) {
70
+ traverse(child, currentDepth + 1);
71
+ }
72
+ }
73
+ }
74
+ traverse(ast, 0);
75
+ return maxDepth;
76
+ }
77
+ function sanitizeInput(content) {
78
+ if (!content) return "";
79
+ let normalized = content.replace(/\r\n?/g, "\n");
80
+ normalized = normalized.trim();
81
+ return normalized;
82
+ }
83
+ function formatFileSize(bytes) {
84
+ const units = ["B", "KB", "MB", "GB"];
85
+ let size = bytes;
86
+ let unitIndex = 0;
87
+ while (size >= 1024 && unitIndex < units.length - 1) {
88
+ size /= 1024;
89
+ unitIndex++;
90
+ }
91
+ return `${size.toFixed(2)} ${units[unitIndex] ?? "B"}`;
92
+ }
93
+
94
+ // src/parser/parse.ts
95
+ function parseMarkdown(content, options) {
96
+ const processor = createProcessor(options);
97
+ const ast = processor.parse(content);
98
+ const maxDepth = options?.maxDepth ?? DEFAULT_MAX_DEPTH;
99
+ const actualDepth = getNestingDepth(ast);
100
+ if (actualDepth > maxDepth) {
101
+ throw new Error(
102
+ `Maximum nesting depth of ${String(maxDepth)} exceeded (found depth of ${String(actualDepth)}). Consider increasing the maxDepth option if this is expected.`
103
+ );
104
+ }
105
+ return { ast };
106
+ }
107
+ async function parseFile(filePath, options) {
108
+ const maxFileSize = options?.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
109
+ try {
110
+ await access(filePath);
111
+ } catch {
112
+ throw new ParserError("FILE_NOT_FOUND", `File not found: ${filePath}`, filePath);
113
+ }
114
+ let fileSize;
115
+ try {
116
+ const stats = await stat(filePath);
117
+ fileSize = stats.size;
118
+ if (fileSize > maxFileSize) {
119
+ throw new ParserError(
120
+ "FILE_TOO_LARGE",
121
+ `File size (${formatFileSize(fileSize)}) exceeds maximum allowed size (${formatFileSize(maxFileSize)})`,
122
+ filePath
123
+ );
124
+ }
125
+ } catch (error) {
126
+ if (error instanceof ParserError) {
127
+ throw error;
128
+ }
129
+ const message = error instanceof Error ? error.message : "Unknown stat error";
130
+ throw new ParserError("FILE_READ_ERROR", `Failed to read file stats: ${message}`, filePath);
131
+ }
132
+ let buffer;
133
+ let content;
134
+ try {
135
+ buffer = await readFile(filePath);
136
+ if (!isValidUtf8(buffer)) {
137
+ throw new ParserError(
138
+ "ENCODING_ERROR",
139
+ "File contains invalid UTF-8 byte sequences",
140
+ filePath
141
+ );
142
+ }
143
+ content = buffer.toString("utf-8");
144
+ } catch (error) {
145
+ if (error instanceof ParserError) {
146
+ throw error;
147
+ }
148
+ const message = error instanceof Error ? error.message : "Unknown read error";
149
+ throw new ParserError("FILE_READ_ERROR", `Failed to read file: ${message}`, filePath);
150
+ }
151
+ if (isLikelyBinary(content)) {
152
+ throw new ParserError(
153
+ "BINARY_FILE",
154
+ "File appears to be binary content rather than text markdown",
155
+ filePath
156
+ );
157
+ }
158
+ content = sanitizeInput(content);
159
+ const result = parseMarkdown(content, options);
160
+ return {
161
+ ...result,
162
+ filePath
163
+ };
164
+ }
165
+
166
+ // src/output/utils.ts
167
+ function createTimestamp() {
168
+ return (/* @__PURE__ */ new Date()).toISOString();
169
+ }
170
+ function omitNullFields(obj) {
171
+ const result = {};
172
+ for (const [key, value] of Object.entries(obj)) {
173
+ if (value !== null) {
174
+ result[key] = value;
175
+ }
176
+ }
177
+ return result;
178
+ }
179
+
180
+ // src/output/formatters.ts
181
+ function formatIndexResponse(documents, summary) {
182
+ return {
183
+ success: true,
184
+ command: "index",
185
+ timestamp: createTimestamp(),
186
+ data: {
187
+ documents,
188
+ summary
189
+ }
190
+ };
191
+ }
192
+ function formatSelectResponse(matches, unresolved = []) {
193
+ const hasErrors = unresolved.length > 0;
194
+ const response = {
195
+ success: !hasErrors,
196
+ command: "select",
197
+ timestamp: createTimestamp(),
198
+ data: {
199
+ matches,
200
+ unresolved
201
+ }
202
+ };
203
+ if (unresolved.length > 0) {
204
+ response.unresolved_selectors = unresolved.map((u) => u.selector);
205
+ }
206
+ return response;
207
+ }
208
+ function formatErrorResponse(command, errors, partialResults) {
209
+ const response = {
210
+ success: false,
211
+ command,
212
+ timestamp: createTimestamp(),
213
+ data: null,
214
+ errors
215
+ };
216
+ if (partialResults && partialResults.length > 0) {
217
+ response.partial_results = partialResults;
218
+ }
219
+ return response;
220
+ }
221
+ function createErrorEntry(type, code, message, file, selector, suggestions) {
222
+ return omitNullFields({
223
+ type,
224
+ code,
225
+ message,
226
+ file,
227
+ selector,
228
+ suggestions
229
+ });
230
+ }
231
+
232
+ // src/output/text-formatters.ts
233
+ function formatIndexText(documents) {
234
+ const parts = [];
235
+ const multiDoc = documents.length > 1;
236
+ for (const doc of documents) {
237
+ if (multiDoc) {
238
+ parts.push(`${doc.namespace} ${doc.file_path}`);
239
+ }
240
+ const headingLines = formatHeadingTree(doc.headings);
241
+ if (headingLines.length > 0) {
242
+ parts.push(...headingLines);
243
+ }
244
+ const blocks = doc.blocks;
245
+ const blockParts = [];
246
+ if (blocks.code_blocks > 0) blockParts.push(`code:${blocks.code_blocks}`);
247
+ if (blocks.paragraphs > 0) blockParts.push(`para:${blocks.paragraphs}`);
248
+ if (blocks.lists > 0) blockParts.push(`list:${blocks.lists}`);
249
+ if (blocks.tables > 0) blockParts.push(`table:${blocks.tables}`);
250
+ if (blocks.blockquotes > 0) blockParts.push(`quote:${blocks.blockquotes}`);
251
+ if (blockParts.length > 0) {
252
+ parts.push("---");
253
+ parts.push(blockParts.join(" "));
254
+ }
255
+ }
256
+ return parts.join("\n");
257
+ }
258
+ function formatHeadingTree(headings) {
259
+ const lines = [];
260
+ const counts = {};
261
+ for (const h of headings) {
262
+ const idx = counts[h.depth] ?? 0;
263
+ counts[h.depth] = idx + 1;
264
+ const indent = h.depth > 1 ? " ".repeat(h.depth - 1) : "";
265
+ lines.push(`${indent}h${h.depth}.${idx} ${h.text}`);
266
+ }
267
+ return lines;
268
+ }
269
+ function formatSelectText(matches, unresolved) {
270
+ const parts = [];
271
+ for (let i = 0; i < matches.length; i++) {
272
+ if (matches.length > 1) parts.push(`${matches[i].selector}:`);
273
+ parts.push(matches[i].content);
274
+ }
275
+ for (const u of unresolved) {
276
+ parts.push(`!${u.selector}`);
277
+ parts.push(u.reason);
278
+ if (u.suggestions.length > 0) {
279
+ const suggestions = u.suggestions.slice(0, 5).map((s) => `~${s}`).join(" ");
280
+ parts.push(suggestions);
281
+ }
282
+ }
283
+ return parts.join("\n");
284
+ }
285
+ function formatErrorText(errors) {
286
+ const parts = [];
287
+ for (const err of errors) {
288
+ parts.push(`!${err.type}: ${err.message}`);
289
+ if (err.file) {
290
+ parts.push(`file: ${err.file}`);
291
+ }
292
+ if (err.selector) {
293
+ parts.push(`selector: ${err.selector}`);
294
+ }
295
+ if (err.suggestions && err.suggestions.length > 0) {
296
+ const suggestions = err.suggestions.slice(0, 5).map((s) => `~${s}`).join(" ");
297
+ parts.push(suggestions);
298
+ }
299
+ }
300
+ return parts.join("\n");
301
+ }
302
+ var FORMAT_SPECS = {
303
+ index: {
304
+ terse: "hN.I title (indented)\\n---\\ncode:N para:N list:N table:N",
305
+ example: `h1.0 mdsel
306
+ h2.0 Installation
307
+ h2.1 Quick Start
308
+ h2.2 Commands
309
+ h3.0 index
310
+ h3.1 select
311
+ ---
312
+ code:19 para:23 list:5 table:3`
313
+ },
314
+ select: {
315
+ terse: "content only. multiple: selector: prefix. no index = all",
316
+ example: `h2.0:
317
+ ## Installation
318
+ content...
319
+ h2.1:
320
+ ## Quick Start
321
+ content...
322
+
323
+ # single result: no prefix
324
+ ## Installation
325
+ content...
326
+
327
+ # error format
328
+ !selector
329
+ reason
330
+ ~suggestion1 ~suggestion2`
331
+ }
332
+ };
333
+
334
+ // src/cli/utils/namespace.ts
335
+ import { basename, extname } from "path";
336
+ function deriveNamespace(filePath) {
337
+ const base = basename(filePath, extname(filePath));
338
+ return base.toLowerCase();
339
+ }
340
+
341
+ // src/cli/utils/content-extractor.ts
342
+ import { toString } from "mdast-util-to-string";
343
+ import { toMarkdown } from "mdast-util-to-markdown";
344
+ import { gfmToMarkdown } from "mdast-util-gfm";
345
+ var PREVIEW_LENGTH = 80;
346
+ var TRUNCATION_MARKER = "[truncated]";
347
+ function extractMarkdown(node) {
348
+ if (isSectionNode(node)) {
349
+ const root = { type: "root", children: node.children };
350
+ return toMarkdown(root, { extensions: [gfmToMarkdown()] }).trimEnd();
351
+ }
352
+ return toMarkdown(node, { extensions: [gfmToMarkdown()] }).trimEnd();
353
+ }
354
+ function isSectionNode(node) {
355
+ return typeof node === "object" && node !== null && "type" in node && node.type === "section" && "children" in node;
356
+ }
357
+ function extractText(node) {
358
+ return toString(node);
359
+ }
360
+ function countWords(text) {
361
+ const trimmed = text.trim();
362
+ if (trimmed === "") return 0;
363
+ return trimmed.split(/\s+/).filter((w) => w.length > 0).length;
364
+ }
365
+ function getContentPreview(text, maxLen = PREVIEW_LENGTH) {
366
+ const normalized = text.replace(/\s+/g, " ").trim();
367
+ if (normalized.length <= maxLen) return normalized;
368
+ return normalized.slice(0, maxLen) + "...";
369
+ }
370
+ function truncateContent(content, options = {}) {
371
+ const lines = content.split("\n");
372
+ const totalLines = lines.length;
373
+ if (options.head !== void 0 && options.head > 0) {
374
+ if (options.head >= totalLines) {
375
+ return { content, truncated: false, wordCount: countWords(content) };
376
+ }
377
+ const truncatedLines = lines.slice(0, options.head);
378
+ const truncatedContent = truncatedLines.join("\n") + "\n" + TRUNCATION_MARKER;
379
+ return {
380
+ content: truncatedContent,
381
+ truncated: true,
382
+ wordCount: countWords(truncatedContent)
383
+ };
384
+ }
385
+ if (options.tail !== void 0 && options.tail > 0) {
386
+ if (options.tail >= totalLines) {
387
+ return { content, truncated: false, wordCount: countWords(content) };
388
+ }
389
+ const truncatedLines = lines.slice(-options.tail);
390
+ const truncatedContent = TRUNCATION_MARKER + "\n" + truncatedLines.join("\n");
391
+ return {
392
+ content: truncatedContent,
393
+ truncated: true,
394
+ wordCount: countWords(truncatedContent)
395
+ };
396
+ }
397
+ return { content, truncated: false, wordCount: countWords(content) };
398
+ }
399
+
400
+ // src/cli/utils/selector-builder.ts
401
+ function buildAvailableSelectors(tree, namespace) {
402
+ const selectors = [];
403
+ const headingCounts = {
404
+ 1: 0,
405
+ 2: 0,
406
+ 3: 0,
407
+ 4: 0,
408
+ 5: 0,
409
+ 6: 0
410
+ };
411
+ const blockCounts = {
412
+ paragraph: 0,
413
+ code: 0,
414
+ list: 0,
415
+ table: 0,
416
+ blockquote: 0
417
+ };
418
+ selectors.push(`${namespace}::root`);
419
+ for (const child of tree.children) {
420
+ if (child.type === "heading") {
421
+ const depth = child.depth;
422
+ const currentIndex = headingCounts[depth] ?? 0;
423
+ headingCounts[depth] = currentIndex + 1;
424
+ selectors.push(`${namespace}::heading:h${String(depth)}[${String(currentIndex)}]`);
425
+ } else {
426
+ const blockType = mapNodeTypeToBlockType(child.type);
427
+ if (blockType) {
428
+ const currentIndex = blockCounts[blockType] ?? 0;
429
+ blockCounts[blockType] = currentIndex + 1;
430
+ selectors.push(`${namespace}::block:${blockType}[${String(currentIndex)}]`);
431
+ }
432
+ }
433
+ }
434
+ return selectors;
435
+ }
436
+ function buildDocumentIndex(tree, namespace, filePath) {
437
+ const headings = [];
438
+ const blockCounts = {
439
+ paragraphs: 0,
440
+ code_blocks: 0,
441
+ lists: 0,
442
+ tables: 0,
443
+ blockquotes: 0
444
+ };
445
+ const headingIndices = {
446
+ 1: 0,
447
+ 2: 0,
448
+ 3: 0,
449
+ 4: 0,
450
+ 5: 0,
451
+ 6: 0
452
+ };
453
+ let root = null;
454
+ const preHeadingContent = [];
455
+ let foundHeading = false;
456
+ for (const child of tree.children) {
457
+ if (child.type === "heading") {
458
+ foundHeading = true;
459
+ const heading = child;
460
+ const depth = heading.depth;
461
+ const currentIndex = headingIndices[depth] ?? 0;
462
+ headingIndices[depth] = currentIndex + 1;
463
+ const text = extractText(heading);
464
+ let childrenCount = 0;
465
+ let i = tree.children.indexOf(heading) + 1;
466
+ while (i < tree.children.length) {
467
+ const nextChild = tree.children[i];
468
+ if (nextChild?.type === "heading" && nextChild.depth <= depth) {
469
+ break;
470
+ }
471
+ childrenCount++;
472
+ i++;
473
+ }
474
+ headings.push({
475
+ selector: `${namespace}::heading:h${String(depth)}[${String(currentIndex)}]`,
476
+ type: `heading:h${String(depth)}`,
477
+ depth,
478
+ text,
479
+ content_preview: getContentPreview(text),
480
+ truncated: false,
481
+ children_count: childrenCount,
482
+ word_count: countWords(text),
483
+ section_word_count: countWords(text),
484
+ // Simplified - just heading text
485
+ section_truncated: false
486
+ });
487
+ } else {
488
+ if (!foundHeading) {
489
+ preHeadingContent.push(child);
490
+ }
491
+ countBlock(child, blockCounts);
492
+ }
493
+ }
494
+ if (preHeadingContent.length > 0) {
495
+ const rootText = preHeadingContent.map((c) => extractText(c)).join("\n");
496
+ root = {
497
+ selector: `${namespace}::root`,
498
+ type: "root",
499
+ content_preview: getContentPreview(rootText),
500
+ truncated: false,
501
+ children_count: preHeadingContent.length,
502
+ word_count: countWords(rootText)
503
+ };
504
+ }
505
+ return {
506
+ namespace,
507
+ file_path: filePath,
508
+ root,
509
+ headings,
510
+ blocks: blockCounts
511
+ };
512
+ }
513
+ function mapNodeTypeToBlockType(type) {
514
+ switch (type) {
515
+ case "paragraph":
516
+ return "paragraph";
517
+ case "code":
518
+ return "code";
519
+ case "list":
520
+ return "list";
521
+ case "table":
522
+ return "table";
523
+ case "blockquote":
524
+ return "blockquote";
525
+ default:
526
+ return null;
527
+ }
528
+ }
529
+ function countBlock(node, counts) {
530
+ switch (node.type) {
531
+ case "paragraph":
532
+ counts.paragraphs++;
533
+ break;
534
+ case "code":
535
+ counts.code_blocks++;
536
+ break;
537
+ case "list":
538
+ counts.lists++;
539
+ break;
540
+ case "table":
541
+ counts.tables++;
542
+ break;
543
+ case "blockquote":
544
+ counts.blockquotes++;
545
+ break;
546
+ }
547
+ }
548
+
549
+ // src/cli/utils/file-reader.ts
550
+ function isStdinPiped() {
551
+ return !process.stdin.isTTY;
552
+ }
553
+ function readStdin() {
554
+ return new Promise((resolve, reject) => {
555
+ const chunks = [];
556
+ process.stdin.setEncoding("utf8");
557
+ process.stdin.on("data", (chunk) => {
558
+ if (typeof chunk === "string") {
559
+ chunks.push(Buffer.from(chunk, "utf8"));
560
+ } else {
561
+ chunks.push(chunk);
562
+ }
563
+ });
564
+ process.stdin.on("end", () => {
565
+ const content = Buffer.concat(chunks).toString("utf8");
566
+ resolve(content);
567
+ });
568
+ process.stdin.on("error", (error) => {
569
+ reject(error);
570
+ });
571
+ });
572
+ }
573
+
574
+ // src/cli/utils/exit-codes.ts
575
+ var ExitCode = {
576
+ /** All operations completed successfully */
577
+ SUCCESS: 0,
578
+ /** Complete failure (file not found, parse error, etc.) */
579
+ ERROR: 1,
580
+ /** Invalid arguments or usage */
581
+ USAGE_ERROR: 2
582
+ };
583
+ function exitWithCode(code) {
584
+ process.exit(code);
585
+ }
586
+
587
+ // src/cli/commands/index-command.ts
588
+ async function indexCommand(files, options = {}) {
589
+ const documents = [];
590
+ const errors = [];
591
+ const useJson = options.json === true;
592
+ if (files.length === 0 && isStdinPiped()) {
593
+ await indexStdin(useJson);
594
+ return;
595
+ }
596
+ if (files.length === 0) {
597
+ const error = createErrorEntry(
598
+ "PARSE_ERROR",
599
+ "NO_FILES",
600
+ "No files provided. Specify files to index or pipe content via stdin."
601
+ );
602
+ outputError([error], useJson);
603
+ exitWithCode(ExitCode.ERROR);
604
+ return;
605
+ }
606
+ for (const file of files) {
607
+ try {
608
+ const result = await parseFile(file);
609
+ const namespace = deriveNamespace(file);
610
+ const index = buildDocumentIndex(result.ast, namespace, file);
611
+ documents.push(index);
612
+ } catch (error) {
613
+ if (error instanceof ParserError) {
614
+ errors.push(
615
+ createErrorEntry(
616
+ error.code,
617
+ error.code,
618
+ error.message,
619
+ error.filePath
620
+ )
621
+ );
622
+ } else if (error instanceof Error) {
623
+ errors.push(createErrorEntry("PROCESSING_ERROR", "UNKNOWN", error.message, file));
624
+ }
625
+ }
626
+ }
627
+ if (documents.length === 0 && errors.length > 0) {
628
+ outputError(errors, useJson);
629
+ exitWithCode(ExitCode.ERROR);
630
+ return;
631
+ }
632
+ if (errors.length > 0) {
633
+ const summary = calculateSummary(documents);
634
+ if (useJson) {
635
+ const response = formatErrorResponse(
636
+ "index",
637
+ errors,
638
+ documents
639
+ );
640
+ response.partial_results = documents;
641
+ response.data = { documents, summary };
642
+ response.warnings = [
643
+ `${String(errors.length)} of ${String(files.length)} file(s) could not be processed`
644
+ ];
645
+ console.log(JSON.stringify(response));
646
+ } else {
647
+ console.log(formatIndexText(documents));
648
+ console.log("");
649
+ console.error(formatErrorText(errors));
650
+ }
651
+ exitWithCode(ExitCode.ERROR);
652
+ return;
653
+ }
654
+ if (useJson) {
655
+ const summary = calculateSummary(documents);
656
+ const response = formatIndexResponse(documents, summary);
657
+ console.log(JSON.stringify(response));
658
+ } else {
659
+ console.log(formatIndexText(documents));
660
+ }
661
+ exitWithCode(ExitCode.SUCCESS);
662
+ }
663
+ function outputError(errors, useJson) {
664
+ if (useJson) {
665
+ console.log(JSON.stringify(formatErrorResponse("index", errors)));
666
+ } else {
667
+ console.error(formatErrorText(errors));
668
+ }
669
+ }
670
+ async function indexStdin(useJson) {
671
+ const documents = [];
672
+ const errors = [];
673
+ try {
674
+ const content = await readStdin();
675
+ const result = parseMarkdown(content);
676
+ const namespace = "stdin";
677
+ const index = buildDocumentIndex(result.ast, namespace, "<stdin>");
678
+ documents.push(index);
679
+ if (useJson) {
680
+ const summary = calculateSummary(documents);
681
+ const response = formatIndexResponse(documents, summary);
682
+ console.log(JSON.stringify(response));
683
+ } else {
684
+ console.log(formatIndexText(documents));
685
+ }
686
+ exitWithCode(ExitCode.SUCCESS);
687
+ } catch (error) {
688
+ if (error instanceof Error) {
689
+ errors.push(createErrorEntry("PARSE_ERROR", "PARSE_ERROR", error.message, "<stdin>"));
690
+ }
691
+ outputError(errors, useJson);
692
+ exitWithCode(ExitCode.ERROR);
693
+ }
694
+ }
695
+ function calculateSummary(documents) {
696
+ let totalNodes = 0;
697
+ let totalSelectors = 0;
698
+ for (const doc of documents) {
699
+ if (doc.root) {
700
+ totalNodes++;
701
+ totalSelectors++;
702
+ }
703
+ totalNodes += doc.headings.length;
704
+ totalSelectors += doc.headings.length;
705
+ const blockCount = doc.blocks.paragraphs + doc.blocks.code_blocks + doc.blocks.lists + doc.blocks.tables + doc.blocks.blockquotes;
706
+ totalNodes += blockCount;
707
+ totalSelectors += blockCount;
708
+ }
709
+ return {
710
+ total_documents: documents.length,
711
+ total_nodes: totalNodes,
712
+ total_selectors: totalSelectors
713
+ };
714
+ }
715
+
716
+ // src/selector/types.ts
717
+ var SelectorParseError = class _SelectorParseError extends Error {
718
+ code;
719
+ position;
720
+ input;
721
+ constructor(code, message, position, input) {
722
+ super(message);
723
+ this.name = "SelectorParseError";
724
+ this.code = code;
725
+ this.position = position;
726
+ this.input = input;
727
+ Error.captureStackTrace?.(this, _SelectorParseError);
728
+ }
729
+ /**
730
+ * Format error message with position context.
731
+ */
732
+ toString() {
733
+ const { line, column, offset } = this.position;
734
+ const lineStart = this.input.lastIndexOf("\n", offset - 1) + 1;
735
+ const lineEnd = this.input.indexOf("\n", offset);
736
+ const lineContent = this.input.slice(lineStart, lineEnd === -1 ? void 0 : lineEnd);
737
+ const pointer = " ".repeat(column - 1) + "^";
738
+ return `${this.message}
739
+ at line ${String(line)}, column ${String(column)}
740
+ ${lineContent}
741
+ ${pointer}`;
742
+ }
743
+ };
744
+
745
+ // src/selector/tokenizer.ts
746
+ function tokenize(input) {
747
+ const tokens = [];
748
+ let index = 0;
749
+ let line = 1;
750
+ let column = 1;
751
+ const position = () => ({ line, column, offset: index });
752
+ const isAtEnd = () => index >= input.length;
753
+ const advance = () => {
754
+ if (!isAtEnd()) {
755
+ if (input[index] === "\n") {
756
+ line++;
757
+ column = 1;
758
+ } else {
759
+ column++;
760
+ }
761
+ index++;
762
+ }
763
+ };
764
+ const peek = (offset = 0) => {
765
+ const pos = index + offset;
766
+ return pos < input.length ? input[pos] ?? "" : "";
767
+ };
768
+ const skipWhitespace = () => {
769
+ while (!isAtEnd() && /\s/.test(peek())) {
770
+ advance();
771
+ }
772
+ };
773
+ const scanIdentifier = () => {
774
+ const start = index;
775
+ while (!isAtEnd() && /[a-zA-Z0-9_-]/.test(peek())) {
776
+ advance();
777
+ }
778
+ return input.slice(start, index);
779
+ };
780
+ const scanNumber = () => {
781
+ const start = index;
782
+ while (!isAtEnd() && /[0-9]/.test(peek())) {
783
+ advance();
784
+ }
785
+ return input.slice(start, index);
786
+ };
787
+ const scanString = () => {
788
+ const quote = peek();
789
+ advance();
790
+ const start = index;
791
+ while (!isAtEnd() && peek() !== quote) {
792
+ advance();
793
+ }
794
+ const value = input.slice(start, index);
795
+ advance();
796
+ return value;
797
+ };
798
+ const addToken = (type, value, pos) => {
799
+ tokens.push({ type, value, position: pos ?? position() });
800
+ };
801
+ if (input.trim() === "") {
802
+ addToken("EOF" /* EOF */, "");
803
+ return tokens;
804
+ }
805
+ while (!isAtEnd()) {
806
+ skipWhitespace();
807
+ if (isAtEnd()) break;
808
+ const char = peek();
809
+ const pos = position();
810
+ if (char === ":" && peek(1) === ":") {
811
+ advance();
812
+ advance();
813
+ addToken("NAMESPACE_SEP" /* NAMESPACE_SEP */, "::", pos);
814
+ continue;
815
+ }
816
+ switch (char) {
817
+ case "/":
818
+ advance();
819
+ addToken("SLASH" /* SLASH */, "/", pos);
820
+ continue;
821
+ case ":":
822
+ advance();
823
+ addToken("COLON" /* COLON */, ":", pos);
824
+ continue;
825
+ case ".":
826
+ advance();
827
+ addToken("DOT" /* DOT */, ".", pos);
828
+ continue;
829
+ case "-":
830
+ advance();
831
+ addToken("HYPHEN" /* HYPHEN */, "-", pos);
832
+ continue;
833
+ case ",":
834
+ advance();
835
+ addToken("COMMA" /* COMMA */, ",", pos);
836
+ continue;
837
+ case "[":
838
+ advance();
839
+ addToken("OPEN_BRACKET" /* OPEN_BRACKET */, "[", pos);
840
+ continue;
841
+ case "]":
842
+ advance();
843
+ addToken("CLOSE_BRACKET" /* CLOSE_BRACKET */, "]", pos);
844
+ continue;
845
+ case "?":
846
+ advance();
847
+ addToken("QUESTION" /* QUESTION */, "?", pos);
848
+ continue;
849
+ case "&":
850
+ advance();
851
+ addToken("AMPERSAND" /* AMPERSAND */, "&", pos);
852
+ continue;
853
+ case "=":
854
+ advance();
855
+ addToken("EQUALS" /* EQUALS */, "=", pos);
856
+ continue;
857
+ case '"':
858
+ case "'": {
859
+ const strPos = position();
860
+ const str = scanString();
861
+ addToken("STRING" /* STRING */, str, strPos);
862
+ continue;
863
+ }
864
+ }
865
+ if (/[0-9]/.test(char)) {
866
+ const numPos = position();
867
+ const num = scanNumber();
868
+ addToken("NUMBER" /* NUMBER */, num, numPos);
869
+ continue;
870
+ }
871
+ if (/[a-zA-Z_]/.test(char)) {
872
+ const identPos = position();
873
+ const identifier = scanIdentifier();
874
+ switch (identifier) {
875
+ case "root":
876
+ addToken("ROOT" /* ROOT */, identifier, identPos);
877
+ break;
878
+ case "heading":
879
+ addToken("HEADING" /* HEADING */, identifier, identPos);
880
+ break;
881
+ case "section":
882
+ addToken("SECTION" /* SECTION */, identifier, identPos);
883
+ break;
884
+ case "block":
885
+ addToken("BLOCK" /* BLOCK */, identifier, identPos);
886
+ break;
887
+ case "page":
888
+ addToken("PAGE" /* PAGE */, identifier, identPos);
889
+ break;
890
+ default:
891
+ addToken("IDENTIFIER" /* IDENTIFIER */, identifier, identPos);
892
+ }
893
+ continue;
894
+ }
895
+ throw new SelectorParseError(
896
+ "INVALID_SYNTAX",
897
+ `Invalid character '${char}' in selector`,
898
+ pos,
899
+ input
900
+ );
901
+ }
902
+ addToken("EOF" /* EOF */, "");
903
+ return tokens;
904
+ }
905
+
906
+ // src/selector/parser.ts
907
+ var validHeadingLevels = ["h1", "h2", "h3", "h4", "h5", "h6"];
908
+ var validBlockTypes = ["paragraph", "list", "code", "table", "blockquote"];
909
+ var blockAliases = {
910
+ para: "paragraph",
911
+ paragraph: "paragraph",
912
+ list: "list",
913
+ code: "code",
914
+ table: "table",
915
+ quote: "blockquote",
916
+ blockquote: "blockquote"
917
+ };
918
+ function parseSelector(input) {
919
+ const trimmed = input.trim();
920
+ if (trimmed === "") {
921
+ throw new SelectorParseError(
922
+ "EMPTY_SELECTOR",
923
+ "Selector cannot be empty",
924
+ { line: 1, column: 1, offset: 0 },
925
+ input
926
+ );
927
+ }
928
+ const tokens = tokenize(input);
929
+ const parser = new Parser(tokens, input);
930
+ return parser.parse();
931
+ }
932
+ var Parser = class {
933
+ tokens;
934
+ current = 0;
935
+ input;
936
+ constructor(tokens, input) {
937
+ this.tokens = tokens;
938
+ this.input = input;
939
+ }
940
+ parse() {
941
+ const segments = [];
942
+ let namespace;
943
+ let queryParams;
944
+ const startPos = this.peek().position;
945
+ if (this.check("IDENTIFIER" /* IDENTIFIER */) && this.peekType(1) === "NAMESPACE_SEP" /* NAMESPACE_SEP */) {
946
+ const nsToken = this.advance();
947
+ namespace = nsToken.value;
948
+ this.consume("NAMESPACE_SEP" /* NAMESPACE_SEP */, "Expected '::' after namespace");
949
+ }
950
+ if (this.check("EOF" /* EOF */)) {
951
+ throw this.error("INVALID_SYNTAX", "Expected at least one path segment");
952
+ }
953
+ do {
954
+ segments.push(this.parsePathSegment());
955
+ } while (this.match("SLASH" /* SLASH */));
956
+ if (segments.length > 0) {
957
+ void segments[segments.length - 1];
958
+ }
959
+ if (this.match("QUESTION" /* QUESTION */)) {
960
+ queryParams = this.parseQueryParams();
961
+ }
962
+ if (!this.check("EOF" /* EOF */)) {
963
+ throw this.error("INVALID_SYNTAX", `Unexpected token '${this.peek().value}' after selector`);
964
+ }
965
+ return {
966
+ type: "selector" /* SELECTOR */,
967
+ namespace,
968
+ segments,
969
+ queryParams,
970
+ position: startPos
971
+ };
972
+ }
973
+ parsePathSegment() {
974
+ const startPos = this.peek().position;
975
+ let nodeType;
976
+ let subtype;
977
+ let index;
978
+ if (this.match("ROOT" /* ROOT */)) {
979
+ nodeType = "root";
980
+ } else if (this.match("HEADING" /* HEADING */)) {
981
+ nodeType = "heading";
982
+ if (!this.match("COLON" /* COLON */)) {
983
+ throw this.error("INVALID_SYNTAX", `Expected ':' after 'heading'`);
984
+ }
985
+ if (!this.check("IDENTIFIER" /* IDENTIFIER */)) {
986
+ throw this.error(
987
+ "INVALID_HEADING_LEVEL",
988
+ `Expected heading level (h1-h6) after 'heading:'`
989
+ );
990
+ }
991
+ const levelToken = this.advance();
992
+ const level = levelToken.value;
993
+ if (!validHeadingLevels.includes(level)) {
994
+ throw new SelectorParseError(
995
+ "INVALID_HEADING_LEVEL",
996
+ `Invalid heading level '${level}' - must be h1-h6`,
997
+ levelToken.position,
998
+ this.input
999
+ );
1000
+ }
1001
+ subtype = level;
1002
+ } else if (this.match("BLOCK" /* BLOCK */)) {
1003
+ nodeType = "block";
1004
+ if (!this.match("COLON" /* COLON */)) {
1005
+ throw this.error("INVALID_SYNTAX", `Expected ':' after 'block'`);
1006
+ }
1007
+ if (!this.check("IDENTIFIER" /* IDENTIFIER */)) {
1008
+ throw this.error("INVALID_BLOCK_TYPE", `Expected block type after 'block:'`);
1009
+ }
1010
+ const typeToken = this.advance();
1011
+ const type = typeToken.value;
1012
+ if (!validBlockTypes.includes(type)) {
1013
+ throw new SelectorParseError(
1014
+ "INVALID_BLOCK_TYPE",
1015
+ `Invalid block type '${type}' - must be one of: ${validBlockTypes.join(", ")}`,
1016
+ typeToken.position,
1017
+ this.input
1018
+ );
1019
+ }
1020
+ subtype = type;
1021
+ } else if (this.match("SECTION" /* SECTION */)) {
1022
+ nodeType = "section";
1023
+ } else if (this.match("PAGE" /* PAGE */)) {
1024
+ nodeType = "page";
1025
+ } else if (this.check("IDENTIFIER" /* IDENTIFIER */)) {
1026
+ const identValue = this.peek().value;
1027
+ if (validHeadingLevels.includes(identValue)) {
1028
+ this.advance();
1029
+ nodeType = "heading";
1030
+ subtype = identValue;
1031
+ } else if (identValue in blockAliases) {
1032
+ this.advance();
1033
+ nodeType = "block";
1034
+ subtype = blockAliases[identValue];
1035
+ } else {
1036
+ throw new SelectorParseError(
1037
+ "INVALID_SYNTAX",
1038
+ `Unknown selector '${identValue}' - use h1-h6, code, para, list, table, quote, root, or section`,
1039
+ this.peek().position,
1040
+ this.input
1041
+ );
1042
+ }
1043
+ } else if (this.check("SLASH" /* SLASH */) || this.check("EOF" /* EOF */)) {
1044
+ throw this.error(
1045
+ "INVALID_SYNTAX",
1046
+ "Expected selector type"
1047
+ );
1048
+ } else {
1049
+ throw this.error(
1050
+ "INVALID_SYNTAX",
1051
+ `Expected selector type`
1052
+ );
1053
+ }
1054
+ if (this.match("DOT" /* DOT */)) {
1055
+ if (!this.check("NUMBER" /* NUMBER */)) {
1056
+ throw this.error("INVALID_INDEX", `Expected number after '.'`);
1057
+ }
1058
+ const indexToken = this.advance();
1059
+ const numValue = parseInt(indexToken.value, 10);
1060
+ if (numValue < 0) {
1061
+ throw this.error("INVALID_INDEX", `Index must be non-negative`);
1062
+ }
1063
+ index = numValue;
1064
+ index = this.parseIndexList(numValue);
1065
+ } else if (this.match("OPEN_BRACKET" /* OPEN_BRACKET */)) {
1066
+ if (!this.check("NUMBER" /* NUMBER */)) {
1067
+ throw this.error("INVALID_INDEX", `Expected number inside brackets`);
1068
+ }
1069
+ const indexToken = this.advance();
1070
+ const numValue = parseInt(indexToken.value, 10);
1071
+ if (numValue < 0) {
1072
+ throw this.error("INVALID_INDEX", `Index must be non-negative`);
1073
+ }
1074
+ index = numValue;
1075
+ index = this.parseIndexList(numValue);
1076
+ if (!this.match("CLOSE_BRACKET" /* CLOSE_BRACKET */)) {
1077
+ throw this.error("UNCLOSED_BRACKET", `Unclosed bracket, expected ']'`);
1078
+ }
1079
+ }
1080
+ return {
1081
+ type: "path_segment" /* PATH_SEGMENT */,
1082
+ nodeType,
1083
+ subtype,
1084
+ index,
1085
+ position: startPos
1086
+ };
1087
+ }
1088
+ /**
1089
+ * Parse index list after initial number.
1090
+ * Supports:
1091
+ * - Single: 1 → returns 1
1092
+ * - Range: 1-3 → returns [1, 2, 3]
1093
+ * - Comma list: 1,3,5 → returns [1, 3, 5]
1094
+ */
1095
+ parseIndexList(firstIndex) {
1096
+ const indices = [firstIndex];
1097
+ while (true) {
1098
+ if (this.match("HYPHEN" /* HYPHEN */)) {
1099
+ if (!this.check("NUMBER" /* NUMBER */)) {
1100
+ throw this.error("INVALID_INDEX", `Expected number after '-' for range`);
1101
+ }
1102
+ const endToken = this.advance();
1103
+ const endIndex = parseInt(endToken.value, 10);
1104
+ if (endIndex < firstIndex) {
1105
+ throw this.error("INVALID_INDEX", `Range end must be >= start`);
1106
+ }
1107
+ for (let i = firstIndex + 1; i <= endIndex; i++) {
1108
+ indices.push(i);
1109
+ }
1110
+ break;
1111
+ } else if (this.match("COMMA" /* COMMA */)) {
1112
+ if (!this.check("NUMBER" /* NUMBER */)) {
1113
+ throw this.error("INVALID_INDEX", `Expected number after ','`);
1114
+ }
1115
+ const nextToken = this.advance();
1116
+ const nextIndex = parseInt(nextToken.value, 10);
1117
+ indices.push(nextIndex);
1118
+ } else {
1119
+ break;
1120
+ }
1121
+ }
1122
+ return indices.length === 1 ? indices[0] : indices;
1123
+ }
1124
+ parseQueryParams() {
1125
+ const params = [];
1126
+ const startPos = this.peek().position;
1127
+ if (!this.check("IDENTIFIER" /* IDENTIFIER */)) {
1128
+ throw this.error("MALFORMED_QUERY", `Expected parameter name`);
1129
+ }
1130
+ const key = this.advance().value;
1131
+ if (!this.match("EQUALS" /* EQUALS */)) {
1132
+ throw this.error("MALFORMED_QUERY", `Expected '=' after parameter name '${key}'`);
1133
+ }
1134
+ let value;
1135
+ if (this.check("STRING" /* STRING */)) {
1136
+ value = this.advance().value;
1137
+ } else if (this.check("IDENTIFIER" /* IDENTIFIER */)) {
1138
+ value = this.advance().value;
1139
+ } else if (this.check("NUMBER" /* NUMBER */)) {
1140
+ value = this.advance().value;
1141
+ } else {
1142
+ value = "";
1143
+ }
1144
+ params.push({ key, value, position: startPos });
1145
+ while (this.match("AMPERSAND" /* AMPERSAND */)) {
1146
+ const paramStart = this.peek().position;
1147
+ if (!this.check("IDENTIFIER" /* IDENTIFIER */)) {
1148
+ throw this.error("MALFORMED_QUERY", `Expected parameter name`);
1149
+ }
1150
+ const paramKey = this.advance().value;
1151
+ if (!this.match("EQUALS" /* EQUALS */)) {
1152
+ throw this.error("MALFORMED_QUERY", `Expected '=' after parameter name '${paramKey}'`);
1153
+ }
1154
+ let paramValue;
1155
+ if (this.check("STRING" /* STRING */)) {
1156
+ paramValue = this.advance().value;
1157
+ } else if (this.check("IDENTIFIER" /* IDENTIFIER */)) {
1158
+ paramValue = this.advance().value;
1159
+ } else if (this.check("NUMBER" /* NUMBER */)) {
1160
+ paramValue = this.advance().value;
1161
+ } else {
1162
+ paramValue = "";
1163
+ }
1164
+ params.push({ key: paramKey, value: paramValue, position: paramStart });
1165
+ }
1166
+ return params;
1167
+ }
1168
+ check(type) {
1169
+ return this.peek().type === type;
1170
+ }
1171
+ peekType(offset = 0) {
1172
+ const pos = this.current + offset;
1173
+ return pos < this.tokens.length ? this.tokens[pos]?.type ?? "EOF" /* EOF */ : "EOF" /* EOF */;
1174
+ }
1175
+ match(...types) {
1176
+ for (const type of types) {
1177
+ if (this.check(type)) {
1178
+ this.advance();
1179
+ return true;
1180
+ }
1181
+ }
1182
+ return false;
1183
+ }
1184
+ advance() {
1185
+ if (!this.check("EOF" /* EOF */)) {
1186
+ this.current++;
1187
+ }
1188
+ return this.previous();
1189
+ }
1190
+ peek() {
1191
+ return this.tokens[this.current] ?? this.tokens[this.tokens.length - 1];
1192
+ }
1193
+ previous() {
1194
+ return this.tokens[this.current - 1] ?? this.tokens[0];
1195
+ }
1196
+ consume(type, message) {
1197
+ if (this.check(type)) {
1198
+ return this.advance();
1199
+ }
1200
+ throw this.error("INVALID_SYNTAX", message);
1201
+ }
1202
+ error(code, message) {
1203
+ return new SelectorParseError(code, message, this.peek().position, this.input);
1204
+ }
1205
+ };
1206
+
1207
+ // src/resolver/levenshtein.ts
1208
+ function levenshteinDistance(str1, str2) {
1209
+ const len1 = str1.length;
1210
+ const len2 = str2.length;
1211
+ if (len1 < len2) {
1212
+ return levenshteinDistance(str2, str1);
1213
+ }
1214
+ if (len2 === 0) {
1215
+ return len1;
1216
+ }
1217
+ let prevRow = Array.from({ length: len2 + 1 }, (_, i) => i);
1218
+ let currRow = Array.from({ length: len2 + 1 }, () => 0);
1219
+ for (let i = 1; i <= len1; i++) {
1220
+ currRow[0] = i;
1221
+ for (let j = 1; j <= len2; j++) {
1222
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
1223
+ const prevRowVal = prevRow[j] ?? 0;
1224
+ const prevRowPrevVal = prevRow[j - 1] ?? 0;
1225
+ currRow[j] = Math.min(
1226
+ (currRow[j - 1] ?? 0) + 1,
1227
+ // insertion
1228
+ prevRowVal + 1,
1229
+ // deletion
1230
+ prevRowPrevVal + cost
1231
+ // substitution
1232
+ );
1233
+ }
1234
+ [prevRow, currRow] = [currRow, prevRow];
1235
+ }
1236
+ return prevRow[len2] ?? 0;
1237
+ }
1238
+
1239
+ // src/resolver/suggestions.ts
1240
+ var SuggestionEngine = class {
1241
+ candidates;
1242
+ constructor(candidates) {
1243
+ this.candidates = candidates;
1244
+ }
1245
+ /**
1246
+ * Get suggestions for a query selector.
1247
+ *
1248
+ * @param query - The query selector
1249
+ * @param options - Configuration options
1250
+ * @returns Ranked array of suggestions
1251
+ *
1252
+ * @example
1253
+ * ```typescript
1254
+ * const engine = new SuggestionEngine(['heading:h1[0]', 'heading:h2[0]', 'block:code[0]']);
1255
+ * const suggestions = engine.getSuggestions('headng:h1[0]'); // typo in 'heading'
1256
+ * // Returns: [{ selector: 'heading:h1[0]', distance: 1, ratio: 0.95, reason: 'typo_correction' }, ...]
1257
+ * ```
1258
+ */
1259
+ getSuggestions(query, options = {}) {
1260
+ const { maxResults = 5, minRatio = 0.4, includeExact = true } = options;
1261
+ const normalizedQuery = query.toLowerCase().trim();
1262
+ const results = [];
1263
+ if (includeExact) {
1264
+ const exactMatches = this.candidates.filter(
1265
+ (candidate) => candidate.toLowerCase() === normalizedQuery
1266
+ );
1267
+ for (const selector of exactMatches) {
1268
+ results.push({
1269
+ selector,
1270
+ distance: 0,
1271
+ ratio: 1,
1272
+ reason: "exact_match",
1273
+ normalizedSelector: selector.toLowerCase()
1274
+ });
1275
+ }
1276
+ }
1277
+ const hasExactMatch = results.length > 0;
1278
+ const fuzzyMatches = this.findFuzzyMatches(normalizedQuery, hasExactMatch, minRatio);
1279
+ results.push(...fuzzyMatches);
1280
+ return this.sortAndLimitResults(results, maxResults);
1281
+ }
1282
+ /**
1283
+ * Find fuzzy matches using Levenshtein distance.
1284
+ */
1285
+ findFuzzyMatches(normalizedQuery, skipExact, minRatio) {
1286
+ const results = [];
1287
+ for (const candidate of this.candidates) {
1288
+ const normalizedCandidate = candidate.toLowerCase().trim();
1289
+ if (normalizedCandidate === normalizedQuery) {
1290
+ continue;
1291
+ }
1292
+ const distance = levenshteinDistance(normalizedQuery, normalizedCandidate);
1293
+ const maxLen = Math.max(normalizedQuery.length, normalizedCandidate.length);
1294
+ const ratio = maxLen === 0 ? 1 : (maxLen - distance) / maxLen;
1295
+ if (ratio >= minRatio) {
1296
+ const reason = distance <= 2 ? "typo_correction" : "fuzzy_match";
1297
+ results.push({
1298
+ selector: candidate,
1299
+ distance,
1300
+ ratio,
1301
+ reason,
1302
+ normalizedSelector: normalizedCandidate
1303
+ });
1304
+ }
1305
+ }
1306
+ return results;
1307
+ }
1308
+ /**
1309
+ * Sort results by ratio (desc), distance (asc), length (asc) and limit to maxResults.
1310
+ */
1311
+ sortAndLimitResults(results, maxResults) {
1312
+ return results.sort((a, b) => {
1313
+ if (Math.abs(b.ratio - a.ratio) > 1e-3) {
1314
+ return b.ratio - a.ratio;
1315
+ }
1316
+ if (a.distance !== b.distance) {
1317
+ return a.distance - b.distance;
1318
+ }
1319
+ return a.selector.length - b.selector.length;
1320
+ }).slice(0, maxResults).map(({ normalizedSelector: _, ...suggestion }) => suggestion);
1321
+ }
1322
+ };
1323
+
1324
+ // src/resolver/single-resolver.ts
1325
+ function resolveSingle(tree, namespace, selector, availableSelectors) {
1326
+ try {
1327
+ if (selector.namespace && selector.namespace !== namespace) {
1328
+ return createNamespaceError(selector, [namespace], availableSelectors);
1329
+ }
1330
+ const context = {
1331
+ namespace,
1332
+ path: [],
1333
+ currentNode: tree,
1334
+ segmentIndex: 0,
1335
+ totalSegments: selector.segments.length
1336
+ };
1337
+ const result = resolvePathSegments(context, selector.segments);
1338
+ if (result.success) {
1339
+ const lastSeg = selector.segments[selector.segments.length - 1];
1340
+ const indices = Array.isArray(lastSeg?.index) ? lastSeg.index : void 0;
1341
+ const results = result.nodes.map(({ node, path }, idx) => ({
1342
+ namespace,
1343
+ node,
1344
+ selector: selectorToString(selector, indices?.[idx]),
1345
+ path,
1346
+ wordCount: estimateWordCount(node),
1347
+ childrenAvailable: hasChildren(node)
1348
+ }));
1349
+ return {
1350
+ success: true,
1351
+ results
1352
+ };
1353
+ } else {
1354
+ const engine = new SuggestionEngine(availableSelectors);
1355
+ const suggestions = engine.getSuggestions(selectorToString(selector));
1356
+ const error = {
1357
+ type: result.errorType,
1358
+ message: result.errorMessage,
1359
+ selector: selectorToString(selector),
1360
+ suggestions
1361
+ };
1362
+ const failedSegment = selector.segments[result.failedAtSegment];
1363
+ if (failedSegment) {
1364
+ error.failedSegment = failedSegment;
1365
+ }
1366
+ return {
1367
+ success: false,
1368
+ error,
1369
+ partialResults: result.partialResults
1370
+ };
1371
+ }
1372
+ } catch (error) {
1373
+ const errorOut = {
1374
+ type: "INVALID_PATH",
1375
+ message: error instanceof Error ? error.message : "Unknown error",
1376
+ selector: selectorToString(selector),
1377
+ suggestions: []
1378
+ };
1379
+ return {
1380
+ success: false,
1381
+ error: errorOut
1382
+ };
1383
+ }
1384
+ }
1385
+ function resolvePathSegments(context, segments) {
1386
+ let currentNodes = [{ node: context.currentNode, path: [...context.path] }];
1387
+ const partialResults = [];
1388
+ const maxDepth = DEFAULT_MAX_DEPTH;
1389
+ for (let i = 0; i < segments.length; i++) {
1390
+ const segment = segments[i];
1391
+ if (!segment) {
1392
+ return {
1393
+ success: false,
1394
+ errorType: "SELECTOR_NOT_FOUND",
1395
+ errorMessage: `Invalid segment at index ${i}`,
1396
+ failedAtSegment: i
1397
+ };
1398
+ }
1399
+ context.segmentIndex = i;
1400
+ const isLastSegment = i === segments.length - 1;
1401
+ if (i >= maxDepth) {
1402
+ return {
1403
+ success: false,
1404
+ errorType: "SELECTOR_NOT_FOUND",
1405
+ errorMessage: `Selector depth ${i + 1} exceeds maximum of ${maxDepth}`,
1406
+ failedAtSegment: i,
1407
+ partialResults
1408
+ };
1409
+ }
1410
+ if (segment.nodeType === "root") {
1411
+ if (currentNodes[0]?.node.type !== "root") {
1412
+ return {
1413
+ success: false,
1414
+ errorType: "SELECTOR_NOT_FOUND",
1415
+ errorMessage: "Root segment can only be used at the beginning",
1416
+ failedAtSegment: i
1417
+ };
1418
+ }
1419
+ currentNodes = currentNodes.map(({ node, path }) => ({ node, path: [...path, node] }));
1420
+ continue;
1421
+ }
1422
+ const currentNode = currentNodes[0]?.node;
1423
+ const currentPath = currentNodes[0]?.path ?? [];
1424
+ const matches = findMatchingChildren(currentNode, segment);
1425
+ if (matches.length === 0) {
1426
+ return {
1427
+ success: false,
1428
+ errorType: "SELECTOR_NOT_FOUND",
1429
+ errorMessage: `No matches found for segment at index ${i}`,
1430
+ failedAtSegment: i,
1431
+ partialResults
1432
+ };
1433
+ }
1434
+ if (segment.index !== void 0) {
1435
+ const indices = Array.isArray(segment.index) ? segment.index : [segment.index];
1436
+ const maxIndex = Math.max(...indices);
1437
+ if (maxIndex >= matches.length) {
1438
+ const subtype = segment.subtype ? ":" + segment.subtype : "";
1439
+ return {
1440
+ success: false,
1441
+ errorType: "INDEX_OUT_OF_RANGE",
1442
+ errorMessage: `Index ${maxIndex} out of range (only ${matches.length} ${segment.nodeType}${subtype}(s) found)`,
1443
+ failedAtSegment: i,
1444
+ partialResults
1445
+ };
1446
+ }
1447
+ currentNodes = indices.map((idx) => {
1448
+ const selected = matches[idx];
1449
+ return { node: selected, path: [...currentPath, selected] };
1450
+ });
1451
+ } else if (isLastSegment) {
1452
+ currentNodes = matches.map((m) => ({ node: m, path: [...currentPath, m] }));
1453
+ } else {
1454
+ currentNodes = [{ node: matches[0], path: [...currentPath, matches[0]] }];
1455
+ }
1456
+ partialResults.push({
1457
+ namespace: context.namespace,
1458
+ node: currentNodes[0]?.node,
1459
+ selector: segmentToString(segment),
1460
+ path: currentNodes[0]?.path ?? [],
1461
+ wordCount: estimateWordCount(currentNodes[0]?.node),
1462
+ childrenAvailable: hasChildren(currentNodes[0]?.node)
1463
+ });
1464
+ }
1465
+ return {
1466
+ success: true,
1467
+ nodes: currentNodes
1468
+ };
1469
+ }
1470
+ function findMatchingChildren(parent, segment) {
1471
+ if (!parent?.children || !Array.isArray(parent.children)) {
1472
+ return [];
1473
+ }
1474
+ const matches = [];
1475
+ switch (segment.nodeType) {
1476
+ case "heading":
1477
+ if (segment.subtype?.startsWith("h")) {
1478
+ const depth = parseInt(segment.subtype.slice(1), 10);
1479
+ const children = parent.children;
1480
+ for (let i = 0; i < children.length; i++) {
1481
+ const child = children[i];
1482
+ if (child.type === "heading" && child.depth === depth) {
1483
+ const sectionChildren = [child];
1484
+ for (let j = i + 1; j < children.length; j++) {
1485
+ const sibling = children[j];
1486
+ if (sibling.type === "heading" && sibling.depth <= depth) {
1487
+ break;
1488
+ }
1489
+ sectionChildren.push(sibling);
1490
+ }
1491
+ matches.push({
1492
+ type: "section",
1493
+ depth,
1494
+ children: sectionChildren,
1495
+ position: child.position
1496
+ });
1497
+ }
1498
+ }
1499
+ }
1500
+ break;
1501
+ case "section":
1502
+ case "page":
1503
+ break;
1504
+ case "block":
1505
+ if (segment.subtype) {
1506
+ for (const child of parent.children) {
1507
+ if (child.type === segment.subtype) {
1508
+ matches.push(child);
1509
+ }
1510
+ }
1511
+ }
1512
+ break;
1513
+ }
1514
+ return matches;
1515
+ }
1516
+ function estimateWordCount(node) {
1517
+ if (!node) return 0;
1518
+ if (node.value && typeof node.value === "string") {
1519
+ return node.value.trim().split(/\s+/).filter((w) => w.length > 0).length;
1520
+ }
1521
+ if (node.children && Array.isArray(node.children)) {
1522
+ let count = 0;
1523
+ for (const child of node.children) {
1524
+ count += estimateWordCount(child);
1525
+ }
1526
+ return count;
1527
+ }
1528
+ return 0;
1529
+ }
1530
+ function hasChildren(node) {
1531
+ return node?.children && Array.isArray(node.children) && node.children.length > 0;
1532
+ }
1533
+ function selectorToString(selector, resultIndex) {
1534
+ let result = "";
1535
+ if (selector.namespace) {
1536
+ result += `${selector.namespace}::`;
1537
+ }
1538
+ result += selector.segments.map((seg, i) => {
1539
+ const isLast = i === selector.segments.length - 1;
1540
+ if (isLast && resultIndex !== void 0) {
1541
+ return segmentToStringWithIndex(seg, resultIndex);
1542
+ }
1543
+ return segmentToString(seg);
1544
+ }).join("/");
1545
+ if (selector.queryParams && selector.queryParams.length > 0) {
1546
+ const params = selector.queryParams.map((p) => `${p.key}=${p.value}`).join("&");
1547
+ result += `?${params}`;
1548
+ }
1549
+ return result;
1550
+ }
1551
+ function segmentToStringWithIndex(segment, index) {
1552
+ let segStr = segment.nodeType;
1553
+ if (segment.subtype) {
1554
+ segStr += `:${segment.subtype}`;
1555
+ }
1556
+ segStr += `.${index}`;
1557
+ return segStr;
1558
+ }
1559
+ function segmentToString(segment) {
1560
+ let segStr = segment.nodeType;
1561
+ if (segment.subtype) {
1562
+ segStr += `:${segment.subtype}`;
1563
+ }
1564
+ if (segment.index !== void 0) {
1565
+ const idx = segment.index;
1566
+ if (Array.isArray(idx)) {
1567
+ segStr += `.${idx.join(",")}`;
1568
+ } else {
1569
+ segStr += `.${idx}`;
1570
+ }
1571
+ }
1572
+ return segStr;
1573
+ }
1574
+ function createNamespaceError(selector, availableNamespaces, availableSelectors) {
1575
+ const engine = new SuggestionEngine(availableNamespaces);
1576
+ const nsSuggestions = engine.getSuggestions(selector.namespace || "");
1577
+ const suggestions = nsSuggestions.map((s) => ({
1578
+ selector: `${s.selector}::${selectorToString(selector).replace(/^[^:]+::/, "")}`,
1579
+ distance: s.distance,
1580
+ ratio: s.ratio,
1581
+ reason: s.reason
1582
+ }));
1583
+ return {
1584
+ success: false,
1585
+ error: {
1586
+ type: "NAMESPACE_NOT_FOUND",
1587
+ message: `Namespace '${selector.namespace}' not found. Available namespaces: ${availableNamespaces.join(", ")}`,
1588
+ selector: selectorToString(selector),
1589
+ suggestions
1590
+ }
1591
+ };
1592
+ }
1593
+
1594
+ // src/resolver/multi-resolver.ts
1595
+ function resolveMulti(documents, selector) {
1596
+ if (selector.namespace) {
1597
+ return resolveInNamespace(documents, selector);
1598
+ }
1599
+ return resolveAcrossAll(documents, selector);
1600
+ }
1601
+ function resolveInNamespace(documents, selector) {
1602
+ const targetDoc = documents.find((doc) => doc.namespace === selector.namespace);
1603
+ if (!targetDoc) {
1604
+ const availableNamespaces = documents.map((doc) => doc.namespace);
1605
+ const engine = new SuggestionEngine(availableNamespaces);
1606
+ const nsSuggestions = engine.getSuggestions(selector.namespace || "");
1607
+ const suggestions = nsSuggestions.map((s) => ({
1608
+ selector: `${s.selector}::${selectorToString2(selector).replace(/^[^:]+::/, "")}`,
1609
+ distance: s.distance,
1610
+ ratio: s.ratio,
1611
+ reason: s.reason
1612
+ }));
1613
+ return {
1614
+ success: false,
1615
+ error: {
1616
+ type: "NAMESPACE_NOT_FOUND",
1617
+ message: `Namespace '${selector.namespace}' not found. Available namespaces: ${availableNamespaces.join(", ")}`,
1618
+ selector: selectorToString2(selector),
1619
+ suggestions
1620
+ }
1621
+ };
1622
+ }
1623
+ return resolveSingle(targetDoc.tree, targetDoc.namespace, selector, targetDoc.availableSelectors);
1624
+ }
1625
+ function resolveAcrossAll(documents, selector) {
1626
+ const outcomes = [];
1627
+ for (const doc of documents) {
1628
+ const outcome = resolveSingle(doc.tree, doc.namespace, selector, doc.availableSelectors);
1629
+ outcomes.push(outcome);
1630
+ }
1631
+ return mergeOutcomes(outcomes, selector, documents);
1632
+ }
1633
+ function mergeOutcomes(outcomes, selector, documents) {
1634
+ const allResults = [];
1635
+ const allErrors = [];
1636
+ let hasSuccess = false;
1637
+ for (const outcome of outcomes) {
1638
+ if (outcome.success) {
1639
+ allResults.push(...outcome.results);
1640
+ hasSuccess = true;
1641
+ } else {
1642
+ allErrors.push(outcome.error);
1643
+ }
1644
+ }
1645
+ if (hasSuccess) {
1646
+ return {
1647
+ success: true,
1648
+ results: allResults
1649
+ };
1650
+ }
1651
+ const allSelectors = documents.flatMap((doc) => doc.availableSelectors);
1652
+ const engine = new SuggestionEngine(allSelectors);
1653
+ const suggestions = engine.getSuggestions(selectorToString2(selector));
1654
+ return {
1655
+ success: false,
1656
+ error: {
1657
+ type: "SELECTOR_NOT_FOUND",
1658
+ message: "No matches found in any document",
1659
+ selector: selectorToString2(selector),
1660
+ suggestions
1661
+ }
1662
+ };
1663
+ }
1664
+ function selectorToString2(selector) {
1665
+ let result = "";
1666
+ if (selector.namespace) {
1667
+ result += `${selector.namespace}::`;
1668
+ }
1669
+ result += selector.segments.map((seg) => {
1670
+ let segStr = seg.nodeType;
1671
+ if (seg.subtype) {
1672
+ segStr += `:${seg.subtype}`;
1673
+ }
1674
+ if (seg.index !== void 0) {
1675
+ segStr += `[${seg.index}]`;
1676
+ }
1677
+ return segStr;
1678
+ }).join("/");
1679
+ if (selector.queryParams && selector.queryParams.length > 0) {
1680
+ const params = selector.queryParams.map((p) => `${p.key}=${p.value}`).join("&");
1681
+ result += `?${params}`;
1682
+ }
1683
+ return result;
1684
+ }
1685
+
1686
+ // src/cli/commands/select-command.ts
1687
+ async function selectCommand(selector, files, options = {}) {
1688
+ const useJson = options.json === true;
1689
+ if (files.length === 0) {
1690
+ const error = createErrorEntry(
1691
+ "PARSE_ERROR",
1692
+ "NO_FILES",
1693
+ "No files provided. Specify files to search."
1694
+ );
1695
+ outputError2([error], useJson);
1696
+ exitWithCode(ExitCode.ERROR);
1697
+ return;
1698
+ }
1699
+ let selectorAst;
1700
+ try {
1701
+ selectorAst = parseSelector(selector);
1702
+ } catch (error) {
1703
+ if (error instanceof SelectorParseError) {
1704
+ const errorEntry = createErrorEntry(
1705
+ "INVALID_SELECTOR",
1706
+ error.code,
1707
+ error.message,
1708
+ void 0,
1709
+ selector
1710
+ );
1711
+ outputError2([errorEntry], useJson);
1712
+ exitWithCode(ExitCode.ERROR);
1713
+ return;
1714
+ }
1715
+ throw error;
1716
+ }
1717
+ const truncateOptions = {};
1718
+ if (selectorAst.queryParams) {
1719
+ for (const param of selectorAst.queryParams) {
1720
+ if (param.key === "head") {
1721
+ const value = parseInt(param.value, 10);
1722
+ if (!isNaN(value) && value > 0) {
1723
+ truncateOptions.head = value;
1724
+ }
1725
+ } else if (param.key === "tail") {
1726
+ const value = parseInt(param.value, 10);
1727
+ if (!isNaN(value) && value > 0) {
1728
+ truncateOptions.tail = value;
1729
+ }
1730
+ }
1731
+ }
1732
+ }
1733
+ const documents = [];
1734
+ const parseErrors = [];
1735
+ for (const file of files) {
1736
+ try {
1737
+ const result = await parseFile(file);
1738
+ const namespace = deriveNamespace(file);
1739
+ const selectors = buildAvailableSelectors(result.ast, namespace);
1740
+ documents.push({
1741
+ namespace,
1742
+ tree: result.ast,
1743
+ availableSelectors: selectors
1744
+ });
1745
+ } catch (error) {
1746
+ if (error instanceof ParserError) {
1747
+ parseErrors.push(
1748
+ createErrorEntry(
1749
+ error.code,
1750
+ error.code,
1751
+ error.message,
1752
+ error.filePath
1753
+ )
1754
+ );
1755
+ } else if (error instanceof Error) {
1756
+ parseErrors.push(createErrorEntry("PROCESSING_ERROR", "UNKNOWN", error.message, file));
1757
+ }
1758
+ }
1759
+ }
1760
+ if (documents.length === 0) {
1761
+ outputError2(parseErrors, useJson);
1762
+ exitWithCode(ExitCode.ERROR);
1763
+ return;
1764
+ }
1765
+ const outcome = resolveMulti(documents, selectorAst);
1766
+ if (outcome.success) {
1767
+ const matches = formatMatches(outcome.results, truncateOptions);
1768
+ if (useJson) {
1769
+ const response = formatSelectResponse(matches, []);
1770
+ console.log(JSON.stringify(response));
1771
+ } else {
1772
+ console.log(formatSelectText(matches, []));
1773
+ }
1774
+ exitWithCode(ExitCode.SUCCESS);
1775
+ return;
1776
+ }
1777
+ const err = outcome.error;
1778
+ const unresolved = [
1779
+ {
1780
+ selector: err.selector,
1781
+ reason: err.message,
1782
+ suggestions: err.suggestions.map((s) => s.selector)
1783
+ }
1784
+ ];
1785
+ if (useJson) {
1786
+ const response = formatSelectResponse([], unresolved);
1787
+ console.log(JSON.stringify(response));
1788
+ } else {
1789
+ console.log(formatSelectText([], unresolved));
1790
+ }
1791
+ exitWithCode(ExitCode.ERROR);
1792
+ }
1793
+ function outputError2(errors, useJson) {
1794
+ if (useJson) {
1795
+ console.log(JSON.stringify(formatErrorResponse("select", errors)));
1796
+ } else {
1797
+ console.error(formatErrorText(errors));
1798
+ }
1799
+ }
1800
+ var SELECTABLE_BLOCKS = {
1801
+ paragraph: "para",
1802
+ code: "code",
1803
+ list: "list",
1804
+ table: "table",
1805
+ blockquote: "quote"
1806
+ };
1807
+ function formatMatches(results, truncateOpts) {
1808
+ return results.map((result) => {
1809
+ const { content, truncated } = truncateContent(extractMarkdown(result.node), truncateOpts);
1810
+ const childrenAvailable = [];
1811
+ if (result.childrenAvailable && result.node.children) {
1812
+ const typeCounts = {};
1813
+ for (const child of result.node.children) {
1814
+ const childType = String(child.type);
1815
+ if (childType === "heading" && "depth" in child) {
1816
+ const level = `h${child.depth}`;
1817
+ const idx = typeCounts[level] ?? 0;
1818
+ typeCounts[level] = idx + 1;
1819
+ const childText = extractMarkdown(child);
1820
+ const childPreview = childText.slice(0, 80).replace(/^#+\s*/, "");
1821
+ childrenAvailable.push({
1822
+ selector: `${level}[${idx}]`,
1823
+ type: "heading",
1824
+ preview: childPreview
1825
+ });
1826
+ } else if (childType in SELECTABLE_BLOCKS) {
1827
+ const shorthand = SELECTABLE_BLOCKS[childType];
1828
+ const idx = typeCounts[shorthand] ?? 0;
1829
+ typeCounts[shorthand] = idx + 1;
1830
+ const childText = extractMarkdown(child);
1831
+ const childPreview = childText.slice(0, 80);
1832
+ childrenAvailable.push({
1833
+ selector: `${shorthand}[${idx}]`,
1834
+ type: childType,
1835
+ preview: childPreview
1836
+ });
1837
+ }
1838
+ }
1839
+ }
1840
+ return {
1841
+ selector: String(result.selector),
1842
+ type: String(result.node.type),
1843
+ content,
1844
+ truncated,
1845
+ children_available: childrenAvailable
1846
+ };
1847
+ });
1848
+ }
1849
+
1850
+ // src/cli/commands/format-command.ts
1851
+ function formatCommand(command, options = {}) {
1852
+ const style = options.example ? "example" : "terse";
1853
+ if (command === "index") {
1854
+ console.log(FORMAT_SPECS.index[style]);
1855
+ } else if (command === "select") {
1856
+ console.log(FORMAT_SPECS.select[style]);
1857
+ } else {
1858
+ console.log("# index");
1859
+ console.log(FORMAT_SPECS.index[style]);
1860
+ console.log("");
1861
+ console.log("# select");
1862
+ console.log(FORMAT_SPECS.select[style]);
1863
+ }
1864
+ }
1865
+
1866
+ // src/cli/index.ts
1867
+ var require2 = createRequire(import.meta.url);
1868
+ var pkg = require2("../package.json");
1869
+ var program = new Command();
1870
+ program.name("mdsel").description(pkg.description).version(pkg.version).option("--json", "Output JSON instead of minimal text");
1871
+ program.command("index", { isDefault: true }).description("Parse documents and emit selector inventory").argument("<files...>", "Markdown files to index").action(async (files) => {
1872
+ try {
1873
+ const globalOpts = program.opts();
1874
+ await indexCommand(files, { json: globalOpts.json });
1875
+ } catch (error) {
1876
+ console.error("Unexpected error:", error);
1877
+ process.exit(ExitCode.ERROR);
1878
+ }
1879
+ });
1880
+ program.command("select").description("Retrieve content via selectors").argument("<selector>", "Selector string").argument("[files...]", "Markdown files to search").action(async (selector, files) => {
1881
+ try {
1882
+ const globalOpts = program.opts();
1883
+ await selectCommand(selector, files, { json: globalOpts.json });
1884
+ } catch (error) {
1885
+ console.error("Unexpected error:", error);
1886
+ process.exit(ExitCode.ERROR);
1887
+ }
1888
+ });
1889
+ program.command("format").description("Output format specification for tool descriptions").argument("[command]", "Command to describe (index, select, or omit for all)").option("--example", "Show example output instead of terse spec").action((command, options) => {
1890
+ formatCommand(command, options);
1891
+ });
1892
+ program.parse();