mdsel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +384 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.mjs +1892 -0
- package/package.json +77 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 dabstractor
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
# mdsel
|
|
2
|
+
|
|
3
|
+
Declarative Markdown semantic selection CLI for LLM agents.
|
|
4
|
+
|
|
5
|
+
mdsel parses Markdown documents into semantic trees and exposes machine-addressable selectors for every meaningful chunk. It enables LLMs to request exactly the content they want—no more, no less—without loading entire files into context.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install -g mdsel
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
**Requirements**: Node.js >=18.0.0
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Index a document to discover available selectors
|
|
19
|
+
mdsel index README.md
|
|
20
|
+
|
|
21
|
+
# Select a specific heading (shorthand)
|
|
22
|
+
mdsel select h1.0 README.md
|
|
23
|
+
|
|
24
|
+
# Select the first code block under a heading
|
|
25
|
+
mdsel select "h2.0/code.0" README.md
|
|
26
|
+
|
|
27
|
+
# Select multiple headings with comma syntax
|
|
28
|
+
mdsel select h2.0,2 README.md
|
|
29
|
+
|
|
30
|
+
# Limit output to first 10 lines
|
|
31
|
+
mdsel select "h2.0?head=10" README.md
|
|
32
|
+
|
|
33
|
+
# Limit output to last 5 lines
|
|
34
|
+
mdsel select "h2.0?tail=5" README.md
|
|
35
|
+
|
|
36
|
+
# Use JSON output for programmatic consumption
|
|
37
|
+
mdsel index README.md --json
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Commands
|
|
41
|
+
|
|
42
|
+
### index
|
|
43
|
+
|
|
44
|
+
Parse documents and emit selector inventory.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
mdsel index <files...>
|
|
48
|
+
mdsel index <files...> --json
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**Example**:
|
|
52
|
+
```bash
|
|
53
|
+
mdsel index README.md docs/API.md
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Text Output** (default):
|
|
57
|
+
```
|
|
58
|
+
h1.0 mdsel
|
|
59
|
+
h2.0 Installation
|
|
60
|
+
h2.1 Quick Start
|
|
61
|
+
h2.2 Commands
|
|
62
|
+
h3.0 index
|
|
63
|
+
h3.1 select
|
|
64
|
+
---
|
|
65
|
+
code:19 para:23 list:5 table:3
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**JSON Output** (`--json` flag):
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"success": true,
|
|
72
|
+
"command": "index",
|
|
73
|
+
"timestamp": "2025-01-15T10:30:00.000Z",
|
|
74
|
+
"data": {
|
|
75
|
+
"documents": [
|
|
76
|
+
{
|
|
77
|
+
"namespace": "readme",
|
|
78
|
+
"file_path": "README.md",
|
|
79
|
+
"headings": [...],
|
|
80
|
+
"blocks": {
|
|
81
|
+
"paragraphs": 5,
|
|
82
|
+
"code_blocks": 2,
|
|
83
|
+
"lists": 1,
|
|
84
|
+
"tables": 0,
|
|
85
|
+
"blockquotes": 0
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
],
|
|
89
|
+
"summary": {
|
|
90
|
+
"total_documents": 1,
|
|
91
|
+
"total_nodes": 8,
|
|
92
|
+
"total_selectors": 8
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### select
|
|
99
|
+
|
|
100
|
+
Retrieve content via selectors.
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
mdsel select <selector> [files...]
|
|
104
|
+
mdsel select <selector> [files...] --json
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Arguments**:
|
|
108
|
+
- `<selector>` - Selector string (see [Selectors](#selectors))
|
|
109
|
+
- `[files...]` - Markdown files to search (optional, uses stdin if omitted)
|
|
110
|
+
|
|
111
|
+
**Options**:
|
|
112
|
+
- `--json` - Output JSON instead of text
|
|
113
|
+
|
|
114
|
+
**Examples**:
|
|
115
|
+
```bash
|
|
116
|
+
# Select first h2 (shorthand)
|
|
117
|
+
mdsel select h2.0 README.md
|
|
118
|
+
|
|
119
|
+
# Select first code block
|
|
120
|
+
mdsel select code.0 README.md
|
|
121
|
+
|
|
122
|
+
# Cross-document selection (all documents)
|
|
123
|
+
mdsel select h1.0 README.md GUIDE.md
|
|
124
|
+
|
|
125
|
+
# Limit output to first 10 lines
|
|
126
|
+
mdsel select "h2.0?head=10" README.md
|
|
127
|
+
|
|
128
|
+
# Limit output to last 5 lines
|
|
129
|
+
mdsel select "h2.0?tail=5" README.md
|
|
130
|
+
|
|
131
|
+
# Range selection
|
|
132
|
+
mdsel select h2.1-3 README.md
|
|
133
|
+
|
|
134
|
+
# Multiple specific indices
|
|
135
|
+
mdsel select h2.0,2,4 README.md
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Text Output** (default):
|
|
139
|
+
```
|
|
140
|
+
## Quick Start
|
|
141
|
+
|
|
142
|
+
To get started...
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**Multiple Results** (selector prefix):
|
|
146
|
+
```
|
|
147
|
+
heading:h2.0:
|
|
148
|
+
## Installation
|
|
149
|
+
content...
|
|
150
|
+
heading:h2.1:
|
|
151
|
+
## Quick Start
|
|
152
|
+
content...
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Error Output**:
|
|
156
|
+
```
|
|
157
|
+
!h2.99
|
|
158
|
+
Index out of range: document has 3 h2 headings
|
|
159
|
+
~h2.0 ~h2.1 ~h2.2
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Selectors
|
|
163
|
+
|
|
164
|
+
Selectors are path-based, ordinal, stateless, and deterministic. They resemble CSS/XPath conceptually but are purpose-built for Markdown.
|
|
165
|
+
|
|
166
|
+
### Syntax
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
[namespace::]type[index][/path][?query]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
- **namespace** (optional) - Document identifier, defaults to all documents
|
|
173
|
+
- **type** - Node type (root, heading, section, block) or shorthand
|
|
174
|
+
- **index** (optional) - 0-based ordinal: `.N`, `[N]`, `.N-M` (range), `.N,M,O` (list)
|
|
175
|
+
- **path** (optional) - Additional path segments for nested selection
|
|
176
|
+
- **query** (optional) - Query parameters (e.g., `?head=10`, `?tail=5`)
|
|
177
|
+
|
|
178
|
+
### Node Types
|
|
179
|
+
|
|
180
|
+
| Category | Full Form | Shorthand |
|
|
181
|
+
|----------|-----------|-----------|
|
|
182
|
+
| Root | `root` | - |
|
|
183
|
+
| Headings | `heading:h1` ... `heading:h6` | `h1` ... `h6` |
|
|
184
|
+
| Sections | `section` | - |
|
|
185
|
+
| Blocks | `block:paragraph` | `para`, `paragraph` |
|
|
186
|
+
| | `block:code` | `code` |
|
|
187
|
+
| | `block:list` | `list` |
|
|
188
|
+
| | `block:table` | `table` |
|
|
189
|
+
| | `block:blockquote` | `quote`, `blockquote` |
|
|
190
|
+
|
|
191
|
+
### Index Syntax
|
|
192
|
+
|
|
193
|
+
Two equivalent notations are supported:
|
|
194
|
+
|
|
195
|
+
| Notation | Example | Meaning |
|
|
196
|
+
|----------|---------|---------|
|
|
197
|
+
| Dot | `h2.0` | First h2 |
|
|
198
|
+
| Bracket | `h2[0]` | First h2 |
|
|
199
|
+
| Range | `h2.1-3` or `h2[1-3]` | h2.1, h2.2, h2.3 |
|
|
200
|
+
| Comma list | `h2.0,2,4` or `h2[0,2,4]` | h2.0, h2.2, h2.4 |
|
|
201
|
+
| No index | `h2` | All h2 headings |
|
|
202
|
+
|
|
203
|
+
### Examples
|
|
204
|
+
|
|
205
|
+
**Basic selection**:
|
|
206
|
+
```bash
|
|
207
|
+
root # Document root
|
|
208
|
+
h1.0 # First h1 heading
|
|
209
|
+
h2.1 # Second h2 heading
|
|
210
|
+
code.0 # First code block
|
|
211
|
+
para.2 # Third paragraph
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Full form (equivalent)**:
|
|
215
|
+
```bash
|
|
216
|
+
heading:h1[0] # First h1 heading
|
|
217
|
+
block:code[0] # First code block
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Namespace selection**:
|
|
221
|
+
```bash
|
|
222
|
+
readme::root # Root in specific document
|
|
223
|
+
docs::h2.0 # First h2 in docs
|
|
224
|
+
api::table.1 # Second table in api
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
**Path composition**:
|
|
228
|
+
```bash
|
|
229
|
+
h2.1/code.0 # First code block under second h2
|
|
230
|
+
section.0/list.1 # Second list in first section
|
|
231
|
+
docs::h2.0/section.0/code.0 # Nested path with namespace
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**Range and list selection**:
|
|
235
|
+
```bash
|
|
236
|
+
h2.0-2 # First three h2 headings
|
|
237
|
+
h2.1,3,5 # 2nd, 4th, and 6th h2 headings
|
|
238
|
+
code.0,2 # 1st and 3rd code blocks
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**Query parameters**:
|
|
242
|
+
```bash
|
|
243
|
+
h2.0?head=10 # First 10 lines of content
|
|
244
|
+
h2.0?tail=5 # Last 5 lines of content
|
|
245
|
+
section.2?head=20 # First 20 lines of section
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Cross-document selection**:
|
|
249
|
+
```bash
|
|
250
|
+
h1.0 # First h1 from ALL documents
|
|
251
|
+
code.0 # First code block from ALL documents
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Index Semantics
|
|
255
|
+
|
|
256
|
+
- Index is **0-based** (first item is index 0)
|
|
257
|
+
- Index counts among siblings of the same type
|
|
258
|
+
- Index is relative to parent context, not global
|
|
259
|
+
- No index means select **all** matches of that type
|
|
260
|
+
|
|
261
|
+
## Output Format
|
|
262
|
+
|
|
263
|
+
Default output is compact text optimized for LLM token efficiency. Use `--json` for structured JSON output.
|
|
264
|
+
|
|
265
|
+
### Index Response Schema (JSON)
|
|
266
|
+
|
|
267
|
+
```typescript
|
|
268
|
+
interface IndexResponse {
|
|
269
|
+
documents: DocumentIndex[];
|
|
270
|
+
summary: {
|
|
271
|
+
total_documents: number;
|
|
272
|
+
total_nodes: number;
|
|
273
|
+
total_selectors: number;
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
### Select Response Schema (JSON)
|
|
279
|
+
|
|
280
|
+
```typescript
|
|
281
|
+
interface SelectResponse {
|
|
282
|
+
matches: {
|
|
283
|
+
selector: string;
|
|
284
|
+
type: string;
|
|
285
|
+
content: string;
|
|
286
|
+
truncated: boolean;
|
|
287
|
+
children_available: {
|
|
288
|
+
selector: string;
|
|
289
|
+
type: string;
|
|
290
|
+
preview: string;
|
|
291
|
+
}[];
|
|
292
|
+
}[];
|
|
293
|
+
unresolved: {
|
|
294
|
+
selector: string;
|
|
295
|
+
reason: string;
|
|
296
|
+
suggestions: string[];
|
|
297
|
+
}[];
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Truncation
|
|
302
|
+
|
|
303
|
+
By default, full content is returned. Use `?head=N` or `?tail=N` query parameters to limit output to the first or last N lines. Truncated content includes a `[truncated]` marker.
|
|
304
|
+
|
|
305
|
+
## Error Handling
|
|
306
|
+
|
|
307
|
+
### Exit Codes
|
|
308
|
+
|
|
309
|
+
| Code | Meaning |
|
|
310
|
+
|------|---------|
|
|
311
|
+
| 0 | Success |
|
|
312
|
+
| 1 | Error |
|
|
313
|
+
| 2 | Usage error |
|
|
314
|
+
|
|
315
|
+
### Error Types
|
|
316
|
+
|
|
317
|
+
| Type | Description |
|
|
318
|
+
|------|-------------|
|
|
319
|
+
| `FILE_NOT_FOUND` | Specified file does not exist |
|
|
320
|
+
| `PARSE_ERROR` | Markdown parsing failed |
|
|
321
|
+
| `INVALID_SELECTOR` | Selector syntax is invalid |
|
|
322
|
+
| `SELECTOR_NOT_FOUND` | Selector does not match any nodes |
|
|
323
|
+
| `NAMESPACE_NOT_FOUND` | Specified namespace does not exist |
|
|
324
|
+
| `PROCESSING_ERROR` | General processing error |
|
|
325
|
+
|
|
326
|
+
### Error Response Example
|
|
327
|
+
|
|
328
|
+
**Text format**:
|
|
329
|
+
```
|
|
330
|
+
!h2.99
|
|
331
|
+
Index out of range: document has 3 h2 headings
|
|
332
|
+
~h2.0 ~h2.1 ~h2.2
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
**JSON format**:
|
|
336
|
+
```json
|
|
337
|
+
{
|
|
338
|
+
"success": false,
|
|
339
|
+
"command": "select",
|
|
340
|
+
"timestamp": "2025-01-15T10:38:00.000Z",
|
|
341
|
+
"data": {
|
|
342
|
+
"matches": [],
|
|
343
|
+
"unresolved": [
|
|
344
|
+
{
|
|
345
|
+
"selector": "h2.99",
|
|
346
|
+
"reason": "Index out of range: document has 3 h2 headings",
|
|
347
|
+
"suggestions": ["h2.0", "h2.1", "h2.2"]
|
|
348
|
+
}
|
|
349
|
+
]
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Suggestions
|
|
355
|
+
|
|
356
|
+
When a selector fails to resolve, the tool provides fuzzy-matched suggestions based on:
|
|
357
|
+
- Known selector grammar
|
|
358
|
+
- Existing selectors in the document
|
|
359
|
+
- Levenshtein distance and prefix similarity
|
|
360
|
+
|
|
361
|
+
## Development
|
|
362
|
+
|
|
363
|
+
```bash
|
|
364
|
+
# Run tests
|
|
365
|
+
npm test
|
|
366
|
+
|
|
367
|
+
# Build project
|
|
368
|
+
npm run build
|
|
369
|
+
|
|
370
|
+
# Lint code
|
|
371
|
+
npm run lint
|
|
372
|
+
|
|
373
|
+
# Format code
|
|
374
|
+
npm run format
|
|
375
|
+
|
|
376
|
+
# Type check
|
|
377
|
+
npm run type-check
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Requirements**: Node.js >=18.0.0, npm
|
|
381
|
+
|
|
382
|
+
## License
|
|
383
|
+
|
|
384
|
+
MIT
|
package/dist/cli.d.ts
ADDED