@origints/markdown 0.1.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -136
- package/dist/index.cjs +65 -65
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.es.js +1003 -836
- package/dist/index.es.js.map +1 -1
- package/dist/markdown-node.d.ts +2 -0
- package/dist/markdown-spec-builder.d.ts +36 -0
- package/dist/markdown-spec-executor.d.ts +6 -0
- package/dist/markdown-spec.d.ts +32 -0
- package/dist/parse.d.ts +9 -5
- package/package.json +16 -2
package/README.md
CHANGED
|
@@ -4,14 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
## Why
|
|
8
|
-
|
|
9
|
-
Parsing Markdown is easy. Knowing exactly where each heading, link, or code block came from in the source is harder. When you're extracting structured data from Markdown documents, you need that connection.
|
|
10
|
-
|
|
11
|
-
This package parses Markdown into a navigable tree while maintaining source positions for every node. Extract frontmatter, query by node type, and convert to HTML - all with full provenance.
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
7
|
## Features
|
|
16
8
|
|
|
17
9
|
- Parse Markdown with GFM (GitHub Flavored Markdown) support
|
|
@@ -24,188 +16,205 @@ This package parses Markdown into a navigable tree while maintaining source posi
|
|
|
24
16
|
|
|
25
17
|
---
|
|
26
18
|
|
|
27
|
-
##
|
|
19
|
+
## Installation
|
|
28
20
|
|
|
29
21
|
```bash
|
|
30
22
|
npm install @origints/markdown @origints/core
|
|
31
23
|
```
|
|
32
24
|
|
|
33
|
-
|
|
34
|
-
import { parseMarkdown } from "@origints/markdown";
|
|
35
|
-
|
|
36
|
-
const md = `
|
|
37
|
-
# Hello World
|
|
25
|
+
---
|
|
38
26
|
|
|
39
|
-
|
|
40
|
-
`;
|
|
27
|
+
## Usage with Planner
|
|
41
28
|
|
|
42
|
-
|
|
29
|
+
### Extract content from a Markdown file
|
|
43
30
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
```
|
|
31
|
+
```ts
|
|
32
|
+
import { Planner, loadFile, run } from '@origints/core'
|
|
33
|
+
import { parseMarkdown } from '@origints/markdown'
|
|
49
34
|
|
|
50
|
-
|
|
35
|
+
const plan = new Planner()
|
|
36
|
+
.in(loadFile('README.md'))
|
|
37
|
+
.mapIn(parseMarkdown())
|
|
38
|
+
.emit((out, $) => out.add('title', $.select('heading').text()))
|
|
39
|
+
.compile()
|
|
51
40
|
|
|
41
|
+
const result = await run(plan, { readFile, registry })
|
|
42
|
+
// result.value: { title: 'My Project' }
|
|
52
43
|
```
|
|
53
|
-
Hello World
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
---
|
|
57
44
|
|
|
58
|
-
|
|
45
|
+
### Extract collections with selectAll
|
|
59
46
|
|
|
60
|
-
|
|
61
|
-
- macOS / Linux / Windows
|
|
62
|
-
- Runtime requirements:
|
|
63
|
-
- Node.js >= 18
|
|
64
|
-
- Package managers:
|
|
65
|
-
- npm, pnpm, yarn
|
|
66
|
-
- Peer dependencies:
|
|
67
|
-
- @origints/core ^0.1.0
|
|
47
|
+
Use `selectAll()` to extract data from all matching nodes as an array:
|
|
68
48
|
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
49
|
+
```ts
|
|
50
|
+
// Extract all heading texts from a document
|
|
51
|
+
const plan = new Planner()
|
|
52
|
+
.in(loadFile('README.md'))
|
|
53
|
+
.mapIn(parseMarkdown())
|
|
54
|
+
.emit((out, $) =>
|
|
55
|
+
out.add(
|
|
56
|
+
'headings',
|
|
57
|
+
$.selectAll('heading', node => node.text())
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
.compile()
|
|
61
|
+
|
|
62
|
+
const result = await run(plan, { readFile, registry })
|
|
63
|
+
// result.value: { headings: ['Introduction', 'Getting Started', 'API'] }
|
|
73
64
|
```
|
|
74
65
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
## Usage
|
|
78
|
-
|
|
79
|
-
### Basic parsing
|
|
66
|
+
### Extract structured data from repeated nodes
|
|
80
67
|
|
|
81
68
|
```ts
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
const doc = result.value;
|
|
69
|
+
// Extract all link URLs and labels
|
|
70
|
+
const plan = new Planner()
|
|
71
|
+
.in(loadFile('README.md'))
|
|
72
|
+
.mapIn(parseMarkdown())
|
|
73
|
+
.emit((out, $) =>
|
|
74
|
+
out.add(
|
|
75
|
+
'links',
|
|
76
|
+
$.selectAll('link', node => node.text())
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
.compile()
|
|
80
|
+
```
|
|
95
81
|
|
|
96
|
-
|
|
97
|
-
const headings = doc.findAll("heading");
|
|
82
|
+
### Extract top-level children
|
|
98
83
|
|
|
99
|
-
|
|
100
|
-
const links = doc.findAll("link");
|
|
84
|
+
Use `children()` to extract each direct child of a node:
|
|
101
85
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
86
|
+
```ts
|
|
87
|
+
const plan = new Planner()
|
|
88
|
+
.in(loadFile('README.md'))
|
|
89
|
+
.mapIn(parseMarkdown())
|
|
90
|
+
.emit((out, $) =>
|
|
91
|
+
out.add(
|
|
92
|
+
'blocks',
|
|
93
|
+
$.children(node => node.text())
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
.compile()
|
|
105
97
|
```
|
|
106
98
|
|
|
107
|
-
###
|
|
99
|
+
### Extract frontmatter fields
|
|
108
100
|
|
|
109
101
|
```ts
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
102
|
+
// doc.md:
|
|
103
|
+
// ---
|
|
104
|
+
// title: My Post
|
|
105
|
+
// date: 2024-01-15
|
|
106
|
+
// tags:
|
|
107
|
+
// - typescript
|
|
108
|
+
// - origins
|
|
109
|
+
// ---
|
|
110
|
+
// # Content here
|
|
111
|
+
|
|
112
|
+
const plan = new Planner()
|
|
113
|
+
.in(loadFile('doc.md'))
|
|
114
|
+
.mapIn(parseMarkdown())
|
|
115
|
+
.emit((out, $) => out.add('title', $.select('yaml').text()))
|
|
116
|
+
.compile()
|
|
117
|
+
```
|
|
120
118
|
|
|
121
|
-
|
|
122
|
-
`);
|
|
119
|
+
### Combine Markdown with other sources
|
|
123
120
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
121
|
+
```ts
|
|
122
|
+
const plan = new Planner()
|
|
123
|
+
.in(loadFile('README.md'))
|
|
124
|
+
.mapIn(parseMarkdown())
|
|
125
|
+
.emit((out, $) => out.add('title', $.select('heading').text()))
|
|
126
|
+
.in(loadFile('package.json'))
|
|
127
|
+
.mapIn(parseJson())
|
|
128
|
+
.emit((out, $) =>
|
|
129
|
+
out
|
|
130
|
+
.add('version', $.get('version').string())
|
|
131
|
+
.add('name', $.get('name').string())
|
|
132
|
+
)
|
|
133
|
+
.compile()
|
|
128
134
|
```
|
|
129
135
|
|
|
130
|
-
###
|
|
136
|
+
### Standalone usage (without Planner)
|
|
137
|
+
|
|
138
|
+
For direct Markdown navigation:
|
|
131
139
|
|
|
132
140
|
```ts
|
|
133
|
-
import {
|
|
141
|
+
import { parseMarkdownImpl, MarkdownNode } from '@origints/markdown'
|
|
134
142
|
|
|
135
|
-
const
|
|
143
|
+
const node = parseMarkdownImpl.execute(markdownString) as MarkdownNode
|
|
136
144
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
145
|
+
// Select nodes using CSS-like selectors
|
|
146
|
+
const headingResult = node.select('heading')
|
|
147
|
+
if (headingResult.ok) {
|
|
148
|
+
console.log(headingResult.value.text())
|
|
140
149
|
}
|
|
141
|
-
```
|
|
142
150
|
|
|
143
|
-
|
|
151
|
+
// Select by attribute
|
|
152
|
+
const h1Result = node.select('heading[depth=1]')
|
|
153
|
+
const codeResult = node.select('code[lang="typescript"]')
|
|
144
154
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
import { parseMarkdown, registerMarkdownTransforms } from "@origints/markdown";
|
|
148
|
-
|
|
149
|
-
registerMarkdownTransforms(globalRegistry);
|
|
155
|
+
// Nested selectors
|
|
156
|
+
const listItems = node.selectAll('list > listItem')
|
|
150
157
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
.emit((out, $) => {
|
|
154
|
-
const title = $.find("heading")?.text() ?? "Untitled";
|
|
155
|
-
out.add("title", title);
|
|
156
|
-
})
|
|
157
|
-
.compile();
|
|
158
|
+
// Get all text content
|
|
159
|
+
console.log(node.text())
|
|
158
160
|
```
|
|
159
161
|
|
|
160
162
|
### Typed node extraction
|
|
161
163
|
|
|
162
164
|
```ts
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
const heading = result.value.find("heading");
|
|
171
|
-
const data: HeadingData | undefined = heading?.data();
|
|
172
|
-
console.log(data?.depth); // 1, 2, 3, etc.
|
|
165
|
+
const headingResult = node.select('heading')
|
|
166
|
+
if (headingResult.ok) {
|
|
167
|
+
const data = headingResult.value.asHeading()
|
|
168
|
+
if (data.ok) {
|
|
169
|
+
console.log(data.value.depth) // 1, 2, 3, etc.
|
|
170
|
+
}
|
|
171
|
+
}
|
|
173
172
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const
|
|
177
|
-
|
|
173
|
+
const linkResult = node.select('link')
|
|
174
|
+
if (linkResult.ok) {
|
|
175
|
+
const data = linkResult.value.asLink()
|
|
176
|
+
if (data.ok) {
|
|
177
|
+
console.log(data.value.url)
|
|
178
|
+
}
|
|
178
179
|
}
|
|
179
180
|
```
|
|
180
181
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
## Project Status
|
|
184
|
-
|
|
185
|
-
- **Experimental** - APIs may change
|
|
186
|
-
|
|
187
|
-
---
|
|
182
|
+
### Converting to HTML
|
|
188
183
|
|
|
189
|
-
|
|
184
|
+
```ts
|
|
185
|
+
import { parseMarkdownImpl, toHtml } from '@origints/markdown'
|
|
190
186
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
187
|
+
const node = parseMarkdownImpl.execute('# Hello\n\nWorld') as MarkdownNode
|
|
188
|
+
const html = toHtml(node)
|
|
189
|
+
// <h1>Hello</h1>\n<p>World</p>
|
|
190
|
+
```
|
|
194
191
|
|
|
195
|
-
|
|
192
|
+
### Frontmatter extraction (standalone)
|
|
196
193
|
|
|
197
|
-
|
|
194
|
+
```ts
|
|
195
|
+
import { parseMarkdownImpl, extractFrontmatter } from '@origints/markdown'
|
|
198
196
|
|
|
199
|
-
|
|
200
|
-
|
|
197
|
+
const node = parseMarkdownImpl.execute(markdownWithFrontmatter) as MarkdownNode
|
|
198
|
+
const frontmatter = extractFrontmatter(node)
|
|
199
|
+
if (frontmatter) {
|
|
200
|
+
console.log(frontmatter.title)
|
|
201
|
+
}
|
|
202
|
+
```
|
|
201
203
|
|
|
202
204
|
---
|
|
203
205
|
|
|
204
|
-
##
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
206
|
+
## API
|
|
207
|
+
|
|
208
|
+
| Export | Description |
|
|
209
|
+
| -------------------------------------- | ----------------------------------------------------- |
|
|
210
|
+
| `parseMarkdown(options?)` | Create a transform AST for use with `Planner.mapIn()` |
|
|
211
|
+
| `parseMarkdownImpl` | Sync transform implementation (string input) |
|
|
212
|
+
| `parseMarkdownAsyncImpl` | Async transform implementation (string or stream) |
|
|
213
|
+
| `registerMarkdownTransforms(registry)` | Register all Markdown transforms with a registry |
|
|
214
|
+
| `MarkdownNode` | Navigable wrapper with selector support |
|
|
215
|
+
| `toHtml(node)` | Convert Markdown to HTML |
|
|
216
|
+
| `toJson(node, options?)` | Convert MarkdownNode to JSON |
|
|
217
|
+
| `extractFrontmatter(node)` | Extract YAML frontmatter |
|
|
209
218
|
|
|
210
219
|
---
|
|
211
220
|
|