@origints/markdown 0.1.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,14 +4,6 @@
4
4
 
5
5
  ---
6
6
 
7
- ## Why
8
-
9
- Parsing Markdown is easy. Knowing exactly where each heading, link, or code block came from in the source is harder. When you're extracting structured data from Markdown documents, you need that connection.
10
-
11
- This package parses Markdown into a navigable tree while maintaining source positions for every node. Extract frontmatter, query by node type, and convert to HTML - all with full provenance.
12
-
13
- ---
14
-
15
7
  ## Features
16
8
 
17
9
  - Parse Markdown with GFM (GitHub Flavored Markdown) support
@@ -24,188 +16,205 @@ This package parses Markdown into a navigable tree while maintaining source posi
24
16
 
25
17
  ---
26
18
 
27
- ## Quick Start
19
+ ## Installation
28
20
 
29
21
  ```bash
30
22
  npm install @origints/markdown @origints/core
31
23
  ```
32
24
 
33
- ```ts
34
- import { parseMarkdown } from "@origints/markdown";
35
-
36
- const md = `
37
- # Hello World
25
+ ---
38
26
 
39
- This is a paragraph.
40
- `;
27
+ ## Usage with Planner
41
28
 
42
- const result = parseMarkdown(md);
29
+ ### Extract content from a Markdown file
43
30
 
44
- if (result.ok) {
45
- const heading = result.value.find("heading");
46
- console.log(heading?.text());
47
- }
48
- ```
31
+ ```ts
32
+ import { Planner, loadFile, run } from '@origints/core'
33
+ import { parseMarkdown } from '@origints/markdown'
49
34
 
50
- Expected output:
35
+ const plan = new Planner()
36
+ .in(loadFile('README.md'))
37
+ .mapIn(parseMarkdown())
38
+ .emit((out, $) => out.add('title', $.select('heading').text()))
39
+ .compile()
51
40
 
41
+ const result = await run(plan, { readFile, registry })
42
+ // result.value: { title: 'My Project' }
52
43
  ```
53
- Hello World
54
- ```
55
-
56
- ---
57
44
 
58
- ## Installation
45
+ ### Extract collections with selectAll
59
46
 
60
- - Supported platforms:
61
- - macOS / Linux / Windows
62
- - Runtime requirements:
63
- - Node.js >= 18
64
- - Package managers:
65
- - npm, pnpm, yarn
66
- - Peer dependencies:
67
- - @origints/core ^0.1.0
47
+ Use `selectAll()` to extract data from all matching nodes as an array:
68
48
 
69
- ```bash
70
- npm install @origints/markdown @origints/core
71
- # or
72
- pnpm add @origints/markdown @origints/core
49
+ ```ts
50
+ // Extract all heading texts from a document
51
+ const plan = new Planner()
52
+ .in(loadFile('README.md'))
53
+ .mapIn(parseMarkdown())
54
+ .emit((out, $) =>
55
+ out.add(
56
+ 'headings',
57
+ $.selectAll('heading', node => node.text())
58
+ )
59
+ )
60
+ .compile()
61
+
62
+ const result = await run(plan, { readFile, registry })
63
+ // result.value: { headings: ['Introduction', 'Getting Started', 'API'] }
73
64
  ```
74
65
 
75
- ---
76
-
77
- ## Usage
78
-
79
- ### Basic parsing
66
+ ### Extract structured data from repeated nodes
80
67
 
81
68
  ```ts
82
- import { parseMarkdown } from "@origints/markdown";
83
-
84
- const result = parseMarkdown(`
85
- # Title
86
-
87
- A paragraph with **bold** and *italic*.
88
-
89
- - Item 1
90
- - Item 2
91
- `);
92
-
93
- if (result.ok) {
94
- const doc = result.value;
69
+ // Extract all link URLs and labels
70
+ const plan = new Planner()
71
+ .in(loadFile('README.md'))
72
+ .mapIn(parseMarkdown())
73
+ .emit((out, $) =>
74
+ out.add(
75
+ 'links',
76
+ $.selectAll('link', node => node.text())
77
+ )
78
+ )
79
+ .compile()
80
+ ```
95
81
 
96
- // Find all headings
97
- const headings = doc.findAll("heading");
82
+ ### Extract top-level children
98
83
 
99
- // Find all links
100
- const links = doc.findAll("link");
84
+ Use `children()` to extract each direct child of a node:
101
85
 
102
- // Get text content
103
- const text = doc.text();
104
- }
86
+ ```ts
87
+ const plan = new Planner()
88
+ .in(loadFile('README.md'))
89
+ .mapIn(parseMarkdown())
90
+ .emit((out, $) =>
91
+ out.add(
92
+ 'blocks',
93
+ $.children(node => node.text())
94
+ )
95
+ )
96
+ .compile()
105
97
  ```
106
98
 
107
- ### Frontmatter extraction
99
+ ### Extract frontmatter fields
108
100
 
109
101
  ```ts
110
- import { parseMarkdown, extractFrontmatter } from "@origints/markdown";
111
-
112
- const result = parseMarkdown(`
113
- ---
114
- title: My Post
115
- date: 2024-01-15
116
- tags:
117
- - typescript
118
- - origins
119
- ---
102
+ // doc.md:
103
+ // ---
104
+ // title: My Post
105
+ // date: 2024-01-15
106
+ // tags:
107
+ // - typescript
108
+ // - origins
109
+ // ---
110
+ // # Content here
111
+
112
+ const plan = new Planner()
113
+ .in(loadFile('doc.md'))
114
+ .mapIn(parseMarkdown())
115
+ .emit((out, $) => out.add('title', $.select('yaml').text()))
116
+ .compile()
117
+ ```
120
118
 
121
- # Content here
122
- `);
119
+ ### Combine Markdown with other sources
123
120
 
124
- if (result.ok) {
125
- const frontmatter = extractFrontmatter(result.value);
126
- console.log(frontmatter?.title);
127
- }
121
+ ```ts
122
+ const plan = new Planner()
123
+ .in(loadFile('README.md'))
124
+ .mapIn(parseMarkdown())
125
+ .emit((out, $) => out.add('title', $.select('heading').text()))
126
+ .in(loadFile('package.json'))
127
+ .mapIn(parseJson())
128
+ .emit((out, $) =>
129
+ out
130
+ .add('version', $.get('version').string())
131
+ .add('name', $.get('name').string())
132
+ )
133
+ .compile()
128
134
  ```
129
135
 
130
- ### Converting to HTML
136
+ ### Standalone usage (without Planner)
137
+
138
+ For direct Markdown navigation:
131
139
 
132
140
  ```ts
133
- import { parseMarkdown, toHtml } from "@origints/markdown";
141
+ import { parseMarkdownImpl, MarkdownNode } from '@origints/markdown'
134
142
 
135
- const result = parseMarkdown("# Hello\n\nWorld");
143
+ const node = parseMarkdownImpl.execute(markdownString) as MarkdownNode
136
144
 
137
- if (result.ok) {
138
- const html = toHtml(result.value);
139
- // <h1>Hello</h1>\n<p>World</p>
145
+ // Select nodes using CSS-like selectors
146
+ const headingResult = node.select('heading')
147
+ if (headingResult.ok) {
148
+ console.log(headingResult.value.text())
140
149
  }
141
- ```
142
150
 
143
- ### Using with Origins plans
151
+ // Select by attribute
152
+ const h1Result = node.select('heading[depth=1]')
153
+ const codeResult = node.select('code[lang="typescript"]')
144
154
 
145
- ```ts
146
- import { Planner, loadFile, globalRegistry } from "@origints/core";
147
- import { parseMarkdown, registerMarkdownTransforms } from "@origints/markdown";
148
-
149
- registerMarkdownTransforms(globalRegistry);
155
+ // Nested selectors
156
+ const listItems = node.selectAll('list > listItem')
150
157
 
151
- const plan = Planner.in(loadFile("README.md"))
152
- .mapIn(parseMarkdown())
153
- .emit((out, $) => {
154
- const title = $.find("heading")?.text() ?? "Untitled";
155
- out.add("title", title);
156
- })
157
- .compile();
158
+ // Get all text content
159
+ console.log(node.text())
158
160
  ```
159
161
 
160
162
  ### Typed node extraction
161
163
 
162
164
  ```ts
163
- import { parseMarkdown } from "@origints/markdown";
164
- import type { HeadingData, LinkData } from "@origints/markdown";
165
-
166
- const result = parseMarkdown(content);
167
-
168
- if (result.ok) {
169
- // Get heading with typed data
170
- const heading = result.value.find("heading");
171
- const data: HeadingData | undefined = heading?.data();
172
- console.log(data?.depth); // 1, 2, 3, etc.
165
+ const headingResult = node.select('heading')
166
+ if (headingResult.ok) {
167
+ const data = headingResult.value.asHeading()
168
+ if (data.ok) {
169
+ console.log(data.value.depth) // 1, 2, 3, etc.
170
+ }
171
+ }
173
172
 
174
- // Get link with typed data
175
- const link = result.value.find("link");
176
- const linkData: LinkData | undefined = link?.data();
177
- console.log(linkData?.url);
173
+ const linkResult = node.select('link')
174
+ if (linkResult.ok) {
175
+ const data = linkResult.value.asLink()
176
+ if (data.ok) {
177
+ console.log(data.value.url)
178
+ }
178
179
  }
179
180
  ```
180
181
 
181
- ---
182
-
183
- ## Project Status
184
-
185
- - **Experimental** - APIs may change
186
-
187
- ---
182
+ ### Converting to HTML
188
183
 
189
- ## Non-Goals
184
+ ```ts
185
+ import { parseMarkdownImpl, toHtml } from '@origints/markdown'
190
186
 
191
- - Not a Markdown renderer/serializer
192
- - Not a Markdown editor
193
- - Not a full MDX parser
187
+ const node = parseMarkdownImpl.execute('# Hello\n\nWorld') as MarkdownNode
188
+ const html = toHtml(node)
189
+ // <h1>Hello</h1>\n<p>World</p>
190
+ ```
194
191
 
195
- ---
192
+ ### Frontmatter extraction (standalone)
196
193
 
197
- ## Documentation
194
+ ```ts
195
+ import { parseMarkdownImpl, extractFrontmatter } from '@origints/markdown'
198
196
 
199
- - See `@origints/core` for Origins concepts
200
- - See [remark](https://github.com/remarkjs/remark) for underlying parser
197
+ const node = parseMarkdownImpl.execute(markdownWithFrontmatter) as MarkdownNode
198
+ const frontmatter = extractFrontmatter(node)
199
+ if (frontmatter) {
200
+ console.log(frontmatter.title)
201
+ }
202
+ ```
201
203
 
202
204
  ---
203
205
 
204
- ## Contributing
205
-
206
- - Open an issue before large changes
207
- - Keep PRs focused
208
- - Tests required for new features
206
+ ## API
207
+
208
+ | Export | Description |
209
+ | -------------------------------------- | ----------------------------------------------------- |
210
+ | `parseMarkdown(options?)` | Create a transform AST for use with `Planner.mapIn()` |
211
+ | `parseMarkdownImpl` | Sync transform implementation (string input) |
212
+ | `parseMarkdownAsyncImpl` | Async transform implementation (string or stream) |
213
+ | `registerMarkdownTransforms(registry)` | Register all Markdown transforms with a registry |
214
+ | `MarkdownNode` | Navigable wrapper with selector support |
215
+ | `toHtml(node)` | Convert Markdown to HTML |
216
+ | `toJson(node, options?)` | Convert MarkdownNode to JSON |
217
+ | `extractFrontmatter(node)` | Extract YAML frontmatter |
209
218
 
210
219
  ---
211
220