@origints/markdown 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,177 @@
1
+ import { Root, Content } from 'mdast';
2
+ import { MarkdownResult, MarkdownPath, SourcePosition } from './markdown-result';
3
+ import { HeadingData, CodeBlockData, InlineCodeData, LinkData, ImageData, ListData, TableData, BlockquoteData, ParagraphData, DefinitionData } from './typed-extractors';
4
+ type MdastNode = Root | Content;
5
+ /**
6
+ * The type of Markdown node.
7
+ */
8
+ export type MarkdownNodeType = 'root' | 'heading' | 'paragraph' | 'text' | 'emphasis' | 'strong' | 'inlineCode' | 'code' | 'link' | 'image' | 'list' | 'listItem' | 'table' | 'tableRow' | 'tableCell' | 'blockquote' | 'thematicBreak' | 'html' | 'definition' | 'footnoteDefinition' | 'footnoteReference' | 'break' | 'yaml' | 'toml' | 'unknown';
9
+ /**
10
+ * A wrapper around mdast nodes with full metadata preservation.
11
+ *
12
+ * MarkdownNode enables typed navigation through Markdown structures while
13
+ * maintaining provenance information. Each traversal operation returns a new
14
+ * MarkdownNode with an extended path, allowing you to trace exactly how you
15
+ * arrived at any node.
16
+ */
17
+ export declare class MarkdownNode {
18
+ private readonly node;
19
+ private readonly _path;
20
+ private readonly root;
21
+ private constructor();
22
+ /**
23
+ * Creates a MarkdownNode from a parsed mdast tree.
24
+ */
25
+ static fromRoot(root: Root): MarkdownNode;
26
+ /**
27
+ * Creates a MarkdownNode from an mdast node.
28
+ * @internal
29
+ */
30
+ static fromNode(node: MdastNode, path: MarkdownPath, root: Root): MarkdownNode;
31
+ /**
32
+ * Returns the current path through the Markdown structure.
33
+ */
34
+ get path(): MarkdownPath;
35
+ /**
36
+ * Returns the source position of this node, if available.
37
+ */
38
+ get position(): SourcePosition | undefined;
39
+ /**
40
+ * Returns the type of this Markdown node.
41
+ */
42
+ get nodeType(): MarkdownNodeType;
43
+ /**
44
+ * Returns the underlying mdast node.
45
+ */
46
+ unwrap(): MdastNode;
47
+ /**
48
+ * Select a single node matching the CSS-like selector.
49
+ *
50
+ * Supported selectors:
51
+ * - Type: `heading`, `paragraph`, `code`, etc.
52
+ * - Attribute: `[depth=2]`, `[lang="typescript"]`
53
+ * - Pseudo-classes: `:first-child`, `:last-child`, `:nth-child(n)`
54
+ * - Combinators: `heading text`, `list > listItem`
55
+ *
56
+ * @example
57
+ * ```typescript
58
+ * node.select('heading[depth=1]')
59
+ * node.select('code[lang="typescript"]')
60
+ * node.select('list > listItem:first-child')
61
+ * ```
62
+ */
63
+ select(selector: string): MarkdownResult<MarkdownNode>;
64
+ /**
65
+ * Select all nodes matching the CSS-like selector.
66
+ */
67
+ selectAll(selector: string): MarkdownResult<readonly MarkdownNode[]>;
68
+ /**
69
+ * Get all direct children of this node.
70
+ */
71
+ children(): MarkdownResult<readonly MarkdownNode[]>;
72
+ /**
73
+ * Get child at specific index.
74
+ */
75
+ child(index: number): MarkdownResult<MarkdownNode>;
76
+ /**
77
+ * Get the first child.
78
+ */
79
+ first(): MarkdownResult<MarkdownNode>;
80
+ /**
81
+ * Get the last child.
82
+ */
83
+ last(): MarkdownResult<MarkdownNode>;
84
+ /**
85
+ * Get all headings in the document.
86
+ */
87
+ headings(): MarkdownResult<readonly MarkdownNode[]>;
88
+ /**
89
+ * Get all code blocks in the document.
90
+ */
91
+ codeBlocks(): MarkdownResult<readonly MarkdownNode[]>;
92
+ /**
93
+ * Get all links in the document.
94
+ */
95
+ links(): MarkdownResult<readonly MarkdownNode[]>;
96
+ /**
97
+ * Get all images in the document.
98
+ */
99
+ images(): MarkdownResult<readonly MarkdownNode[]>;
100
+ /**
101
+ * Get all lists in the document.
102
+ */
103
+ lists(): MarkdownResult<readonly MarkdownNode[]>;
104
+ /**
105
+ * Get all tables in the document.
106
+ */
107
+ tables(): MarkdownResult<readonly MarkdownNode[]>;
108
+ /**
109
+ * Get all paragraphs in the document.
110
+ */
111
+ paragraphs(): MarkdownResult<readonly MarkdownNode[]>;
112
+ /**
113
+ * Get all blockquotes in the document.
114
+ */
115
+ blockquotes(): MarkdownResult<readonly MarkdownNode[]>;
116
+ /**
117
+ * Get the section under a specific heading (content until next heading of same or higher level).
118
+ */
119
+ section(headingText: string): MarkdownResult<readonly MarkdownNode[]>;
120
+ /**
121
+ * Extract heading data if this is a heading node.
122
+ */
123
+ asHeading(): MarkdownResult<HeadingData>;
124
+ /**
125
+ * Extract code block data if this is a code node.
126
+ */
127
+ asCodeBlock(): MarkdownResult<CodeBlockData>;
128
+ /**
129
+ * Extract inline code data if this is an inlineCode node.
130
+ */
131
+ asInlineCode(): MarkdownResult<InlineCodeData>;
132
+ /**
133
+ * Extract link data if this is a link node.
134
+ */
135
+ asLink(): MarkdownResult<LinkData>;
136
+ /**
137
+ * Extract image data if this is an image node.
138
+ */
139
+ asImage(): MarkdownResult<ImageData>;
140
+ /**
141
+ * Extract list data if this is a list node.
142
+ */
143
+ asList(): MarkdownResult<ListData>;
144
+ /**
145
+ * Extract table data if this is a table node.
146
+ */
147
+ asTable(): MarkdownResult<TableData>;
148
+ /**
149
+ * Extract blockquote data if this is a blockquote node.
150
+ */
151
+ asBlockquote(): MarkdownResult<BlockquoteData>;
152
+ /**
153
+ * Extract paragraph data if this is a paragraph node.
154
+ */
155
+ asParagraph(): MarkdownResult<ParagraphData>;
156
+ /**
157
+ * Extract definition data if this is a definition node.
158
+ */
159
+ asDefinition(): MarkdownResult<DefinitionData>;
160
+ /**
161
+ * Get the text content of this node (recursively extracts all text).
162
+ */
163
+ text(): string;
164
+ isHeading(): boolean;
165
+ isParagraph(): boolean;
166
+ isCode(): boolean;
167
+ isInlineCode(): boolean;
168
+ isLink(): boolean;
169
+ isImage(): boolean;
170
+ isList(): boolean;
171
+ isTable(): boolean;
172
+ isBlockquote(): boolean;
173
+ isText(): boolean;
174
+ private extractText;
175
+ private extractListItem;
176
+ }
177
+ export {};
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Result types for Markdown operations.
3
+ *
4
+ * @module markdown/markdown-result
5
+ */
6
+ /**
7
+ * Path through a Markdown structure.
8
+ * Uses indices for positional navigation and node types for semantic navigation.
9
+ */
10
+ export type MarkdownPath = readonly (string | number)[];
11
+ /**
12
+ * Source position information.
13
+ */
14
+ export interface SourcePosition {
15
+ start: {
16
+ line: number;
17
+ column: number;
18
+ offset?: number;
19
+ };
20
+ end: {
21
+ line: number;
22
+ column: number;
23
+ offset?: number;
24
+ };
25
+ }
26
+ /**
27
+ * Types of Markdown failures.
28
+ */
29
+ export type MarkdownFailureKind = 'parse' | 'type' | 'missing' | 'selector' | 'frontmatter';
30
+ /**
31
+ * Failure information for Markdown operations.
32
+ */
33
+ export interface MarkdownFailure {
34
+ readonly kind: MarkdownFailureKind;
35
+ readonly message: string;
36
+ readonly path: MarkdownPath;
37
+ readonly position?: SourcePosition;
38
+ }
39
+ /**
40
+ * Result type for Markdown operations.
41
+ * Either success with a value or failure with error details.
42
+ */
43
+ export type MarkdownResult<T> = {
44
+ readonly ok: true;
45
+ readonly value: T;
46
+ readonly path: MarkdownPath;
47
+ } | {
48
+ readonly ok: false;
49
+ readonly failure: MarkdownFailure;
50
+ };
51
+ /**
52
+ * Create a successful result.
53
+ */
54
+ export declare function ok<T>(value: T, path: MarkdownPath): MarkdownResult<T>;
55
+ /**
56
+ * Create a failure result.
57
+ */
58
+ export declare function fail<T>(kind: MarkdownFailureKind, message: string, path: MarkdownPath, position?: SourcePosition): MarkdownResult<T>;
59
+ /**
60
+ * Format a Markdown path for display.
61
+ */
62
+ export declare function formatMarkdownPath(path: MarkdownPath): string;
@@ -0,0 +1,41 @@
1
+ import { TransformAst, TransformImpl } from '@origints/core';
2
+ /**
3
+ * Options for parsing Markdown.
4
+ */
5
+ export interface MarkdownParseOptions {
6
+ /**
7
+ * Enable GitHub Flavored Markdown (tables, strikethrough, autolinks, task lists).
8
+ * Default: true
9
+ */
10
+ gfm?: boolean;
11
+ /**
12
+ * Parse frontmatter (YAML or TOML at the start of the document).
13
+ * Default: true
14
+ */
15
+ frontmatter?: boolean;
16
+ /**
17
+ * Frontmatter formats to recognize.
18
+ * Default: ['yaml', 'toml']
19
+ */
20
+ frontmatterFormats?: ('yaml' | 'toml')[];
21
+ }
22
+ /**
23
+ * Create a parseMarkdown transform AST.
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const plan = Planner.in(loadFile('README.md'))
28
+ * .mapIn(parseMarkdown())
29
+ * .inject((node) => node.select('heading[depth=1]').value.asHeading().value.text)
30
+ * .compile()
31
+ * ```
32
+ */
33
+ export declare function parseMarkdown(options?: MarkdownParseOptions): TransformAst;
34
+ /**
35
+ * Transform implementation for parseMarkdown.
36
+ */
37
+ export declare const parseMarkdownImpl: TransformImpl;
38
+ /**
39
+ * Async transform implementation for parseMarkdown (handles streams).
40
+ */
41
+ export declare const parseMarkdownAsyncImpl: TransformImpl;
@@ -0,0 +1,120 @@
1
+ import { SourcePosition } from './markdown-result';
2
+ /**
3
+ * Extracted heading data.
4
+ */
5
+ export interface HeadingData {
6
+ readonly depth: 1 | 2 | 3 | 4 | 5 | 6;
7
+ readonly text: string;
8
+ readonly id?: string;
9
+ readonly position?: SourcePosition;
10
+ }
11
+ /**
12
+ * Extracted code block data.
13
+ */
14
+ export interface CodeBlockData {
15
+ readonly lang?: string;
16
+ readonly meta?: string;
17
+ readonly value: string;
18
+ readonly position?: SourcePosition;
19
+ }
20
+ /**
21
+ * Extracted inline code data.
22
+ */
23
+ export interface InlineCodeData {
24
+ readonly value: string;
25
+ readonly position?: SourcePosition;
26
+ }
27
+ /**
28
+ * Extracted link data.
29
+ */
30
+ export interface LinkData {
31
+ readonly url: string;
32
+ readonly title?: string;
33
+ readonly text: string;
34
+ readonly position?: SourcePosition;
35
+ }
36
+ /**
37
+ * Extracted image data.
38
+ */
39
+ export interface ImageData {
40
+ readonly url: string;
41
+ readonly alt?: string;
42
+ readonly title?: string;
43
+ readonly position?: SourcePosition;
44
+ }
45
+ /**
46
+ * List item data.
47
+ */
48
+ export interface ListItemData {
49
+ readonly checked?: boolean;
50
+ readonly text: string;
51
+ readonly children: readonly ListItemData[];
52
+ readonly position?: SourcePosition;
53
+ }
54
+ /**
55
+ * Extracted list data.
56
+ */
57
+ export interface ListData {
58
+ readonly ordered: boolean;
59
+ readonly start?: number;
60
+ readonly items: readonly ListItemData[];
61
+ readonly position?: SourcePosition;
62
+ }
63
+ /**
64
+ * Table cell data.
65
+ */
66
+ export interface TableCellData {
67
+ readonly text: string;
68
+ readonly align?: 'left' | 'center' | 'right';
69
+ }
70
+ /**
71
+ * Table row data.
72
+ */
73
+ export interface TableRowData {
74
+ readonly cells: readonly TableCellData[];
75
+ }
76
+ /**
77
+ * Extracted table data.
78
+ */
79
+ export interface TableData {
80
+ readonly headers: readonly TableCellData[];
81
+ readonly rows: readonly TableRowData[];
82
+ readonly position?: SourcePosition;
83
+ }
84
+ /**
85
+ * Extracted blockquote data.
86
+ */
87
+ export interface BlockquoteData {
88
+ readonly text: string;
89
+ readonly position?: SourcePosition;
90
+ }
91
+ /**
92
+ * Extracted paragraph data.
93
+ */
94
+ export interface ParagraphData {
95
+ readonly text: string;
96
+ readonly position?: SourcePosition;
97
+ }
98
+ /**
99
+ * Extracted thematic break (horizontal rule) data.
100
+ */
101
+ export interface ThematicBreakData {
102
+ readonly position?: SourcePosition;
103
+ }
104
+ /**
105
+ * Extracted definition data (for reference-style links).
106
+ */
107
+ export interface DefinitionData {
108
+ readonly identifier: string;
109
+ readonly url: string;
110
+ readonly title?: string;
111
+ readonly position?: SourcePosition;
112
+ }
113
+ /**
114
+ * Extracted footnote definition data.
115
+ */
116
+ export interface FootnoteDefinitionData {
117
+ readonly identifier: string;
118
+ readonly text: string;
119
+ readonly position?: SourcePosition;
120
+ }
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "@origints/markdown",
3
+ "version": "0.1.0",
4
+ "description": "Markdown parsing and manipulation for Origins with full lineage tracking",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.es.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.es.js",
13
+ "require": "./dist/index.cjs"
14
+ }
15
+ },
16
+ "dependencies": {
17
+ "unified": "^11.0.0",
18
+ "remark-parse": "^11.0.0",
19
+ "remark-gfm": "^4.0.0",
20
+ "remark-frontmatter": "^5.0.0",
21
+ "remark-rehype": "^11.0.0",
22
+ "rehype-stringify": "^10.0.0",
23
+ "unist-util-select": "^5.0.0",
24
+ "unist-util-visit": "^5.0.0",
25
+ "vfile-matter": "^5.0.0",
26
+ "yaml": "^2.7.0"
27
+ },
28
+ "peerDependencies": {
29
+ "@origints/core": "^0.1.0"
30
+ },
31
+ "devDependencies": {
32
+ "@types/mdast": "^4.0.0",
33
+ "@types/node": "^25.0.0",
34
+ "@vitest/coverage-v8": "^4.0.0",
35
+ "eslint": "^9.0.0",
36
+ "typescript": "^5.9.0",
37
+ "vite": "^7.0.0",
38
+ "vite-plugin-dts": "^4.0.0",
39
+ "vitest": "^4.0.0",
40
+ "@origints/core": "0.1.0",
41
+ "@origints/yaml": "0.1.0"
42
+ },
43
+ "files": [
44
+ "dist"
45
+ ],
46
+ "publishConfig": {
47
+ "access": "public"
48
+ },
49
+ "repository": {
50
+ "type": "git",
51
+ "url": "https://github.com/anthropics/origins.git",
52
+ "directory": "packages/markdown"
53
+ },
54
+ "license": "MIT",
55
+ "scripts": {
56
+ "build": "vite build",
57
+ "test": "vitest run",
58
+ "test:watch": "vitest",
59
+ "typecheck": "tsc -p tsconfig.json --noEmit",
60
+ "lint": "eslint src"
61
+ }
62
+ }