@tishlang/tishdoc-parse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # @tishlang/tishdoc-parse
2
+
3
+ Tish-native Markdown / TishDoc parser. Frontmatter (YAML / JSON), `meta.imports`, `:::directive`, includes, and a CommonMark/GFM-ish body grammar (headings, paragraphs, fenced code, lists ordered + unordered, blockquotes, thematic breaks, tables, links, images, italics, bold, strikethrough, code spans, autolinks, hard line breaks) → AST + diagnostics.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install @tishlang/tishdoc-parse
9
+ ```
10
+
11
+ ## Usage (Tish)
12
+
13
+ ```tish
14
+ import { parseDocument, stringifyAst } from "@tishlang/tishdoc-parse"
15
+
16
+ let result = parseDocument(source, null)
17
+ let json = stringifyAst(result.ast)
18
+ ```
19
+
20
+ ## Exports
21
+
22
+ - `parseDocument(source, options) -> { ast, diagnostics, bodyStartLine1 }`
23
+ - `stringifyDiagnostics(diagnostics) -> string`
24
+ - `stringifyAst(ast) -> string`
25
+
26
+ `options` is optional and may include:
27
+
28
+ | Key | Type | Notes |
29
+ |---|---|---|
30
+ | `readPartial` | `(path) => string` | Resolver for `:::include{path=…}` blocks. |
31
+ | `maxIncludeDepth` | `number` | Default 16. |
32
+ | `strictMeta` | `boolean` | Promote meta validation warnings to errors. |
33
+
34
+ ## AST node types (subset for README rendering)
35
+
36
+ `Document`, `Heading`, `Paragraph`, `Text`, `Emphasis` (`<strong>`), `Italic` (`<em>`), `Strikethrough`, `CodeSpan`, `CodeBlock`, `Link`, `Image`, `LineBreak`, `BlockQuote`, `ThematicBreak`, `List` (unordered), `OrderedList`, `ListItem`, `Table`, `TableCell`, `DirectiveBlock`, `DirectiveLeaf`, `Include`.
37
+
38
+ See `src/ast.tish` for builder signatures.
39
+
40
+ ## Companions
41
+
42
+ - [`@tishlang/tishdoc-render-html`](https://github.com/tishlang/tishdoc-render-html) — AST → safe semantic HTML (default consumer for READMEs and docs).
43
+ - [`@tishlang/tish-syntax-highlight`](https://github.com/tishlang/tish-syntax-highlight) — fenced-code syntax highlighter used by the HTML renderer.
44
+ - [`@tishlang/tishdoc-render-typst`](https://github.com/tishlang/tishdoc-render-typst) — AST → Typst source (for PDF / print pipelines).
45
+
46
+ ## Build
47
+
48
+ ```bash
49
+ tish build --target js --no-optimize src/main.tish -o dist/tishdoc-parse.js
50
+ TISH_EXPORT_OUT=dist/tishdoc-parse.js \
51
+ TISH_EXPORT_NAMES=parseDocument,stringifyDiagnostics,stringifyAst \
52
+ node scripts/append-exports.mjs
53
+ ```
54
+
55
+ The script replaces the older `tish-creator/scripts/js_append_exports.tish` so this package is now standalone.
package/package.json ADDED
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "@tishlang/tishdoc-parse",
3
+ "version": "0.1.0",
4
+ "description": "TishDoc parse pipeline: frontmatter, includes, Markdown+directives → AST",
5
+ "type": "module",
6
+ "tish": {
7
+ "module": "./src/main.tish"
8
+ },
9
+ "main": "./src/main.tish",
10
+ "exports": {
11
+ ".": {
12
+ "tish": "./src/main.tish",
13
+ "import": "./dist/tishdoc-parse.js",
14
+ "default": "./dist/tishdoc-parse.js"
15
+ }
16
+ },
17
+ "devDependencies": {
18
+ "@tishlang/tish": "1.9.2"
19
+ },
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "git+https://github.com/tishlang/tishdoc-parse.git"
23
+ },
24
+ "scripts": {
25
+ "build": "tish build --target js --no-optimize src/main.tish -o dist/tishdoc-parse.js && TISH_EXPORT_OUT=dist/tishdoc-parse.js TISH_EXPORT_NAMES=parseDocument,stringifyDiagnostics,stringifyAst node scripts/append-exports.mjs",
26
+ "test": "tish run test/run_tests.tish && tish run test/golden_note.tish && tish run test/markdown_blocks.tish && tish run test/markdown_document.tish && tish run test/include_resolve.tish && tish run test/meta_imports.tish"
27
+ },
28
+ "dependencies": {},
29
+ "publishConfig": {
30
+ "access": "public"
31
+ },
32
+ "files": [
33
+ "src",
34
+ "README.md"
35
+ ]
36
+ }
package/src/ast.tish ADDED
@@ -0,0 +1,181 @@
1
+ // AST node builders — schema matches docs/schema/tishdoc-ast.schema.json
2
+
3
+ export fn schemaVersion() {
4
+ return 1
5
+ }
6
+
7
+ export fn documentNode(meta, children) {
8
+ let n = {}
9
+ n["type"] = "Document"
10
+ n["schemaVersion"] = schemaVersion()
11
+ n["meta"] = meta
12
+ n["children"] = children
13
+ return n
14
+ }
15
+
16
+ export fn headingNode(level, children) {
17
+ let n = {}
18
+ n["type"] = "Heading"
19
+ n["level"] = level
20
+ n["children"] = children
21
+ return n
22
+ }
23
+
24
+ export fn paragraphNode(children) {
25
+ let n = {}
26
+ n["type"] = "Paragraph"
27
+ n["children"] = children
28
+ return n
29
+ }
30
+
31
+ export fn textNode(value) {
32
+ let n = {}
33
+ n["type"] = "Text"
34
+ n["value"] = value
35
+ return n
36
+ }
37
+
38
+ export fn emphasisNode(children) {
39
+ let n = {}
40
+ n["type"] = "Emphasis"
41
+ n["children"] = children
42
+ return n
43
+ }
44
+
45
+ export fn codeSpanNode(value) {
46
+ let n = {}
47
+ n["type"] = "CodeSpan"
48
+ n["value"] = value
49
+ return n
50
+ }
51
+
52
+ export fn listNode(ordered, items) {
53
+ let n = {}
54
+ n["type"] = "List"
55
+ n["ordered"] = ordered
56
+ n["items"] = items
57
+ return n
58
+ }
59
+
60
+ export fn listItemNode(children) {
61
+ let n = {}
62
+ n["type"] = "ListItem"
63
+ n["children"] = children
64
+ return n
65
+ }
66
+
67
+ export fn codeBlockNode(lang, value) {
68
+ let n = {}
69
+ n["type"] = "CodeBlock"
70
+ n["lang"] = lang
71
+ n["value"] = value
72
+ return n
73
+ }
74
+
75
+ export fn directiveBlockNode(name, attrs, children) {
76
+ let n = {}
77
+ n["type"] = "DirectiveBlock"
78
+ n["name"] = name
79
+ n["attrs"] = attrs
80
+ n["children"] = children
81
+ return n
82
+ }
83
+
84
+ export fn directiveLeafNode(name, attrs) {
85
+ let n = {}
86
+ n["type"] = "DirectiveLeaf"
87
+ n["name"] = name
88
+ n["attrs"] = attrs
89
+ return n
90
+ }
91
+
92
+ export fn includeNode(path, resolved, error, children) {
93
+ let n = {}
94
+ n["type"] = "Include"
95
+ n["path"] = path
96
+ n["resolved"] = resolved
97
+ n["error"] = error
98
+ n["children"] = children
99
+ return n
100
+ }
101
+
102
+ // --- Extended GFM/CommonMark-ish nodes for README rendering ----------------
103
+ // Added so the same TishDoc pipeline can render arbitrary `README.md` files
104
+ // (e.g. VS Code extension marketplace pages). These coexist with the
105
+ // existing TishDoc nodes; older renderers should fall through to a no-op
106
+ // for unknown `type` values.
107
+
108
+ export fn italicNode(children) {
109
+ let n = {}
110
+ n["type"] = "Italic"
111
+ n["children"] = children
112
+ return n
113
+ }
114
+
115
+ export fn strikethroughNode(children) {
116
+ let n = {}
117
+ n["type"] = "Strikethrough"
118
+ n["children"] = children
119
+ return n
120
+ }
121
+
122
+ export fn linkNode(href, title, children) {
123
+ let n = {}
124
+ n["type"] = "Link"
125
+ n["href"] = href
126
+ n["title"] = title
127
+ n["children"] = children
128
+ return n
129
+ }
130
+
131
+ export fn imageNode(src, alt, title) {
132
+ let n = {}
133
+ n["type"] = "Image"
134
+ n["src"] = src
135
+ n["alt"] = alt
136
+ n["title"] = title
137
+ return n
138
+ }
139
+
140
+ export fn lineBreakNode() {
141
+ let n = {}
142
+ n["type"] = "LineBreak"
143
+ return n
144
+ }
145
+
146
+ export fn blockQuoteNode(children) {
147
+ let n = {}
148
+ n["type"] = "BlockQuote"
149
+ n["children"] = children
150
+ return n
151
+ }
152
+
153
+ export fn thematicBreakNode() {
154
+ let n = {}
155
+ n["type"] = "ThematicBreak"
156
+ return n
157
+ }
158
+
159
+ export fn orderedListNode(start, items) {
160
+ let n = {}
161
+ n["type"] = "OrderedList"
162
+ n["start"] = start
163
+ n["items"] = items
164
+ return n
165
+ }
166
+
167
+ export fn tableNode(aligns, header, rows) {
168
+ let n = {}
169
+ n["type"] = "Table"
170
+ n["aligns"] = aligns
171
+ n["header"] = header
172
+ n["rows"] = rows
173
+ return n
174
+ }
175
+
176
+ export fn tableCellNode(children) {
177
+ let n = {}
178
+ n["type"] = "TableCell"
179
+ n["children"] = children
180
+ return n
181
+ }
package/src/attrs.tish ADDED
@@ -0,0 +1,92 @@
1
+ // Parse `{ key=value key2="v" }` into a plain object (minimal).
2
+
3
+ import { trim } from "./trim.tish"
4
+
5
+ export fn parseDirectiveAttrs(s) {
6
+ let out = {}
7
+ if (s === null) {
8
+ return out
9
+ }
10
+ let t = trim(typeof s === "string" ? s : String(s))
11
+ if (t === "") {
12
+ return out
13
+ }
14
+ if (!t.startsWith("{") || !t.endsWith("}")) {
15
+ return out
16
+ }
17
+ let inner = trim(t.slice(1, t.length - 1))
18
+ if (inner === "") {
19
+ return out
20
+ }
21
+ let parts = splitTopLevelSpaces(inner)
22
+ let pi = 0
23
+ while (pi < parts.length) {
24
+ let p = parts[pi]
25
+ let eq = p.indexOf("=")
26
+ if (eq > 0) {
27
+ let k = trim(p.slice(0, eq))
28
+ let v = trim(p.slice(eq + 1))
29
+ if (v.charAt(0) === "\"" && v.charAt(v.length - 1) === "\"") {
30
+ v = v.slice(1, v.length - 1)
31
+ } else if (v.charAt(0) === "'" && v.charAt(v.length - 1) === "'") {
32
+ v = v.slice(1, v.length - 1)
33
+ }
34
+ out[k] = v
35
+ }
36
+ pi = pi + 1
37
+ }
38
+ return out
39
+ }
40
+
41
+ fn splitTopLevelSpaces(inner) {
42
+ let out = []
43
+ let buf = ""
44
+ let i = 0
45
+ let depth = 0
46
+ let inStr = false
47
+ let sq = ""
48
+ while (i < inner.length) {
49
+ let c = inner.charAt(i)
50
+ if (inStr) {
51
+ buf = buf + c
52
+ if (c === sq && inner.charAt(i - 1) !== "\\") {
53
+ inStr = false
54
+ }
55
+ i = i + 1
56
+ continue
57
+ }
58
+ if (c === "\"" || c === "'") {
59
+ inStr = true
60
+ sq = c
61
+ buf = buf + c
62
+ i = i + 1
63
+ continue
64
+ }
65
+ if (c === "{" || c === "(" || c === "[") {
66
+ depth = depth + 1
67
+ buf = buf + c
68
+ i = i + 1
69
+ continue
70
+ }
71
+ if (c === "}" || c === ")" || c === "]") {
72
+ depth = depth - 1
73
+ buf = buf + c
74
+ i = i + 1
75
+ continue
76
+ }
77
+ if (c === " " && depth === 0) {
78
+ if (trim(buf) !== "") {
79
+ out.push(trim(buf))
80
+ }
81
+ buf = ""
82
+ i = i + 1
83
+ continue
84
+ }
85
+ buf = buf + c
86
+ i = i + 1
87
+ }
88
+ if (trim(buf) !== "") {
89
+ out.push(trim(buf))
90
+ }
91
+ return out
92
+ }
@@ -0,0 +1,160 @@
1
+ import { parseSimpleYamlBlock } from "./yaml_simple.tish"
2
+
3
+ fn makeFm(meta, body, metaRaw, diagnostics, language) {
4
+ let out = {}
5
+ out.meta = meta
6
+ out.body = body
7
+ out.metaRaw = metaRaw
8
+ out.diagnostics = diagnostics
9
+ out.language = language
10
+ return out
11
+ }
12
+
13
+ fn splitJsonFrontmatterFromStart(raw) {
14
+ if (!raw.startsWith("{")) {
15
+ return null
16
+ }
17
+ let depth = 0
18
+ let i = 0
19
+ let inStr = false
20
+ let strQuote = ""
21
+ let esc = false
22
+ while (i < raw.length) {
23
+ let c = raw.charAt(i)
24
+ if (inStr) {
25
+ if (esc) {
26
+ esc = false
27
+ } else if (c === "\\") {
28
+ esc = true
29
+ } else if (c === strQuote) {
30
+ inStr = false
31
+ }
32
+ i = i + 1
33
+ continue
34
+ }
35
+ if (c === "\"" || c === "'") {
36
+ inStr = true
37
+ strQuote = c
38
+ i = i + 1
39
+ continue
40
+ }
41
+ if (c === "{") {
42
+ depth = depth + 1
43
+ } else if (c === "}") {
44
+ depth = depth - 1
45
+ if (depth === 0) {
46
+ let jsonStr = raw.slice(0, i + 1)
47
+ let rest = raw.slice(i + 1)
48
+ let k = 0
49
+ while (k < rest.length) {
50
+ let ch = rest.charAt(k)
51
+ if (ch !== " " && ch !== "\t" && ch !== "\r" && ch !== "\n") {
52
+ break
53
+ }
54
+ k = k + 1
55
+ }
56
+ rest = rest.slice(k)
57
+ if (rest.length > 0 && rest.charAt(0) !== "\n") {
58
+ rest = "\n" + rest
59
+ }
60
+ let jr = {}
61
+ jr.jsonStr = jsonStr
62
+ jr.rest = rest
63
+ return jr
64
+ }
65
+ }
66
+ i = i + 1
67
+ }
68
+ return null
69
+ }
70
+
71
+ export fn matterPing() {
72
+ return 99
73
+ }
74
+
75
+ export fn splitMatter(raw) {
76
+ let diagnostics = []
77
+ if (raw === null) {
78
+ return makeFm({}, "", "", diagnostics, "")
79
+ }
80
+ let s = raw
81
+ if (typeof s !== "string") {
82
+ s = String(s)
83
+ }
84
+ let lines = s.split("\n")
85
+ if (lines.length === 0) {
86
+ return makeFm({}, s, "", diagnostics, "")
87
+ }
88
+
89
+ if (!lines[0].startsWith("---")) {
90
+ if (s.trim().startsWith("{")) {
91
+ let r = splitJsonFrontmatterFromStart(s)
92
+ if (r !== null) {
93
+ let metaJson = {}
94
+ try {
95
+ metaJson = JSON.parse(r.jsonStr)
96
+ } catch (e) {
97
+ let de = {}
98
+ de.level = "error"
99
+ de.message = "Invalid JSON frontmatter"
100
+ diagnostics.push(de)
101
+ return makeFm({}, s, r.jsonStr, diagnostics, "json")
102
+ }
103
+ return makeFm(metaJson, r.rest, r.jsonStr, diagnostics, "json")
104
+ }
105
+ }
106
+ }
107
+
108
+ if (!lines[0].startsWith("---")) {
109
+ return makeFm({}, s, "", diagnostics, "")
110
+ }
111
+
112
+ let lang = "yaml"
113
+ let rest0 = lines[0].trim().slice(3).trim()
114
+ if (rest0 !== "") {
115
+ lang = rest0
116
+ }
117
+
118
+ let i = 1
119
+ let found = false
120
+ let matterLines = []
121
+ while (i < lines.length) {
122
+ let line = lines[i]
123
+ if (line.trim() === "---") {
124
+ found = true
125
+ break
126
+ }
127
+ matterLines.push(line)
128
+ i = i + 1
129
+ }
130
+
131
+ if (!found) {
132
+ let du = {}
133
+ du.level = "warn"
134
+ du.message = "Unclosed frontmatter fence; treating whole file as body"
135
+ diagnostics.push(du)
136
+ return makeFm({}, s, "", diagnostics, "")
137
+ }
138
+
139
+ let matterStr = matterLines.join("\n")
140
+ let bodyLines = []
141
+ let j = i + 1
142
+ while (j < lines.length) {
143
+ bodyLines.push(lines[j])
144
+ j = j + 1
145
+ }
146
+ let body = bodyLines.join("\n")
147
+
148
+ let meta = {}
149
+ if (lang === "yaml" || lang === "yml") {
150
+ meta = parseSimpleYamlBlock(matterStr)
151
+ } else {
152
+ let dw = {}
153
+ dw.level = "warn"
154
+ dw.message = "Frontmatter language not supported for fence: " + lang + "; use yaml, yml, or JSON object at file start."
155
+ diagnostics.push(dw)
156
+ return makeFm({}, body, matterStr, diagnostics, lang)
157
+ }
158
+
159
+ return makeFm(meta, body, matterStr, diagnostics, lang)
160
+ }
@@ -0,0 +1,180 @@
1
+ // Resolve `::include{path="relative.md"}` or `::include{path=relative.md}` in source (no remote URLs).
2
+ // Helpers must appear before `resolveOnce` / export so `tish run` on this module alone sees them.
3
+
4
+ fn trimIncludePath(s) {
5
+ let a = 0
6
+ let b = s.length
7
+ while (a < b) {
8
+ let c = s.charAt(a)
9
+ if (c !== " " && c !== "\t" && c !== "\r" && c !== "\n") {
10
+ break
11
+ }
12
+ a = a + 1
13
+ }
14
+ while (b > a) {
15
+ let c2 = s.charAt(b - 1)
16
+ if (c2 !== " " && c2 !== "\t" && c2 !== "\r" && c2 !== "\n") {
17
+ break
18
+ }
19
+ b = b - 1
20
+ }
21
+ return s.slice(a, b)
22
+ }
23
+
24
+ fn arrayContains(arr, v) {
25
+ let i = 0
26
+ while (i < arr.length) {
27
+ if (arr[i] === v) {
28
+ return true
29
+ }
30
+ i = i + 1
31
+ }
32
+ return false
33
+ }
34
+
35
+ fn resolveOnce(text, readPartial, maxDepth, visited, depth, diagnostics) {
36
+ if (depth > maxDepth) {
37
+ let d1 = {}
38
+ d1["level"] = "error"
39
+ d1["message"] = "Include max depth exceeded (" + String(maxDepth) + ")"
40
+ diagnostics.push(d1)
41
+ let rx = {}
42
+ rx["body"] = text
43
+ rx["diagnostics"] = diagnostics
44
+ return rx
45
+ }
46
+
47
+ let marker = "::include{path="
48
+ let out = ""
49
+ let pos = 0
50
+ while (pos < text.length) {
51
+ let idx = text.indexOf(marker, pos)
52
+ if (idx === -1) {
53
+ out = out + text.slice(pos)
54
+ break
55
+ }
56
+ out = out + text.slice(pos, idx)
57
+ let pathStart = idx + marker.length
58
+ let path = ""
59
+ let endDirective = -1
60
+ if (pathStart >= text.length) {
61
+ let d0 = {}
62
+ d0["level"] = "error"
63
+ d0["message"] = "Incomplete ::include after path="
64
+ diagnostics.push(d0)
65
+ out = out + text.slice(idx)
66
+ break
67
+ }
68
+ if (text.charAt(pathStart) === "\"") {
69
+ let closeQuote = text.indexOf("\"", pathStart + 1)
70
+ if (closeQuote === -1) {
71
+ let d2 = {}
72
+ d2["level"] = "error"
73
+ d2["message"] = "Unclosed path in ::include"
74
+ diagnostics.push(d2)
75
+ out = out + text.slice(idx)
76
+ break
77
+ }
78
+ path = text.slice(pathStart + 1, closeQuote)
79
+ let afterQuote = closeQuote + 1
80
+ if (afterQuote >= text.length || text.charAt(afterQuote) !== "}") {
81
+ let d3 = {}
82
+ d3["level"] = "error"
83
+ d3["message"] = "Expected } after path in ::include"
84
+ diagnostics.push(d3)
85
+ out = out + text.slice(idx)
86
+ break
87
+ }
88
+ endDirective = afterQuote + 1
89
+ } else {
90
+ let endBrace = text.indexOf("}", pathStart)
91
+ if (endBrace === -1) {
92
+ let d6 = {}
93
+ d6["level"] = "error"
94
+ d6["message"] = "Unclosed ::include (missing })"
95
+ diagnostics.push(d6)
96
+ out = out + text.slice(idx)
97
+ break
98
+ }
99
+ path = trimIncludePath(text.slice(pathStart, endBrace))
100
+ if (path.length === 0) {
101
+ let d7 = {}
102
+ d7["level"] = "error"
103
+ d7["message"] = "Empty path in ::include"
104
+ diagnostics.push(d7)
105
+ out = out + text.slice(idx)
106
+ break
107
+ }
108
+ endDirective = endBrace + 1
109
+ }
110
+ if (arrayContains(visited, path)) {
111
+ let d4 = {}
112
+ d4["level"] = "error"
113
+ d4["message"] = "Include cycle detected: " + path
114
+ diagnostics.push(d4)
115
+ out = out + "<!-- include cycle: " + path + " -->"
116
+ pos = endDirective
117
+ continue
118
+ }
119
+
120
+ let content = readPartial(path)
121
+ if (content === null) {
122
+ let d5 = {}
123
+ d5["level"] = "error"
124
+ d5["message"] = "Include not found: " + path
125
+ diagnostics.push(d5)
126
+ out = out + "<!-- missing include: " + path + " -->"
127
+ pos = endDirective
128
+ continue
129
+ }
130
+
131
+ let nextVisited = []
132
+ let vi = 0
133
+ while (vi < visited.length) {
134
+ nextVisited.push(visited[vi])
135
+ vi = vi + 1
136
+ }
137
+ nextVisited.push(path)
138
+
139
+ let nested = resolveOnce(content, readPartial, maxDepth, nextVisited, depth + 1, diagnostics)
140
+ out = out + nested["body"]
141
+ pos = endDirective
142
+ }
143
+ let rf = {}
144
+ rf["body"] = out
145
+ rf["diagnostics"] = diagnostics
146
+ return rf
147
+ }
148
+
149
+ export fn resolveIncludesInBody(srcText, incOpts) {
150
+ let diagnostics = []
151
+ if (srcText === null) {
152
+ let r0 = {}
153
+ r0["body"] = ""
154
+ r0["diagnostics"] = diagnostics
155
+ return r0
156
+ }
157
+ let text = typeof srcText === "string" ? srcText : String(srcText)
158
+ let readPartial = incOpts["readPartial"]
159
+ if (readPartial === null) {
160
+ let r1 = {}
161
+ r1["body"] = text
162
+ r1["diagnostics"] = diagnostics
163
+ return r1
164
+ }
165
+
166
+ let maxDepth = incOpts["maxDepth"]
167
+ if (maxDepth === null) {
168
+ maxDepth = 16
169
+ }
170
+ let visited = incOpts["visited"]
171
+ if (visited === null) {
172
+ visited = []
173
+ }
174
+ let depth = incOpts["depth"]
175
+ if (depth === null) {
176
+ depth = 0
177
+ }
178
+
179
+ return resolveOnce(text, readPartial, maxDepth, visited, depth, diagnostics)
180
+ }