mdld-parse 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/src/generate.js +2 -1
- package/src/index.js +1 -1
- package/src/merge.js +11 -3
- package/src/parse.js +246 -72
- package/src/render.js +345 -345
- package/src/shared.js +212 -0
- package/src/utils.js +2 -9
package/src/shared.js
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for MD-LD Parser and Renderer
|
|
3
|
+
* Ensures DRY code and consistent CommonMark processing
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export const DEFAULT_CONTEXT = {
|
|
7
|
+
'@vocab': "http://www.w3.org/2000/01/rdf-schema#",
|
|
8
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
9
|
+
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
10
|
+
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
11
|
+
sh: "http://www.w3.org/ns/shacl#",
|
|
12
|
+
prov: 'http://www.w3.org/ns/prov#'
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
// CommonMark patterns - shared between parser and renderer
|
|
16
|
+
export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
|
|
17
|
+
export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
18
|
+
export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
19
|
+
export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
20
|
+
export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
21
|
+
export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
22
|
+
export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
23
|
+
|
|
24
|
+
// Inline carrier patterns - shared extraction logic
|
|
25
|
+
export const INLINE_CARRIER_PATTERNS = {
|
|
26
|
+
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
27
|
+
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// Pre-compiled carrier patterns for performance
|
|
31
|
+
export const CARRIER_PATTERN_ARRAY = [
|
|
32
|
+
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
33
|
+
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
// Cache for fence regex patterns
|
|
37
|
+
export const FENCE_CLOSE_PATTERNS = new Map();
|
|
38
|
+
|
|
39
|
+
export function getFenceClosePattern(fenceChar) {
|
|
40
|
+
if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
|
|
41
|
+
FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
|
|
42
|
+
}
|
|
43
|
+
return FENCE_CLOSE_PATTERNS.get(fenceChar);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Range calculation utilities - shared between parser and renderer
|
|
47
|
+
export function calcRangeInfo(line, attrs, lineStart, prefixLength, valueLength) {
|
|
48
|
+
const wsLength = prefixLength < line.length && line[prefixLength] === ' ' ? 1 :
|
|
49
|
+
line.slice(prefixLength).match(/^\s+/)?.[0]?.length || 0;
|
|
50
|
+
const valueStartInLine = prefixLength + wsLength;
|
|
51
|
+
return {
|
|
52
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueStartInLine + valueLength],
|
|
53
|
+
attrsRange: calcAttrsRange(line, attrs, lineStart)
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function calcAttrsRange(line, attrs, lineStart) {
|
|
58
|
+
if (!attrs) return null;
|
|
59
|
+
const attrsStartInLine = line.lastIndexOf(attrs);
|
|
60
|
+
return attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Token creation utilities - shared structure
|
|
64
|
+
export function createToken(type, range, text, attrs = null, attrsRange = null, valueRange = null, extra = {}) {
|
|
65
|
+
const token = { type, range, text, attrs, attrsRange, valueRange, ...extra };
|
|
66
|
+
Object.defineProperty(token, '_carriers', {
|
|
67
|
+
enumerable: false, writable: true, value: null
|
|
68
|
+
});
|
|
69
|
+
return token;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, extra = {}) {
|
|
73
|
+
return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// List token creation - shared logic
|
|
77
|
+
export function createListToken(type, line, lineStart, pos, match) {
|
|
78
|
+
const attrs = match[4] || null;
|
|
79
|
+
const prefix = match[1].length + (match[2] ? match[2].length : 0);
|
|
80
|
+
const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
|
|
81
|
+
return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
|
|
82
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Semantic block parsing - shared between parser and renderer
|
|
86
|
+
export const semCache = {};
|
|
87
|
+
export const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
|
|
88
|
+
|
|
89
|
+
export function parseSemCached(attrs) {
|
|
90
|
+
if (!attrs) return EMPTY_SEM;
|
|
91
|
+
let sem = semCache[attrs];
|
|
92
|
+
if (!sem) {
|
|
93
|
+
sem = Object.freeze(parseSemanticBlock(attrs));
|
|
94
|
+
semCache[attrs] = sem;
|
|
95
|
+
}
|
|
96
|
+
return sem;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Indentation utilities - shared for list processing
|
|
100
|
+
export function getIndentLevel(block, sourceText) {
|
|
101
|
+
if (!block.range || !sourceText) return 0;
|
|
102
|
+
|
|
103
|
+
const text = sourceText.substring(block.range.start, block.range.end);
|
|
104
|
+
const indentMatch = text.match(/^(\s*)/);
|
|
105
|
+
const indentSpaces = indentMatch ? indentMatch[1].length : 0;
|
|
106
|
+
|
|
107
|
+
// CommonMark: 4 spaces or 1 tab = one level
|
|
108
|
+
// We'll use 2 spaces for better readability (configurable)
|
|
109
|
+
return Math.floor(indentSpaces / 2);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Content extraction utilities - shared between parser and renderer
|
|
113
|
+
export function extractContentFromRange(sourceText, range, attrsRange = null) {
|
|
114
|
+
if (!range || !sourceText) return '';
|
|
115
|
+
|
|
116
|
+
let text = sourceText.substring(range[0], range[1]);
|
|
117
|
+
|
|
118
|
+
// Remove MD-LD annotations, preserve content
|
|
119
|
+
if (attrsRange) {
|
|
120
|
+
const beforeAttrs = text.substring(0, attrsRange[0] - range[0]);
|
|
121
|
+
const afterAttrs = text.substring(attrsRange[1] - range[0]);
|
|
122
|
+
text = beforeAttrs + afterAttrs;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return text.trim();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// List marker utilities - shared for advanced list processing
|
|
129
|
+
export function getListMarker(block, sourceText) {
|
|
130
|
+
if (!block.range) return null;
|
|
131
|
+
|
|
132
|
+
const text = sourceText.substring(block.range.start, block.range.end);
|
|
133
|
+
const markerMatch = text.match(/^(\s*)([-*+]|\d+\[\.|\])\s+/);
|
|
134
|
+
|
|
135
|
+
if (!markerMatch) return null;
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
type: markerMatch[2].startsWith('-') ? 'dash' :
|
|
139
|
+
markerMatch[2].startsWith('*') ? 'asterisk' :
|
|
140
|
+
markerMatch[2].startsWith('+') ? 'plus' : 'ordered',
|
|
141
|
+
marker: markerMatch[2],
|
|
142
|
+
indent: markerMatch[1].length
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// CommonMark line processors - shared between parser and renderer
|
|
147
|
+
export const PROCESSORS = [
|
|
148
|
+
{ test: line => line.startsWith('```'), process: null }, // Code blocks handled separately
|
|
149
|
+
{ test: line => line.startsWith('`'), process: null }, // Code spans handled separately
|
|
150
|
+
{ test: line => PREFIX_REGEX.test(line), process: null }, // Prefixes handled separately
|
|
151
|
+
{ test: line => HEADING_REGEX.test(line), process: null }, // Headings handled separately
|
|
152
|
+
{ test: line => UNORDERED_LIST_REGEX.test(line), process: null }, // Lists handled separately
|
|
153
|
+
{ test: line => BLOCKQUOTE_REGEX.test(line), process: null }, // Blockquotes handled separately
|
|
154
|
+
{ test: line => STANDALONE_SUBJECT_REGEX.test(line), process: null }, // Standalone subjects handled separately
|
|
155
|
+
{ test: line => line.trim() === '', process: null }, // Empty lines handled separately
|
|
156
|
+
{ test: line => true, process: null } // Default: paragraph
|
|
157
|
+
];
|
|
158
|
+
|
|
159
|
+
// HTML escaping - shared utility
|
|
160
|
+
export function escapeHtml(text) {
|
|
161
|
+
if (!text) return '';
|
|
162
|
+
return text
|
|
163
|
+
.replace(/&/g, '&')
|
|
164
|
+
.replace(/</g, '<')
|
|
165
|
+
.replace(/>/g, '>')
|
|
166
|
+
.replace(/"/g, '"')
|
|
167
|
+
.replace(/'/g, ''');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Quad key generation - shared between parser and renderer
|
|
171
|
+
export function quadIndexKey(subject, predicate, object) {
|
|
172
|
+
const datatype = object.datatype?.value || '';
|
|
173
|
+
const language = object.language || '';
|
|
174
|
+
return `${subject.value}|${predicate.value}|${object.value}|${datatype}|${language}`;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// IRI expansion and shortening - shared utilities
|
|
178
|
+
export function expandAndShortenIRI(iri, ctx) {
|
|
179
|
+
const expanded = expandIRI(iri, ctx);
|
|
180
|
+
return shortenIRI(expanded, ctx);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Subject resolution utilities - shared between parser and renderer
|
|
184
|
+
export function resolveSubjectType(subjectDecl) {
|
|
185
|
+
if (!subjectDecl) return 'none';
|
|
186
|
+
|
|
187
|
+
if (subjectDecl.startsWith('=#')) {
|
|
188
|
+
return 'fragment';
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (subjectDecl.startsWith('+')) {
|
|
192
|
+
return 'soft-object';
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (subjectDecl === 'RESET') {
|
|
196
|
+
return 'reset';
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return 'full-iri';
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Fragment resolution - shared logic
|
|
203
|
+
export function resolveFragment(fragment, currentSubject) {
|
|
204
|
+
if (!currentSubject) {
|
|
205
|
+
throw new Error('Fragment requires current subject');
|
|
206
|
+
}
|
|
207
|
+
const fragmentName = fragment.substring(2); // Remove =#
|
|
208
|
+
const baseIRI = currentSubject.value;
|
|
209
|
+
const hashIndex = baseIRI.indexOf('#');
|
|
210
|
+
const base = hashIndex > -1 ? baseIRI.slice(0, hashIndex) : baseIRI;
|
|
211
|
+
return base + '#' + fragmentName;
|
|
212
|
+
}
|
package/src/utils.js
CHANGED
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
'@vocab': "http://www.w3.org/2000/01/rdf-schema#",
|
|
3
|
-
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
|
-
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
5
|
-
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
6
|
-
sh: "http://www.w3.org/ns/shacl#",
|
|
7
|
-
prov: 'http://www.w3.org/ns/prov#'
|
|
8
|
-
};
|
|
1
|
+
import { URL_REGEX, DEFAULT_CONTEXT } from './shared.js';
|
|
9
2
|
|
|
10
3
|
// Base Term class for RDF/JS compatibility
|
|
11
4
|
export class Term {
|
|
@@ -258,7 +251,7 @@ export function expandIRI(term, ctx) {
|
|
|
258
251
|
const t = raw.trim();
|
|
259
252
|
let result;
|
|
260
253
|
|
|
261
|
-
if (t.match(
|
|
254
|
+
if (t.match(URL_REGEX)) {
|
|
262
255
|
result = t;
|
|
263
256
|
} else if (t.includes(':')) {
|
|
264
257
|
const [prefix, ref] = t.split(':', 2);
|