mdld-parse 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -172
- package/index.js +463 -793
- package/package.json +7 -8
- package/tests.js +0 -409
package/index.js
CHANGED
|
@@ -1,882 +1,552 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
// ============================================================================
|
|
9
|
-
// RDF/JS Data Factory (Minimal Implementation)
|
|
10
|
-
// ============================================================================
|
|
11
|
-
|
|
12
|
-
const DefaultDataFactory = {
|
|
13
|
-
namedNode: (value) => ({ termType: 'NamedNode', value }),
|
|
14
|
-
blankNode: (value = `b${Math.random().toString(36).slice(2, 11)}`) => ({
|
|
15
|
-
termType: 'BlankNode',
|
|
16
|
-
value
|
|
17
|
-
}),
|
|
18
|
-
literal: (value, languageOrDatatype) => {
|
|
19
|
-
if (typeof languageOrDatatype === 'string') {
|
|
20
|
-
return {
|
|
21
|
-
termType: 'Literal',
|
|
22
|
-
value,
|
|
23
|
-
language: languageOrDatatype,
|
|
24
|
-
datatype: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' }
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
return {
|
|
28
|
-
termType: 'Literal',
|
|
29
|
-
value,
|
|
30
|
-
language: '',
|
|
31
|
-
datatype: languageOrDatatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
|
|
32
|
-
};
|
|
33
|
-
},
|
|
34
|
-
quad: (subject, predicate, object, graph) => ({
|
|
35
|
-
subject,
|
|
36
|
-
predicate,
|
|
37
|
-
object,
|
|
38
|
-
graph: graph || DefaultDataFactory.defaultGraph()
|
|
39
|
-
}),
|
|
40
|
-
defaultGraph: () => ({ termType: 'DefaultGraph', value: '' })
|
|
1
|
+
const DEFAULT_CONTEXT = {
|
|
2
|
+
'@vocab': 'http://schema.org/',
|
|
3
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
|
+
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
5
|
+
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
6
|
+
schema: 'http://schema.org/'
|
|
41
7
|
};
|
|
42
8
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
const lines = yamlText.trim().split('\n');
|
|
50
|
-
const obj = {};
|
|
51
|
-
let currentKey = null;
|
|
52
|
-
let indent = 0;
|
|
53
|
-
let inArray = false;
|
|
54
|
-
let currentArray = null;
|
|
55
|
-
|
|
56
|
-
for (let line of lines) {
|
|
57
|
-
const trimmed = line.trim();
|
|
58
|
-
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
59
|
-
|
|
60
|
-
const leadingSpaces = line.match(/^\s*/)[0].length;
|
|
61
|
-
|
|
62
|
-
// Array item
|
|
63
|
-
if (trimmed.startsWith('- ')) {
|
|
64
|
-
if (!inArray) {
|
|
65
|
-
currentArray = [];
|
|
66
|
-
inArray = true;
|
|
67
|
-
}
|
|
68
|
-
const value = trimmed.substring(2).trim();
|
|
69
|
-
currentArray.push(parseYAMLValue(value));
|
|
70
|
-
continue;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// Key-value pair
|
|
74
|
-
const colonIndex = trimmed.indexOf(':');
|
|
75
|
-
if (colonIndex > 0) {
|
|
76
|
-
const key = trimmed.substring(0, colonIndex).trim().replace(/^['"]|['"]$/g, '');
|
|
77
|
-
let value = trimmed.substring(colonIndex + 1).trim();
|
|
78
|
-
|
|
79
|
-
// Save previous array
|
|
80
|
-
if (inArray && currentKey && currentArray) {
|
|
81
|
-
obj[currentKey] = currentArray;
|
|
82
|
-
inArray = false;
|
|
83
|
-
currentArray = null;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
currentKey = key;
|
|
87
|
-
|
|
88
|
-
if (!value) {
|
|
89
|
-
// Empty value or nested object/array coming
|
|
90
|
-
indent = leadingSpaces;
|
|
91
|
-
continue;
|
|
9
|
+
const DataFactory = {
|
|
10
|
+
namedNode: (v) => ({ termType: 'NamedNode', value: v }),
|
|
11
|
+
blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
|
|
12
|
+
literal: (v, lang) => {
|
|
13
|
+
if (typeof lang === 'string') {
|
|
14
|
+
return { termType: 'Literal', value: v, language: lang, datatype: DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString') };
|
|
92
15
|
}
|
|
16
|
+
return { termType: 'Literal', value: v, language: '', datatype: lang || DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#string') };
|
|
17
|
+
},
|
|
18
|
+
quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
|
|
19
|
+
};
|
|
93
20
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
21
|
+
function hash(str) {
|
|
22
|
+
let h = 5381;
|
|
23
|
+
for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
|
|
24
|
+
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
|
+
}
|
|
97
26
|
|
|
98
|
-
|
|
99
|
-
if (
|
|
100
|
-
|
|
27
|
+
function expandIRI(term, ctx) {
|
|
28
|
+
if (!term) return null;
|
|
29
|
+
const t = term.trim();
|
|
30
|
+
if (t.match(/^https?:/)) return t;
|
|
31
|
+
if (t.includes(':')) {
|
|
32
|
+
const [prefix, ref] = t.split(':', 2);
|
|
33
|
+
return ctx[prefix] ? ctx[prefix] + ref : t;
|
|
101
34
|
}
|
|
102
|
-
|
|
103
|
-
return obj;
|
|
104
|
-
} catch (e) {
|
|
105
|
-
console.warn('YAML-LD parse error:', e);
|
|
106
|
-
return {};
|
|
107
|
-
}
|
|
35
|
+
return (ctx['@vocab'] || '') + t;
|
|
108
36
|
}
|
|
109
37
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if (value === 'null') return null;
|
|
116
|
-
if (/^-?\d+$/.test(value)) return parseInt(value, 10);
|
|
117
|
-
if (/^-?\d+\.\d+$/.test(value)) return parseFloat(value);
|
|
38
|
+
// Annotation parsing - explicit string operations
|
|
39
|
+
function parseAnnotation(raw) {
|
|
40
|
+
try {
|
|
41
|
+
const cleaned = raw.replace(/^\{|\}$/g, '').trim();
|
|
42
|
+
if (!cleaned) return { subject: null, entries: [], datatype: null, language: null };
|
|
118
43
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
const tokens = [];
|
|
128
|
-
const lines = text.split('\n');
|
|
129
|
-
let i = 0;
|
|
130
|
-
let inCodeBlock = false;
|
|
131
|
-
let codeFence = null;
|
|
132
|
-
let codeLang = null;
|
|
133
|
-
let codeAttrs = {};
|
|
134
|
-
let codeLines = [];
|
|
135
|
-
|
|
136
|
-
while (i < lines.length) {
|
|
137
|
-
const line = lines[i];
|
|
138
|
-
const trimmed = line.trim();
|
|
139
|
-
|
|
140
|
-
// Fenced code block ```lang {attrs}
|
|
141
|
-
const fenceMatch = line.match(/^(```+)(.*)$/);
|
|
142
|
-
if (fenceMatch) {
|
|
143
|
-
const [, fence, rest] = fenceMatch;
|
|
144
|
-
|
|
145
|
-
if (!inCodeBlock) {
|
|
146
|
-
// Start of code block
|
|
147
|
-
inCodeBlock = true;
|
|
148
|
-
codeFence = fence;
|
|
149
|
-
codeLines = [];
|
|
150
|
-
codeLang = null;
|
|
151
|
-
codeAttrs = {};
|
|
152
|
-
|
|
153
|
-
const restTrimmed = rest.trim();
|
|
154
|
-
if (restTrimmed) {
|
|
155
|
-
// Extract language (first token that is not an attribute block)
|
|
156
|
-
const attrIndex = restTrimmed.indexOf('{');
|
|
157
|
-
const langPart = attrIndex >= 0 ? restTrimmed.substring(0, attrIndex).trim() : restTrimmed;
|
|
158
|
-
if (langPart) {
|
|
159
|
-
codeLang = langPart.split(/\s+/)[0];
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// Attributes after language: ```lang {#id typeof="..."}
|
|
163
|
-
const attrMatch = restTrimmed.match(/\{[^}]+\}/);
|
|
164
|
-
if (attrMatch) {
|
|
165
|
-
codeAttrs = parseAttributes(attrMatch[0]);
|
|
166
|
-
}
|
|
44
|
+
// Validate quotes
|
|
45
|
+
let quoteCount = 0;
|
|
46
|
+
for (let i = 0; i < cleaned.length; i++) {
|
|
47
|
+
if (cleaned[i] === '"') quoteCount++;
|
|
48
|
+
}
|
|
49
|
+
if (quoteCount % 2 !== 0) {
|
|
50
|
+
console.warn(`Unbalanced quotes in annotation: ${raw}`);
|
|
51
|
+
return { subject: null, entries: [], datatype: null, language: null };
|
|
167
52
|
}
|
|
168
53
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
54
|
+
const result = { subject: null, entries: [], datatype: null, language: null };
|
|
55
|
+
const parts = cleaned.split(/\s+/).filter(p => p);
|
|
56
|
+
|
|
57
|
+
for (const part of parts) {
|
|
58
|
+
if (part === '=') {
|
|
59
|
+
result.subject = 'RESET';
|
|
60
|
+
} else if (part.startsWith('=')) {
|
|
61
|
+
result.subject = part.substring(1);
|
|
62
|
+
} else if (part.startsWith('@')) {
|
|
63
|
+
result.language = part.substring(1);
|
|
64
|
+
} else if (part.startsWith('^^')) {
|
|
65
|
+
result.datatype = part.substring(2);
|
|
66
|
+
} else if (part.startsWith('^')) {
|
|
67
|
+
result.entries.push({ kind: 'property', predicate: part.substring(1), direction: 'reverse' });
|
|
68
|
+
} else if (part.startsWith('.')) {
|
|
69
|
+
result.entries.push({ kind: 'type', classIRI: part.substring(1) });
|
|
70
|
+
} else {
|
|
71
|
+
result.entries.push({ kind: 'property', predicate: part, direction: 'forward' });
|
|
72
|
+
}
|
|
73
|
+
}
|
|
181
74
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
codeLines = [];
|
|
75
|
+
if (result.entries.length === 0 && !result.subject) {
|
|
76
|
+
console.warn(`No valid entries found in annotation: ${raw}`);
|
|
77
|
+
return { subject: null, entries: [], datatype: null, language: null };
|
|
78
|
+
}
|
|
187
79
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
80
|
+
return result;
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error(`Error parsing annotation ${raw}:`, error);
|
|
83
|
+
return { subject: null, entries: [], datatype: null, language: null };
|
|
191
84
|
}
|
|
85
|
+
}
|
|
192
86
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
87
|
+
// Token scanning - consolidated helpers
|
|
88
|
+
function scanTokens(text) {
|
|
89
|
+
const tokens = [];
|
|
90
|
+
const lines = text.split('\n');
|
|
91
|
+
let pos = 0;
|
|
92
|
+
let codeBlock = null;
|
|
93
|
+
|
|
94
|
+
for (let i = 0; i < lines.length; i++) {
|
|
95
|
+
const line = lines[i];
|
|
96
|
+
const lineStart = pos;
|
|
97
|
+
pos += line.length + 1;
|
|
98
|
+
|
|
99
|
+
// Code blocks
|
|
100
|
+
if (line.startsWith('```')) {
|
|
101
|
+
if (!codeBlock) {
|
|
102
|
+
const fence = line.match(/^(`{3,})(.*)/);
|
|
103
|
+
codeBlock = {
|
|
104
|
+
fence: fence[1],
|
|
105
|
+
start: lineStart,
|
|
106
|
+
content: [],
|
|
107
|
+
lang: fence[2].trim().split('{')[0].trim(),
|
|
108
|
+
attrs: fence[2].match(/\{[^}]+\}/)?.[0]
|
|
109
|
+
};
|
|
110
|
+
} else if (line.startsWith(codeBlock.fence)) {
|
|
111
|
+
tokens.push({
|
|
112
|
+
type: 'code',
|
|
113
|
+
range: [codeBlock.start, lineStart],
|
|
114
|
+
text: codeBlock.content.join('\n'),
|
|
115
|
+
lang: codeBlock.lang,
|
|
116
|
+
attrs: codeBlock.attrs
|
|
117
|
+
});
|
|
118
|
+
codeBlock = null;
|
|
119
|
+
}
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
198
122
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
const [, hashes, text, attrs] = headingMatch;
|
|
203
|
-
let attributes = attrs ? parseAttributes(attrs) : {};
|
|
204
|
-
|
|
205
|
-
// Check next line for attributes
|
|
206
|
-
if (!attrs && i + 1 < lines.length) {
|
|
207
|
-
const nextLine = lines[i + 1].trim();
|
|
208
|
-
if (nextLine.match(/^\{[^}]+\}$/)) {
|
|
209
|
-
attributes = parseAttributes(nextLine);
|
|
210
|
-
i++; // Skip the attribute line
|
|
123
|
+
if (codeBlock) {
|
|
124
|
+
codeBlock.content.push(line);
|
|
125
|
+
continue;
|
|
211
126
|
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
tokens.push({
|
|
215
|
-
type: 'heading',
|
|
216
|
-
depth: hashes.length,
|
|
217
|
-
text: text.trim(),
|
|
218
|
-
attrs: attributes
|
|
219
|
-
});
|
|
220
|
-
i++;
|
|
221
|
-
continue;
|
|
222
|
-
}
|
|
223
127
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
indent: indent.length,
|
|
231
|
-
checked: checked.toLowerCase() === 'x',
|
|
232
|
-
text: text.trim(),
|
|
233
|
-
attrs: attrs ? parseAttributes(attrs) : {}
|
|
234
|
-
});
|
|
235
|
-
i++;
|
|
236
|
-
continue;
|
|
237
|
-
}
|
|
128
|
+
// Prefix declarations
|
|
129
|
+
const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
|
|
130
|
+
if (prefixMatch) {
|
|
131
|
+
tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
238
134
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
indent: indent.length,
|
|
252
|
-
text: combinedText.trim(),
|
|
253
|
-
attrs: attrs ? parseAttributes(attrs) : {}
|
|
254
|
-
});
|
|
255
|
-
i++;
|
|
256
|
-
continue;
|
|
257
|
-
}
|
|
135
|
+
// Headings
|
|
136
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
137
|
+
if (headingMatch) {
|
|
138
|
+
tokens.push({
|
|
139
|
+
type: 'heading',
|
|
140
|
+
depth: headingMatch[1].length,
|
|
141
|
+
range: [lineStart, pos],
|
|
142
|
+
text: headingMatch[2].trim(),
|
|
143
|
+
attrs: headingMatch[3]
|
|
144
|
+
});
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
258
147
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
148
|
+
// Lists
|
|
149
|
+
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
150
|
+
if (listMatch) {
|
|
151
|
+
tokens.push({
|
|
152
|
+
type: 'list',
|
|
153
|
+
indent: listMatch[1].length,
|
|
154
|
+
range: [lineStart, pos],
|
|
155
|
+
text: listMatch[3].trim(),
|
|
156
|
+
attrs: listMatch[4]
|
|
157
|
+
});
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
268
160
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
161
|
+
// Blockquotes
|
|
162
|
+
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
163
|
+
if (blockquoteMatch) {
|
|
164
|
+
tokens.push({
|
|
165
|
+
type: 'blockquote',
|
|
166
|
+
range: [lineStart, pos],
|
|
167
|
+
text: blockquoteMatch[1].trim(),
|
|
168
|
+
attrs: blockquoteMatch[2]
|
|
169
|
+
});
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
273
172
|
|
|
274
|
-
|
|
275
|
-
|
|
173
|
+
// Paragraphs
|
|
174
|
+
if (line.trim()) {
|
|
175
|
+
const paraMatch = line.match(/^(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
176
|
+
if (paraMatch) {
|
|
177
|
+
tokens.push({
|
|
178
|
+
type: 'para',
|
|
179
|
+
range: [lineStart, pos],
|
|
180
|
+
text: paraMatch[1].trim(),
|
|
181
|
+
attrs: paraMatch[2] || null
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
276
186
|
|
|
277
|
-
|
|
187
|
+
return tokens;
|
|
278
188
|
}
|
|
279
189
|
|
|
280
|
-
//
|
|
281
|
-
|
|
282
|
-
|
|
190
|
+
// Inline value extraction - simplified
|
|
191
|
+
function extractInlineValue(text, baseOffset = 0) {
|
|
192
|
+
const spans = [];
|
|
193
|
+
let pos = 0;
|
|
283
194
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
if (idMatch) attrs.id = idMatch[1];
|
|
291
|
-
|
|
292
|
-
// Classes: .class1 .class2
|
|
293
|
-
const classMatches = cleaned.match(/\.([^\s.#]+)/g);
|
|
294
|
-
if (classMatches) {
|
|
295
|
-
attrs.class = classMatches.map(c => c.substring(1)).join(' ');
|
|
296
|
-
}
|
|
195
|
+
while (pos < text.length) {
|
|
196
|
+
const bracketStart = text.indexOf('[', pos);
|
|
197
|
+
if (bracketStart === -1) {
|
|
198
|
+
if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
297
201
|
|
|
298
|
-
|
|
299
|
-
const kvRegex = /(\w+)=["']([^"']*)["']/g;
|
|
300
|
-
let match;
|
|
301
|
-
while ((match = kvRegex.exec(cleaned)) !== null) {
|
|
302
|
-
attrs[match[1]] = match[2];
|
|
303
|
-
}
|
|
202
|
+
if (bracketStart > pos) spans.push({ type: 'text', text: text.substring(pos, bracketStart) });
|
|
304
203
|
|
|
305
|
-
|
|
306
|
-
|
|
204
|
+
const bracketEnd = text.indexOf(']', bracketStart);
|
|
205
|
+
if (bracketEnd === -1) {
|
|
206
|
+
spans.push({ type: 'text', text: text.substring(bracketStart) });
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
307
209
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
while ((match = inlineRegex.exec(text)) !== null) {
|
|
322
|
-
// Text before match
|
|
323
|
-
if (match.index > lastIndex) {
|
|
324
|
-
spans.push({
|
|
325
|
-
type: 'text',
|
|
326
|
-
value: text.substring(lastIndex, match.index)
|
|
327
|
-
});
|
|
328
|
-
}
|
|
210
|
+
const spanText = text.substring(bracketStart + 1, bracketEnd);
|
|
211
|
+
let spanEnd = bracketEnd + 1;
|
|
212
|
+
let url = null;
|
|
213
|
+
let attrs = null;
|
|
214
|
+
|
|
215
|
+
// Parse link destination
|
|
216
|
+
if (text[spanEnd] === '(') {
|
|
217
|
+
const parenEnd = text.indexOf(')', spanEnd);
|
|
218
|
+
if (parenEnd !== -1) {
|
|
219
|
+
url = text.substring(spanEnd + 1, parenEnd);
|
|
220
|
+
spanEnd = parenEnd + 1;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
329
223
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
});
|
|
224
|
+
// Parse attributes
|
|
225
|
+
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
226
|
+
if (attrsMatch) {
|
|
227
|
+
attrs = `{${attrsMatch[1]}}`;
|
|
228
|
+
spanEnd += attrsMatch[0].length;
|
|
229
|
+
}
|
|
337
230
|
|
|
338
|
-
|
|
339
|
-
|
|
231
|
+
spans.push({
|
|
232
|
+
type: url ? 'link' : 'span',
|
|
233
|
+
text: spanText,
|
|
234
|
+
url: url,
|
|
235
|
+
attrs: attrs,
|
|
236
|
+
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
237
|
+
});
|
|
340
238
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
spans.push({
|
|
344
|
-
type: 'text',
|
|
345
|
-
value: text.substring(lastIndex)
|
|
346
|
-
});
|
|
347
|
-
}
|
|
239
|
+
pos = spanEnd;
|
|
240
|
+
}
|
|
348
241
|
|
|
349
|
-
|
|
242
|
+
return spans.length ? spans : [{ type: 'text', text: text }];
|
|
350
243
|
}
|
|
351
244
|
|
|
352
|
-
//
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
baseIRI: options.baseIRI || '',
|
|
360
|
-
defaultVocab: options.defaultVocab || 'http://schema.org/',
|
|
361
|
-
dataFactory: options.dataFactory || DefaultDataFactory,
|
|
362
|
-
...options
|
|
363
|
-
};
|
|
245
|
+
// Core processing functions - consolidated
|
|
246
|
+
function createBlock(subject, entries, range, ctx) {
|
|
247
|
+
const expanded = entries.map(e => ({
|
|
248
|
+
...e,
|
|
249
|
+
predicate: e.predicate ? expandIRI(e.predicate, ctx) : null,
|
|
250
|
+
classIRI: e.classIRI ? expandIRI(e.classIRI, ctx) : null
|
|
251
|
+
}));
|
|
364
252
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
hashBlankNode(input) {
|
|
376
|
-
if (this.blankNodeMap.has(input)) {
|
|
377
|
-
return this.blankNodeMap.get(input);
|
|
378
|
-
}
|
|
379
|
-
let hash = 5381;
|
|
380
|
-
for (let i = 0; i < input.length; i++) {
|
|
381
|
-
hash = ((hash << 5) + hash) + input.charCodeAt(i);
|
|
382
|
-
}
|
|
383
|
-
const bnId = `b${Math.abs(hash).toString(16).slice(0, 12)}`;
|
|
384
|
-
this.blankNodeMap.set(input, bnId);
|
|
385
|
-
return bnId;
|
|
386
|
-
}
|
|
253
|
+
const blockId = hash([subject, ...expanded.map(e => JSON.stringify(e))].join('|'));
|
|
254
|
+
return {
|
|
255
|
+
id: blockId,
|
|
256
|
+
range: { start: range[0], end: range[1] },
|
|
257
|
+
subject,
|
|
258
|
+
entries: expanded,
|
|
259
|
+
context: { ...ctx }
|
|
260
|
+
};
|
|
261
|
+
}
|
|
387
262
|
|
|
388
|
-
|
|
389
|
-
|
|
263
|
+
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory) {
|
|
264
|
+
if (!subject || !predicate || !object) return;
|
|
265
|
+
const quad = dataFactory.quad(subject, predicate, object);
|
|
266
|
+
quads.push(quad);
|
|
267
|
+
quadIndex.set(JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]), blockId);
|
|
268
|
+
}
|
|
390
269
|
|
|
391
|
-
|
|
392
|
-
|
|
270
|
+
function createLiteralValue(value, datatype, language, context, dataFactory) {
|
|
271
|
+
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
272
|
+
if (language) return dataFactory.literal(value, language);
|
|
273
|
+
return dataFactory.literal(value);
|
|
274
|
+
}
|
|
393
275
|
|
|
394
|
-
|
|
395
|
-
if (
|
|
396
|
-
try {
|
|
397
|
-
this.context = parseYAMLLD(frontmatter);
|
|
276
|
+
function processAnnotation(token, state, textContent = null) {
|
|
277
|
+
if (!token.attrs) return;
|
|
398
278
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
this.options.baseIRI = this.context['@context']['@base'];
|
|
402
|
-
}
|
|
279
|
+
const ann = parseAnnotation(token.attrs);
|
|
280
|
+
const originalSubject = state.currentSubject;
|
|
403
281
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
types.forEach(type => {
|
|
413
|
-
const typeNode = this.resolveResource(type);
|
|
414
|
-
if (typeNode) {
|
|
415
|
-
this.emitQuad(
|
|
416
|
-
this.rootSubject,
|
|
417
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
418
|
-
typeNode
|
|
419
|
-
);
|
|
420
|
-
}
|
|
421
|
-
});
|
|
422
|
-
}
|
|
423
|
-
} catch (e) {
|
|
424
|
-
console.error('YAML-LD parse error:', e);
|
|
425
|
-
this.context = {
|
|
426
|
-
'@context': { '@vocab': this.options.defaultVocab }
|
|
427
|
-
};
|
|
428
|
-
this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
|
|
429
|
-
}
|
|
430
|
-
} else {
|
|
431
|
-
// No frontmatter - use base IRI as root
|
|
432
|
-
this.context = {
|
|
433
|
-
'@context': { '@vocab': this.options.defaultVocab }
|
|
434
|
-
};
|
|
435
|
-
this.rootSubject = this.df.namedNode(this.options.baseIRI || '');
|
|
282
|
+
// Handle subject declaration
|
|
283
|
+
if (ann.subject === 'RESET') {
|
|
284
|
+
state.currentSubject = null;
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
if (ann.subject) {
|
|
288
|
+
state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
|
|
436
289
|
}
|
|
437
290
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
// Tokenize markdown
|
|
441
|
-
const tokens = tokenizeMarkdown(body);
|
|
291
|
+
if (!originalSubject && !ann.subject) return;
|
|
442
292
|
|
|
443
|
-
|
|
444
|
-
|
|
293
|
+
const targetSubject = ann.subject ?
|
|
294
|
+
state.df.namedNode(expandIRI(ann.subject, state.ctx)) :
|
|
295
|
+
originalSubject;
|
|
445
296
|
|
|
446
|
-
|
|
447
|
-
|
|
297
|
+
const block = createBlock(targetSubject.value, ann.entries, token.range, state.ctx);
|
|
298
|
+
state.origin.blocks.set(block.id, block);
|
|
448
299
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
resolveRootSubject(context) {
|
|
458
|
-
if (context['@id']) {
|
|
459
|
-
const id = context['@id'];
|
|
460
|
-
if (id.startsWith('#')) {
|
|
461
|
-
const fullIRI = (this.options.baseIRI || '') + id;
|
|
462
|
-
return this.df.namedNode(fullIRI);
|
|
463
|
-
}
|
|
464
|
-
if (id.startsWith('_:')) {
|
|
465
|
-
return this.df.blankNode(id.substring(2));
|
|
466
|
-
}
|
|
467
|
-
if (id.includes(':')) {
|
|
468
|
-
return this.df.namedNode(id);
|
|
469
|
-
}
|
|
470
|
-
return this.df.namedNode(this.options.baseIRI + id);
|
|
300
|
+
// Handle list context types
|
|
301
|
+
if (token.type === 'list' && state.listContext?.types.length > 0) {
|
|
302
|
+
state.listContext.types.forEach(typeIRI => {
|
|
303
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id,
|
|
304
|
+
targetSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
305
|
+
state.df.namedNode(typeIRI), state.df);
|
|
306
|
+
});
|
|
471
307
|
}
|
|
472
|
-
return this.df.namedNode(this.options.baseIRI || '');
|
|
473
|
-
}
|
|
474
308
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
309
|
+
// Emit triples
|
|
310
|
+
ann.entries.forEach(e => {
|
|
311
|
+
if (e.kind === 'type') {
|
|
312
|
+
const typeSubject = token.url ?
|
|
313
|
+
state.df.namedNode(expandIRI(token.url, state.ctx)) : targetSubject;
|
|
314
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id,
|
|
315
|
+
typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
316
|
+
state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
|
|
317
|
+
} else if (e.kind === 'property' && e.predicate) {
|
|
318
|
+
const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
|
|
319
|
+
let object;
|
|
480
320
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
321
|
+
if (token.url) {
|
|
322
|
+
object = state.df.namedNode(expandIRI(token.url, state.ctx));
|
|
323
|
+
} else if (ann.subject && !token.url) {
|
|
324
|
+
if (e.direction === 'reverse') {
|
|
325
|
+
object = targetSubject;
|
|
326
|
+
} else {
|
|
327
|
+
object = token.type === 'code' ?
|
|
328
|
+
createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
|
|
329
|
+
targetSubject;
|
|
330
|
+
}
|
|
331
|
+
} else {
|
|
332
|
+
object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
|
|
333
|
+
}
|
|
484
334
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
if (token.type === 'heading') {
|
|
489
|
-
// First h1 becomes label (but don't emit if heading has #id attribute)
|
|
490
|
-
if (token.depth === 1 && !titleEmitted && !token.attrs.id) {
|
|
491
|
-
this.emitQuad(
|
|
492
|
-
this.rootSubject,
|
|
493
|
-
this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
|
|
494
|
-
this.df.literal(token.text)
|
|
495
|
-
);
|
|
496
|
-
titleEmitted = true;
|
|
497
|
-
}
|
|
335
|
+
const subject = e.direction === 'reverse' ? object :
|
|
336
|
+
(ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
|
|
337
|
+
const objectRef = e.direction === 'reverse' ? originalSubject : object;
|
|
498
338
|
|
|
499
|
-
|
|
500
|
-
if (token.attrs.id) {
|
|
501
|
-
const rootFragment = this.getRootFragment();
|
|
502
|
-
let newSubject;
|
|
503
|
-
|
|
504
|
-
if (token.attrs.id === rootFragment) {
|
|
505
|
-
// Same as root document subject
|
|
506
|
-
newSubject = this.rootSubject;
|
|
507
|
-
} else {
|
|
508
|
-
// Fragment relative to root
|
|
509
|
-
const baseForFragment = this.rootSubject.value.split('#')[0];
|
|
510
|
-
newSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
// Type assertion
|
|
514
|
-
if (token.attrs.typeof) {
|
|
515
|
-
const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
|
|
516
|
-
types.forEach(type => {
|
|
517
|
-
const typeNode = this.resolveResource(type);
|
|
518
|
-
if (typeNode) {
|
|
519
|
-
this.emitQuad(
|
|
520
|
-
newSubject,
|
|
521
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
522
|
-
typeNode
|
|
523
|
-
);
|
|
524
|
-
}
|
|
525
|
-
});
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
// Heading text becomes an rdfs:label of the subject
|
|
529
|
-
this.emitQuad(
|
|
530
|
-
newSubject,
|
|
531
|
-
this.df.namedNode('http://www.w3.org/2000/01/rdf-schema#label'),
|
|
532
|
-
this.df.literal(token.text.trim())
|
|
533
|
-
);
|
|
534
|
-
|
|
535
|
-
// Set as current subject
|
|
536
|
-
this.currentSubject = newSubject;
|
|
537
|
-
this.subjectStack.push(newSubject);
|
|
538
|
-
} else if (!titleEmitted) {
|
|
539
|
-
// Heading without id keeps parent context
|
|
540
|
-
// but h1 without attributes still sets root as current
|
|
541
|
-
if (token.depth === 1) {
|
|
542
|
-
this.currentSubject = this.rootSubject;
|
|
543
|
-
}
|
|
339
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subject, predicate, objectRef, state.df);
|
|
544
340
|
}
|
|
341
|
+
});
|
|
342
|
+
}
|
|
545
343
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
if (token.type === 'code') {
|
|
550
|
-
// Code blocks become SoftwareSourceCode-like resources
|
|
551
|
-
let snippetSubject;
|
|
552
|
-
|
|
553
|
-
if (token.attrs && token.attrs.id) {
|
|
554
|
-
const rootFragment = this.getRootFragment();
|
|
555
|
-
if (token.attrs.id === rootFragment) {
|
|
556
|
-
snippetSubject = this.rootSubject;
|
|
557
|
-
} else {
|
|
558
|
-
const baseForFragment = this.rootSubject.value.split('#')[0];
|
|
559
|
-
snippetSubject = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
|
|
560
|
-
}
|
|
561
|
-
} else {
|
|
562
|
-
snippetSubject = this.df.blankNode(
|
|
563
|
-
this.hashBlankNode(`code:${token.lang || ''}:${token.text}`)
|
|
564
|
-
);
|
|
565
|
-
}
|
|
344
|
+
// List processing - simplified
|
|
345
|
+
function setupListContext(token, state, nextToken) {
|
|
346
|
+
if (!token.attrs || nextToken?.type !== 'list') return false;
|
|
566
347
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
const types = token.attrs.typeof.trim().split(/\s+/).filter(Boolean);
|
|
570
|
-
types.forEach(type => {
|
|
571
|
-
const typeNode = this.resolveResource(type);
|
|
572
|
-
if (typeNode) {
|
|
573
|
-
this.emitQuad(
|
|
574
|
-
snippetSubject,
|
|
575
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
576
|
-
typeNode
|
|
577
|
-
);
|
|
578
|
-
}
|
|
579
|
-
});
|
|
580
|
-
} else {
|
|
581
|
-
const defaultType = this.resolveResource('SoftwareSourceCode');
|
|
582
|
-
if (defaultType) {
|
|
583
|
-
this.emitQuad(
|
|
584
|
-
snippetSubject,
|
|
585
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
586
|
-
defaultType
|
|
587
|
-
);
|
|
588
|
-
}
|
|
589
|
-
}
|
|
348
|
+
const ann = parseAnnotation(token.attrs);
|
|
349
|
+
state.listContext = { predicate: null, types: [], reverse: false };
|
|
590
350
|
|
|
591
|
-
|
|
592
|
-
if (
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
this.emitQuad(
|
|
596
|
-
snippetSubject,
|
|
597
|
-
langPred,
|
|
598
|
-
this.df.literal(token.lang)
|
|
599
|
-
);
|
|
600
|
-
}
|
|
351
|
+
ann.entries.forEach(e => {
|
|
352
|
+
if (e.kind === 'property') {
|
|
353
|
+
state.listContext.predicate = expandIRI(e.predicate, state.ctx);
|
|
354
|
+
state.listContext.reverse = e.direction === 'reverse';
|
|
601
355
|
}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
const textPred = this.resolveResource('text');
|
|
605
|
-
if (textPred && token.text) {
|
|
606
|
-
this.emitQuad(
|
|
607
|
-
snippetSubject,
|
|
608
|
-
textPred,
|
|
609
|
-
this.df.literal(token.text)
|
|
610
|
-
);
|
|
356
|
+
if (e.kind === 'type') {
|
|
357
|
+
state.listContext.types.push(expandIRI(e.classIRI, state.ctx));
|
|
611
358
|
}
|
|
359
|
+
});
|
|
360
|
+
return true;
|
|
361
|
+
}
|
|
612
362
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
this.emitQuad(
|
|
617
|
-
this.currentSubject,
|
|
618
|
-
hasPartPred,
|
|
619
|
-
snippetSubject
|
|
620
|
-
);
|
|
621
|
-
}
|
|
363
|
+
function processListItem(token, state) {
|
|
364
|
+
const ann = parseAnnotation(token.attrs);
|
|
365
|
+
const originalSubject = state.currentSubject;
|
|
622
366
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
if (token.type === 'paragraph') {
|
|
627
|
-
// First paragraph after title becomes description
|
|
628
|
-
if (firstParagraph && titleEmitted) {
|
|
629
|
-
const text = token.text.trim();
|
|
630
|
-
if (text && !text.match(/\[.*\]/)) { // Simple text, no links
|
|
631
|
-
this.emitQuad(
|
|
632
|
-
this.rootSubject,
|
|
633
|
-
this.df.namedNode('http://purl.org/dc/terms/description'),
|
|
634
|
-
this.df.literal(text)
|
|
635
|
-
);
|
|
636
|
-
}
|
|
637
|
-
firstParagraph = false;
|
|
638
|
-
}
|
|
367
|
+
if (ann.subject) {
|
|
368
|
+
state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
|
|
369
|
+
}
|
|
639
370
|
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
// Task items create Action instances
|
|
652
|
-
let action;
|
|
653
|
-
if (token.attrs.id) {
|
|
654
|
-
const rootFragment = this.getRootFragment();
|
|
655
|
-
if (token.attrs.id === rootFragment) {
|
|
656
|
-
action = this.rootSubject;
|
|
657
|
-
} else {
|
|
658
|
-
const baseForFragment = this.rootSubject.value.split('#')[0];
|
|
659
|
-
action = this.df.namedNode(baseForFragment + '#' + token.attrs.id);
|
|
660
|
-
}
|
|
661
|
-
} else {
|
|
662
|
-
action = this.df.blankNode(this.hashBlankNode(`task:${token.text}`));
|
|
371
|
+
// Process item properties
|
|
372
|
+
ann.entries.forEach(e => {
|
|
373
|
+
if (e.kind === 'type') {
|
|
374
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-item',
|
|
375
|
+
state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
376
|
+
state.df.namedNode(expandIRI(e.classIRI, state.ctx)), state.df);
|
|
377
|
+
} else if (e.kind === 'property' && e.predicate) {
|
|
378
|
+
const predicate = state.df.namedNode(expandIRI(e.predicate, state.ctx));
|
|
379
|
+
const object = createLiteralValue(token.text, ann.datatype, ann.language, state.ctx, state.df);
|
|
380
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-item',
|
|
381
|
+
state.currentSubject, predicate, object, state.df);
|
|
663
382
|
}
|
|
383
|
+
});
|
|
664
384
|
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
if (typeNode) {
|
|
672
|
-
this.emitQuad(
|
|
673
|
-
action,
|
|
674
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
675
|
-
typeNode
|
|
676
|
-
);
|
|
677
|
-
}
|
|
678
|
-
});
|
|
385
|
+
// Process list context relationship
|
|
386
|
+
if (state.listContext?.predicate && originalSubject) {
|
|
387
|
+
const predicate = state.df.namedNode(expandIRI(state.listContext.predicate, state.ctx));
|
|
388
|
+
if (state.listContext.reverse) {
|
|
389
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
390
|
+
state.currentSubject, predicate, originalSubject, state.df);
|
|
679
391
|
} else {
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
683
|
-
this.df.namedNode(actionType)
|
|
684
|
-
);
|
|
392
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
393
|
+
originalSubject, predicate, state.currentSubject, state.df);
|
|
685
394
|
}
|
|
395
|
+
}
|
|
686
396
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
? 'http://schema.org/CompletedActionStatus'
|
|
695
|
-
: 'http://schema.org/PotentialActionStatus';
|
|
696
|
-
|
|
697
|
-
this.emitQuad(
|
|
698
|
-
action,
|
|
699
|
-
this.df.namedNode('http://schema.org/actionStatus'),
|
|
700
|
-
this.df.namedNode(status)
|
|
701
|
-
);
|
|
702
|
-
|
|
703
|
-
// Link to current subject
|
|
704
|
-
this.emitQuad(
|
|
705
|
-
this.currentSubject,
|
|
706
|
-
this.df.namedNode('http://schema.org/potentialAction'),
|
|
707
|
-
action
|
|
708
|
-
);
|
|
709
|
-
|
|
710
|
-
continue;
|
|
711
|
-
}
|
|
397
|
+
// Apply list context types
|
|
398
|
+
if (state.listContext?.types.length > 0 && ann.subject) {
|
|
399
|
+
state.listContext.types.forEach(type => {
|
|
400
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-item',
|
|
401
|
+
state.currentSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
402
|
+
state.df.namedNode(expandIRI(type, state.ctx)), state.df);
|
|
403
|
+
});
|
|
712
404
|
}
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
processInline(text) {
|
|
716
|
-
const spans = parseInline(text);
|
|
717
|
-
|
|
718
|
-
for (const span of spans) {
|
|
719
|
-
if (span.type === 'text') {
|
|
720
|
-
continue;
|
|
721
|
-
}
|
|
722
|
-
|
|
723
|
-
if (span.type === 'link' || span.type === 'span') {
|
|
724
|
-
const attrs = span.attrs;
|
|
725
|
-
|
|
726
|
-
// Subject declaration
|
|
727
|
-
let subject = this.currentSubject;
|
|
728
|
-
if (attrs.id) {
|
|
729
|
-
const rootFragment = this.getRootFragment();
|
|
730
|
-
|
|
731
|
-
if (attrs.id === rootFragment) {
|
|
732
|
-
// Same as root document subject
|
|
733
|
-
subject = this.rootSubject;
|
|
734
|
-
} else {
|
|
735
|
-
// Fragment relative to root
|
|
736
|
-
const baseForFragment = this.rootSubject.value.split('#')[0];
|
|
737
|
-
subject = this.df.namedNode(baseForFragment + '#' + attrs.id);
|
|
738
|
-
}
|
|
739
|
-
|
|
740
|
-
// Type assertion
|
|
741
|
-
if (attrs.typeof) {
|
|
742
|
-
const types = attrs.typeof.trim().split(/\s+/).filter(Boolean);
|
|
743
|
-
types.forEach(type => {
|
|
744
|
-
const typeNode = this.resolveResource(type);
|
|
745
|
-
if (typeNode) {
|
|
746
|
-
this.emitQuad(
|
|
747
|
-
subject,
|
|
748
|
-
this.df.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
|
749
|
-
typeNode
|
|
750
|
-
);
|
|
751
|
-
}
|
|
752
|
-
});
|
|
753
|
-
}
|
|
754
|
-
}
|
|
755
405
|
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
const properties = attrs.property.trim().split(/\s+/).filter(Boolean);
|
|
759
|
-
properties.forEach(prop => {
|
|
760
|
-
const predicate = this.resolveResource(prop);
|
|
761
|
-
if (!predicate) return;
|
|
406
|
+
state.currentSubject = originalSubject;
|
|
407
|
+
}
|
|
762
408
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
}
|
|
409
|
+
// Main parsing function
|
|
410
|
+
export function parse(text, options = {}) {
|
|
411
|
+
const state = {
|
|
412
|
+
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
413
|
+
df: options.dataFactory || DataFactory,
|
|
414
|
+
quads: [],
|
|
415
|
+
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
416
|
+
currentSubject: null,
|
|
417
|
+
listContext: null
|
|
418
|
+
};
|
|
774
419
|
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
}
|
|
420
|
+
const tokens = scanTokens(text);
|
|
421
|
+
tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
|
|
778
422
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
423
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
424
|
+
const token = tokens[i];
|
|
425
|
+
const nextToken = tokens[i + 1];
|
|
426
|
+
|
|
427
|
+
switch (token.type) {
|
|
428
|
+
case 'heading':
|
|
429
|
+
processAnnotation(token, state, token.text);
|
|
430
|
+
break;
|
|
431
|
+
case 'code':
|
|
432
|
+
processAnnotation(token, state, token.text);
|
|
433
|
+
break;
|
|
434
|
+
case 'para':
|
|
435
|
+
if (setupListContext(token, state, nextToken)) break;
|
|
436
|
+
// Regular paragraphs are NOT value carriers per spec
|
|
437
|
+
// Only process spans and links within paragraphs
|
|
438
|
+
if (state.currentSubject) {
|
|
439
|
+
const spans = extractInlineValue(token.text, token.range[0]);
|
|
440
|
+
|
|
441
|
+
// Process annotated spans (value carriers)
|
|
442
|
+
spans.filter(s => s.type === 'span' && s.attrs)
|
|
443
|
+
.forEach(span => processAnnotation(span, state, span.text));
|
|
444
|
+
|
|
445
|
+
// Process spans where paragraph has annotation
|
|
446
|
+
if (token.attrs) {
|
|
447
|
+
spans.filter(s => s.type === 'span')
|
|
448
|
+
.forEach(span => {
|
|
449
|
+
// Attach paragraph's annotation to the span
|
|
450
|
+
const spanWithAttrs = { ...span, attrs: token.attrs };
|
|
451
|
+
processAnnotation(spanWithAttrs, state, span.text);
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Process links (value carriers)
|
|
456
|
+
spans.filter(s => s.type === 'link')
|
|
457
|
+
.forEach(link => processAnnotation(link, state, link.text));
|
|
458
|
+
}
|
|
459
|
+
break;
|
|
460
|
+
case 'list':
|
|
461
|
+
if (state.listContext) processListItem(token, state);
|
|
462
|
+
break;
|
|
463
|
+
case 'blockquote':
|
|
464
|
+
if (state.currentSubject) processAnnotation(token, state, token.text);
|
|
465
|
+
break;
|
|
797
466
|
}
|
|
467
|
+
}
|
|
798
468
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
// Link from current subject
|
|
816
|
-
if (attrs.rel) {
|
|
817
|
-
const rels = attrs.rel.trim().split(/\s+/).filter(Boolean);
|
|
818
|
-
rels.forEach(rel => {
|
|
819
|
-
const predicate = this.resolveResource(rel);
|
|
820
|
-
if (predicate) {
|
|
821
|
-
this.emitQuad(subject, predicate, blankSubject);
|
|
822
|
-
}
|
|
823
|
-
});
|
|
824
|
-
}
|
|
469
|
+
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function shortenIRI(iri, ctx) {
|
|
473
|
+
if (!iri || !iri.startsWith('http')) return iri;
|
|
474
|
+
|
|
475
|
+
// Check @vocab first
|
|
476
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
|
|
477
|
+
return iri.substring(ctx['@vocab'].length);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Check prefixes
|
|
481
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
482
|
+
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
483
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
825
484
|
}
|
|
826
|
-
}
|
|
827
485
|
}
|
|
828
|
-
}
|
|
829
486
|
|
|
830
|
-
|
|
831
|
-
|
|
487
|
+
// No prefix found, return full IRI
|
|
488
|
+
return iri;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
export function serialize({ text, diff, origin, options = {} }) {
|
|
492
|
+
if (!diff || (!diff.add?.length && !diff.delete?.length)) return { text, origin };
|
|
832
493
|
|
|
833
|
-
|
|
834
|
-
|
|
494
|
+
let result = text;
|
|
495
|
+
const edits = [];
|
|
496
|
+
const ctx = options.context || {};
|
|
835
497
|
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
498
|
+
if (diff.delete) {
|
|
499
|
+
diff.delete.forEach(quad => {
|
|
500
|
+
const key = JSON.stringify([quad.subject.value, quad.predicate.value, quad.object.value]);
|
|
501
|
+
const blockId = origin?.quadIndex.get(key);
|
|
502
|
+
if (!blockId) return;
|
|
840
503
|
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
const [prefix, reference] = trimmed.split(':', 2);
|
|
844
|
-
const contextObj = this.context?.['@context'] || {};
|
|
504
|
+
const block = origin.blocks.get(blockId);
|
|
505
|
+
if (!block) return;
|
|
845
506
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
507
|
+
const start = block.range.start;
|
|
508
|
+
const end = block.range.end;
|
|
509
|
+
const before = text.substring(Math.max(0, start - 1), start);
|
|
510
|
+
const after = text.substring(end, end + 1);
|
|
511
|
+
const deleteStart = before === '\n' ? start - 1 : start;
|
|
512
|
+
const deleteEnd = after === '\n' ? end + 1 : end;
|
|
849
513
|
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
return this.df.namedNode('http://www.w3.org/2001/XMLSchema#' + reference);
|
|
853
|
-
}
|
|
514
|
+
edits.push({ start: deleteStart, end: deleteEnd, text: '' });
|
|
515
|
+
});
|
|
854
516
|
}
|
|
855
517
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
}
|
|
518
|
+
if (diff.add) {
|
|
519
|
+
diff.add.forEach(quad => {
|
|
520
|
+
let insertPos = result.length;
|
|
860
521
|
|
|
861
|
-
|
|
862
|
-
|
|
522
|
+
for (const [, block] of origin?.blocks || []) {
|
|
523
|
+
if (block.subject === quad.subject.value) {
|
|
524
|
+
insertPos = block.range.end;
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
863
528
|
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
}
|
|
529
|
+
const pred = shortenIRI(quad.predicate.value, ctx);
|
|
530
|
+
let objText;
|
|
867
531
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
532
|
+
if (quad.object.termType === 'Literal') {
|
|
533
|
+
objText = quad.object.value;
|
|
534
|
+
} else {
|
|
535
|
+
objText = shortenIRI(quad.object.value, ctx);
|
|
536
|
+
}
|
|
872
537
|
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
538
|
+
const newLine = `\n[${objText}] {${pred}}`;
|
|
539
|
+
|
|
540
|
+
edits.push({ start: insertPos, end: insertPos, text: newLine });
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
edits.sort((a, b) => b.start - a.start);
|
|
545
|
+
edits.forEach(edit => {
|
|
546
|
+
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
547
|
+
});
|
|
876
548
|
|
|
877
|
-
|
|
878
|
-
const parser = new MDLDParser(options);
|
|
879
|
-
return parser.parse(markdown);
|
|
549
|
+
return { text: result, origin };
|
|
880
550
|
}
|
|
881
551
|
|
|
882
|
-
export default {
|
|
552
|
+
export default { parse, serialize, parseAnnotation };
|