mdld-parse 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -19
- package/index.js +621 -199
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -149,12 +149,12 @@ ex:apollo11 schema:organizer ex:nasa .
|
|
|
149
149
|
|
|
150
150
|
### Resource Declaration
|
|
151
151
|
|
|
152
|
-
Declare resources inline with `
|
|
152
|
+
Declare resources inline with `{=iri}`:
|
|
153
153
|
|
|
154
154
|
```markdown
|
|
155
155
|
# Mission {=ex:apollo11}
|
|
156
156
|
|
|
157
|
-
[Neil Armstrong]
|
|
157
|
+
[Neil Armstrong] {=ex:armstrong ?commander .Person}
|
|
158
158
|
```
|
|
159
159
|
|
|
160
160
|
```turtle
|
|
@@ -171,8 +171,8 @@ Lists require explicit subjects per item:
|
|
|
171
171
|
|
|
172
172
|
Ingredients: {?ingredient .Ingredient}
|
|
173
173
|
|
|
174
|
-
-
|
|
175
|
-
-
|
|
174
|
+
- Flour {=ex:flour name}
|
|
175
|
+
- Water {=ex:water name}
|
|
176
176
|
```
|
|
177
177
|
|
|
178
178
|
```turtle
|
|
@@ -219,7 +219,7 @@ Reverse the relationship direction:
|
|
|
219
219
|
|
|
220
220
|
Part of: {^?hasPart}
|
|
221
221
|
|
|
222
|
-
-
|
|
222
|
+
- Book {=ex:book}
|
|
223
223
|
```
|
|
224
224
|
|
|
225
225
|
```turtle
|
|
@@ -331,21 +331,17 @@ console.log(updated.text);
|
|
|
331
331
|
Only specific markdown elements can carry semantic values:
|
|
332
332
|
|
|
333
333
|
**Inline:**
|
|
334
|
-
- `[text]` — span with annotation
|
|
335
|
-
- `[text](url)` — link to external resource
|
|
336
|
-
- `[text]
|
|
334
|
+
- `[text] {...}` — span with annotation
|
|
335
|
+
- `[text](url) {...}` — link to external resource
|
|
336
|
+
- `[text] {...}` — inline resource declaration
|
|
337
|
+
- ` {...}` — embedding with annotation
|
|
337
338
|
|
|
338
339
|
**Block:**
|
|
339
340
|
- Headings (`# Title`)
|
|
340
|
-
- List items (`- item`)
|
|
341
|
+
- List items (`- item`, `1. item`) (single-level)
|
|
341
342
|
- Blockquotes (`> quote`)
|
|
342
343
|
- Code blocks (` ```lang `)
|
|
343
344
|
|
|
344
|
-
**Non-carriers:**
|
|
345
|
-
- Plain paragraphs without `[...]`
|
|
346
|
-
- Images (future)
|
|
347
|
-
- Tables (future)
|
|
348
|
-
|
|
349
345
|
## Architecture
|
|
350
346
|
|
|
351
347
|
### Design Principles
|
|
@@ -383,12 +379,12 @@ MD-LD explicitly forbids to ensure deterministic parsing:
|
|
|
383
379
|
|
|
384
380
|
Attendees: {?attendee}
|
|
385
381
|
|
|
386
|
-
-
|
|
387
|
-
-
|
|
382
|
+
- Alice {=urn:person:alice name}
|
|
383
|
+
- Bob {=urn:person:bob name}
|
|
388
384
|
|
|
389
385
|
Action items: {?actionItem}
|
|
390
386
|
|
|
391
|
-
-
|
|
387
|
+
- Review proposal {=urn:task:1 name}
|
|
392
388
|
```
|
|
393
389
|
|
|
394
390
|
### Developer Documentation
|
|
@@ -401,7 +397,7 @@ Action items: {?actionItem}
|
|
|
401
397
|
|
|
402
398
|
Example:
|
|
403
399
|
|
|
404
|
-
```bash {=api:/users/:id
|
|
400
|
+
```bash {=api:/users/:id#example .CodeExample text}
|
|
405
401
|
curl https://api.example.com/users/123
|
|
406
402
|
```
|
|
407
403
|
````
|
|
@@ -412,7 +408,7 @@ curl https://api.example.com/users/123
|
|
|
412
408
|
# Paper {=doi:10.1234/example .ScholarlyArticle}
|
|
413
409
|
|
|
414
410
|
[Semantic Web] {about}
|
|
415
|
-
[Alice Johnson]
|
|
411
|
+
[Alice Johnson] {=orcid:0000-0001-2345-6789 author}
|
|
416
412
|
[2024-01] {datePublished ^^xsd:gYearMonth}
|
|
417
413
|
|
|
418
414
|
> This paper explores semantic markup in Markdown. {abstract @en}
|
package/index.js
CHANGED
|
@@ -25,8 +25,9 @@ function hash(str) {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
function expandIRI(term, ctx) {
|
|
28
|
-
if (
|
|
29
|
-
const
|
|
28
|
+
if (term == null) return null;
|
|
29
|
+
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
30
|
+
const t = raw.trim();
|
|
30
31
|
if (t.match(/^https?:/)) return t;
|
|
31
32
|
if (t.includes(':')) {
|
|
32
33
|
const [prefix, ref] = t.split(':', 2);
|
|
@@ -37,38 +38,83 @@ function expandIRI(term, ctx) {
|
|
|
37
38
|
|
|
38
39
|
function parseSemanticBlock(raw) {
|
|
39
40
|
try {
|
|
40
|
-
const
|
|
41
|
-
|
|
41
|
+
const src = String(raw || '').trim();
|
|
42
|
+
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
43
|
+
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
44
|
+
|
|
45
|
+
const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
46
|
+
const re = /\S+/g;
|
|
47
|
+
let m;
|
|
48
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
49
|
+
const token = m[0];
|
|
50
|
+
const relStart = 1 + m.index;
|
|
51
|
+
const relEnd = relStart + token.length;
|
|
52
|
+
const entryIndex = result.entries.length;
|
|
42
53
|
|
|
43
|
-
const result = { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
44
|
-
const tokens = cleaned.split(/\s+/).filter(t => t);
|
|
45
|
-
|
|
46
|
-
for (const token of tokens) {
|
|
47
54
|
if (token === '=') {
|
|
48
55
|
result.subject = 'RESET';
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
result.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
result.
|
|
63
|
-
|
|
64
|
-
|
|
56
|
+
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (token.startsWith('=')) {
|
|
61
|
+
const iri = token.substring(1);
|
|
62
|
+
result.subject = iri;
|
|
63
|
+
result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (token.startsWith('^^')) {
|
|
68
|
+
const datatype = token.substring(2);
|
|
69
|
+
if (!result.language) result.datatype = datatype;
|
|
70
|
+
result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (token.startsWith('@')) {
|
|
75
|
+
const language = token.substring(1);
|
|
76
|
+
result.language = language;
|
|
77
|
+
result.datatype = null;
|
|
78
|
+
result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (token.startsWith('.')) {
|
|
83
|
+
const classIRI = token.substring(1);
|
|
84
|
+
result.types.push({ iri: classIRI, entryIndex });
|
|
85
|
+
result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (token.startsWith('^?')) {
|
|
90
|
+
const iri = token.substring(2);
|
|
91
|
+
result.predicates.push({ iri, form: '^?', entryIndex });
|
|
92
|
+
result.entries.push({ kind: 'property', iri, form: '^?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
93
|
+
continue;
|
|
65
94
|
}
|
|
95
|
+
|
|
96
|
+
if (token.startsWith('^')) {
|
|
97
|
+
const iri = token.substring(1);
|
|
98
|
+
result.predicates.push({ iri, form: '^', entryIndex });
|
|
99
|
+
result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (token.startsWith('?')) {
|
|
104
|
+
const iri = token.substring(1);
|
|
105
|
+
result.predicates.push({ iri, form: '?', entryIndex });
|
|
106
|
+
result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
111
|
+
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
66
112
|
}
|
|
67
113
|
|
|
68
114
|
return result;
|
|
69
115
|
} catch (error) {
|
|
70
116
|
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
71
|
-
return { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
117
|
+
return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
72
118
|
}
|
|
73
119
|
}
|
|
74
120
|
|
|
@@ -86,20 +132,29 @@ function scanTokens(text) {
|
|
|
86
132
|
if (line.startsWith('```')) {
|
|
87
133
|
if (!codeBlock) {
|
|
88
134
|
const fence = line.match(/^(`{3,})(.*)/);
|
|
135
|
+
const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
|
|
136
|
+
const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
|
|
137
|
+
const contentStart = lineStart + line.length + 1;
|
|
89
138
|
codeBlock = {
|
|
90
139
|
fence: fence[1],
|
|
91
140
|
start: lineStart,
|
|
92
141
|
content: [],
|
|
93
142
|
lang: fence[2].trim().split(/[\s{]/)[0],
|
|
94
|
-
attrs:
|
|
143
|
+
attrs: attrsText,
|
|
144
|
+
attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
|
|
145
|
+
valueRangeStart: contentStart
|
|
95
146
|
};
|
|
96
147
|
} else if (line.startsWith(codeBlock.fence)) {
|
|
148
|
+
const valueStart = codeBlock.valueRangeStart;
|
|
149
|
+
const valueEnd = Math.max(valueStart, lineStart - 1);
|
|
97
150
|
tokens.push({
|
|
98
151
|
type: 'code',
|
|
99
152
|
range: [codeBlock.start, lineStart],
|
|
100
153
|
text: codeBlock.content.join('\n'),
|
|
101
154
|
lang: codeBlock.lang,
|
|
102
|
-
attrs: codeBlock.attrs
|
|
155
|
+
attrs: codeBlock.attrs,
|
|
156
|
+
attrsRange: codeBlock.attrsRange,
|
|
157
|
+
valueRange: [valueStart, valueEnd]
|
|
103
158
|
});
|
|
104
159
|
codeBlock = null;
|
|
105
160
|
}
|
|
@@ -119,35 +174,58 @@ function scanTokens(text) {
|
|
|
119
174
|
|
|
120
175
|
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
121
176
|
if (headingMatch) {
|
|
177
|
+
const attrs = headingMatch[3] || null;
|
|
178
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
179
|
+
const afterHashes = headingMatch[1].length;
|
|
180
|
+
const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
|
|
181
|
+
const valueStartInLine = afterHashes + ws;
|
|
182
|
+
const valueEndInLine = valueStartInLine + headingMatch[2].length;
|
|
122
183
|
tokens.push({
|
|
123
184
|
type: 'heading',
|
|
124
185
|
depth: headingMatch[1].length,
|
|
125
186
|
range: [lineStart, pos - 1],
|
|
126
187
|
text: headingMatch[2].trim(),
|
|
127
|
-
attrs
|
|
188
|
+
attrs,
|
|
189
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
190
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
128
191
|
});
|
|
129
192
|
continue;
|
|
130
193
|
}
|
|
131
194
|
|
|
132
195
|
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
133
196
|
if (listMatch) {
|
|
197
|
+
const attrs = listMatch[4] || null;
|
|
198
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
199
|
+
const prefix = listMatch[1].length + listMatch[2].length;
|
|
200
|
+
const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
|
|
201
|
+
const valueStartInLine = prefix + ws;
|
|
202
|
+
const valueEndInLine = valueStartInLine + listMatch[3].length;
|
|
134
203
|
tokens.push({
|
|
135
204
|
type: 'list',
|
|
136
205
|
indent: listMatch[1].length,
|
|
137
206
|
range: [lineStart, pos - 1],
|
|
138
207
|
text: listMatch[3].trim(),
|
|
139
|
-
attrs
|
|
208
|
+
attrs,
|
|
209
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
210
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
140
211
|
});
|
|
141
212
|
continue;
|
|
142
213
|
}
|
|
143
214
|
|
|
144
215
|
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
145
216
|
if (blockquoteMatch) {
|
|
217
|
+
const attrs = blockquoteMatch[2] || null;
|
|
218
|
+
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
219
|
+
const prefixMatch = line.match(/^>\s+/);
|
|
220
|
+
const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
|
|
221
|
+
const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
|
|
146
222
|
tokens.push({
|
|
147
223
|
type: 'blockquote',
|
|
148
224
|
range: [lineStart, pos - 1],
|
|
149
225
|
text: blockquoteMatch[1].trim(),
|
|
150
|
-
attrs
|
|
226
|
+
attrs,
|
|
227
|
+
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
228
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
151
229
|
});
|
|
152
230
|
continue;
|
|
153
231
|
}
|
|
@@ -165,7 +243,6 @@ function scanTokens(text) {
|
|
|
165
243
|
return tokens;
|
|
166
244
|
}
|
|
167
245
|
|
|
168
|
-
// Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
|
|
169
246
|
function extractInlineCarriers(text, baseOffset = 0) {
|
|
170
247
|
const carriers = [];
|
|
171
248
|
let pos = 0;
|
|
@@ -174,14 +251,25 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
174
251
|
const bracketStart = text.indexOf('[', pos);
|
|
175
252
|
if (bracketStart === -1) break;
|
|
176
253
|
|
|
177
|
-
|
|
178
|
-
|
|
254
|
+
let bracketDepth = 1;
|
|
255
|
+
let bracketEnd = bracketStart + 1;
|
|
256
|
+
|
|
257
|
+
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
258
|
+
if (text[bracketEnd] === '[') {
|
|
259
|
+
bracketDepth++;
|
|
260
|
+
} else if (text[bracketEnd] === ']') {
|
|
261
|
+
bracketDepth--;
|
|
262
|
+
}
|
|
263
|
+
bracketEnd++;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (bracketDepth > 0) break;
|
|
179
267
|
|
|
180
|
-
const carrierText = text.substring(bracketStart + 1, bracketEnd);
|
|
181
|
-
|
|
268
|
+
const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
|
|
269
|
+
const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
|
|
270
|
+
let spanEnd = bracketEnd;
|
|
182
271
|
let url = null;
|
|
183
272
|
|
|
184
|
-
// Check for (url) or (=iri)
|
|
185
273
|
if (text[spanEnd] === '(') {
|
|
186
274
|
const parenEnd = text.indexOf(')', spanEnd);
|
|
187
275
|
if (parenEnd !== -1) {
|
|
@@ -190,22 +278,24 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
190
278
|
}
|
|
191
279
|
}
|
|
192
280
|
|
|
193
|
-
// Check for {attrs}
|
|
194
281
|
let attrs = null;
|
|
282
|
+
let attrsRange = null;
|
|
195
283
|
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
196
284
|
if (attrsMatch) {
|
|
197
285
|
attrs = `{${attrsMatch[1]}}`;
|
|
286
|
+
const braceIndex = attrsMatch[0].indexOf('{');
|
|
287
|
+
const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
|
|
288
|
+
attrsRange = [absStart, absStart + attrs.length];
|
|
198
289
|
spanEnd += attrsMatch[0].length;
|
|
199
290
|
}
|
|
200
291
|
|
|
201
|
-
// Determine type and resource
|
|
202
292
|
let carrierType = 'span';
|
|
203
293
|
let resourceIRI = null;
|
|
204
294
|
|
|
205
295
|
if (url) {
|
|
206
296
|
if (url.startsWith('=')) {
|
|
207
|
-
|
|
208
|
-
|
|
297
|
+
pos = spanEnd;
|
|
298
|
+
continue;
|
|
209
299
|
} else {
|
|
210
300
|
carrierType = 'link';
|
|
211
301
|
resourceIRI = url;
|
|
@@ -217,6 +307,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
217
307
|
text: carrierText,
|
|
218
308
|
url: resourceIRI,
|
|
219
309
|
attrs: attrs,
|
|
310
|
+
attrsRange,
|
|
311
|
+
valueRange,
|
|
220
312
|
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
221
313
|
});
|
|
222
314
|
|
|
@@ -226,191 +318,150 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
226
318
|
return carriers;
|
|
227
319
|
}
|
|
228
320
|
|
|
229
|
-
function createBlock(subject, types, predicates, range, ctx) {
|
|
321
|
+
function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
|
|
230
322
|
const expanded = {
|
|
231
|
-
subject
|
|
232
|
-
types: types.map(t => expandIRI(t, ctx)),
|
|
233
|
-
predicates: predicates.map(p => ({
|
|
234
|
-
iri: expandIRI(p.iri, ctx),
|
|
235
|
-
form: p.form
|
|
236
|
-
}))
|
|
323
|
+
subject,
|
|
324
|
+
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
325
|
+
predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
|
|
237
326
|
};
|
|
238
|
-
|
|
239
327
|
const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
|
|
240
328
|
return {
|
|
241
329
|
id: blockId,
|
|
242
330
|
range: { start: range[0], end: range[1] },
|
|
331
|
+
attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
|
|
332
|
+
valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
|
|
333
|
+
carrierType: carrierType || null,
|
|
243
334
|
subject,
|
|
244
335
|
types: expanded.types,
|
|
245
336
|
predicates: expanded.predicates,
|
|
337
|
+
entries: entries || [],
|
|
246
338
|
context: { ...ctx }
|
|
247
339
|
};
|
|
248
340
|
}
|
|
249
341
|
|
|
250
|
-
function
|
|
342
|
+
function quadIndexKey(subject, predicate, object) {
|
|
343
|
+
const objKey = object.termType === 'Literal'
|
|
344
|
+
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
345
|
+
: JSON.stringify({ t: object.termType, v: object.value });
|
|
346
|
+
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
|
|
251
350
|
if (!subject || !predicate || !object) return;
|
|
252
351
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
253
352
|
quads.push(quad);
|
|
254
|
-
|
|
255
|
-
quad.subject.value,
|
|
256
|
-
quad.predicate.value,
|
|
257
|
-
quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
|
|
258
|
-
]);
|
|
259
|
-
quadIndex.set(key, blockId);
|
|
353
|
+
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), meta ? { blockId, ...meta } : { blockId });
|
|
260
354
|
}
|
|
261
355
|
|
|
262
356
|
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
263
|
-
if (datatype)
|
|
264
|
-
|
|
265
|
-
}
|
|
266
|
-
if (language) {
|
|
267
|
-
return dataFactory.literal(value, language);
|
|
268
|
-
}
|
|
357
|
+
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
358
|
+
if (language) return dataFactory.literal(value, language);
|
|
269
359
|
return dataFactory.literal(value);
|
|
270
360
|
}
|
|
271
361
|
|
|
272
|
-
// Core processing: handle subject/type declarations and property emissions
|
|
273
362
|
function processAnnotation(carrier, sem, state) {
|
|
274
|
-
// §6.1 Subject declaration
|
|
275
363
|
if (sem.subject === 'RESET') {
|
|
276
364
|
state.currentSubject = null;
|
|
277
365
|
return;
|
|
278
366
|
}
|
|
279
367
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
368
|
+
const previousSubject = state.currentSubject;
|
|
369
|
+
let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
|
|
370
|
+
if (newSubject) state.currentSubject = newSubject;
|
|
283
371
|
|
|
284
|
-
// Determine the subject for emissions
|
|
285
372
|
const S = state.currentSubject;
|
|
286
|
-
if (!S) return;
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
const block = createBlock(
|
|
290
|
-
S.value,
|
|
291
|
-
sem.types,
|
|
292
|
-
sem.predicates,
|
|
293
|
-
carrier.range,
|
|
294
|
-
state.ctx
|
|
295
|
-
);
|
|
373
|
+
if (!S) return;
|
|
374
|
+
|
|
375
|
+
const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
|
|
296
376
|
state.origin.blocks.set(block.id, block);
|
|
297
377
|
|
|
298
|
-
// Extract L (literal) and O (object IRI)
|
|
299
378
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
300
379
|
const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
301
380
|
|
|
302
|
-
|
|
303
|
-
|
|
381
|
+
sem.types.forEach(t => {
|
|
382
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
383
|
+
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
304
384
|
const typeSubject = O || S;
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
state.origin.quadIndex,
|
|
308
|
-
block.id,
|
|
309
|
-
typeSubject,
|
|
310
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
311
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
312
|
-
state.df
|
|
313
|
-
);
|
|
385
|
+
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
386
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
314
387
|
});
|
|
315
388
|
|
|
316
|
-
// §8 Emit predicate triples (routing table)
|
|
317
389
|
sem.predicates.forEach(pred => {
|
|
318
390
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
391
|
+
const token = `${pred.form}${pred.iri}`;
|
|
319
392
|
|
|
320
393
|
if (pred.form === '') {
|
|
321
|
-
|
|
322
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
|
|
394
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
323
395
|
} else if (pred.form === '?') {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
396
|
+
if (newSubject) {
|
|
397
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
398
|
+
} else if (O) {
|
|
399
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
327
400
|
}
|
|
328
|
-
} else if (pred.form === '^') {
|
|
329
|
-
// ^p: reverse literal (L → S impossible, emit nothing per spec)
|
|
330
|
-
// Note: Some interpretations might emit S → S or skip
|
|
331
401
|
} else if (pred.form === '^?') {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
402
|
+
if (newSubject) {
|
|
403
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
404
|
+
} else if (O) {
|
|
405
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
335
406
|
}
|
|
336
407
|
}
|
|
337
408
|
});
|
|
338
409
|
}
|
|
339
410
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
const contextSubject = state.currentSubject;
|
|
343
|
-
if (!contextSubject) return;
|
|
411
|
+
function processListContext(contextSem, listTokens, state, contextSubject = null) {
|
|
412
|
+
if (!contextSubject) contextSubject = state.currentSubject;
|
|
344
413
|
|
|
345
414
|
listTokens.forEach(listToken => {
|
|
346
|
-
// Extract carriers from list item text
|
|
347
415
|
const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
|
|
348
|
-
|
|
349
|
-
// Find subject from carriers or list item annotation
|
|
350
416
|
let itemSubject = null;
|
|
351
417
|
let itemSubjectCarrier = null;
|
|
352
418
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
if (
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
itemSubjectCarrier = carrier;
|
|
359
|
-
break;
|
|
419
|
+
if (listToken.attrs) {
|
|
420
|
+
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
421
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
422
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
423
|
+
itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
|
|
360
424
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (!itemSubject) {
|
|
428
|
+
for (const carrier of carriers) {
|
|
429
|
+
if (carrier.attrs) {
|
|
430
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
431
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
432
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
433
|
+
itemSubjectCarrier = carrier;
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
367
436
|
}
|
|
368
437
|
}
|
|
369
438
|
}
|
|
370
439
|
|
|
371
|
-
if (!itemSubject) return;
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
emitQuad(
|
|
376
|
-
state.quads,
|
|
377
|
-
state.origin.quadIndex,
|
|
378
|
-
'list-context',
|
|
379
|
-
itemSubject,
|
|
380
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
381
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
382
|
-
state.df
|
|
383
|
-
);
|
|
440
|
+
if (!itemSubject) return;
|
|
441
|
+
|
|
442
|
+
contextSem.types.forEach(t => {
|
|
443
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
444
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
|
|
384
445
|
});
|
|
385
446
|
|
|
386
|
-
// Emit context relationships
|
|
387
447
|
contextSem.predicates.forEach(pred => {
|
|
388
448
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
389
|
-
|
|
390
449
|
if (pred.form === '^' || pred.form === '^?') {
|
|
391
|
-
|
|
392
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
393
|
-
itemSubject, P, contextSubject, state.df);
|
|
450
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
|
|
394
451
|
} else {
|
|
395
|
-
|
|
396
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
397
|
-
contextSubject, P, itemSubject, state.df);
|
|
452
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
|
|
398
453
|
}
|
|
399
454
|
});
|
|
400
455
|
|
|
401
|
-
// Process item's own annotations
|
|
402
456
|
const prevSubject = state.currentSubject;
|
|
403
457
|
state.currentSubject = itemSubject;
|
|
404
458
|
|
|
405
|
-
// Process the list token's own attributes
|
|
406
459
|
if (listToken.attrs) {
|
|
407
460
|
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
408
|
-
|
|
409
|
-
const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
|
|
461
|
+
const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
|
|
410
462
|
processAnnotation(carrier, itemSem, state);
|
|
411
463
|
}
|
|
412
464
|
|
|
413
|
-
// Process inline carriers' attributes
|
|
414
465
|
carriers.forEach(carrier => {
|
|
415
466
|
if (carrier.attrs) {
|
|
416
467
|
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
@@ -432,8 +483,6 @@ export function parse(text, options = {}) {
|
|
|
432
483
|
};
|
|
433
484
|
|
|
434
485
|
const tokens = scanTokens(text);
|
|
435
|
-
|
|
436
|
-
// Apply prefix declarations
|
|
437
486
|
tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
|
|
438
487
|
|
|
439
488
|
for (let i = 0; i < tokens.length; i++) {
|
|
@@ -441,18 +490,26 @@ export function parse(text, options = {}) {
|
|
|
441
490
|
|
|
442
491
|
if (token.type === 'heading' && token.attrs) {
|
|
443
492
|
const sem = parseSemanticBlock(token.attrs);
|
|
444
|
-
const carrier = { type: 'heading', text: token.text, range: token.range };
|
|
493
|
+
const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
445
494
|
processAnnotation(carrier, sem, state);
|
|
446
495
|
} else if (token.type === 'code' && token.attrs) {
|
|
447
496
|
const sem = parseSemanticBlock(token.attrs);
|
|
448
|
-
const carrier = { type: 'code', text: token.text, range: token.range };
|
|
497
|
+
const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
449
498
|
processAnnotation(carrier, sem, state);
|
|
450
499
|
} else if (token.type === 'blockquote' && token.attrs) {
|
|
451
500
|
const sem = parseSemanticBlock(token.attrs);
|
|
452
|
-
const carrier = { type: 'blockquote', text: token.text, range: token.range };
|
|
501
|
+
const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
453
502
|
processAnnotation(carrier, sem, state);
|
|
454
503
|
} else if (token.type === 'para') {
|
|
455
|
-
// Check for
|
|
504
|
+
// Check for standalone subject declarations: {=iri} on its own line
|
|
505
|
+
const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
|
|
506
|
+
if (standaloneSubjectMatch) {
|
|
507
|
+
const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
|
|
508
|
+
const attrsStart = token.range[0] + token.text.indexOf('{=');
|
|
509
|
+
const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
|
|
510
|
+
processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
|
|
511
|
+
}
|
|
512
|
+
|
|
456
513
|
const followingLists = [];
|
|
457
514
|
let j = i + 1;
|
|
458
515
|
while (j < tokens.length && tokens[j].type === 'list') {
|
|
@@ -460,17 +517,28 @@ export function parse(text, options = {}) {
|
|
|
460
517
|
j++;
|
|
461
518
|
}
|
|
462
519
|
|
|
463
|
-
// Check if this paragraph ends with {attrs} and is followed by lists
|
|
464
520
|
const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
|
|
465
521
|
if (contextMatch && followingLists.length > 0) {
|
|
466
|
-
// This is a list context annotation
|
|
467
522
|
const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
|
|
468
|
-
|
|
523
|
+
let contextSubject = state.currentSubject;
|
|
524
|
+
|
|
525
|
+
// Always look for the most recent heading subject for context
|
|
526
|
+
for (let k = i - 1; k >= 0; k--) {
|
|
527
|
+
const prevToken = tokens[k];
|
|
528
|
+
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
529
|
+
const headingSem = parseSemanticBlock(prevToken.attrs);
|
|
530
|
+
if (headingSem.subject) {
|
|
531
|
+
contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
processListContext(contextSem, followingLists, state, contextSubject);
|
|
469
538
|
i = j - 1;
|
|
470
539
|
continue;
|
|
471
540
|
}
|
|
472
541
|
|
|
473
|
-
// Process inline carriers
|
|
474
542
|
const carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
475
543
|
carriers.forEach(carrier => {
|
|
476
544
|
if (carrier.attrs) {
|
|
@@ -484,76 +552,429 @@ export function parse(text, options = {}) {
|
|
|
484
552
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
485
553
|
}
|
|
486
554
|
|
|
487
|
-
function shortenIRI(iri, ctx) {
|
|
555
|
+
export function shortenIRI(iri, ctx) {
|
|
488
556
|
if (!iri || !iri.startsWith('http')) return iri;
|
|
489
|
-
|
|
490
|
-
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
|
|
491
|
-
return iri.substring(ctx['@vocab'].length);
|
|
492
|
-
}
|
|
493
|
-
|
|
557
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
494
558
|
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
495
559
|
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
496
560
|
return prefix + ':' + iri.substring(namespace.length);
|
|
497
561
|
}
|
|
498
562
|
}
|
|
499
|
-
|
|
500
563
|
return iri;
|
|
501
564
|
}
|
|
502
565
|
|
|
566
|
+
const serializeHelpers = {
|
|
567
|
+
readAttrsSpan: (block, text) => {
|
|
568
|
+
if (!block?.attrsRange) return null;
|
|
569
|
+
const { start, end } = block.attrsRange;
|
|
570
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end > start)
|
|
571
|
+
? { start, end, text: text.substring(start, end) }
|
|
572
|
+
: null;
|
|
573
|
+
},
|
|
574
|
+
|
|
575
|
+
readValueSpan: (block, text) => {
|
|
576
|
+
if (!block?.valueRange) return null;
|
|
577
|
+
const { start, end } = block.valueRange;
|
|
578
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
579
|
+
? { start, end, text: text.substring(start, end) }
|
|
580
|
+
: null;
|
|
581
|
+
},
|
|
582
|
+
|
|
583
|
+
normalizeAttrsTokens: (attrsText) => {
|
|
584
|
+
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
585
|
+
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
586
|
+
},
|
|
587
|
+
|
|
588
|
+
blockTokensFromEntries: (block) => block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null,
|
|
589
|
+
|
|
590
|
+
removeEntryAt: (block, entryIndex) => {
|
|
591
|
+
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
592
|
+
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
593
|
+
},
|
|
594
|
+
|
|
595
|
+
replaceLangDatatypeEntries: (block, lit, ctx) => {
|
|
596
|
+
if (!block?.entries) return null;
|
|
597
|
+
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
598
|
+
const extras = [];
|
|
599
|
+
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
600
|
+
const dt = lit?.datatype?.value;
|
|
601
|
+
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
602
|
+
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
603
|
+
}
|
|
604
|
+
return [...filtered, ...extras];
|
|
605
|
+
},
|
|
606
|
+
|
|
607
|
+
writeAttrsTokens: (tokens) => `{${tokens.join(' ').trim()}}`,
|
|
608
|
+
|
|
609
|
+
removeOneToken: (tokens, matchFn) => {
|
|
610
|
+
const i = tokens.findIndex(matchFn);
|
|
611
|
+
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
612
|
+
},
|
|
613
|
+
|
|
614
|
+
normalizeQuad: (q) => {
|
|
615
|
+
if (!q) return null;
|
|
616
|
+
const { subject, predicate, object } = q;
|
|
617
|
+
if (object?.termType === 'Literal') {
|
|
618
|
+
const language = typeof object.language === 'string' ? object.language : '';
|
|
619
|
+
const datatype = object.datatype?.value || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
|
|
620
|
+
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
621
|
+
}
|
|
622
|
+
return { ...q, subject, predicate, object };
|
|
623
|
+
},
|
|
624
|
+
|
|
625
|
+
quadToKeyForOrigin: (q) => {
|
|
626
|
+
const nq = serializeHelpers.normalizeQuad(q);
|
|
627
|
+
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
628
|
+
},
|
|
629
|
+
|
|
630
|
+
parseQuadIndexKey: (key) => {
|
|
631
|
+
try {
|
|
632
|
+
const [s, p, objKey] = JSON.parse(key);
|
|
633
|
+
return { s, p, o: JSON.parse(objKey) };
|
|
634
|
+
} catch {
|
|
635
|
+
return null;
|
|
636
|
+
}
|
|
637
|
+
},
|
|
638
|
+
|
|
639
|
+
sanitizeCarrierValueForBlock: (block, raw) => {
|
|
640
|
+
const s = String(raw ?? '');
|
|
641
|
+
const t = block?.carrierType;
|
|
642
|
+
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
643
|
+
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
644
|
+
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
645
|
+
}
|
|
646
|
+
};
|
|
647
|
+
|
|
503
648
|
export function serialize({ text, diff, origin, options = {} }) {
|
|
504
|
-
if (!diff || (!diff.add?.length && !diff.delete?.length))
|
|
649
|
+
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
650
|
+
const reparsed = parse(text, { context: options.context || {} });
|
|
651
|
+
return { text, origin: reparsed.origin };
|
|
652
|
+
}
|
|
505
653
|
|
|
654
|
+
const base = origin || parse(text, { context: options.context || {} }).origin;
|
|
506
655
|
let result = text;
|
|
507
656
|
const edits = [];
|
|
508
657
|
const ctx = options.context || {};
|
|
509
658
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
659
|
+
const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
|
|
660
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
661
|
+
const parsed = serializeHelpers.parseQuadIndexKey(k);
|
|
662
|
+
if (!parsed) continue;
|
|
663
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
664
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
665
|
+
if (parsed.o?.v !== literalValue) continue;
|
|
666
|
+
return entry;
|
|
667
|
+
}
|
|
668
|
+
return null;
|
|
669
|
+
};
|
|
670
|
+
|
|
671
|
+
const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
|
|
672
|
+
const out = [];
|
|
673
|
+
for (const [k, entry] of base?.quadIndex || []) {
|
|
674
|
+
const parsed = serializeHelpers.parseQuadIndexKey(k);
|
|
675
|
+
if (!parsed) continue;
|
|
676
|
+
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
677
|
+
if (parsed.o?.t !== 'Literal') continue;
|
|
678
|
+
const blockId = entry?.blockId || entry;
|
|
679
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
680
|
+
if (block) out.push({ block, entry, obj: parsed.o });
|
|
681
|
+
}
|
|
682
|
+
return out;
|
|
683
|
+
};
|
|
684
|
+
|
|
685
|
+
const objectSignature = (o) => {
|
|
686
|
+
if (!o) return '';
|
|
687
|
+
if (o.termType === 'Literal') {
|
|
688
|
+
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
689
|
+
}
|
|
690
|
+
return JSON.stringify({ t: o.termType, v: o.value });
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
const anchors = new Map();
|
|
694
|
+
for (const q0 of diff.delete || []) {
|
|
695
|
+
const q = serializeHelpers.normalizeQuad(q0);
|
|
696
|
+
if (!q) continue;
|
|
697
|
+
if (!q?.subject || !q?.object || !q?.predicate) continue;
|
|
698
|
+
const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
|
|
699
|
+
const qk = serializeHelpers.quadToKeyForOrigin(q);
|
|
700
|
+
const entry = qk ? base?.quadIndex?.get(qk) : null;
|
|
701
|
+
const blockId = entry?.blockId || entry;
|
|
702
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
703
|
+
if (!block?.attrsRange) continue;
|
|
704
|
+
anchors.set(key, { block, entry });
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
const addBySP = new Map();
|
|
708
|
+
for (const q0 of diff.add || []) {
|
|
709
|
+
const q = serializeHelpers.normalizeQuad(q0);
|
|
710
|
+
if (!q) continue;
|
|
711
|
+
if (!q?.subject || !q?.predicate || !q?.object) continue;
|
|
712
|
+
const k = JSON.stringify([q.subject.value, q.predicate.value]);
|
|
713
|
+
const list = addBySP.get(k) || [];
|
|
714
|
+
list.push(q);
|
|
715
|
+
addBySP.set(k, list);
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
const consumedAdds = new Set();
|
|
719
|
+
const literalUpdates = [];
|
|
720
|
+
for (const dq0 of diff.delete || []) {
|
|
721
|
+
const dq = serializeHelpers.normalizeQuad(dq0);
|
|
722
|
+
if (!dq) continue;
|
|
723
|
+
if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
|
|
724
|
+
if (dq.object.termType !== 'Literal') continue;
|
|
725
|
+
const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
|
|
726
|
+
const candidates = addBySP.get(k) || [];
|
|
727
|
+
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(serializeHelpers.quadToKeyForOrigin(x)));
|
|
728
|
+
if (!aq) continue;
|
|
729
|
+
|
|
730
|
+
const dqk = serializeHelpers.quadToKeyForOrigin(dq);
|
|
731
|
+
let entry = dqk ? base?.quadIndex?.get(dqk) : null;
|
|
732
|
+
if (!entry && dq.object?.termType === 'Literal') {
|
|
733
|
+
entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
|
|
734
|
+
}
|
|
735
|
+
const blockId = entry?.blockId || entry;
|
|
736
|
+
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
737
|
+
if (!block) continue;
|
|
738
|
+
|
|
739
|
+
literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
|
|
740
|
+
consumedAdds.add(serializeHelpers.quadToKeyForOrigin(aq));
|
|
741
|
+
}
|
|
520
742
|
|
|
521
|
-
|
|
522
|
-
|
|
743
|
+
for (const q0 of diff.add || []) {
|
|
744
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
745
|
+
if (!quad || quad.object?.termType !== 'Literal') continue;
|
|
746
|
+
if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) continue;
|
|
523
747
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
const before = text.substring(Math.max(0, start - 1), start);
|
|
527
|
-
const after = text.substring(end, Math.min(end + 1, text.length));
|
|
528
|
-
const deleteStart = before === '\n' ? start - 1 : start;
|
|
529
|
-
const deleteEnd = after === '\n' ? end + 1 : end;
|
|
748
|
+
const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
|
|
749
|
+
if (matches.length === 0) continue;
|
|
530
750
|
|
|
531
|
-
|
|
751
|
+
const desiredLang = quad.object.language || '';
|
|
752
|
+
const sameLang = matches.filter(m => {
|
|
753
|
+
const entries = m.block?.entries || [];
|
|
754
|
+
const langEntry = entries.find(e => e.kind === 'language');
|
|
755
|
+
const lang = langEntry?.language || '';
|
|
756
|
+
return lang === desiredLang;
|
|
532
757
|
});
|
|
758
|
+
|
|
759
|
+
if (sameLang.length !== 1) continue;
|
|
760
|
+
const target = sameLang[0].block;
|
|
761
|
+
const vSpan = serializeHelpers.readValueSpan(target, text);
|
|
762
|
+
if (!vSpan) continue;
|
|
763
|
+
|
|
764
|
+
const newValue = serializeHelpers.sanitizeCarrierValueForBlock(target, quad.object.value);
|
|
765
|
+
edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
|
|
766
|
+
|
|
767
|
+
const aSpan = serializeHelpers.readAttrsSpan(target, text);
|
|
768
|
+
if (aSpan && target?.entries?.length) {
|
|
769
|
+
const nextEntries = serializeHelpers.replaceLangDatatypeEntries(target, quad.object, ctx);
|
|
770
|
+
if (nextEntries) {
|
|
771
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
772
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
consumedAdds.add(quad);
|
|
533
777
|
}
|
|
534
778
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
779
|
+
for (const u of literalUpdates) {
|
|
780
|
+
const span = serializeHelpers.readValueSpan(u.block, text);
|
|
781
|
+
if (span) {
|
|
782
|
+
const newValue = serializeHelpers.sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
|
|
783
|
+
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
784
|
+
}
|
|
538
785
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
786
|
+
const aSpan = serializeHelpers.readAttrsSpan(u.block, text);
|
|
787
|
+
if (aSpan) {
|
|
788
|
+
if (u.block?.entries?.length) {
|
|
789
|
+
const nextEntries = serializeHelpers.replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
|
|
790
|
+
if (nextEntries) {
|
|
791
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
792
|
+
if (nextTokens.length === 0) {
|
|
793
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
794
|
+
} else {
|
|
795
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
} else {
|
|
799
|
+
const tokens = serializeHelpers.normalizeAttrsTokens(aSpan.text);
|
|
800
|
+
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object);
|
|
801
|
+
if (updated.join(' ') !== tokens.join(' ')) {
|
|
802
|
+
if (updated.length === 0) {
|
|
803
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
804
|
+
} else {
|
|
805
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
806
|
+
}
|
|
543
807
|
}
|
|
544
808
|
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
const updateAttrsDatatypeLang = (tokens, newLit) => {
|
|
813
|
+
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
814
|
+
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
815
|
+
const dt = newLit?.datatype?.value;
|
|
816
|
+
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
817
|
+
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
818
|
+
}
|
|
819
|
+
return predicatesAndTypes;
|
|
820
|
+
};
|
|
545
821
|
|
|
546
|
-
|
|
547
|
-
|
|
822
|
+
if (diff.delete) {
|
|
823
|
+
diff.delete.forEach(q0 => {
|
|
824
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
825
|
+
if (!quad) return;
|
|
826
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
548
827
|
|
|
549
828
|
if (quad.object.termType === 'Literal') {
|
|
550
|
-
|
|
829
|
+
const isUpdated = literalUpdates.some(u =>
|
|
830
|
+
u.deleteQuad.subject.value === quad.subject.value &&
|
|
831
|
+
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
832
|
+
u.deleteQuad.object.value === quad.object.value
|
|
833
|
+
);
|
|
834
|
+
if (isUpdated) return;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
const key = serializeHelpers.quadToKeyForOrigin(quad);
|
|
838
|
+
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
839
|
+
if (!entry && quad.object?.termType === 'Literal') {
|
|
840
|
+
entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
|
|
841
|
+
}
|
|
842
|
+
const blockId = entry?.blockId || entry;
|
|
843
|
+
if (!blockId) return;
|
|
844
|
+
const block = base?.blocks?.get(blockId);
|
|
845
|
+
const span = serializeHelpers.readAttrsSpan(block, text);
|
|
846
|
+
if (!span) return;
|
|
847
|
+
|
|
848
|
+
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
849
|
+
const nextEntries = serializeHelpers.removeEntryAt(block, entry.entryIndex);
|
|
850
|
+
if (!nextEntries) return;
|
|
851
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
852
|
+
if (nextTokens.length === 0) {
|
|
853
|
+
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
854
|
+
} else {
|
|
855
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(nextTokens) });
|
|
856
|
+
}
|
|
857
|
+
return;
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
const tokens = serializeHelpers.normalizeAttrsTokens(span.text);
|
|
861
|
+
let updated = tokens;
|
|
862
|
+
let removed = false;
|
|
863
|
+
|
|
864
|
+
if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
865
|
+
const expectedType = entry.expandedType || quad.object.value;
|
|
866
|
+
({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
|
|
867
|
+
if (!t.startsWith('.')) return false;
|
|
868
|
+
const raw = t.slice(1);
|
|
869
|
+
return expandIRI(raw, ctx) === expectedType;
|
|
870
|
+
}));
|
|
551
871
|
} else {
|
|
552
|
-
|
|
872
|
+
const expectedPred = entry?.expandedPredicate || quad.predicate.value;
|
|
873
|
+
const expectedForm = entry?.form;
|
|
874
|
+
({ tokens: updated, removed } = serializeHelpers.removeOneToken(tokens, t => {
|
|
875
|
+
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
876
|
+
if (!m) return false;
|
|
877
|
+
const form = m[1] || '';
|
|
878
|
+
const raw = m[2];
|
|
879
|
+
if (expectedForm != null && form !== expectedForm) return false;
|
|
880
|
+
return expandIRI(raw, ctx) === expectedPred;
|
|
881
|
+
}));
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
if (!removed) return;
|
|
885
|
+
|
|
886
|
+
if (updated.length === 0) {
|
|
887
|
+
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
888
|
+
return;
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
const newAttrs = serializeHelpers.writeAttrsTokens(updated);
|
|
892
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
893
|
+
});
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
if (diff.add) {
|
|
897
|
+
diff.add.forEach(q0 => {
|
|
898
|
+
const quad = serializeHelpers.normalizeQuad(q0);
|
|
899
|
+
if (!quad) return;
|
|
900
|
+
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
901
|
+
|
|
902
|
+
if (consumedAdds.has(serializeHelpers.quadToKeyForOrigin(quad))) return;
|
|
903
|
+
|
|
904
|
+
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
905
|
+
const anchored = anchors.get(anchorKey) || null;
|
|
906
|
+
let targetBlock = anchored?.block || null;
|
|
907
|
+
|
|
908
|
+
if (!targetBlock) {
|
|
909
|
+
for (const [, block] of base?.blocks || []) {
|
|
910
|
+
if (block.subject === quad.subject.value && block.attrsRange) {
|
|
911
|
+
targetBlock = block;
|
|
912
|
+
break;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
918
|
+
if (!targetBlock) {
|
|
919
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
920
|
+
if (quad.object.termType === 'Literal') {
|
|
921
|
+
const value = String(quad.object.value ?? '');
|
|
922
|
+
let ann = predShort;
|
|
923
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
924
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
925
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
926
|
+
}
|
|
927
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
928
|
+
} else {
|
|
929
|
+
const full = quad.object.value;
|
|
930
|
+
const label = shortenIRI(full, ctx);
|
|
931
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
|
|
932
|
+
}
|
|
933
|
+
return;
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
937
|
+
if (quad.object.termType === 'Literal') {
|
|
938
|
+
const value = String(quad.object.value ?? '');
|
|
939
|
+
let ann = predShort;
|
|
940
|
+
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
941
|
+
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
942
|
+
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
943
|
+
}
|
|
944
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
945
|
+
return;
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
if (quad.object.termType === 'NamedNode') {
|
|
949
|
+
const full = quad.object.value;
|
|
950
|
+
const label = shortenIRI(full, ctx);
|
|
951
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}](${full}) {?${predShort}}` });
|
|
952
|
+
return;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
const span = serializeHelpers.readAttrsSpan(targetBlock, text);
|
|
957
|
+
if (!span) return;
|
|
958
|
+
const tokens = serializeHelpers.blockTokensFromEntries(targetBlock) || serializeHelpers.normalizeAttrsTokens(span.text);
|
|
959
|
+
|
|
960
|
+
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
961
|
+
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
962
|
+
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
963
|
+
if (!typeToken) return;
|
|
964
|
+
if (tokens.includes(typeToken)) return;
|
|
965
|
+
const updated = [...tokens, typeToken];
|
|
966
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
967
|
+
return;
|
|
553
968
|
}
|
|
554
969
|
|
|
555
|
-
const
|
|
556
|
-
|
|
970
|
+
const form = anchored?.entry?.form;
|
|
971
|
+
if (form == null) return;
|
|
972
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
973
|
+
const predToken = `${form}${predShort}`;
|
|
974
|
+
if (!predToken) return;
|
|
975
|
+
if (tokens.includes(predToken)) return;
|
|
976
|
+
const updated = [...tokens, predToken];
|
|
977
|
+
edits.push({ start: span.start, end: span.end, text: serializeHelpers.writeAttrsTokens(updated) });
|
|
557
978
|
});
|
|
558
979
|
}
|
|
559
980
|
|
|
@@ -562,7 +983,8 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
562
983
|
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
563
984
|
});
|
|
564
985
|
|
|
565
|
-
|
|
986
|
+
const reparsed = parse(result, { context: options.context || {} });
|
|
987
|
+
return { text: result, origin: reparsed.origin };
|
|
566
988
|
}
|
|
567
989
|
|
|
568
|
-
export default { parse, serialize, parseSemanticBlock };
|
|
990
|
+
export default { parse, serialize, parseSemanticBlock, shortenIRI };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|