mdld-parse 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/package.json +1 -1
- package/src/parse.js +208 -341
- package/src/serialize.js +6 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# MD-LD
|
|
1
|
+
# MD-LD
|
|
2
2
|
|
|
3
3
|
**Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
|
|
4
4
|
|
|
@@ -40,7 +40,7 @@ ex:armstrong schema:name "Neil Armstrong" .
|
|
|
40
40
|
- **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
|
|
41
41
|
- **Type declarations**: `.Class` for rdf:type triples
|
|
42
42
|
- **Datatypes & language**: `^^xsd:date` and `@en` support
|
|
43
|
-
- **Lists**: Explicit subject declarations
|
|
43
|
+
- **Lists**: Explicit subject declarations and numbered ordered lists with `rdf:List` support
|
|
44
44
|
- **Fragments**: Built-in document structuring with `{=#fragment}`
|
|
45
45
|
- **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
|
|
46
46
|
|
|
@@ -184,13 +184,12 @@ ex:armstrong a schema:Person .
|
|
|
184
184
|
|
|
185
185
|
### Lists
|
|
186
186
|
|
|
187
|
-
Lists require explicit subjects per item
|
|
187
|
+
Lists require explicit subjects per item.
|
|
188
188
|
|
|
189
189
|
```markdown
|
|
190
190
|
# Recipe {=ex:recipe}
|
|
191
191
|
|
|
192
192
|
Ingredients: {?ingredient .Ingredient}
|
|
193
|
-
|
|
194
193
|
- Flour {=ex:flour name}
|
|
195
194
|
- Water {=ex:water name}
|
|
196
195
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/src/parse.js
CHANGED
|
@@ -9,23 +9,19 @@ import {
|
|
|
9
9
|
hash
|
|
10
10
|
} from './utils.js';
|
|
11
11
|
|
|
12
|
-
// Constants and patterns
|
|
13
12
|
const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
|
|
14
13
|
const FENCE_REGEX = /^(`{3,})(.*)/;
|
|
15
14
|
const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
16
15
|
const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
17
|
-
const
|
|
16
|
+
const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
18
17
|
const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
19
18
|
const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
20
19
|
const LIST_CONTEXT_REGEX = /^(.+?)\s*\{([^}]+)\}$/;
|
|
21
|
-
|
|
22
|
-
// Inline carrier pattern constants
|
|
23
20
|
const INLINE_CARRIER_PATTERNS = {
|
|
24
21
|
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
25
22
|
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
26
23
|
};
|
|
27
24
|
|
|
28
|
-
// Semantic block cache to avoid repeated parsing
|
|
29
25
|
const semCache = {};
|
|
30
26
|
const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
|
|
31
27
|
|
|
@@ -39,46 +35,50 @@ function parseSemCached(attrs) {
|
|
|
39
35
|
return sem;
|
|
40
36
|
}
|
|
41
37
|
|
|
38
|
+
function calcRangeInfo(line, attrs, lineStart, prefixLength, valueLength) {
|
|
39
|
+
const wsLength = prefixLength < line.length && line[prefixLength] === ' ' ? 1 :
|
|
40
|
+
line.slice(prefixLength).match(/^\s+/)?.[0]?.length || 0;
|
|
41
|
+
const valueStartInLine = prefixLength + wsLength;
|
|
42
|
+
return {
|
|
43
|
+
valueRange: [lineStart + valueStartInLine, lineStart + valueStartInLine + valueLength],
|
|
44
|
+
attrsRange: calcAttrsRange(line, attrs, lineStart)
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
42
48
|
function calcAttrsRange(line, attrs, lineStart) {
|
|
43
49
|
if (!attrs) return null;
|
|
44
50
|
const attrsStartInLine = line.lastIndexOf(attrs);
|
|
45
51
|
return attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null;
|
|
46
52
|
}
|
|
47
53
|
|
|
48
|
-
function calcValueRange(lineStart, valueStartInLine, valueEndInLine) {
|
|
49
|
-
return [lineStart + valueStartInLine, lineStart + valueEndInLine];
|
|
50
|
-
}
|
|
51
|
-
|
|
52
54
|
function createToken(type, range, text, attrs = null, attrsRange = null, valueRange = null, extra = {}) {
|
|
53
55
|
const token = { type, range, text, attrs, attrsRange, valueRange, ...extra };
|
|
54
|
-
// Add lazy carrier caching
|
|
55
56
|
Object.defineProperty(token, '_carriers', {
|
|
56
|
-
enumerable: false,
|
|
57
|
-
writable: true,
|
|
58
|
-
value: null
|
|
57
|
+
enumerable: false, writable: true, value: null
|
|
59
58
|
});
|
|
60
59
|
return token;
|
|
61
60
|
}
|
|
62
61
|
|
|
63
62
|
function getCarriers(token) {
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return [];
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
if (!token._carriers) {
|
|
70
|
-
token._carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
71
|
-
}
|
|
72
|
-
return token._carriers;
|
|
63
|
+
if (token.type === 'code') return [];
|
|
64
|
+
return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
|
|
73
65
|
}
|
|
74
66
|
|
|
67
|
+
const createListToken = (type, line, lineStart, pos, match, indent = null) => {
|
|
68
|
+
const attrs = match[4] || null;
|
|
69
|
+
const prefix = match[1].length + (match[2] ? match[2].length : 0);
|
|
70
|
+
const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
|
|
71
|
+
const extra = indent !== null ? { indent } : { indent: match[1].length };
|
|
72
|
+
return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
|
|
73
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, extra);
|
|
74
|
+
};
|
|
75
|
+
|
|
75
76
|
function scanTokens(text) {
|
|
76
77
|
const tokens = [];
|
|
77
78
|
const lines = text.split('\n');
|
|
78
79
|
let pos = 0;
|
|
79
80
|
let codeBlock = null;
|
|
80
81
|
|
|
81
|
-
// Token processors in order of priority
|
|
82
82
|
const processors = [
|
|
83
83
|
{
|
|
84
84
|
test: line => line.startsWith('```'),
|
|
@@ -114,14 +114,14 @@ function scanTokens(text) {
|
|
|
114
114
|
});
|
|
115
115
|
codeBlock = null;
|
|
116
116
|
}
|
|
117
|
-
return true;
|
|
117
|
+
return true;
|
|
118
118
|
}
|
|
119
119
|
},
|
|
120
120
|
{
|
|
121
121
|
test: () => codeBlock,
|
|
122
122
|
process: line => {
|
|
123
123
|
codeBlock.content.push(line);
|
|
124
|
-
return true;
|
|
124
|
+
return true;
|
|
125
125
|
}
|
|
126
126
|
},
|
|
127
127
|
{
|
|
@@ -129,7 +129,7 @@ function scanTokens(text) {
|
|
|
129
129
|
process: (line, lineStart, pos) => {
|
|
130
130
|
const match = PREFIX_REGEX.exec(line);
|
|
131
131
|
tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
|
|
132
|
-
return true;
|
|
132
|
+
return true;
|
|
133
133
|
}
|
|
134
134
|
},
|
|
135
135
|
{
|
|
@@ -138,32 +138,18 @@ function scanTokens(text) {
|
|
|
138
138
|
const match = HEADING_REGEX.exec(line);
|
|
139
139
|
const attrs = match[3] || null;
|
|
140
140
|
const afterHashes = match[1].length;
|
|
141
|
-
const
|
|
142
|
-
line.slice(afterHashes).match(/^\s+/)?.[0]?.length || 0;
|
|
143
|
-
const valueStartInLine = afterHashes + wsLength;
|
|
144
|
-
const valueEndInLine = valueStartInLine + match[2].length;
|
|
141
|
+
const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
|
|
145
142
|
tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
{ depth: match[1].length }));
|
|
149
|
-
return true; // handled
|
|
143
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
|
|
144
|
+
return true;
|
|
150
145
|
}
|
|
151
146
|
},
|
|
152
147
|
{
|
|
153
|
-
test: line =>
|
|
148
|
+
test: line => UNORDERED_LIST_REGEX.test(line),
|
|
154
149
|
process: (line, lineStart, pos) => {
|
|
155
|
-
const match =
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
const wsLength = prefix < line.length && line[prefix] === ' ' ? 1 :
|
|
159
|
-
line.slice(prefix).match(/^\s+/)?.[0]?.length || 0;
|
|
160
|
-
const valueStartInLine = prefix + wsLength;
|
|
161
|
-
const valueEndInLine = valueStartInLine + match[3].length;
|
|
162
|
-
tokens.push(createToken('list', [lineStart, pos - 1], match[3].trim(), attrs,
|
|
163
|
-
calcAttrsRange(line, attrs, lineStart),
|
|
164
|
-
calcValueRange(lineStart, valueStartInLine, valueEndInLine),
|
|
165
|
-
{ indent: match[1].length }));
|
|
166
|
-
return true; // handled
|
|
150
|
+
const match = UNORDERED_LIST_REGEX.exec(line);
|
|
151
|
+
tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
|
|
152
|
+
return true;
|
|
167
153
|
}
|
|
168
154
|
},
|
|
169
155
|
{
|
|
@@ -175,15 +161,15 @@ function scanTokens(text) {
|
|
|
175
161
|
const valueEndInLine = valueStartInLine + match[1].length;
|
|
176
162
|
tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
|
|
177
163
|
calcAttrsRange(line, attrs, lineStart),
|
|
178
|
-
|
|
179
|
-
return true;
|
|
164
|
+
[lineStart + valueStartInLine, lineStart + valueEndInLine]));
|
|
165
|
+
return true;
|
|
180
166
|
}
|
|
181
167
|
},
|
|
182
168
|
{
|
|
183
169
|
test: line => line.trim(),
|
|
184
170
|
process: (line, lineStart, pos) => {
|
|
185
171
|
tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
|
|
186
|
-
return true;
|
|
172
|
+
return true;
|
|
187
173
|
}
|
|
188
174
|
}
|
|
189
175
|
];
|
|
@@ -196,7 +182,7 @@ function scanTokens(text) {
|
|
|
196
182
|
// Try each processor until one handles the line
|
|
197
183
|
for (const processor of processors) {
|
|
198
184
|
if (processor.test(line) && processor.process(line, lineStart, pos)) {
|
|
199
|
-
break;
|
|
185
|
+
break;
|
|
200
186
|
}
|
|
201
187
|
}
|
|
202
188
|
}
|
|
@@ -212,44 +198,35 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
212
198
|
const carriers = [];
|
|
213
199
|
let pos = 0;
|
|
214
200
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
// Angle-bracket URLs: <URL>{...}
|
|
218
|
-
if (text[pos] === '<') {
|
|
201
|
+
const CARRIER_EXTRACTORS = {
|
|
202
|
+
'<': (text, pos, baseOffset) => {
|
|
219
203
|
const angleEnd = text.indexOf('>', pos);
|
|
220
|
-
if (angleEnd
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
}
|
|
230
|
-
return null;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
// Bracketed links: [text](URL){...} and [text]{...}
|
|
234
|
-
if (text[pos] === '[') {
|
|
204
|
+
if (angleEnd === -1) return null;
|
|
205
|
+
const url = text.slice(pos + 1, angleEnd);
|
|
206
|
+
if (!URL_REGEX.test(url)) return null;
|
|
207
|
+
const { attrs, attrsRange, finalSpanEnd } = extractAttributesFromText(text, angleEnd + 1, baseOffset);
|
|
208
|
+
return createCarrier('link', url, attrs, attrsRange,
|
|
209
|
+
[baseOffset + pos + 1, baseOffset + angleEnd],
|
|
210
|
+
[baseOffset + pos, baseOffset + finalSpanEnd], finalSpanEnd, { url });
|
|
211
|
+
},
|
|
212
|
+
'[': (text, pos, baseOffset) => {
|
|
235
213
|
const bracketEnd = findMatchingBracket(text, pos);
|
|
236
|
-
if (bracketEnd)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
[baseOffset + pos + 1, baseOffset + bracketEnd - 1],
|
|
246
|
-
[baseOffset + pos, baseOffset + finalSpanEnd],
|
|
247
|
-
finalSpanEnd, { url: resourceIRI });
|
|
248
|
-
}
|
|
249
|
-
return null;
|
|
214
|
+
if (!bracketEnd) return null;
|
|
215
|
+
const carrierText = text.slice(pos + 1, bracketEnd - 1);
|
|
216
|
+
const { url, spanEnd } = extractUrlFromBrackets(text, bracketEnd);
|
|
217
|
+
const { attrs, attrsRange, finalSpanEnd } = extractAttributesFromText(text, spanEnd, baseOffset);
|
|
218
|
+
const { carrierType, resourceIRI } = determineCarrierType(url);
|
|
219
|
+
if (url?.startsWith('=')) return { skip: true, pos: finalSpanEnd };
|
|
220
|
+
return createCarrier(carrierType, carrierText, attrs, attrsRange,
|
|
221
|
+
[baseOffset + pos + 1, baseOffset + bracketEnd - 1],
|
|
222
|
+
[baseOffset + pos, baseOffset + finalSpanEnd], finalSpanEnd, { url: resourceIRI });
|
|
250
223
|
}
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
const extractCarrier = (text, pos, baseOffset) => {
|
|
227
|
+
const extractor = CARRIER_EXTRACTORS[text[pos]];
|
|
228
|
+
if (extractor) return extractor(text, pos, baseOffset);
|
|
251
229
|
|
|
252
|
-
// Regex-based carriers: emphasis and code spans
|
|
253
230
|
for (const [type, pattern] of Object.entries(INLINE_CARRIER_PATTERNS)) {
|
|
254
231
|
pattern.lastIndex = pos;
|
|
255
232
|
const match = pattern.exec(text);
|
|
@@ -260,7 +237,6 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
260
237
|
ranges.attrsRange, ranges.valueRange, ranges.range, ranges.pos);
|
|
261
238
|
}
|
|
262
239
|
}
|
|
263
|
-
|
|
264
240
|
return null;
|
|
265
241
|
};
|
|
266
242
|
|
|
@@ -357,14 +333,9 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
|
|
|
357
333
|
predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
|
|
358
334
|
};
|
|
359
335
|
|
|
360
|
-
const signature = [
|
|
361
|
-
subject,
|
|
362
|
-
carrierType || 'unknown',
|
|
363
|
-
expanded.types.join(','),
|
|
364
|
-
expanded.predicates.map(p => `${p.form}${p.iri}`).join(',')
|
|
365
|
-
].join('|');
|
|
366
|
-
|
|
336
|
+
const signature = [subject, carrierType || 'unknown', expanded.types.join(','), expanded.predicates.map(p => `${p.form}${p.iri}`).join(',')].join('|');
|
|
367
337
|
const blockId = hash(signature);
|
|
338
|
+
|
|
368
339
|
return {
|
|
369
340
|
id: blockId,
|
|
370
341
|
range: { start: range[0], end: range[1] },
|
|
@@ -391,93 +362,75 @@ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFac
|
|
|
391
362
|
|
|
392
363
|
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
|
|
393
364
|
}
|
|
365
|
+
const resolveFragment = (fragment, state) => {
|
|
366
|
+
if (!state.currentSubject) return null;
|
|
367
|
+
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
368
|
+
return state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
369
|
+
};
|
|
370
|
+
|
|
394
371
|
function resolveSubject(sem, state) {
|
|
395
372
|
if (!sem.subject) return null;
|
|
396
373
|
if (sem.subject === 'RESET') {
|
|
397
374
|
state.currentSubject = null;
|
|
398
375
|
return null;
|
|
399
376
|
}
|
|
400
|
-
if (sem.subject.startsWith('=#'))
|
|
401
|
-
const fragment = sem.subject.substring(2);
|
|
402
|
-
if (state.currentSubject) {
|
|
403
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
404
|
-
return state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
405
|
-
}
|
|
406
|
-
return null;
|
|
407
|
-
}
|
|
377
|
+
if (sem.subject.startsWith('=#')) return resolveFragment(sem.subject.substring(2), state);
|
|
408
378
|
return state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
409
379
|
}
|
|
410
380
|
|
|
411
381
|
function resolveObject(sem, state) {
|
|
412
382
|
if (!sem.object) return null;
|
|
413
|
-
if (sem.object.startsWith('#'))
|
|
414
|
-
const fragment = sem.object.substring(1);
|
|
415
|
-
if (state.currentSubject) {
|
|
416
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
417
|
-
return state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
418
|
-
}
|
|
419
|
-
return null;
|
|
420
|
-
}
|
|
383
|
+
if (sem.object.startsWith('#')) return resolveFragment(sem.object.substring(1), state);
|
|
421
384
|
return state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
422
385
|
}
|
|
423
386
|
|
|
387
|
+
const createTypeQuad = (typeIRI, subject, state, blockId, entryIndex = null) => {
|
|
388
|
+
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
389
|
+
emitQuad(
|
|
390
|
+
state.quads, state.origin.quadIndex, blockId,
|
|
391
|
+
subject,
|
|
392
|
+
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
393
|
+
state.df.namedNode(expandedType),
|
|
394
|
+
state.df,
|
|
395
|
+
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex }
|
|
396
|
+
);
|
|
397
|
+
};
|
|
398
|
+
|
|
424
399
|
function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier) {
|
|
425
400
|
sem.types.forEach(t => {
|
|
426
401
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
427
402
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
428
|
-
|
|
429
|
-
// For angle-bracket URLs and bracketed links [text](URL), use the URL as the subject
|
|
430
|
-
// for type declarations when there's no explicit subject declaration.
|
|
431
|
-
// This implements {+URL} soft subject behavior.
|
|
432
403
|
let typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
|
|
433
404
|
if (carrier?.type === 'link' && carrier?.url && !newSubject) {
|
|
434
|
-
typeSubject = carrierO;
|
|
405
|
+
typeSubject = carrierO;
|
|
435
406
|
}
|
|
436
|
-
|
|
437
|
-
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
438
|
-
|
|
439
|
-
emitQuad(
|
|
440
|
-
state.quads, state.origin.quadIndex, block.id,
|
|
441
|
-
typeSubject,
|
|
442
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
443
|
-
state.df.namedNode(expandedType),
|
|
444
|
-
state.df,
|
|
445
|
-
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex }
|
|
446
|
-
);
|
|
407
|
+
createTypeQuad(typeIRI, typeSubject, state, block.id, entryIndex);
|
|
447
408
|
});
|
|
448
409
|
}
|
|
449
410
|
|
|
411
|
+
const determinePredicateRole = (pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L) => {
|
|
412
|
+
if (pred.form === '' && carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url) {
|
|
413
|
+
return null;
|
|
414
|
+
}
|
|
415
|
+
switch (pred.form) {
|
|
416
|
+
case '':
|
|
417
|
+
return carrier?.type === 'link' && carrier?.url && carrier.text !== carrier.url && !newSubject
|
|
418
|
+
? { subject: newSubjectOrCarrierO, object: L }
|
|
419
|
+
: { subject: localObject || S, object: L };
|
|
420
|
+
case '?':
|
|
421
|
+
return { subject: newSubject ? previousSubject : S, object: localObject || newSubjectOrCarrierO };
|
|
422
|
+
case '!':
|
|
423
|
+
return { subject: localObject || newSubjectOrCarrierO, object: newSubject ? previousSubject : S };
|
|
424
|
+
default:
|
|
425
|
+
return null;
|
|
426
|
+
}
|
|
427
|
+
};
|
|
428
|
+
|
|
450
429
|
function processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier) {
|
|
451
430
|
sem.predicates.forEach(pred => {
|
|
452
|
-
const
|
|
453
|
-
|
|
454
|
-
// Skip literal predicates for angle-bracket URLs only
|
|
455
|
-
if (pred.form === '' && carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url) {
|
|
456
|
-
return;
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// Determine subject/object roles based on predicate form
|
|
460
|
-
let role;
|
|
461
|
-
switch (pred.form) {
|
|
462
|
-
case '':
|
|
463
|
-
// For bracketed links with literal predicates and no explicit subject, use URL as subject
|
|
464
|
-
if (carrier?.type === 'link' && carrier?.url && carrier.text !== carrier.url && !newSubject) {
|
|
465
|
-
role = { subject: newSubjectOrCarrierO, object: L };
|
|
466
|
-
} else {
|
|
467
|
-
role = { subject: localObject || S, object: L };
|
|
468
|
-
}
|
|
469
|
-
break;
|
|
470
|
-
case '?':
|
|
471
|
-
role = { subject: newSubject ? previousSubject : S, object: localObject || newSubjectOrCarrierO };
|
|
472
|
-
break;
|
|
473
|
-
case '!':
|
|
474
|
-
role = { subject: localObject || newSubjectOrCarrierO, object: newSubject ? previousSubject : S };
|
|
475
|
-
break;
|
|
476
|
-
default:
|
|
477
|
-
role = null;
|
|
478
|
-
}
|
|
479
|
-
|
|
431
|
+
const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
|
|
480
432
|
if (role) {
|
|
433
|
+
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
481
434
|
emitQuad(state.quads, state.origin.quadIndex, block.id,
|
|
482
435
|
role.subject, P, role.object, state.df,
|
|
483
436
|
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
|
|
@@ -498,7 +451,6 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
498
451
|
const newSubject = resolveSubject(sem, state);
|
|
499
452
|
const localObject = resolveObject(sem, state);
|
|
500
453
|
|
|
501
|
-
// Use implicit subject if provided (for list items)
|
|
502
454
|
const effectiveSubject = implicitSubject || (newSubject && !preserveGlobalSubject ? newSubject : previousSubject);
|
|
503
455
|
if (newSubject && !preserveGlobalSubject && !implicitSubject) {
|
|
504
456
|
state.currentSubject = newSubject;
|
|
@@ -546,23 +498,7 @@ export function findItemSubject(listToken, carriers, state) {
|
|
|
546
498
|
return null;
|
|
547
499
|
}
|
|
548
500
|
|
|
549
|
-
|
|
550
|
-
// Check for explicit predicates (excluding subject declarations)
|
|
551
|
-
if (listToken.attrs) {
|
|
552
|
-
const attrs = parseSemCached(listToken.attrs);
|
|
553
|
-
if (attrs.predicates.some(p => !p.subject && p.iri !== 'RESET')) {
|
|
554
|
-
return true;
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
|
-
return carriers.some(carrier => {
|
|
558
|
-
const carrierAttrs = parseSemCached(carrier.attrs);
|
|
559
|
-
return carrierAttrs.predicates.some(p => !p.subject && p.iri !== 'RESET');
|
|
560
|
-
});
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
// Unified list context processing
|
|
564
|
-
function processContextSem({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) {
|
|
565
|
-
// Emit types
|
|
501
|
+
const processContextSem = ({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) => {
|
|
566
502
|
sem.types.forEach(t => {
|
|
567
503
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
568
504
|
emitQuad(
|
|
@@ -574,7 +510,6 @@ function processContextSem({ sem, itemSubject, contextSubject, inheritLiterals =
|
|
|
574
510
|
);
|
|
575
511
|
});
|
|
576
512
|
|
|
577
|
-
// Emit directional predicates
|
|
578
513
|
sem.predicates.forEach(pred => {
|
|
579
514
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
580
515
|
if (pred.form === '!') {
|
|
@@ -584,46 +519,35 @@ function processContextSem({ sem, itemSubject, contextSubject, inheritLiterals =
|
|
|
584
519
|
}
|
|
585
520
|
});
|
|
586
521
|
|
|
587
|
-
// Optionally inherit literal predicates
|
|
588
522
|
if (inheritLiterals) {
|
|
589
523
|
const literalPredicates = sem.predicates.filter(p => p.form === '');
|
|
590
524
|
if (literalPredicates.length > 0) {
|
|
591
525
|
return {
|
|
592
|
-
subject: null,
|
|
593
|
-
object: null,
|
|
594
|
-
types: [],
|
|
526
|
+
subject: null, object: null, types: [],
|
|
595
527
|
predicates: literalPredicates.map(p => ({ iri: p.iri, form: p.form, entryIndex: p.entryIndex })),
|
|
596
|
-
datatype: null,
|
|
597
|
-
language: null,
|
|
598
|
-
entries: []
|
|
528
|
+
datatype: null, language: null, entries: []
|
|
599
529
|
};
|
|
600
530
|
}
|
|
601
531
|
}
|
|
602
532
|
return null;
|
|
603
|
-
}
|
|
533
|
+
};
|
|
604
534
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
// Pop stack frames for lists that have ended (strictly less indent)
|
|
608
|
-
while (
|
|
609
|
-
state.listStack.length &&
|
|
610
|
-
token.indent < state.listStack[state.listStack.length - 1].indent
|
|
611
|
-
) {
|
|
535
|
+
const manageListStack = (token, state) => {
|
|
536
|
+
while (state.listStack.length && token.indent < state.listStack[state.listStack.length - 1].indent) {
|
|
612
537
|
state.listStack.pop();
|
|
613
538
|
}
|
|
614
539
|
|
|
615
|
-
// If we have pending context, always create a new frame for it
|
|
616
540
|
if (state.pendingListContext) {
|
|
617
541
|
state.listStack.push({
|
|
618
542
|
indent: token.indent,
|
|
619
543
|
anchorSubject: state.pendingListContext.subject,
|
|
620
544
|
contextSubject: state.pendingListContext.subject,
|
|
621
|
-
contextSem: state.pendingListContext.sem
|
|
545
|
+
contextSem: state.pendingListContext.sem,
|
|
546
|
+
contextText: state.pendingListContext.contextText,
|
|
547
|
+
contextToken: state.pendingListContext.contextToken // Store context token for origins
|
|
622
548
|
});
|
|
623
549
|
state.pendingListContext = null;
|
|
624
550
|
} else if (state.listStack.length === 0 || token.indent > state.listStack[state.listStack.length - 1].indent) {
|
|
625
|
-
// Push empty frame for nested lists without explicit context
|
|
626
|
-
// Inherit anchorSubject from parent frame if available
|
|
627
551
|
const parentFrame = state.listStack.length > 0 ? state.listStack[state.listStack.length - 1] : null;
|
|
628
552
|
state.listStack.push({
|
|
629
553
|
indent: token.indent,
|
|
@@ -632,138 +556,94 @@ function manageListStack(token, state) {
|
|
|
632
556
|
contextSem: null
|
|
633
557
|
});
|
|
634
558
|
}
|
|
635
|
-
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
function processListItem(token, state) {
|
|
639
|
-
const carriers = getCarriers(token);
|
|
640
|
-
|
|
641
|
-
// Find item subject from list token or inline carriers
|
|
642
|
-
const itemInfo = findItemSubject(token, carriers, state);
|
|
643
|
-
if (!itemInfo) return;
|
|
559
|
+
};
|
|
644
560
|
|
|
645
|
-
|
|
561
|
+
const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
|
|
562
|
+
const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
563
|
+
const addSem = (sem) => { combinedSem.types.push(...sem.types); combinedSem.predicates.push(...sem.predicates); combinedSem.entries.push(...sem.entries); };
|
|
646
564
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
currentFrame.anchorSubject = itemSubject;
|
|
565
|
+
if (listFrame?.contextSem) {
|
|
566
|
+
const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
|
|
567
|
+
if (inheritedSem) addSem(inheritedSem);
|
|
651
568
|
}
|
|
652
569
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
// Collect all semantic information for this list item
|
|
656
|
-
let combinedSem = {
|
|
657
|
-
subject: null,
|
|
658
|
-
object: null,
|
|
659
|
-
types: [],
|
|
660
|
-
predicates: [],
|
|
661
|
-
datatype: null,
|
|
662
|
-
language: null,
|
|
663
|
-
entries: []
|
|
664
|
-
};
|
|
570
|
+
if (token.attrs) addSem(parseSemCached(token.attrs));
|
|
571
|
+
carriers.forEach(carrier => { if (carrier.attrs) addSem(parseSemCached(carrier.attrs)); });
|
|
665
572
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
const inheritedSem = processContextSem({
|
|
669
|
-
sem: listFrame.contextSem,
|
|
670
|
-
itemSubject,
|
|
671
|
-
contextSubject: listFrame.contextSubject,
|
|
672
|
-
inheritLiterals: true,
|
|
673
|
-
state
|
|
674
|
-
});
|
|
573
|
+
return combinedSem;
|
|
574
|
+
};
|
|
675
575
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
}
|
|
681
|
-
}
|
|
576
|
+
const processListItem = (token, state) => {
|
|
577
|
+
const carriers = getCarriers(token);
|
|
578
|
+
const itemInfo = findItemSubject(token, carriers, state);
|
|
579
|
+
if (!itemInfo) return;
|
|
682
580
|
|
|
683
|
-
|
|
684
|
-
if (
|
|
685
|
-
const sem = parseSemCached(token.attrs);
|
|
686
|
-
combinedSem.types.push(...sem.types);
|
|
687
|
-
combinedSem.predicates.push(...sem.predicates);
|
|
688
|
-
combinedSem.entries.push(...sem.entries);
|
|
689
|
-
}
|
|
581
|
+
const { subject: itemSubject } = itemInfo;
|
|
582
|
+
if (state.listStack.length > 0) state.listStack[state.listStack.length - 1].anchorSubject = itemSubject;
|
|
690
583
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
if (carrier.attrs) {
|
|
694
|
-
const sem = parseSemCached(carrier.attrs);
|
|
695
|
-
combinedSem.types.push(...sem.types);
|
|
696
|
-
combinedSem.predicates.push(...sem.predicates);
|
|
697
|
-
combinedSem.entries.push(...sem.entries);
|
|
698
|
-
}
|
|
699
|
-
});
|
|
584
|
+
const listFrame = state.listStack[state.listStack.length - 1];
|
|
585
|
+
const combinedSem = combineSemanticInfo(token, carriers, listFrame, state, itemSubject);
|
|
700
586
|
|
|
701
|
-
// Only create a block if we have semantic information
|
|
702
587
|
if (combinedSem.entries.length > 0) {
|
|
703
588
|
const prevSubject = state.currentSubject;
|
|
704
589
|
state.currentSubject = itemSubject;
|
|
705
590
|
|
|
706
|
-
processAnnotation({
|
|
707
|
-
type: 'list',
|
|
708
|
-
text: token.text,
|
|
709
|
-
range: token.range,
|
|
710
|
-
attrsRange: token.attrsRange || null,
|
|
711
|
-
valueRange: token.valueRange || null
|
|
712
|
-
}, combinedSem, state, {
|
|
713
|
-
preserveGlobalSubject: !state.listStack.length,
|
|
714
|
-
implicitSubject: itemSubject
|
|
715
|
-
});
|
|
591
|
+
processAnnotation({ type: 'list', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null }, combinedSem, state, { preserveGlobalSubject: !state.listStack.length, implicitSubject: itemSubject });
|
|
716
592
|
|
|
717
593
|
state.currentSubject = prevSubject;
|
|
718
594
|
}
|
|
719
|
-
}
|
|
595
|
+
};
|
|
596
|
+
|
|
720
597
|
|
|
721
598
|
function processListContextFromParagraph(token, state) {
|
|
722
599
|
const contextMatch = LIST_CONTEXT_REGEX.exec(token.text);
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
600
|
+
if (!contextMatch) return;
|
|
601
|
+
|
|
602
|
+
const contextSem = parseSemCached(`{${contextMatch[2]}}`);
|
|
603
|
+
let contextSubject = state.currentSubject || state.documentSubject;
|
|
604
|
+
|
|
605
|
+
if (!contextSubject && state.tokens) {
|
|
606
|
+
for (let i = state.currentTokenIndex - 1; i >= 0; i--) {
|
|
607
|
+
const prevToken = state.tokens[i];
|
|
608
|
+
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
609
|
+
const prevSem = parseSemCached(prevToken.attrs);
|
|
610
|
+
if (prevSem.subject) {
|
|
611
|
+
const resolvedSubject = resolveSubject(prevSem, state);
|
|
612
|
+
if (resolvedSubject) {
|
|
613
|
+
contextSubject = resolvedSubject.value;
|
|
614
|
+
break;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
740
617
|
}
|
|
741
618
|
}
|
|
619
|
+
}
|
|
742
620
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
621
|
+
const nextToken = state.tokens?.[state.currentTokenIndex + 1];
|
|
622
|
+
if (state.listStack.length > 0 && nextToken && nextToken.type === 'list') {
|
|
623
|
+
const currentFrame = state.listStack[state.listStack.length - 1];
|
|
624
|
+
if (currentFrame.anchorSubject && nextToken.indent > currentFrame.indent) {
|
|
625
|
+
contextSubject = currentFrame.anchorSubject;
|
|
626
|
+
}
|
|
747
627
|
}
|
|
628
|
+
|
|
629
|
+
state.pendingListContext = {
|
|
630
|
+
sem: contextSem,
|
|
631
|
+
subject: contextSubject,
|
|
632
|
+
contextText: contextMatch[1].replace(':', '').trim(),
|
|
633
|
+
contextToken: token // Store the context token for origin ranges
|
|
634
|
+
};
|
|
748
635
|
}
|
|
749
636
|
|
|
750
|
-
// Helper functions for token processing
|
|
751
637
|
function processTokenAnnotations(token, state, tokenType) {
|
|
752
|
-
// Process token's own attributes
|
|
753
638
|
if (token.attrs) {
|
|
754
639
|
const sem = parseSemCached(token.attrs);
|
|
755
640
|
processAnnotation({
|
|
756
|
-
type: tokenType,
|
|
757
|
-
|
|
758
|
-
range: token.range,
|
|
759
|
-
attrsRange: token.attrsRange || null,
|
|
760
|
-
valueRange: token.valueRange || null
|
|
641
|
+
type: tokenType, text: token.text, range: token.range,
|
|
642
|
+
attrsRange: token.attrsRange || null, valueRange: token.valueRange || null
|
|
761
643
|
}, sem, state);
|
|
762
644
|
}
|
|
763
645
|
|
|
764
|
-
|
|
765
|
-
const carriers = getCarriers(token);
|
|
766
|
-
carriers.forEach(carrier => {
|
|
646
|
+
getCarriers(token).forEach(carrier => {
|
|
767
647
|
if (carrier.attrs) {
|
|
768
648
|
const sem = parseSemCached(carrier.attrs);
|
|
769
649
|
processAnnotation(carrier, sem, state);
|
|
@@ -777,17 +657,41 @@ function processStandaloneSubject(token, state) {
|
|
|
777
657
|
|
|
778
658
|
const sem = parseSemCached(`{=${match[1]}}`);
|
|
779
659
|
const attrsStart = token.range[0] + token.text.indexOf('{=');
|
|
780
|
-
const attrsEnd = attrsStart + (match[1] ? match[1].length : 0);
|
|
781
|
-
|
|
782
660
|
processAnnotation({
|
|
783
|
-
type: 'standalone',
|
|
784
|
-
|
|
785
|
-
range: token.range,
|
|
786
|
-
attrsRange: [attrsStart, attrsEnd],
|
|
661
|
+
type: 'standalone', text: '', range: token.range,
|
|
662
|
+
attrsRange: [attrsStart, attrsStart + (match[1] ? match[1].length : 0)],
|
|
787
663
|
valueRange: null
|
|
788
664
|
}, sem, state);
|
|
789
665
|
}
|
|
790
666
|
|
|
667
|
+
const TOKEN_PROCESSORS = {
|
|
668
|
+
heading: (token, state) => {
|
|
669
|
+
if (token.attrs) {
|
|
670
|
+
const headingSem = parseSemCached(token.attrs);
|
|
671
|
+
if (headingSem.subject) {
|
|
672
|
+
const subject = resolveSubject(headingSem, state);
|
|
673
|
+
if (subject) state.documentSubject = subject;
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
processTokenAnnotations(token, state, token.type);
|
|
677
|
+
},
|
|
678
|
+
code: (token, state) => {
|
|
679
|
+
processTokenAnnotations(token, state, token.type);
|
|
680
|
+
},
|
|
681
|
+
blockquote: (token, state) => {
|
|
682
|
+
processTokenAnnotations(token, state, token.type);
|
|
683
|
+
},
|
|
684
|
+
para: (token, state) => {
|
|
685
|
+
processStandaloneSubject(token, state);
|
|
686
|
+
processListContextFromParagraph(token, state);
|
|
687
|
+
processTokenAnnotations(token, state, token.type);
|
|
688
|
+
},
|
|
689
|
+
list: (token, state) => {
|
|
690
|
+
manageListStack(token, state);
|
|
691
|
+
processListItem(token, state);
|
|
692
|
+
},
|
|
693
|
+
};
|
|
694
|
+
|
|
791
695
|
export function parse(text, options = {}) {
|
|
792
696
|
const state = {
|
|
793
697
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
@@ -795,69 +699,32 @@ export function parse(text, options = {}) {
|
|
|
795
699
|
quads: [],
|
|
796
700
|
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
797
701
|
currentSubject: null,
|
|
798
|
-
documentSubject: null,
|
|
702
|
+
documentSubject: null,
|
|
799
703
|
listStack: [],
|
|
800
704
|
pendingListContext: null,
|
|
801
|
-
tokens: null,
|
|
802
|
-
currentTokenIndex: -1
|
|
705
|
+
tokens: null,
|
|
706
|
+
currentTokenIndex: -1
|
|
803
707
|
};
|
|
804
708
|
|
|
805
709
|
state.tokens = scanTokens(text);
|
|
806
710
|
|
|
807
|
-
// Process prefix declarations first with prefix folding support
|
|
808
711
|
state.tokens.filter(t => t.type === 'prefix').forEach(t => {
|
|
809
|
-
// Check if the IRI value contains a CURIE that references a previously defined prefix
|
|
810
712
|
let resolvedIri = t.iri;
|
|
811
713
|
if (t.iri.includes(':')) {
|
|
812
|
-
const
|
|
813
|
-
const
|
|
714
|
+
const colonIndex = t.iri.indexOf(':');
|
|
715
|
+
const potentialPrefix = t.iri.substring(0, colonIndex);
|
|
716
|
+
const reference = t.iri.substring(colonIndex + 1);
|
|
814
717
|
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
815
|
-
// This is a CURIE referencing an existing prefix - resolve it
|
|
816
718
|
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
817
719
|
}
|
|
818
720
|
}
|
|
819
721
|
state.ctx[t.prefix] = resolvedIri;
|
|
820
722
|
});
|
|
821
723
|
|
|
822
|
-
// Process all other tokens
|
|
823
724
|
for (let i = 0; i < state.tokens.length; i++) {
|
|
824
725
|
const token = state.tokens[i];
|
|
825
726
|
state.currentTokenIndex = i;
|
|
826
|
-
|
|
827
|
-
switch (token.type) {
|
|
828
|
-
case 'heading':
|
|
829
|
-
// Update document subject when processing headings
|
|
830
|
-
if (token.attrs) {
|
|
831
|
-
const headingSem = parseSemCached(token.attrs);
|
|
832
|
-
if (headingSem.subject) {
|
|
833
|
-
const subject = resolveSubject(headingSem, state);
|
|
834
|
-
if (subject) {
|
|
835
|
-
state.documentSubject = subject;
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
}
|
|
839
|
-
processTokenAnnotations(token, state, token.type);
|
|
840
|
-
break;
|
|
841
|
-
case 'code':
|
|
842
|
-
// Process annotations on the opening fence, but skip content processing
|
|
843
|
-
// This allows safe self-explaining of the format in documentation
|
|
844
|
-
processTokenAnnotations(token, state, token.type);
|
|
845
|
-
break;
|
|
846
|
-
case 'blockquote':
|
|
847
|
-
processTokenAnnotations(token, state, token.type);
|
|
848
|
-
break;
|
|
849
|
-
|
|
850
|
-
case 'para':
|
|
851
|
-
processStandaloneSubject(token, state);
|
|
852
|
-
processListContextFromParagraph(token, state);
|
|
853
|
-
processTokenAnnotations(token, state, token.type);
|
|
854
|
-
break;
|
|
855
|
-
|
|
856
|
-
case 'list':
|
|
857
|
-
manageListStack(token, state);
|
|
858
|
-
processListItem(token, state);
|
|
859
|
-
break;
|
|
860
|
-
}
|
|
727
|
+
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
861
728
|
}
|
|
862
729
|
|
|
863
730
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
package/src/serialize.js
CHANGED
|
@@ -177,6 +177,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
177
177
|
return applyEdits(text, edits, ctx, base);
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
+
|
|
180
181
|
function planOperations(diff, base, ctx) {
|
|
181
182
|
// Normalize quads once
|
|
182
183
|
const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
|
|
@@ -376,6 +377,11 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
376
377
|
|
|
377
378
|
// Materialize adds
|
|
378
379
|
for (const { quad, targetBlock } of plan.adds) {
|
|
380
|
+
const quadKey = quadToKeyForOrigin(quad);
|
|
381
|
+
if (plan.consumedAdds.has(quadKey)) {
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
|
|
379
385
|
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
380
386
|
if (!targetBlock) {
|
|
381
387
|
const predShort = shortenIRI(quad.predicate.value, ctx);
|