mdld-parse 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +8 -5
- package/src/index.js +10 -0
- package/src/parse.js +788 -0
- package/src/serialize.js +531 -0
- package/src/utils.js +305 -0
- package/index.js +0 -1364
package/index.js
DELETED
|
@@ -1,1364 +0,0 @@
|
|
|
1
|
-
export const DEFAULT_CONTEXT = {
|
|
2
|
-
'@vocab': 'http://schema.org/',
|
|
3
|
-
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
|
-
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
5
|
-
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
6
|
-
schema: 'http://schema.org/'
|
|
7
|
-
};
|
|
8
|
-
|
|
9
|
-
export const DataFactory = {
|
|
10
|
-
namedNode: (v) => ({ termType: 'NamedNode', value: v }),
|
|
11
|
-
blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
|
|
12
|
-
literal: (v, lang) => {
|
|
13
|
-
if (typeof lang === 'string') {
|
|
14
|
-
return { termType: 'Literal', value: v, language: lang, datatype: DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString') };
|
|
15
|
-
}
|
|
16
|
-
return { termType: 'Literal', value: v, language: '', datatype: lang || DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#string') };
|
|
17
|
-
},
|
|
18
|
-
quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
export function hash(str) {
|
|
22
|
-
let h = 5381;
|
|
23
|
-
for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
|
|
24
|
-
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export function expandIRI(term, ctx) {
|
|
28
|
-
if (term == null) return null;
|
|
29
|
-
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
30
|
-
const t = raw.trim();
|
|
31
|
-
if (t.match(/^https?:/)) return t;
|
|
32
|
-
if (t.includes(':')) {
|
|
33
|
-
const [prefix, ref] = t.split(':', 2);
|
|
34
|
-
return ctx[prefix] ? ctx[prefix] + ref : t;
|
|
35
|
-
}
|
|
36
|
-
return (ctx['@vocab'] || '') + t;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export function shortenIRI(iri, ctx) {
|
|
40
|
-
if (!iri || !iri.startsWith('http')) return iri;
|
|
41
|
-
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
42
|
-
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
43
|
-
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
44
|
-
return prefix + ':' + iri.substring(namespace.length);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
return iri;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
export function parseSemanticBlock(raw) {
|
|
51
|
-
try {
|
|
52
|
-
const src = String(raw || '').trim();
|
|
53
|
-
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
54
|
-
if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
55
|
-
|
|
56
|
-
const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
57
|
-
const re = /\S+/g;
|
|
58
|
-
let m;
|
|
59
|
-
while ((m = re.exec(cleaned)) !== null) {
|
|
60
|
-
const token = m[0];
|
|
61
|
-
const relStart = 1 + m.index;
|
|
62
|
-
const relEnd = relStart + token.length;
|
|
63
|
-
const entryIndex = result.entries.length;
|
|
64
|
-
|
|
65
|
-
if (token === '=') {
|
|
66
|
-
result.subject = 'RESET';
|
|
67
|
-
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
68
|
-
continue;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (token.startsWith('=#')) {
|
|
72
|
-
const fragment = token.substring(2);
|
|
73
|
-
result.subject = `=#${fragment}`;
|
|
74
|
-
result.entries.push({ kind: 'fragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
75
|
-
continue;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
if (token.startsWith('+#')) {
|
|
79
|
-
const fragment = token.substring(2);
|
|
80
|
-
result.object = `#${fragment}`;
|
|
81
|
-
result.entries.push({ kind: 'softFragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
82
|
-
continue;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
if (token.startsWith('+')) {
|
|
86
|
-
const iri = token.substring(1);
|
|
87
|
-
result.object = iri;
|
|
88
|
-
result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
89
|
-
continue;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
if (token.startsWith('=')) {
|
|
93
|
-
const iri = token.substring(1);
|
|
94
|
-
result.subject = iri;
|
|
95
|
-
result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
96
|
-
continue;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if (token.startsWith('^^')) {
|
|
100
|
-
const datatype = token.substring(2);
|
|
101
|
-
if (!result.language) result.datatype = datatype;
|
|
102
|
-
result.entries.push({ kind: 'datatype', datatype, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
if (token.startsWith('@')) {
|
|
107
|
-
const language = token.substring(1);
|
|
108
|
-
result.language = language;
|
|
109
|
-
result.datatype = null;
|
|
110
|
-
result.entries.push({ kind: 'language', language, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
111
|
-
continue;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
if (token.startsWith('.')) {
|
|
115
|
-
const classIRI = token.substring(1);
|
|
116
|
-
result.types.push({ iri: classIRI, entryIndex });
|
|
117
|
-
result.entries.push({ kind: 'type', iri: classIRI, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
118
|
-
continue;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
if (token.startsWith('!')) {
|
|
122
|
-
const iri = token.substring(1);
|
|
123
|
-
result.predicates.push({ iri, form: '!', entryIndex });
|
|
124
|
-
result.entries.push({ kind: 'property', iri, form: '!', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
125
|
-
continue;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
if (token.startsWith('^')) {
|
|
129
|
-
const iri = token.substring(1);
|
|
130
|
-
result.predicates.push({ iri, form: '^', entryIndex });
|
|
131
|
-
result.entries.push({ kind: 'property', iri, form: '^', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
132
|
-
continue;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
if (token.startsWith('?')) {
|
|
136
|
-
const iri = token.substring(1);
|
|
137
|
-
result.predicates.push({ iri, form: '?', entryIndex });
|
|
138
|
-
result.entries.push({ kind: 'property', iri, form: '?', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
139
|
-
continue;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
143
|
-
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return result;
|
|
147
|
-
} catch (error) {
|
|
148
|
-
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
149
|
-
return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function scanTokens(text) {
|
|
154
|
-
const tokens = [];
|
|
155
|
-
const lines = text.split('\n');
|
|
156
|
-
let pos = 0;
|
|
157
|
-
let codeBlock = null;
|
|
158
|
-
|
|
159
|
-
for (let i = 0; i < lines.length; i++) {
|
|
160
|
-
const line = lines[i];
|
|
161
|
-
const lineStart = pos;
|
|
162
|
-
pos += line.length + 1;
|
|
163
|
-
|
|
164
|
-
if (line.startsWith('```')) {
|
|
165
|
-
if (!codeBlock) {
|
|
166
|
-
const fence = line.match(/^(`{3,})(.*)/);
|
|
167
|
-
const attrsText = fence[2].match(/\{[^}]+\}/)?.[0] || null;
|
|
168
|
-
const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
|
|
169
|
-
const contentStart = lineStart + line.length + 1;
|
|
170
|
-
codeBlock = {
|
|
171
|
-
fence: fence[1],
|
|
172
|
-
start: lineStart,
|
|
173
|
-
content: [],
|
|
174
|
-
lang: fence[2].trim().split(/[\s{]/)[0],
|
|
175
|
-
attrs: attrsText,
|
|
176
|
-
attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
|
|
177
|
-
valueRangeStart: contentStart
|
|
178
|
-
};
|
|
179
|
-
} else if (line.startsWith(codeBlock.fence)) {
|
|
180
|
-
const valueStart = codeBlock.valueRangeStart;
|
|
181
|
-
const valueEnd = Math.max(valueStart, lineStart - 1);
|
|
182
|
-
tokens.push({
|
|
183
|
-
type: 'code',
|
|
184
|
-
range: [codeBlock.start, lineStart],
|
|
185
|
-
text: codeBlock.content.join('\n'),
|
|
186
|
-
lang: codeBlock.lang,
|
|
187
|
-
attrs: codeBlock.attrs,
|
|
188
|
-
attrsRange: codeBlock.attrsRange,
|
|
189
|
-
valueRange: [valueStart, valueEnd]
|
|
190
|
-
});
|
|
191
|
-
codeBlock = null;
|
|
192
|
-
}
|
|
193
|
-
continue;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if (codeBlock) {
|
|
197
|
-
codeBlock.content.push(line);
|
|
198
|
-
continue;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
const prefixMatch = line.match(/^\[([^\]]+)\]\s*<([^>]+)>/);
|
|
202
|
-
if (prefixMatch) {
|
|
203
|
-
tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
|
|
204
|
-
continue;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
208
|
-
if (headingMatch) {
|
|
209
|
-
const attrs = headingMatch[3] || null;
|
|
210
|
-
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
211
|
-
const afterHashes = headingMatch[1].length;
|
|
212
|
-
const ws = line.substring(afterHashes).match(/^\s+/)?.[0]?.length || 0;
|
|
213
|
-
const valueStartInLine = afterHashes + ws;
|
|
214
|
-
const valueEndInLine = valueStartInLine + headingMatch[2].length;
|
|
215
|
-
tokens.push({
|
|
216
|
-
type: 'heading',
|
|
217
|
-
depth: headingMatch[1].length,
|
|
218
|
-
range: [lineStart, pos - 1],
|
|
219
|
-
text: headingMatch[2].trim(),
|
|
220
|
-
attrs,
|
|
221
|
-
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
222
|
-
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
223
|
-
});
|
|
224
|
-
continue;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/);
|
|
228
|
-
if (listMatch) {
|
|
229
|
-
const attrs = listMatch[4] || null;
|
|
230
|
-
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
231
|
-
const prefix = listMatch[1].length + listMatch[2].length;
|
|
232
|
-
const ws = line.substring(prefix).match(/^\s+/)?.[0]?.length || 0;
|
|
233
|
-
const valueStartInLine = prefix + ws;
|
|
234
|
-
const valueEndInLine = valueStartInLine + listMatch[3].length;
|
|
235
|
-
tokens.push({
|
|
236
|
-
type: 'list',
|
|
237
|
-
indent: listMatch[1].length,
|
|
238
|
-
range: [lineStart, pos - 1],
|
|
239
|
-
text: listMatch[3].trim(),
|
|
240
|
-
attrs,
|
|
241
|
-
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
242
|
-
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
243
|
-
});
|
|
244
|
-
continue;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
248
|
-
if (blockquoteMatch) {
|
|
249
|
-
const attrs = blockquoteMatch[2] || null;
|
|
250
|
-
const attrsStartInLine = attrs ? line.lastIndexOf(attrs) : -1;
|
|
251
|
-
const prefixMatch = line.match(/^>\s+/);
|
|
252
|
-
const valueStartInLine = prefixMatch ? prefixMatch[0].length : 2;
|
|
253
|
-
const valueEndInLine = valueStartInLine + blockquoteMatch[1].length;
|
|
254
|
-
tokens.push({
|
|
255
|
-
type: 'blockquote',
|
|
256
|
-
range: [lineStart, pos - 1],
|
|
257
|
-
text: blockquoteMatch[1].trim(),
|
|
258
|
-
attrs,
|
|
259
|
-
attrsRange: attrs && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null,
|
|
260
|
-
valueRange: [lineStart + valueStartInLine, lineStart + valueEndInLine]
|
|
261
|
-
});
|
|
262
|
-
continue;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
if (line.trim()) {
|
|
266
|
-
tokens.push({
|
|
267
|
-
type: 'para',
|
|
268
|
-
range: [lineStart, pos - 1],
|
|
269
|
-
text: line.trim(),
|
|
270
|
-
attrs: null
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
return tokens;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
function extractInlineCarriers(text, baseOffset = 0) {
|
|
279
|
-
const carriers = [];
|
|
280
|
-
let pos = 0;
|
|
281
|
-
|
|
282
|
-
while (pos < text.length) {
|
|
283
|
-
// Try emphasis patterns first (before brackets)
|
|
284
|
-
const emphasisMatch = text.match(/^[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/, pos);
|
|
285
|
-
if (emphasisMatch) {
|
|
286
|
-
const carrierText = emphasisMatch[1];
|
|
287
|
-
const valueRange = [baseOffset + emphasisMatch[0].length, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length];
|
|
288
|
-
carriers.push({
|
|
289
|
-
type: 'emphasis',
|
|
290
|
-
text: carrierText,
|
|
291
|
-
attrs: `{${emphasisMatch[2]}}`,
|
|
292
|
-
attrsRange: [baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + 2, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + emphasisMatch[2].length],
|
|
293
|
-
valueRange,
|
|
294
|
-
range: [baseOffset + emphasisMatch[0].length, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length]
|
|
295
|
-
});
|
|
296
|
-
pos = baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + emphasisMatch[2].length;
|
|
297
|
-
continue;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// Try code spans
|
|
301
|
-
const codeMatch = text.match(/^``(.+?)``\s*\{([^}]+)\}/, pos);
|
|
302
|
-
if (codeMatch) {
|
|
303
|
-
const carrierText = codeMatch[1];
|
|
304
|
-
const valueRange = [baseOffset + 2, baseOffset + 2 + codeMatch[1].length];
|
|
305
|
-
carriers.push({
|
|
306
|
-
type: 'code',
|
|
307
|
-
text: carrierText,
|
|
308
|
-
attrs: `{${codeMatch[2]}}`,
|
|
309
|
-
attrsRange: [baseOffset + 2 + codeMatch[1].length + 2, baseOffset + 2 + codeMatch[1].length + 2],
|
|
310
|
-
valueRange,
|
|
311
|
-
range: [baseOffset + 2, baseOffset + 2 + codeMatch[1].length + 2]
|
|
312
|
-
});
|
|
313
|
-
pos = baseOffset + 2 + codeMatch[1].length + 2;
|
|
314
|
-
continue;
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
// Try bracket patterns (original logic)
|
|
318
|
-
const bracketStart = text.indexOf('[', pos);
|
|
319
|
-
if (bracketStart === -1) break;
|
|
320
|
-
|
|
321
|
-
let bracketDepth = 1;
|
|
322
|
-
let bracketEnd = bracketStart + 1;
|
|
323
|
-
|
|
324
|
-
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
325
|
-
if (text[bracketEnd] === '[') {
|
|
326
|
-
bracketDepth++;
|
|
327
|
-
} else if (text[bracketEnd] === ']') {
|
|
328
|
-
bracketDepth--;
|
|
329
|
-
}
|
|
330
|
-
bracketEnd++;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if (bracketDepth > 0) break;
|
|
334
|
-
|
|
335
|
-
const carrierText = text.substring(bracketStart + 1, bracketEnd - 1);
|
|
336
|
-
const valueRange = [baseOffset + bracketStart + 1, baseOffset + bracketEnd - 1];
|
|
337
|
-
let spanEnd = bracketEnd;
|
|
338
|
-
let url = null;
|
|
339
|
-
|
|
340
|
-
if (text[spanEnd] === '(') {
|
|
341
|
-
const parenEnd = text.indexOf(')', spanEnd);
|
|
342
|
-
if (parenEnd !== -1) {
|
|
343
|
-
url = text.substring(spanEnd + 1, parenEnd);
|
|
344
|
-
spanEnd = parenEnd + 1;
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
let attrs = null;
|
|
349
|
-
let attrsRange = null;
|
|
350
|
-
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
351
|
-
if (attrsMatch) {
|
|
352
|
-
attrs = `{${attrsMatch[1]}}`;
|
|
353
|
-
const braceIndex = attrsMatch[0].indexOf('{');
|
|
354
|
-
const absStart = baseOffset + spanEnd + (braceIndex >= 0 ? braceIndex : 0);
|
|
355
|
-
attrsRange = [absStart, absStart + attrs.length];
|
|
356
|
-
spanEnd += attrsMatch[0].length;
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
let carrierType = 'span';
|
|
360
|
-
let resourceIRI = null;
|
|
361
|
-
|
|
362
|
-
if (url) {
|
|
363
|
-
if (url.startsWith('=')) {
|
|
364
|
-
pos = spanEnd;
|
|
365
|
-
continue;
|
|
366
|
-
} else {
|
|
367
|
-
carrierType = 'link';
|
|
368
|
-
resourceIRI = url;
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
carriers.push({
|
|
373
|
-
type: carrierType,
|
|
374
|
-
text: carrierText,
|
|
375
|
-
url: resourceIRI,
|
|
376
|
-
attrs: attrs,
|
|
377
|
-
attrsRange,
|
|
378
|
-
valueRange,
|
|
379
|
-
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
380
|
-
});
|
|
381
|
-
|
|
382
|
-
pos = spanEnd;
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
return carriers;
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
|
|
389
|
-
const expanded = {
|
|
390
|
-
subject,
|
|
391
|
-
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
392
|
-
predicates: predicates.map(p => ({ iri: expandIRI(p.iri, ctx), form: p.form }))
|
|
393
|
-
};
|
|
394
|
-
const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
|
|
395
|
-
return {
|
|
396
|
-
id: blockId,
|
|
397
|
-
range: { start: range[0], end: range[1] },
|
|
398
|
-
attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
|
|
399
|
-
valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
|
|
400
|
-
carrierType: carrierType || null,
|
|
401
|
-
subject,
|
|
402
|
-
types: expanded.types,
|
|
403
|
-
predicates: expanded.predicates,
|
|
404
|
-
entries: entries || [],
|
|
405
|
-
context: { ...ctx }
|
|
406
|
-
};
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
function quadIndexKey(subject, predicate, object) {
|
|
410
|
-
const objKey = object.termType === 'Literal'
|
|
411
|
-
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
412
|
-
: JSON.stringify({ t: object.termType, v: object.value });
|
|
413
|
-
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
function normalizeQuad(q) {
|
|
417
|
-
if (!q) return null;
|
|
418
|
-
const { subject, predicate, object } = q;
|
|
419
|
-
if (object?.termType === 'Literal') {
|
|
420
|
-
const language = typeof object.language === 'string' ? object.language : '';
|
|
421
|
-
const datatype = object.datatype?.value || 'http://www.w3.org/2001/XMLSchema#string';
|
|
422
|
-
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
423
|
-
}
|
|
424
|
-
return { ...q, subject, predicate, object };
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
function objectSignature(o) {
|
|
428
|
-
if (!o) return '';
|
|
429
|
-
if (o.termType === 'Literal') {
|
|
430
|
-
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
431
|
-
}
|
|
432
|
-
return JSON.stringify({ t: o.termType, v: o.value });
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
function quadToKeyForOrigin(q) {
|
|
436
|
-
const nq = normalizeQuad(q);
|
|
437
|
-
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
function parseQuadIndexKey(key) {
|
|
441
|
-
try {
|
|
442
|
-
const [s, p, objKey] = JSON.parse(key);
|
|
443
|
-
return { s, p, o: JSON.parse(objKey) };
|
|
444
|
-
} catch {
|
|
445
|
-
return null;
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
function createSemanticSlotId(subject, predicate) {
|
|
450
|
-
return hash(`${subject.value}|${predicate.value}`);
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
454
|
-
const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
|
|
455
|
-
return {
|
|
456
|
-
blockId,
|
|
457
|
-
entryIndex,
|
|
458
|
-
slotId,
|
|
459
|
-
isVacant: false,
|
|
460
|
-
lastValue: null,
|
|
461
|
-
vacantSince: null,
|
|
462
|
-
...meta
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
function markSlotAsVacant(slotInfo, deletedValue) {
|
|
467
|
-
if (!slotInfo) return null;
|
|
468
|
-
return {
|
|
469
|
-
...slotInfo,
|
|
470
|
-
isVacant: true,
|
|
471
|
-
lastValue: deletedValue,
|
|
472
|
-
vacantSince: Date.now()
|
|
473
|
-
};
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
function findVacantSlot(quadIndex, subject, predicate) {
|
|
477
|
-
const targetSlotId = createSemanticSlotId(subject, predicate);
|
|
478
|
-
return Array.from(quadIndex.values())
|
|
479
|
-
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
function occupySlot(slotInfo, newValue) {
|
|
483
|
-
if (!slotInfo || !slotInfo.isVacant) return null;
|
|
484
|
-
return {
|
|
485
|
-
...slotInfo,
|
|
486
|
-
isVacant: false,
|
|
487
|
-
lastValue: newValue,
|
|
488
|
-
vacantSince: null
|
|
489
|
-
};
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
|
|
493
|
-
if (!subject || !predicate || !object) return;
|
|
494
|
-
const quad = dataFactory.quad(subject, predicate, object);
|
|
495
|
-
quads.push(quad);
|
|
496
|
-
|
|
497
|
-
// Create enhanced slot info with semantic slot tracking
|
|
498
|
-
const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
|
|
499
|
-
...meta,
|
|
500
|
-
subject,
|
|
501
|
-
predicate,
|
|
502
|
-
object
|
|
503
|
-
});
|
|
504
|
-
|
|
505
|
-
quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
509
|
-
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
510
|
-
if (language) return dataFactory.literal(value, language);
|
|
511
|
-
return dataFactory.literal(value);
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
function processAnnotation(carrier, sem, state) {
|
|
515
|
-
if (sem.subject === 'RESET') {
|
|
516
|
-
state.currentSubject = null;
|
|
517
|
-
state.currentObject = null;
|
|
518
|
-
return;
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
const previousSubject = state.currentSubject;
|
|
522
|
-
let newSubject = null;
|
|
523
|
-
let localObject = null;
|
|
524
|
-
|
|
525
|
-
if (sem.subject) {
|
|
526
|
-
if (sem.subject.startsWith('=#')) {
|
|
527
|
-
// Handle fragment syntax
|
|
528
|
-
const fragment = sem.subject.substring(2);
|
|
529
|
-
if (state.currentSubject) {
|
|
530
|
-
// Replace any existing fragment in current subject
|
|
531
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
532
|
-
newSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
533
|
-
}
|
|
534
|
-
} else {
|
|
535
|
-
// Regular IRI
|
|
536
|
-
newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
if (sem.object) {
|
|
541
|
-
// Handle soft IRI object declaration - local to this annotation only
|
|
542
|
-
if (sem.object.startsWith('#')) {
|
|
543
|
-
// Soft fragment - resolve against current subject base
|
|
544
|
-
const fragment = sem.object.substring(1);
|
|
545
|
-
if (state.currentSubject) {
|
|
546
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
547
|
-
localObject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
548
|
-
}
|
|
549
|
-
} else {
|
|
550
|
-
// Regular soft IRI
|
|
551
|
-
localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
if (newSubject) state.currentSubject = newSubject;
|
|
556
|
-
|
|
557
|
-
const S = state.currentSubject;
|
|
558
|
-
if (!S) return;
|
|
559
|
-
|
|
560
|
-
const block = createBlock(S.value, sem.types, sem.predicates, sem.entries, carrier.range, carrier.attrsRange || null, carrier.valueRange || null, carrier.type || null, state.ctx);
|
|
561
|
-
state.origin.blocks.set(block.id, block);
|
|
562
|
-
|
|
563
|
-
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
564
|
-
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
565
|
-
|
|
566
|
-
sem.types.forEach(t => {
|
|
567
|
-
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
568
|
-
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
569
|
-
// For types with subject declarations, the type applies to the new subject
|
|
570
|
-
// For types with soft IRI declarations, the type applies to the soft IRI object
|
|
571
|
-
// Otherwise, type applies to carrier object or current subject
|
|
572
|
-
const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
|
|
573
|
-
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
574
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
575
|
-
});
|
|
576
|
-
|
|
577
|
-
sem.predicates.forEach(pred => {
|
|
578
|
-
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
579
|
-
const token = `${pred.form}${pred.iri}`;
|
|
580
|
-
|
|
581
|
-
if (pred.form === '') {
|
|
582
|
-
// S —p→ L (use soft IRI object as subject if available, otherwise current subject)
|
|
583
|
-
const subjectIRI = localObject || S;
|
|
584
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
585
|
-
} else if (pred.form === '?') {
|
|
586
|
-
// S —p→ O (use previous subject as subject, newSubject as object)
|
|
587
|
-
const subjectIRI = newSubject ? previousSubject : S;
|
|
588
|
-
const objectIRI = localObject || newSubject || carrierO;
|
|
589
|
-
if (objectIRI && subjectIRI) {
|
|
590
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
591
|
-
}
|
|
592
|
-
} else if (pred.form === '^') {
|
|
593
|
-
// L —p→ S (use soft IRI object as subject if available, otherwise current subject)
|
|
594
|
-
const subjectIRI = localObject || S;
|
|
595
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
596
|
-
} else if (pred.form === '!') {
|
|
597
|
-
// O —p→ S (use previous subject as object, newSubject as subject)
|
|
598
|
-
const objectIRI = newSubject ? previousSubject : S;
|
|
599
|
-
const subjectIRI = localObject || newSubject || carrierO;
|
|
600
|
-
if (objectIRI && subjectIRI) {
|
|
601
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
function processListContext(contextSem, listTokens, state, contextSubject = null) {
|
|
608
|
-
if (!contextSubject) contextSubject = state.currentSubject;
|
|
609
|
-
|
|
610
|
-
listTokens.forEach(listToken => {
|
|
611
|
-
const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
|
|
612
|
-
let itemSubject = null;
|
|
613
|
-
let itemSubjectCarrier = null;
|
|
614
|
-
|
|
615
|
-
if (listToken.attrs) {
|
|
616
|
-
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
617
|
-
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
618
|
-
if (itemSem.subject.startsWith('=#')) {
|
|
619
|
-
// Handle fragment syntax in list items
|
|
620
|
-
const fragment = itemSem.subject.substring(2);
|
|
621
|
-
if (state.currentSubject) {
|
|
622
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
623
|
-
itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
624
|
-
}
|
|
625
|
-
} else {
|
|
626
|
-
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
627
|
-
}
|
|
628
|
-
itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
if (!itemSubject) {
|
|
633
|
-
for (const carrier of carriers) {
|
|
634
|
-
if (carrier.attrs) {
|
|
635
|
-
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
636
|
-
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
637
|
-
if (itemSem.subject.startsWith('=#')) {
|
|
638
|
-
// Handle fragment syntax in inline carriers
|
|
639
|
-
const fragment = itemSem.subject.substring(2);
|
|
640
|
-
if (state.currentSubject) {
|
|
641
|
-
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
642
|
-
itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
643
|
-
}
|
|
644
|
-
} else {
|
|
645
|
-
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
646
|
-
}
|
|
647
|
-
itemSubjectCarrier = carrier;
|
|
648
|
-
break;
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
if (!itemSubject) return;
|
|
655
|
-
|
|
656
|
-
contextSem.types.forEach(t => {
|
|
657
|
-
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
658
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandIRI(typeIRI, state.ctx)), state.df);
|
|
659
|
-
});
|
|
660
|
-
|
|
661
|
-
contextSem.predicates.forEach(pred => {
|
|
662
|
-
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
663
|
-
|
|
664
|
-
// According to MD-LD spec: list predicates that connect to item subjects MUST use object predicate forms (?p or !p)
|
|
665
|
-
// Literal predicate forms (p) in list scope emit no quads
|
|
666
|
-
if (pred.form === '!') {
|
|
667
|
-
// Reverse object property: O —p→ S
|
|
668
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context', itemSubject, P, contextSubject, state.df);
|
|
669
|
-
} else if (pred.form === '?') {
|
|
670
|
-
// Object property: S —p→ O
|
|
671
|
-
emitQuad(state.quads, state.origin.quadIndex, 'list-context', contextSubject, P, itemSubject, state.df);
|
|
672
|
-
}
|
|
673
|
-
// Note: pred.form === '' and pred.form === '^' are intentionally ignored (literal predicate forms)
|
|
674
|
-
});
|
|
675
|
-
|
|
676
|
-
const prevSubject = state.currentSubject;
|
|
677
|
-
state.currentSubject = itemSubject;
|
|
678
|
-
|
|
679
|
-
// Check if item has its own predicates
|
|
680
|
-
let hasOwnPredicates = false;
|
|
681
|
-
let itemSem = null;
|
|
682
|
-
|
|
683
|
-
if (listToken.attrs) {
|
|
684
|
-
itemSem = parseSemanticBlock(listToken.attrs);
|
|
685
|
-
if (itemSem.predicates.length > 0) {
|
|
686
|
-
hasOwnPredicates = true;
|
|
687
|
-
}
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
if (!hasOwnPredicates) {
|
|
691
|
-
// Check inline carriers for predicates
|
|
692
|
-
for (const carrier of carriers) {
|
|
693
|
-
if (carrier.attrs) {
|
|
694
|
-
const carrierSem = parseSemanticBlock(carrier.attrs);
|
|
695
|
-
if (carrierSem.predicates.length > 0) {
|
|
696
|
-
hasOwnPredicates = true;
|
|
697
|
-
break;
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
|
|
703
|
-
// If item has no predicates, inherit literal predicates from context
|
|
704
|
-
if (!hasOwnPredicates) {
|
|
705
|
-
const inheritedPredicates = contextSem.predicates.filter(p => p.form === '');
|
|
706
|
-
if (inheritedPredicates.length > 0 && listToken.text) {
|
|
707
|
-
// Create inherited annotation block
|
|
708
|
-
const inheritedTokens = inheritedPredicates.map(p => p.iri).join(' ');
|
|
709
|
-
const inheritedSem = parseSemanticBlock(`{${inheritedTokens}}`);
|
|
710
|
-
const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
|
|
711
|
-
processAnnotation(carrier, inheritedSem, state);
|
|
712
|
-
}
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
if (listToken.attrs) {
|
|
716
|
-
if (!itemSem) itemSem = parseSemanticBlock(listToken.attrs);
|
|
717
|
-
const carrier = { type: 'list', text: listToken.text, range: listToken.range, attrsRange: listToken.attrsRange || null, valueRange: listToken.valueRange || null };
|
|
718
|
-
processAnnotation(carrier, itemSem, state);
|
|
719
|
-
}
|
|
720
|
-
|
|
721
|
-
carriers.forEach(carrier => {
|
|
722
|
-
if (carrier.attrs) {
|
|
723
|
-
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
724
|
-
processAnnotation(carrier, itemSem, state);
|
|
725
|
-
}
|
|
726
|
-
});
|
|
727
|
-
|
|
728
|
-
state.currentSubject = prevSubject;
|
|
729
|
-
});
|
|
730
|
-
}
|
|
731
|
-
|
|
732
|
-
export function parse(text, options = {}) {
|
|
733
|
-
const state = {
|
|
734
|
-
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
735
|
-
df: options.dataFactory || DataFactory,
|
|
736
|
-
quads: [],
|
|
737
|
-
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
738
|
-
currentSubject: null,
|
|
739
|
-
currentObject: null
|
|
740
|
-
};
|
|
741
|
-
|
|
742
|
-
const tokens = scanTokens(text);
|
|
743
|
-
tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
|
|
744
|
-
|
|
745
|
-
for (let i = 0; i < tokens.length; i++) {
|
|
746
|
-
const token = tokens[i];
|
|
747
|
-
|
|
748
|
-
if (token.type === 'heading' && token.attrs) {
|
|
749
|
-
const sem = parseSemanticBlock(token.attrs);
|
|
750
|
-
const carrier = { type: 'heading', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
751
|
-
processAnnotation(carrier, sem, state);
|
|
752
|
-
} else if (token.type === 'code' && token.attrs) {
|
|
753
|
-
const sem = parseSemanticBlock(token.attrs);
|
|
754
|
-
const carrier = { type: 'code', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
755
|
-
processAnnotation(carrier, sem, state);
|
|
756
|
-
} else if (token.type === 'blockquote' && token.attrs) {
|
|
757
|
-
const sem = parseSemanticBlock(token.attrs);
|
|
758
|
-
const carrier = { type: 'blockquote', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null };
|
|
759
|
-
processAnnotation(carrier, sem, state);
|
|
760
|
-
} else if (token.type === 'para') {
|
|
761
|
-
// Check for standalone subject declarations: {=iri} on its own line
|
|
762
|
-
const standaloneSubjectMatch = token.text.match(/^\s*\{=(.*?)\}\s*$/);
|
|
763
|
-
if (standaloneSubjectMatch) {
|
|
764
|
-
const sem = parseSemanticBlock(`{=${standaloneSubjectMatch[1]}}`);
|
|
765
|
-
const attrsStart = token.range[0] + token.text.indexOf('{=');
|
|
766
|
-
const attrsEnd = attrsStart + (standaloneSubjectMatch[1] ? standaloneSubjectMatch[1].length : 0);
|
|
767
|
-
processAnnotation({ type: 'standalone', text: '', range: token.range, attrsRange: [attrsStart, attrsEnd], valueRange: null }, sem, state);
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
const followingLists = [];
|
|
771
|
-
let j = i + 1;
|
|
772
|
-
while (j < tokens.length && tokens[j].type === 'list') {
|
|
773
|
-
followingLists.push(tokens[j]);
|
|
774
|
-
j++;
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
|
|
778
|
-
if (contextMatch && followingLists.length > 0) {
|
|
779
|
-
const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
|
|
780
|
-
let contextSubject = state.currentSubject;
|
|
781
|
-
|
|
782
|
-
// Always look for the most recent heading subject for context
|
|
783
|
-
for (let k = i - 1; k >= 0; k--) {
|
|
784
|
-
const prevToken = tokens[k];
|
|
785
|
-
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
786
|
-
const headingSem = parseSemanticBlock(prevToken.attrs);
|
|
787
|
-
if (headingSem.subject) {
|
|
788
|
-
contextSubject = state.df.namedNode(expandIRI(headingSem.subject, state.ctx));
|
|
789
|
-
break;
|
|
790
|
-
}
|
|
791
|
-
}
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
processListContext(contextSem, followingLists, state, contextSubject);
|
|
795
|
-
i = j - 1;
|
|
796
|
-
continue;
|
|
797
|
-
}
|
|
798
|
-
|
|
799
|
-
const carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
800
|
-
carriers.forEach(carrier => {
|
|
801
|
-
if (carrier.attrs) {
|
|
802
|
-
const sem = parseSemanticBlock(carrier.attrs);
|
|
803
|
-
processAnnotation(carrier, sem, state);
|
|
804
|
-
}
|
|
805
|
-
});
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
function readSpan(block, text, spanType = 'attrs') {
|
|
813
|
-
const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
|
|
814
|
-
if (!range) return null;
|
|
815
|
-
const { start, end } = range;
|
|
816
|
-
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
817
|
-
? { start, end, text: text.substring(start, end) }
|
|
818
|
-
: null;
|
|
819
|
-
}
|
|
820
|
-
|
|
821
|
-
function normalizeAttrsTokens(attrsText) {
|
|
822
|
-
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
823
|
-
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
function writeAttrsTokens(tokens) {
|
|
827
|
-
return `{${tokens.join(' ').trim()}}`;
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
function removeOneToken(tokens, matchFn) {
|
|
831
|
-
const i = tokens.findIndex(matchFn);
|
|
832
|
-
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
function addObjectToken(tokens, iri) {
|
|
836
|
-
const objectToken = `+${iri}`;
|
|
837
|
-
return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
|
|
838
|
-
}
|
|
839
|
-
|
|
840
|
-
function removeObjectToken(tokens, iri) {
|
|
841
|
-
const objectToken = `+${iri}`;
|
|
842
|
-
return removeOneToken(tokens, t => t === objectToken);
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
function addSoftFragmentToken(tokens, fragment) {
|
|
846
|
-
const fragmentToken = `+#${fragment}`;
|
|
847
|
-
return tokens.includes(fragmentToken) ? tokens : [...tokens, fragmentToken];
|
|
848
|
-
}
|
|
849
|
-
|
|
850
|
-
function removeSoftFragmentToken(tokens, fragment) {
|
|
851
|
-
const fragmentToken = `+#${fragment}`;
|
|
852
|
-
return removeOneToken(tokens, t => t === fragmentToken);
|
|
853
|
-
}
|
|
854
|
-
|
|
855
|
-
function sanitizeCarrierValueForBlock(block, raw) {
|
|
856
|
-
const s = String(raw ?? '');
|
|
857
|
-
const t = block?.carrierType;
|
|
858
|
-
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
859
|
-
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
860
|
-
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
861
|
-
}
|
|
862
|
-
|
|
863
|
-
function blockTokensFromEntries(block) {
|
|
864
|
-
return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
|
|
865
|
-
}
|
|
866
|
-
|
|
867
|
-
function removeEntryAt(block, entryIndex) {
|
|
868
|
-
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
869
|
-
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
function replaceLangDatatypeEntries(block, lit, ctx) {
|
|
873
|
-
if (!block?.entries) return null;
|
|
874
|
-
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
875
|
-
const extras = [];
|
|
876
|
-
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
877
|
-
const dt = lit?.datatype?.value;
|
|
878
|
-
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
879
|
-
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
880
|
-
}
|
|
881
|
-
return [...filtered, ...extras];
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
function updateAttrsDatatypeLang(tokens, newLit, ctx) {
|
|
885
|
-
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
886
|
-
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
887
|
-
const dt = newLit?.datatype?.value;
|
|
888
|
-
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
889
|
-
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
890
|
-
}
|
|
891
|
-
return predicatesAndTypes;
|
|
892
|
-
}
|
|
893
|
-
|
|
894
|
-
export function serialize({ text, diff, origin, options = {} }) {
|
|
895
|
-
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
896
|
-
const reparsed = parse(text, { context: options.context || {} });
|
|
897
|
-
return { text, origin: reparsed.origin };
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
const base = origin || parse(text, { context: options.context || {} }).origin;
|
|
901
|
-
let result = text;
|
|
902
|
-
const edits = [];
|
|
903
|
-
const ctx = options.context || {};
|
|
904
|
-
|
|
905
|
-
const findOriginEntryForLiteralByValue = (subjectIri, predicateIri, literalValue) => {
|
|
906
|
-
for (const [k, entry] of base?.quadIndex || []) {
|
|
907
|
-
const parsed = parseQuadIndexKey(k);
|
|
908
|
-
if (!parsed) continue;
|
|
909
|
-
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
910
|
-
if (parsed.o?.t !== 'Literal') continue;
|
|
911
|
-
if (parsed.o?.v === literalValue) return entry;
|
|
912
|
-
}
|
|
913
|
-
return null;
|
|
914
|
-
};
|
|
915
|
-
|
|
916
|
-
const findLiteralCarrierBlocksBySP = (subjectIri, predicateIri) => {
|
|
917
|
-
const out = [];
|
|
918
|
-
for (const [k, entry] of base?.quadIndex || []) {
|
|
919
|
-
const parsed = parseQuadIndexKey(k);
|
|
920
|
-
if (!parsed) continue;
|
|
921
|
-
if (parsed.s !== subjectIri || parsed.p !== predicateIri) continue;
|
|
922
|
-
if (parsed.o?.t !== 'Literal') continue;
|
|
923
|
-
const blockId = entry?.blockId || entry;
|
|
924
|
-
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
925
|
-
if (block) out.push({ block, entry, obj: parsed.o });
|
|
926
|
-
}
|
|
927
|
-
return out;
|
|
928
|
-
};
|
|
929
|
-
|
|
930
|
-
const anchors = new Map();
|
|
931
|
-
for (const q0 of diff.delete || []) {
|
|
932
|
-
const q = normalizeQuad(q0);
|
|
933
|
-
if (!q) continue;
|
|
934
|
-
if (!q?.subject || !q?.object || !q?.predicate) continue;
|
|
935
|
-
const key = JSON.stringify([q.subject.value, objectSignature(q.object)]);
|
|
936
|
-
const qk = quadToKeyForOrigin(q);
|
|
937
|
-
const entry = qk ? base?.quadIndex?.get(qk) : null;
|
|
938
|
-
const blockId = entry?.blockId || entry;
|
|
939
|
-
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
940
|
-
if (!block?.attrsRange) continue;
|
|
941
|
-
anchors.set(key, { block, entry });
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
const addBySP = new Map();
|
|
945
|
-
for (const q0 of diff.add || []) {
|
|
946
|
-
const q = normalizeQuad(q0);
|
|
947
|
-
if (!q) continue;
|
|
948
|
-
if (!q?.subject || !q?.predicate || !q?.object) continue;
|
|
949
|
-
const k = JSON.stringify([q.subject.value, q.predicate.value]);
|
|
950
|
-
const list = addBySP.get(k) || [];
|
|
951
|
-
list.push(q);
|
|
952
|
-
addBySP.set(k, list);
|
|
953
|
-
}
|
|
954
|
-
|
|
955
|
-
const consumedAdds = new Set();
|
|
956
|
-
const literalUpdates = [];
|
|
957
|
-
for (const dq0 of diff.delete || []) {
|
|
958
|
-
const dq = normalizeQuad(dq0);
|
|
959
|
-
if (!dq) continue;
|
|
960
|
-
if (!dq?.subject || !dq?.predicate || !dq?.object) continue;
|
|
961
|
-
if (dq.object.termType !== 'Literal') continue;
|
|
962
|
-
const k = JSON.stringify([dq.subject.value, dq.predicate.value]);
|
|
963
|
-
const candidates = addBySP.get(k) || [];
|
|
964
|
-
const aq = candidates.find(x => x?.object?.termType === 'Literal' && !consumedAdds.has(quadToKeyForOrigin(x)));
|
|
965
|
-
if (!aq) continue;
|
|
966
|
-
|
|
967
|
-
const dqk = quadToKeyForOrigin(dq);
|
|
968
|
-
let entry = dqk ? base?.quadIndex?.get(dqk) : null;
|
|
969
|
-
if (!entry && dq.object?.termType === 'Literal') {
|
|
970
|
-
entry = findOriginEntryForLiteralByValue(dq.subject.value, dq.predicate.value, dq.object.value);
|
|
971
|
-
}
|
|
972
|
-
const blockId = entry?.blockId || entry;
|
|
973
|
-
const block = blockId ? base?.blocks?.get(blockId) : null;
|
|
974
|
-
if (!block) continue;
|
|
975
|
-
|
|
976
|
-
literalUpdates.push({ deleteQuad: dq, addQuad: aq, entry, block });
|
|
977
|
-
consumedAdds.add(quadToKeyForOrigin(aq));
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
for (const q0 of diff.add || []) {
|
|
981
|
-
const quad = normalizeQuad(q0);
|
|
982
|
-
if (!quad || quad.object?.termType !== 'Literal') continue;
|
|
983
|
-
if (consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
984
|
-
|
|
985
|
-
// Check if there's a vacant slot we can reuse
|
|
986
|
-
const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
|
|
987
|
-
if (!vacantSlot) continue;
|
|
988
|
-
|
|
989
|
-
const block = base?.blocks?.get(vacantSlot.blockId);
|
|
990
|
-
if (!block) continue;
|
|
991
|
-
|
|
992
|
-
const span = readSpan(block, text, 'attrs');
|
|
993
|
-
if (!span) continue;
|
|
994
|
-
|
|
995
|
-
// Occupy the vacant slot and update the annotation
|
|
996
|
-
const occupiedSlot = occupySlot(vacantSlot, quad.object);
|
|
997
|
-
if (!occupiedSlot) continue;
|
|
998
|
-
|
|
999
|
-
// Update the carrier value
|
|
1000
|
-
const valueSpan = readSpan(block, text, 'value');
|
|
1001
|
-
if (valueSpan) {
|
|
1002
|
-
edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
// Update the annotation block to restore the predicate token
|
|
1006
|
-
const tokens = normalizeAttrsTokens(span.text);
|
|
1007
|
-
const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
|
|
1008
|
-
|
|
1009
|
-
// For empty annotation blocks, replace entirely; for non-empty, add if missing
|
|
1010
|
-
if (tokens.length === 0) {
|
|
1011
|
-
edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
|
|
1012
|
-
} else if (!tokens.includes(predToken)) {
|
|
1013
|
-
const updated = [...tokens, predToken];
|
|
1014
|
-
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
// Mark as consumed and continue
|
|
1018
|
-
consumedAdds.add(quadToKeyForOrigin(quad));
|
|
1019
|
-
continue;
|
|
1020
|
-
|
|
1021
|
-
const matches = findLiteralCarrierBlocksBySP(quad.subject.value, quad.predicate.value);
|
|
1022
|
-
if (matches.length === 0) continue;
|
|
1023
|
-
|
|
1024
|
-
const desiredLang = quad.object.language || '';
|
|
1025
|
-
const sameLang = matches.filter(m => {
|
|
1026
|
-
const entries = m.block?.entries || [];
|
|
1027
|
-
const langEntry = entries.find(e => e.kind === 'language');
|
|
1028
|
-
const lang = langEntry?.language || '';
|
|
1029
|
-
return lang === desiredLang;
|
|
1030
|
-
});
|
|
1031
|
-
|
|
1032
|
-
if (sameLang.length !== 1) continue;
|
|
1033
|
-
const target = sameLang[0].block;
|
|
1034
|
-
const vSpan = readSpan(target, text, 'value');
|
|
1035
|
-
if (!vSpan) continue;
|
|
1036
|
-
|
|
1037
|
-
const newValue = sanitizeCarrierValueForBlock(target, quad.object.value);
|
|
1038
|
-
edits.push({ start: vSpan.start, end: vSpan.end, text: newValue });
|
|
1039
|
-
|
|
1040
|
-
const aSpan = readSpan(target, text, 'attrs');
|
|
1041
|
-
if (aSpan && target?.entries?.length) {
|
|
1042
|
-
const nextEntries = replaceLangDatatypeEntries(target, quad.object, ctx);
|
|
1043
|
-
if (nextEntries) {
|
|
1044
|
-
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
1045
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
1046
|
-
}
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
consumedAdds.add(quad);
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
for (const u of literalUpdates) {
|
|
1053
|
-
const span = readSpan(u.block, text, 'value');
|
|
1054
|
-
if (span) {
|
|
1055
|
-
const newValue = sanitizeCarrierValueForBlock(u.block, u.addQuad.object.value);
|
|
1056
|
-
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
const aSpan = readSpan(u.block, text, 'attrs');
|
|
1060
|
-
if (aSpan) {
|
|
1061
|
-
if (u.block?.entries?.length) {
|
|
1062
|
-
const nextEntries = replaceLangDatatypeEntries(u.block, u.addQuad.object, ctx);
|
|
1063
|
-
if (nextEntries) {
|
|
1064
|
-
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
1065
|
-
if (nextTokens.length === 0) {
|
|
1066
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
1067
|
-
} else {
|
|
1068
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(nextTokens) });
|
|
1069
|
-
}
|
|
1070
|
-
}
|
|
1071
|
-
} else {
|
|
1072
|
-
const tokens = normalizeAttrsTokens(aSpan.text);
|
|
1073
|
-
const updated = updateAttrsDatatypeLang(tokens, u.addQuad.object, ctx);
|
|
1074
|
-
if (updated.join(' ') !== tokens.join(' ')) {
|
|
1075
|
-
if (updated.length === 0) {
|
|
1076
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text: '{}' });
|
|
1077
|
-
} else {
|
|
1078
|
-
edits.push({ start: aSpan.start, end: aSpan.end, text: writeAttrsTokens(updated) });
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
|
-
}
|
|
1082
|
-
}
|
|
1083
|
-
}
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
if (diff.delete) {
|
|
1087
|
-
diff.delete.forEach(q0 => {
|
|
1088
|
-
const quad = normalizeQuad(q0);
|
|
1089
|
-
if (!quad) return;
|
|
1090
|
-
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
1091
|
-
|
|
1092
|
-
if (quad.object.termType === 'Literal') {
|
|
1093
|
-
const isUpdated = literalUpdates.some(u =>
|
|
1094
|
-
u.deleteQuad.subject.value === quad.subject.value &&
|
|
1095
|
-
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
1096
|
-
u.deleteQuad.object.value === quad.object.value
|
|
1097
|
-
);
|
|
1098
|
-
if (isUpdated) return;
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
const key = quadToKeyForOrigin(quad);
|
|
1102
|
-
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
1103
|
-
if (!entry && quad.object?.termType === 'Literal') {
|
|
1104
|
-
entry = findOriginEntryForLiteralByValue(quad.subject.value, quad.predicate.value, quad.object.value);
|
|
1105
|
-
}
|
|
1106
|
-
|
|
1107
|
-
// Mark the semantic slot as vacant for future reuse
|
|
1108
|
-
if (entry && entry.slotId) {
|
|
1109
|
-
// Capture block information before marking as vacant
|
|
1110
|
-
const block = base?.blocks?.get(entry.blockId);
|
|
1111
|
-
const blockInfo = block ? {
|
|
1112
|
-
id: entry.blockId,
|
|
1113
|
-
range: block.range,
|
|
1114
|
-
attrsRange: block.attrsRange,
|
|
1115
|
-
valueRange: block.valueRange,
|
|
1116
|
-
carrierType: block.carrierType,
|
|
1117
|
-
subject: block.subject,
|
|
1118
|
-
context: block.context
|
|
1119
|
-
} : null;
|
|
1120
|
-
|
|
1121
|
-
const vacantSlot = markSlotAsVacant(entry, quad.object);
|
|
1122
|
-
if (vacantSlot && blockInfo) {
|
|
1123
|
-
vacantSlot.blockInfo = blockInfo;
|
|
1124
|
-
base.quadIndex.set(key, vacantSlot);
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
|
|
1128
|
-
const blockId = entry?.blockId || entry;
|
|
1129
|
-
if (!blockId) return;
|
|
1130
|
-
|
|
1131
|
-
const block = base?.blocks?.get(blockId);
|
|
1132
|
-
if (!block) return;
|
|
1133
|
-
|
|
1134
|
-
const span = readSpan(block, text, 'attrs');
|
|
1135
|
-
if (!span) return;
|
|
1136
|
-
|
|
1137
|
-
// Handle entry removal by index
|
|
1138
|
-
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
1139
|
-
const nextEntries = removeEntryAt(block, entry.entryIndex);
|
|
1140
|
-
if (!nextEntries) return;
|
|
1141
|
-
|
|
1142
|
-
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
1143
|
-
const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
|
|
1144
|
-
edits.push({ start: span.start, end: span.end, text: newText });
|
|
1145
|
-
return;
|
|
1146
|
-
}
|
|
1147
|
-
|
|
1148
|
-
// Handle object token removal
|
|
1149
|
-
if (entry?.kind === 'object') {
|
|
1150
|
-
const objectIRI = shortenIRI(quad.object.value, ctx);
|
|
1151
|
-
const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
|
|
1152
|
-
if (!removed) return;
|
|
1153
|
-
|
|
1154
|
-
const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
1155
|
-
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1156
|
-
return;
|
|
1157
|
-
}
|
|
1158
|
-
|
|
1159
|
-
// Handle soft fragment token removal
|
|
1160
|
-
if (entry?.kind === 'softFragment') {
|
|
1161
|
-
const fragment = entry.fragment;
|
|
1162
|
-
const { tokens: updated, removed } = removeSoftFragmentToken(tokens, fragment);
|
|
1163
|
-
if (!removed) return;
|
|
1164
|
-
|
|
1165
|
-
const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
1166
|
-
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1167
|
-
return;
|
|
1168
|
-
}
|
|
1169
|
-
|
|
1170
|
-
const tokens = normalizeAttrsTokens(span.text);
|
|
1171
|
-
let updated = tokens;
|
|
1172
|
-
let removed = false;
|
|
1173
|
-
|
|
1174
|
-
if (entry?.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
1175
|
-
const expectedType = entry.expandedType || quad.object.value;
|
|
1176
|
-
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
1177
|
-
if (!t.startsWith('.')) return false;
|
|
1178
|
-
const raw = t.slice(1);
|
|
1179
|
-
return expandIRI(raw, ctx) === expectedType;
|
|
1180
|
-
}));
|
|
1181
|
-
} else {
|
|
1182
|
-
const expectedPred = entry?.expandedPredicate || quad.predicate.value;
|
|
1183
|
-
const expectedForm = entry?.form;
|
|
1184
|
-
({ tokens: updated, removed } = removeOneToken(tokens, t => {
|
|
1185
|
-
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
1186
|
-
if (!m) return false;
|
|
1187
|
-
const form = m[1] || '';
|
|
1188
|
-
const raw = m[2];
|
|
1189
|
-
if (expectedForm != null && form !== expectedForm) return false;
|
|
1190
|
-
return expandIRI(raw, ctx) === expectedPred;
|
|
1191
|
-
}));
|
|
1192
|
-
}
|
|
1193
|
-
|
|
1194
|
-
if (!removed) return;
|
|
1195
|
-
|
|
1196
|
-
if (updated.length === 0) {
|
|
1197
|
-
edits.push({ start: span.start, end: span.end, text: '{}' });
|
|
1198
|
-
return;
|
|
1199
|
-
}
|
|
1200
|
-
|
|
1201
|
-
const newAttrs = writeAttrsTokens(updated);
|
|
1202
|
-
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1203
|
-
});
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
if (diff.add) {
|
|
1207
|
-
diff.add.forEach(q0 => {
|
|
1208
|
-
const quad = normalizeQuad(q0);
|
|
1209
|
-
if (!quad) return;
|
|
1210
|
-
if (!quad?.subject || !quad?.predicate || !quad?.object) return;
|
|
1211
|
-
|
|
1212
|
-
if (consumedAdds.has(quadToKeyForOrigin(quad))) return;
|
|
1213
|
-
|
|
1214
|
-
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
1215
|
-
const anchored = anchors.get(anchorKey) || null;
|
|
1216
|
-
let targetBlock = anchored?.block || null;
|
|
1217
|
-
|
|
1218
|
-
if (!targetBlock) {
|
|
1219
|
-
for (const [, block] of base?.blocks || []) {
|
|
1220
|
-
if (block.subject === quad.subject.value && block.attrsRange) {
|
|
1221
|
-
targetBlock = block;
|
|
1222
|
-
break;
|
|
1223
|
-
}
|
|
1224
|
-
}
|
|
1225
|
-
}
|
|
1226
|
-
|
|
1227
|
-
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
1228
|
-
if (!targetBlock) {
|
|
1229
|
-
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1230
|
-
if (quad.object.termType === 'Literal') {
|
|
1231
|
-
const value = String(quad.object.value ?? '');
|
|
1232
|
-
let ann = predShort;
|
|
1233
|
-
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
1234
|
-
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
1235
|
-
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
1236
|
-
}
|
|
1237
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
1238
|
-
} else {
|
|
1239
|
-
const full = quad.object.value;
|
|
1240
|
-
const label = shortenIRI(full, ctx);
|
|
1241
|
-
const objectShort = shortenIRI(full, ctx);
|
|
1242
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {+${objectShort} ?${predShort}}` });
|
|
1243
|
-
}
|
|
1244
|
-
return;
|
|
1245
|
-
}
|
|
1246
|
-
|
|
1247
|
-
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1248
|
-
if (quad.object.termType === 'Literal') {
|
|
1249
|
-
const value = String(quad.object.value ?? '');
|
|
1250
|
-
let ann = predShort;
|
|
1251
|
-
if (quad.object.language) ann += ` @${quad.object.language}`;
|
|
1252
|
-
else if (quad.object.datatype?.value && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
1253
|
-
ann += ` ^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
1254
|
-
}
|
|
1255
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${value}] {${ann}}` });
|
|
1256
|
-
return;
|
|
1257
|
-
}
|
|
1258
|
-
|
|
1259
|
-
if (quad.object.termType === 'NamedNode') {
|
|
1260
|
-
const full = quad.object.value;
|
|
1261
|
-
const objectShort = shortenIRI(full, ctx);
|
|
1262
|
-
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1263
|
-
|
|
1264
|
-
// Check if this is a soft fragment
|
|
1265
|
-
const isSoftFragment = full.includes('#') && anchored?.entry?.kind === 'softFragment';
|
|
1266
|
-
|
|
1267
|
-
if (isSoftFragment || anchored?.entry?.form === '?') {
|
|
1268
|
-
// Add soft fragment token if not present
|
|
1269
|
-
if (isSoftFragment) {
|
|
1270
|
-
const fragment = full.split('#')[1];
|
|
1271
|
-
const updated = addSoftFragmentToken(tokens, fragment);
|
|
1272
|
-
if (updated.length !== tokens.length) {
|
|
1273
|
-
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1274
|
-
}
|
|
1275
|
-
} else {
|
|
1276
|
-
const updated = addObjectToken(tokens, objectShort);
|
|
1277
|
-
if (updated.length !== tokens.length) {
|
|
1278
|
-
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1279
|
-
}
|
|
1280
|
-
}
|
|
1281
|
-
} else {
|
|
1282
|
-
// Create new annotation with object token
|
|
1283
|
-
if (isSoftFragment) {
|
|
1284
|
-
const fragment = full.split('#')[1];
|
|
1285
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {+#${fragment} ?${predShort}}` });
|
|
1286
|
-
} else {
|
|
1287
|
-
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {+${objectShort} ?${predShort}}` });
|
|
1288
|
-
}
|
|
1289
|
-
}
|
|
1290
|
-
return;
|
|
1291
|
-
}
|
|
1292
|
-
}
|
|
1293
|
-
|
|
1294
|
-
const span = readSpan(targetBlock, text, 'attrs');
|
|
1295
|
-
if (!span) return;
|
|
1296
|
-
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
1297
|
-
|
|
1298
|
-
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
1299
|
-
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
1300
|
-
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
1301
|
-
if (!typeToken) return;
|
|
1302
|
-
if (tokens.includes(typeToken)) return;
|
|
1303
|
-
const updated = [...tokens, typeToken];
|
|
1304
|
-
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1305
|
-
return;
|
|
1306
|
-
}
|
|
1307
|
-
|
|
1308
|
-
const form = anchored?.entry?.form;
|
|
1309
|
-
if (form == null) return;
|
|
1310
|
-
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1311
|
-
const predToken = `${form}${predShort}`;
|
|
1312
|
-
if (!predToken) return;
|
|
1313
|
-
if (tokens.includes(predToken)) return;
|
|
1314
|
-
const updated = [...tokens, predToken];
|
|
1315
|
-
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1316
|
-
});
|
|
1317
|
-
}
|
|
1318
|
-
|
|
1319
|
-
edits.sort((a, b) => b.start - a.start);
|
|
1320
|
-
edits.forEach(edit => {
|
|
1321
|
-
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
1322
|
-
});
|
|
1323
|
-
|
|
1324
|
-
// Extract vacant slots before reparsing to preserve them
|
|
1325
|
-
const vacantSlots = new Map();
|
|
1326
|
-
base?.quadIndex?.forEach((slot, key) => {
|
|
1327
|
-
if (slot.isVacant) {
|
|
1328
|
-
vacantSlots.set(key, slot);
|
|
1329
|
-
}
|
|
1330
|
-
});
|
|
1331
|
-
|
|
1332
|
-
const reparsed = parse(result, { context: options.context || {} });
|
|
1333
|
-
|
|
1334
|
-
// Merge vacant slots back into the new origin
|
|
1335
|
-
vacantSlots.forEach((vacantSlot, key) => {
|
|
1336
|
-
// Check if the block still exists in the new origin
|
|
1337
|
-
if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
|
|
1338
|
-
// Recreate the empty block for the vacant slot using preserved info
|
|
1339
|
-
const blockInfo = vacantSlot.blockInfo;
|
|
1340
|
-
if (blockInfo) {
|
|
1341
|
-
const emptyBlock = {
|
|
1342
|
-
id: blockInfo.id,
|
|
1343
|
-
range: blockInfo.range || { start: 0, end: 0 },
|
|
1344
|
-
attrsRange: blockInfo.attrsRange,
|
|
1345
|
-
valueRange: blockInfo.valueRange,
|
|
1346
|
-
carrierType: blockInfo.carrierType || 'span',
|
|
1347
|
-
subject: blockInfo.subject || '',
|
|
1348
|
-
types: [],
|
|
1349
|
-
predicates: [],
|
|
1350
|
-
entries: [], // Empty entries - just {} annotation
|
|
1351
|
-
context: blockInfo.context || { ...ctx }
|
|
1352
|
-
};
|
|
1353
|
-
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
1354
|
-
}
|
|
1355
|
-
}
|
|
1356
|
-
|
|
1357
|
-
// Merge the vacant slot back
|
|
1358
|
-
reparsed.origin.quadIndex.set(key, vacantSlot);
|
|
1359
|
-
});
|
|
1360
|
-
|
|
1361
|
-
return { text: result, origin: reparsed.origin };
|
|
1362
|
-
}
|
|
1363
|
-
|
|
1364
|
-
export default { parse, serialize, parseSemanticBlock, shortenIRI };
|