mdld-parse 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +5 -5
- package/src/index.js +10 -0
- package/src/parse.js +788 -0
- package/src/serialize.js +531 -0
- package/src/utils.js +305 -0
- package/index.js +0 -1364
package/src/serialize.js
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
import { parse } from './parse.js';
|
|
2
|
+
import {
|
|
3
|
+
shortenIRI,
|
|
4
|
+
normalizeQuad,
|
|
5
|
+
quadToKeyForOrigin,
|
|
6
|
+
parseQuadIndexKey,
|
|
7
|
+
findVacantSlot,
|
|
8
|
+
occupySlot,
|
|
9
|
+
markSlotAsVacant,
|
|
10
|
+
normalizeAttrsTokens,
|
|
11
|
+
writeAttrsTokens,
|
|
12
|
+
removeOneToken,
|
|
13
|
+
addObjectToken,
|
|
14
|
+
removeObjectToken,
|
|
15
|
+
addSoftFragmentToken,
|
|
16
|
+
removeSoftFragmentToken,
|
|
17
|
+
objectSignature,
|
|
18
|
+
expandIRI
|
|
19
|
+
} from './utils.js';
|
|
20
|
+
|
|
21
|
+
function getBlockById(base, blockId) {
|
|
22
|
+
return blockId ? base?.blocks?.get(blockId) : null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function getEntryByQuadKey(base, quadKey) {
|
|
26
|
+
return quadKey ? base?.quadIndex?.get(quadKey) : null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function isValidQuad(quad) {
|
|
30
|
+
return quad && quad.subject && quad.predicate && quad.object;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function createLiteralAnnotation(value, predicate, language, datatype, ctx) {
|
|
34
|
+
let ann = predicate;
|
|
35
|
+
if (language) ann += ` @${language}`;
|
|
36
|
+
else if (datatype?.value && datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
37
|
+
ann += ` ^^${shortenIRI(datatype.value, ctx)}`;
|
|
38
|
+
}
|
|
39
|
+
return ann;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function createObjectAnnotation(objectShort, predicateShort, isSoftFragment = false, fragment = null) {
|
|
43
|
+
if (isSoftFragment) {
|
|
44
|
+
return `[${objectShort}] {+#${fragment} ?${predicateShort}}`;
|
|
45
|
+
}
|
|
46
|
+
return `[${objectShort}] {+${objectShort} ?${predicateShort}}`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function readSpan(block, text, spanType = 'attrs') {
|
|
50
|
+
const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
|
|
51
|
+
if (!range) return null;
|
|
52
|
+
const { start, end } = range;
|
|
53
|
+
return (Number.isFinite(start) && Number.isFinite(end) && start >= 0 && end >= start)
|
|
54
|
+
? { start, end, text: text.substring(start, end) }
|
|
55
|
+
: null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function sanitizeCarrierValueForBlock(block, raw) {
|
|
59
|
+
const s = String(raw ?? '');
|
|
60
|
+
const t = block?.carrierType;
|
|
61
|
+
if (t === 'code') return s.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
62
|
+
const oneLine = s.replace(/[\n\r]+/g, ' ').trim();
|
|
63
|
+
return (t === 'span' || t === 'link') ? oneLine.replace(/[\[\]]/g, ' ') : oneLine;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function blockTokensFromEntries(block) {
|
|
67
|
+
return block?.entries?.length ? block.entries.map(e => e.raw).filter(Boolean) : null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function removeEntryAt(block, entryIndex) {
|
|
71
|
+
if (!block?.entries || entryIndex == null || entryIndex < 0 || entryIndex >= block.entries.length) return null;
|
|
72
|
+
return [...block.entries.slice(0, entryIndex), ...block.entries.slice(entryIndex + 1)];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function replaceLangDatatypeEntries(block, lit, ctx) {
|
|
76
|
+
if (!block?.entries) return null;
|
|
77
|
+
const filtered = block.entries.filter(e => e.kind !== 'language' && e.kind !== 'datatype');
|
|
78
|
+
const extras = [];
|
|
79
|
+
if (lit?.language) extras.push({ kind: 'language', language: lit.language, raw: `@${lit.language}`, relRange: { start: 0, end: 0 } });
|
|
80
|
+
const dt = lit?.datatype?.value;
|
|
81
|
+
if (!lit?.language && dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
82
|
+
extras.push({ kind: 'datatype', datatype: shortenIRI(dt, ctx), raw: `^^${shortenIRI(dt, ctx)}`, relRange: { start: 0, end: 0 } });
|
|
83
|
+
}
|
|
84
|
+
return [...filtered, ...extras];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function updateAttrsDatatypeLang(tokens, newLit, ctx) {
|
|
88
|
+
const predicatesAndTypes = tokens.filter(t => !t.startsWith('@') && !t.startsWith('^^'));
|
|
89
|
+
if (newLit?.language) return [...predicatesAndTypes, `@${newLit.language}`];
|
|
90
|
+
const dt = newLit?.datatype?.value;
|
|
91
|
+
if (dt && dt !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
92
|
+
return [...predicatesAndTypes, `^^${shortenIRI(dt, ctx)}`];
|
|
93
|
+
}
|
|
94
|
+
return predicatesAndTypes;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Slot abstraction for cleaner operations
|
|
98
|
+
class Slot {
|
|
99
|
+
constructor(block, entry, kind = null) {
|
|
100
|
+
this.block = block;
|
|
101
|
+
this.entry = entry;
|
|
102
|
+
this.kind = kind || entry?.kind;
|
|
103
|
+
this.entryIndex = entry?.entryIndex;
|
|
104
|
+
this.isVacant = entry?.isVacant || false;
|
|
105
|
+
this.form = entry?.form;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
removeToken(tokens, ctx, quad) {
|
|
109
|
+
if (!this.entry) return { tokens, removed: false };
|
|
110
|
+
|
|
111
|
+
if (this.kind === 'object') {
|
|
112
|
+
const objectIRI = shortenIRI(quad.object.value, ctx);
|
|
113
|
+
return removeObjectToken(tokens, objectIRI);
|
|
114
|
+
} else if (this.kind === 'softFragment') {
|
|
115
|
+
const fragment = this.entry.fragment;
|
|
116
|
+
return removeSoftFragmentToken(tokens, fragment);
|
|
117
|
+
} else if (this.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
118
|
+
const expectedType = this.entry.expandedType || quad.object.value;
|
|
119
|
+
return removeOneToken(tokens, t => {
|
|
120
|
+
if (!t.startsWith('.')) return false;
|
|
121
|
+
const raw = t.slice(1);
|
|
122
|
+
return expandIRI(raw, ctx) === expectedType;
|
|
123
|
+
});
|
|
124
|
+
} else {
|
|
125
|
+
const expectedPred = this.entry.expandedPredicate || quad.predicate.value;
|
|
126
|
+
const expectedForm = this.entry.form;
|
|
127
|
+
return removeOneToken(tokens, t => {
|
|
128
|
+
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
129
|
+
if (!m) return false;
|
|
130
|
+
const form = m[1] || '';
|
|
131
|
+
const raw = m[2];
|
|
132
|
+
if (expectedForm != null && form !== expectedForm) return false;
|
|
133
|
+
return expandIRI(raw, ctx) === expectedPred;
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
addToken(tokens, ctx, quad) {
|
|
139
|
+
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
140
|
+
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
141
|
+
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
142
|
+
if (typeToken && !tokens.includes(typeToken)) {
|
|
143
|
+
return [...tokens, typeToken];
|
|
144
|
+
}
|
|
145
|
+
} else if (quad.object.termType === 'NamedNode') {
|
|
146
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
147
|
+
const isSoftFragment = quad.object.value.includes('#');
|
|
148
|
+
const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
|
|
149
|
+
|
|
150
|
+
if (isSoftFragment) {
|
|
151
|
+
return addSoftFragmentToken(tokens, fragment);
|
|
152
|
+
} else {
|
|
153
|
+
return addObjectToken(tokens, objectShort);
|
|
154
|
+
}
|
|
155
|
+
} else if (quad.object.termType === 'Literal') {
|
|
156
|
+
// For literal predicates, we need to add the predicate token
|
|
157
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
158
|
+
if (!tokens.includes(predShort)) {
|
|
159
|
+
return [...tokens, predShort];
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return tokens;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
markVacant(quad) {
|
|
166
|
+
if (this.entry && this.entry.slotId) {
|
|
167
|
+
return markSlotAsVacant(this.entry, quad.object);
|
|
168
|
+
}
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export function serialize({ text, diff, origin, options = {} }) {
|
|
174
|
+
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
175
|
+
const reparsed = parse(text, { context: options.context || {} });
|
|
176
|
+
return { text, origin: reparsed.origin };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const base = origin || parse(text, { context: options.context || {} }).origin;
|
|
180
|
+
const ctx = options.context || {};
|
|
181
|
+
|
|
182
|
+
// Phase 1: Plan operations (pure, no text edits)
|
|
183
|
+
const plan = planOperations(diff, base, ctx);
|
|
184
|
+
|
|
185
|
+
// Phase 2: Materialize edits (ranges + strings)
|
|
186
|
+
const edits = materializeEdits(plan, text, ctx, base);
|
|
187
|
+
|
|
188
|
+
// Phase 3: Apply edits + reparse
|
|
189
|
+
return applyEdits(text, edits, ctx, base);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function planOperations(diff, base, ctx) {
|
|
193
|
+
// Normalize quads once
|
|
194
|
+
const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
|
|
195
|
+
const normDeletes = (diff.delete || []).map(normalizeQuad).filter(isValidQuad);
|
|
196
|
+
|
|
197
|
+
const plan = {
|
|
198
|
+
literalUpdates: [],
|
|
199
|
+
vacantSlotOccupations: [],
|
|
200
|
+
deletes: [],
|
|
201
|
+
adds: [],
|
|
202
|
+
consumedAdds: new Set()
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
// Build lookup maps
|
|
206
|
+
const addBySP = new Map();
|
|
207
|
+
for (const quad of normAdds) {
|
|
208
|
+
const k = JSON.stringify([quad.subject.value, quad.predicate.value]);
|
|
209
|
+
const list = addBySP.get(k) || [];
|
|
210
|
+
list.push(quad);
|
|
211
|
+
addBySP.set(k, list);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Build anchors for delete operations
|
|
215
|
+
const anchors = new Map();
|
|
216
|
+
for (const quad of normDeletes) {
|
|
217
|
+
const key = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
218
|
+
const quadKey = quadToKeyForOrigin(quad);
|
|
219
|
+
const entry = getEntryByQuadKey(base, quadKey);
|
|
220
|
+
const blockId = entry?.blockId || entry;
|
|
221
|
+
const block = getBlockById(base, blockId);
|
|
222
|
+
if (block?.attrsRange) {
|
|
223
|
+
anchors.set(key, { block, entry });
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Detect literal updates early
|
|
228
|
+
for (const deleteQuad of normDeletes) {
|
|
229
|
+
if (deleteQuad.object.termType !== 'Literal') continue;
|
|
230
|
+
|
|
231
|
+
const k = JSON.stringify([deleteQuad.subject.value, deleteQuad.predicate.value]);
|
|
232
|
+
const candidates = addBySP.get(k) || [];
|
|
233
|
+
const addQuad = candidates.find(x =>
|
|
234
|
+
x?.object?.termType === 'Literal' && !plan.consumedAdds.has(quadToKeyForOrigin(x))
|
|
235
|
+
);
|
|
236
|
+
|
|
237
|
+
if (!addQuad) continue;
|
|
238
|
+
|
|
239
|
+
const entry = resolveOriginEntry(deleteQuad, base);
|
|
240
|
+
const block = entry ? getBlockById(base, entry.blockId || entry) : null;
|
|
241
|
+
|
|
242
|
+
if (block) {
|
|
243
|
+
plan.literalUpdates.push({ deleteQuad, addQuad, entry, block });
|
|
244
|
+
plan.consumedAdds.add(quadToKeyForOrigin(addQuad));
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Find vacant slot occupations
|
|
249
|
+
for (const quad of normAdds) {
|
|
250
|
+
if (quad.object.termType !== 'Literal') continue;
|
|
251
|
+
if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
252
|
+
|
|
253
|
+
const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
|
|
254
|
+
if (!vacantSlot) continue;
|
|
255
|
+
|
|
256
|
+
const block = base?.blocks?.get(vacantSlot.blockId);
|
|
257
|
+
if (block) {
|
|
258
|
+
plan.vacantSlotOccupations.push({ quad, vacantSlot, block });
|
|
259
|
+
plan.consumedAdds.add(quadToKeyForOrigin(quad));
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Plan remaining deletes
|
|
264
|
+
for (const quad of normDeletes) {
|
|
265
|
+
if (quad.object.termType === 'Literal') {
|
|
266
|
+
const isUpdated = plan.literalUpdates.some(u =>
|
|
267
|
+
u.deleteQuad.subject.value === quad.subject.value &&
|
|
268
|
+
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
269
|
+
u.deleteQuad.object.value === quad.object.value
|
|
270
|
+
);
|
|
271
|
+
if (isUpdated) continue;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const entry = resolveOriginEntry(quad, base);
|
|
275
|
+
const block = entry ? getBlockById(base, entry.blockId || entry) : null;
|
|
276
|
+
if (block) {
|
|
277
|
+
plan.deletes.push({ quad, entry, block });
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Plan remaining adds
|
|
282
|
+
for (const quad of normAdds) {
|
|
283
|
+
if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
284
|
+
|
|
285
|
+
const targetBlock = findTargetBlock(quad, base, anchors);
|
|
286
|
+
plan.adds.push({ quad, targetBlock });
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return plan;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function materializeEdits(plan, text, ctx, base) {
|
|
293
|
+
const edits = [];
|
|
294
|
+
|
|
295
|
+
// Materialize vacant slot occupations
|
|
296
|
+
for (const { quad, vacantSlot, block } of plan.vacantSlotOccupations) {
|
|
297
|
+
const span = readSpan(block, text, 'attrs');
|
|
298
|
+
if (!span) continue;
|
|
299
|
+
|
|
300
|
+
// Update carrier value
|
|
301
|
+
const valueSpan = readSpan(block, text, 'value');
|
|
302
|
+
if (valueSpan) {
|
|
303
|
+
edits.push({ start: valueSpan.start, end: valueSpan.end, text: quad.object.value });
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Update annotation block
|
|
307
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
308
|
+
const predToken = `${vacantSlot.form || ''}${shortenIRI(quad.predicate.value, ctx)}`;
|
|
309
|
+
|
|
310
|
+
if (tokens.length === 0) {
|
|
311
|
+
edits.push({ start: span.start, end: span.end, text: `{${predToken}}` });
|
|
312
|
+
} else if (!tokens.includes(predToken)) {
|
|
313
|
+
const updated = [...tokens, predToken];
|
|
314
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Materialize literal updates
|
|
319
|
+
for (const { deleteQuad, addQuad, entry, block } of plan.literalUpdates) {
|
|
320
|
+
const span = readSpan(block, text, 'value');
|
|
321
|
+
if (span) {
|
|
322
|
+
const newValue = sanitizeCarrierValueForBlock(block, addQuad.object.value);
|
|
323
|
+
edits.push({ start: span.start, end: span.end, text: newValue });
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const aSpan = readSpan(block, text, 'attrs');
|
|
327
|
+
if (aSpan) {
|
|
328
|
+
if (block?.entries?.length) {
|
|
329
|
+
const nextEntries = replaceLangDatatypeEntries(block, addQuad.object, ctx);
|
|
330
|
+
if (nextEntries) {
|
|
331
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
332
|
+
const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
|
|
333
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: newText });
|
|
334
|
+
}
|
|
335
|
+
} else {
|
|
336
|
+
const tokens = normalizeAttrsTokens(aSpan.text);
|
|
337
|
+
const updated = updateAttrsDatatypeLang(tokens, addQuad.object, ctx);
|
|
338
|
+
if (updated.join(' ') !== tokens.join(' ')) {
|
|
339
|
+
const newText = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
340
|
+
edits.push({ start: aSpan.start, end: aSpan.end, text: newText });
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Materialize deletes
|
|
347
|
+
for (const { quad, entry, block } of plan.deletes) {
|
|
348
|
+
const slot = new Slot(block, entry);
|
|
349
|
+
|
|
350
|
+
// Mark slot as vacant
|
|
351
|
+
const vacantSlot = slot.markVacant(quad);
|
|
352
|
+
if (vacantSlot && block) {
|
|
353
|
+
const blockInfo = {
|
|
354
|
+
id: entry.blockId,
|
|
355
|
+
range: block.range,
|
|
356
|
+
attrsRange: block.attrsRange,
|
|
357
|
+
valueRange: block.valueRange,
|
|
358
|
+
carrierType: block.carrierType,
|
|
359
|
+
subject: block.subject,
|
|
360
|
+
context: block.context
|
|
361
|
+
};
|
|
362
|
+
vacantSlot.blockInfo = blockInfo;
|
|
363
|
+
const key = quadToKeyForOrigin(quad);
|
|
364
|
+
if (key) base.quadIndex.set(key, vacantSlot);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const span = readSpan(block, text, 'attrs');
|
|
368
|
+
if (!span) continue;
|
|
369
|
+
|
|
370
|
+
// Handle entry removal by index
|
|
371
|
+
if (entry?.entryIndex != null && block?.entries?.length) {
|
|
372
|
+
const nextEntries = removeEntryAt(block, entry.entryIndex);
|
|
373
|
+
if (nextEntries) {
|
|
374
|
+
const nextTokens = nextEntries.map(e => e.raw).filter(Boolean);
|
|
375
|
+
const newText = nextTokens.length === 0 ? '{}' : writeAttrsTokens(nextTokens);
|
|
376
|
+
edits.push({ start: span.start, end: span.end, text: newText });
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Handle token-based removals using Slot abstraction
|
|
382
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
383
|
+
const { tokens: updated, removed } = slot.removeToken(tokens, ctx, quad);
|
|
384
|
+
|
|
385
|
+
if (removed) {
|
|
386
|
+
const newText = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
387
|
+
edits.push({ start: span.start, end: span.end, text: newText });
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Materialize adds
|
|
392
|
+
for (const { quad, targetBlock } of plan.adds) {
|
|
393
|
+
if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
|
|
394
|
+
if (!targetBlock) {
|
|
395
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
396
|
+
if (quad.object.termType === 'Literal') {
|
|
397
|
+
const value = String(quad.object.value ?? '');
|
|
398
|
+
const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
|
|
399
|
+
edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
|
|
400
|
+
} else {
|
|
401
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
402
|
+
edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
|
|
403
|
+
}
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const span = readSpan(targetBlock, text, 'attrs');
|
|
408
|
+
if (!span) continue;
|
|
409
|
+
|
|
410
|
+
// Check if this is a subject-only block (like {=ex:order-123})
|
|
411
|
+
const tokens = normalizeAttrsTokens(span.text);
|
|
412
|
+
const hasSubjectToken = tokens.some(t => t.startsWith('='));
|
|
413
|
+
const hasPredicateTokens = tokens.some(t => !t.startsWith('=') && !t.startsWith('.'));
|
|
414
|
+
|
|
415
|
+
if (tokens.length === 1 && tokens[0].startsWith('=')) {
|
|
416
|
+
// This is a subject-only block, create new annotation
|
|
417
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
418
|
+
if (quad.object.termType === 'Literal') {
|
|
419
|
+
const value = String(quad.object.value ?? '');
|
|
420
|
+
const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
|
|
421
|
+
edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
|
|
422
|
+
} else {
|
|
423
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
424
|
+
edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
|
|
425
|
+
}
|
|
426
|
+
continue;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Normal annotation block, add tokens
|
|
430
|
+
const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
|
|
431
|
+
const slot = new Slot(targetBlock, null);
|
|
432
|
+
let updated = slot.addToken(existingTokens, ctx, quad);
|
|
433
|
+
|
|
434
|
+
// For literal predicates with datatypes, we need to add datatype token too
|
|
435
|
+
if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
436
|
+
const datatypeToken = `^^${shortenIRI(quad.object.datatype.value, ctx)}`;
|
|
437
|
+
if (!updated.includes(datatypeToken)) {
|
|
438
|
+
updated = [...updated, datatypeToken];
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (updated.length !== existingTokens.length) {
|
|
443
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return edits;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function applyEdits(text, edits, ctx, base) {
|
|
452
|
+
let result = text;
|
|
453
|
+
|
|
454
|
+
// Sort edits descending to avoid position shifts
|
|
455
|
+
edits.sort((a, b) => b.start - a.start);
|
|
456
|
+
edits.forEach(edit => {
|
|
457
|
+
result = result.substring(0, edit.start) + edit.text + result.substring(edit.end);
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
// Extract vacant slots before reparsing
|
|
461
|
+
const vacantSlots = new Map();
|
|
462
|
+
base?.quadIndex?.forEach((slot, key) => {
|
|
463
|
+
if (slot.isVacant) {
|
|
464
|
+
vacantSlots.set(key, slot);
|
|
465
|
+
}
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
const reparsed = parse(result, { context: ctx });
|
|
469
|
+
|
|
470
|
+
// Merge vacant slots back
|
|
471
|
+
vacantSlots.forEach((vacantSlot, key) => {
|
|
472
|
+
if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
|
|
473
|
+
const blockInfo = vacantSlot.blockInfo;
|
|
474
|
+
if (blockInfo) {
|
|
475
|
+
const emptyBlock = {
|
|
476
|
+
id: blockInfo.id,
|
|
477
|
+
range: blockInfo.range || { start: 0, end: 0 },
|
|
478
|
+
attrsRange: blockInfo.attrsRange,
|
|
479
|
+
valueRange: blockInfo.valueRange,
|
|
480
|
+
carrierType: blockInfo.carrierType || 'span',
|
|
481
|
+
subject: blockInfo.subject || '',
|
|
482
|
+
types: [],
|
|
483
|
+
predicates: [],
|
|
484
|
+
entries: [],
|
|
485
|
+
context: blockInfo.context || { ...ctx }
|
|
486
|
+
};
|
|
487
|
+
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
reparsed.origin.quadIndex.set(key, vacantSlot);
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
return { text: result, origin: reparsed.origin };
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Helper functions for origin lookup
|
|
497
|
+
function resolveOriginEntry(quad, base) {
|
|
498
|
+
const key = quadToKeyForOrigin(quad);
|
|
499
|
+
let entry = key ? base?.quadIndex?.get(key) : null;
|
|
500
|
+
|
|
501
|
+
if (!entry && quad.object?.termType === 'Literal') {
|
|
502
|
+
// Fallback: search by value
|
|
503
|
+
for (const [k, e] of base?.quadIndex || []) {
|
|
504
|
+
const parsed = parseQuadIndexKey(k);
|
|
505
|
+
if (parsed && parsed.s === quad.subject.value &&
|
|
506
|
+
parsed.p === quad.predicate.value &&
|
|
507
|
+
parsed.o?.t === 'Literal' &&
|
|
508
|
+
parsed.o?.v === quad.object.value) {
|
|
509
|
+
entry = e;
|
|
510
|
+
break;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
return entry;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function findTargetBlock(quad, base, anchors) {
|
|
519
|
+
const anchorKey = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
520
|
+
const anchored = anchors.get(anchorKey);
|
|
521
|
+
if (anchored?.block) return anchored.block;
|
|
522
|
+
|
|
523
|
+
// Block affinity: prefer same block, then same subject
|
|
524
|
+
for (const [, block] of base?.blocks || []) {
|
|
525
|
+
if (block.subject === quad.subject.value && block.attrsRange) {
|
|
526
|
+
return block;
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
return null;
|
|
531
|
+
}
|