mdld-parse 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/parse.js +323 -303
- package/src/render.js +490 -0
- package/src/serialize.js +74 -93
- package/src/utils.js +60 -106
package/src/serialize.js
CHANGED
|
@@ -94,80 +94,68 @@ function updateAttrsDatatypeLang(tokens, newLit, ctx) {
|
|
|
94
94
|
return predicatesAndTypes;
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
//
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
97
|
+
// Direct slot operations - no class abstraction needed
|
|
98
|
+
function removeTokenFromSlot(entry, tokens, ctx, quad) {
|
|
99
|
+
if (!entry) return { tokens, removed: false };
|
|
100
|
+
|
|
101
|
+
if (entry.kind === 'object') {
|
|
102
|
+
const objectIRI = shortenIRI(quad.object.value, ctx);
|
|
103
|
+
return removeObjectToken(tokens, objectIRI);
|
|
104
|
+
} else if (entry.kind === 'softFragment') {
|
|
105
|
+
const fragment = entry.fragment;
|
|
106
|
+
return removeSoftFragmentToken(tokens, fragment);
|
|
107
|
+
} else if (entry.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
108
|
+
const expectedType = entry.expandedType || quad.object.value;
|
|
109
|
+
return removeOneToken(tokens, t => {
|
|
110
|
+
if (!t.startsWith('.')) return false;
|
|
111
|
+
const raw = t.slice(1);
|
|
112
|
+
return expandIRI(raw, ctx) === expectedType;
|
|
113
|
+
});
|
|
114
|
+
} else {
|
|
115
|
+
const expectedPred = entry.expandedPredicate || quad.predicate.value;
|
|
116
|
+
const expectedForm = entry.form;
|
|
117
|
+
return removeOneToken(tokens, t => {
|
|
118
|
+
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
119
|
+
if (!m) return false;
|
|
120
|
+
const form = m[1] || '';
|
|
121
|
+
const raw = m[2];
|
|
122
|
+
if (expectedForm != null && form !== expectedForm) return false;
|
|
123
|
+
return expandIRI(raw, ctx) === expectedPred;
|
|
124
|
+
});
|
|
106
125
|
}
|
|
126
|
+
}
|
|
107
127
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return
|
|
114
|
-
} else if (this.kind === 'softFragment') {
|
|
115
|
-
const fragment = this.entry.fragment;
|
|
116
|
-
return removeSoftFragmentToken(tokens, fragment);
|
|
117
|
-
} else if (this.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
|
|
118
|
-
const expectedType = this.entry.expandedType || quad.object.value;
|
|
119
|
-
return removeOneToken(tokens, t => {
|
|
120
|
-
if (!t.startsWith('.')) return false;
|
|
121
|
-
const raw = t.slice(1);
|
|
122
|
-
return expandIRI(raw, ctx) === expectedType;
|
|
123
|
-
});
|
|
124
|
-
} else {
|
|
125
|
-
const expectedPred = this.entry.expandedPredicate || quad.predicate.value;
|
|
126
|
-
const expectedForm = this.entry.form;
|
|
127
|
-
return removeOneToken(tokens, t => {
|
|
128
|
-
const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
|
|
129
|
-
if (!m) return false;
|
|
130
|
-
const form = m[1] || '';
|
|
131
|
-
const raw = m[2];
|
|
132
|
-
if (expectedForm != null && form !== expectedForm) return false;
|
|
133
|
-
return expandIRI(raw, ctx) === expectedPred;
|
|
134
|
-
});
|
|
128
|
+
function addTokenToSlot(tokens, ctx, quad) {
|
|
129
|
+
if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
|
|
130
|
+
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
131
|
+
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
132
|
+
if (typeToken && !tokens.includes(typeToken)) {
|
|
133
|
+
return [...tokens, typeToken];
|
|
135
134
|
}
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
141
|
-
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
142
|
-
if (typeToken && !tokens.includes(typeToken)) {
|
|
143
|
-
return [...tokens, typeToken];
|
|
144
|
-
}
|
|
145
|
-
} else if (quad.object.termType === 'NamedNode') {
|
|
146
|
-
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
147
|
-
const isSoftFragment = quad.object.value.includes('#');
|
|
148
|
-
const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
|
|
135
|
+
} else if (quad.object.termType === 'NamedNode') {
|
|
136
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
137
|
+
const isSoftFragment = quad.object.value.includes('#');
|
|
138
|
+
const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
|
|
149
139
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
return [...tokens, predShort];
|
|
160
|
-
}
|
|
140
|
+
if (isSoftFragment) {
|
|
141
|
+
return addSoftFragmentToken(tokens, fragment);
|
|
142
|
+
} else {
|
|
143
|
+
return addObjectToken(tokens, objectShort);
|
|
144
|
+
}
|
|
145
|
+
} else if (quad.object.termType === 'Literal') {
|
|
146
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
147
|
+
if (!tokens.includes(predShort)) {
|
|
148
|
+
return [...tokens, predShort];
|
|
161
149
|
}
|
|
162
|
-
return tokens;
|
|
163
150
|
}
|
|
151
|
+
return tokens;
|
|
152
|
+
}
|
|
164
153
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}
|
|
169
|
-
return null;
|
|
154
|
+
function markEntryAsVacant(entry, quad) {
|
|
155
|
+
if (entry && entry.slotId) {
|
|
156
|
+
return markSlotAsVacant(entry, quad.object);
|
|
170
157
|
}
|
|
158
|
+
return null;
|
|
171
159
|
}
|
|
172
160
|
|
|
173
161
|
export function serialize({ text, diff, origin, options = {} }) {
|
|
@@ -345,10 +333,8 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
345
333
|
|
|
346
334
|
// Materialize deletes
|
|
347
335
|
for (const { quad, entry, block } of plan.deletes) {
|
|
348
|
-
const slot = new Slot(block, entry);
|
|
349
|
-
|
|
350
336
|
// Mark slot as vacant
|
|
351
|
-
const vacantSlot =
|
|
337
|
+
const vacantSlot = markEntryAsVacant(entry, quad);
|
|
352
338
|
if (vacantSlot && block) {
|
|
353
339
|
const blockInfo = {
|
|
354
340
|
id: entry.blockId,
|
|
@@ -378,9 +364,9 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
378
364
|
}
|
|
379
365
|
}
|
|
380
366
|
|
|
381
|
-
// Handle token-based removals using
|
|
367
|
+
// Handle token-based removals using direct functions
|
|
382
368
|
const tokens = normalizeAttrsTokens(span.text);
|
|
383
|
-
const { tokens: updated, removed } =
|
|
369
|
+
const { tokens: updated, removed } = removeTokenFromSlot(entry, tokens, ctx, quad);
|
|
384
370
|
|
|
385
371
|
if (removed) {
|
|
386
372
|
const newText = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
@@ -428,8 +414,7 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
428
414
|
|
|
429
415
|
// Normal annotation block, add tokens
|
|
430
416
|
const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
|
|
431
|
-
|
|
432
|
-
let updated = slot.addToken(existingTokens, ctx, quad);
|
|
417
|
+
let updated = addTokenToSlot(existingTokens, ctx, quad);
|
|
433
418
|
|
|
434
419
|
// For literal predicates with datatypes, we need to add datatype token too
|
|
435
420
|
if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
@@ -460,32 +445,28 @@ function applyEdits(text, edits, ctx, base) {
|
|
|
460
445
|
// Extract vacant slots before reparsing
|
|
461
446
|
const vacantSlots = new Map();
|
|
462
447
|
base?.quadIndex?.forEach((slot, key) => {
|
|
463
|
-
if (slot.isVacant)
|
|
464
|
-
vacantSlots.set(key, slot);
|
|
465
|
-
}
|
|
448
|
+
if (slot.isVacant) vacantSlots.set(key, slot);
|
|
466
449
|
});
|
|
467
450
|
|
|
468
451
|
const reparsed = parse(result, { context: ctx });
|
|
469
452
|
|
|
470
453
|
// Merge vacant slots back
|
|
471
454
|
vacantSlots.forEach((vacantSlot, key) => {
|
|
472
|
-
if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
|
|
473
|
-
const blockInfo = vacantSlot
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
488
|
-
}
|
|
455
|
+
if (!reparsed.origin.blocks.has(vacantSlot.blockId) && vacantSlot.blockInfo) {
|
|
456
|
+
const { blockInfo } = vacantSlot;
|
|
457
|
+
const emptyBlock = {
|
|
458
|
+
id: blockInfo.id,
|
|
459
|
+
range: blockInfo.range || { start: 0, end: 0 },
|
|
460
|
+
attrsRange: blockInfo.attrsRange,
|
|
461
|
+
valueRange: blockInfo.valueRange,
|
|
462
|
+
carrierType: blockInfo.carrierType || 'span',
|
|
463
|
+
subject: blockInfo.subject || '',
|
|
464
|
+
types: [],
|
|
465
|
+
predicates: [],
|
|
466
|
+
entries: [],
|
|
467
|
+
context: blockInfo.context || { ...ctx }
|
|
468
|
+
};
|
|
469
|
+
reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
|
|
489
470
|
}
|
|
490
471
|
reparsed.origin.quadIndex.set(key, vacantSlot);
|
|
491
472
|
});
|
package/src/utils.js
CHANGED
|
@@ -76,14 +76,13 @@ export function parseSemanticBlock(raw) {
|
|
|
76
76
|
const relEnd = relStart + token.length;
|
|
77
77
|
const entryIndex = result.entries.length;
|
|
78
78
|
|
|
79
|
-
// Handle special tokens
|
|
79
|
+
// Handle special tokens
|
|
80
80
|
if (token === '=') {
|
|
81
81
|
result.subject = 'RESET';
|
|
82
82
|
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
83
83
|
continue;
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
// Handle '=' pattern for subject declarations (not reset)
|
|
87
86
|
if (token.startsWith('=') && !token.startsWith('=#')) {
|
|
88
87
|
const iri = token.substring(1);
|
|
89
88
|
result.subject = iri;
|
|
@@ -95,41 +94,32 @@ export function parseSemanticBlock(raw) {
|
|
|
95
94
|
let processed = false;
|
|
96
95
|
for (const [pattern, config] of Object.entries(TOKEN_PATTERNS)) {
|
|
97
96
|
if (token.startsWith(pattern)) {
|
|
98
|
-
const entry = {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (config.
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
entry.iri = extracted;
|
|
125
|
-
} else if (config.kind === 'property') {
|
|
126
|
-
result.predicates.push({ iri: extracted, form: config.form, entryIndex });
|
|
127
|
-
entry.iri = extracted;
|
|
128
|
-
entry.form = config.form;
|
|
129
|
-
}
|
|
130
|
-
} else {
|
|
131
|
-
// For '=' pattern (subjectReset handled above)
|
|
132
|
-
if (config.kind === 'subjectReset') continue;
|
|
97
|
+
const entry = { kind: config.kind, relRange: { start: relStart, end: relEnd }, raw: token };
|
|
98
|
+
const extracted = config.extract(token);
|
|
99
|
+
|
|
100
|
+
if (config.kind === 'fragment') {
|
|
101
|
+
result.subject = `=#${extracted}`;
|
|
102
|
+
entry.fragment = extracted;
|
|
103
|
+
} else if (config.kind === 'softFragment') {
|
|
104
|
+
result.object = `#${extracted}`;
|
|
105
|
+
entry.fragment = extracted;
|
|
106
|
+
} else if (config.kind === 'object') {
|
|
107
|
+
result.object = extracted;
|
|
108
|
+
entry.iri = extracted;
|
|
109
|
+
} else if (config.kind === 'datatype') {
|
|
110
|
+
if (!result.language) result.datatype = extracted;
|
|
111
|
+
entry.datatype = extracted;
|
|
112
|
+
} else if (config.kind === 'language') {
|
|
113
|
+
result.language = extracted;
|
|
114
|
+
result.datatype = null;
|
|
115
|
+
entry.language = extracted;
|
|
116
|
+
} else if (config.kind === 'type') {
|
|
117
|
+
result.types.push({ iri: extracted, entryIndex });
|
|
118
|
+
entry.iri = extracted;
|
|
119
|
+
} else if (config.kind === 'property') {
|
|
120
|
+
result.predicates.push({ iri: extracted, form: config.form, entryIndex });
|
|
121
|
+
entry.iri = extracted;
|
|
122
|
+
entry.form = config.form;
|
|
133
123
|
}
|
|
134
124
|
|
|
135
125
|
result.entries.push(entry);
|
|
@@ -138,7 +128,7 @@ export function parseSemanticBlock(raw) {
|
|
|
138
128
|
}
|
|
139
129
|
}
|
|
140
130
|
|
|
141
|
-
//
|
|
131
|
+
// Default case (no pattern match)
|
|
142
132
|
if (!processed) {
|
|
143
133
|
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
144
134
|
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
@@ -192,69 +182,42 @@ export function parseQuadIndexKey(key) {
|
|
|
192
182
|
}
|
|
193
183
|
}
|
|
194
184
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// Consolidated quad management
|
|
200
|
-
export function createQuadManager() {
|
|
185
|
+
// Direct slot management functions - no factory needed
|
|
186
|
+
export function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
187
|
+
const slotId = meta.subject && meta.predicate ? hash(`${meta.subject.value}|${meta.predicate.value}`) : null;
|
|
201
188
|
return {
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
lastValue: null,
|
|
210
|
-
vacantSince: null,
|
|
211
|
-
...meta
|
|
212
|
-
};
|
|
213
|
-
},
|
|
214
|
-
|
|
215
|
-
markVacant: (slotInfo, deletedValue) => {
|
|
216
|
-
if (!slotInfo) return null;
|
|
217
|
-
return {
|
|
218
|
-
...slotInfo,
|
|
219
|
-
isVacant: true,
|
|
220
|
-
lastValue: deletedValue,
|
|
221
|
-
vacantSince: Date.now()
|
|
222
|
-
};
|
|
223
|
-
},
|
|
224
|
-
|
|
225
|
-
findVacant: (quadIndex, subject, predicate) => {
|
|
226
|
-
const targetSlotId = createSemanticSlotId(subject, predicate);
|
|
227
|
-
return Array.from(quadIndex.values())
|
|
228
|
-
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
229
|
-
},
|
|
230
|
-
|
|
231
|
-
occupy: (slotInfo, newValue) => {
|
|
232
|
-
if (!slotInfo || !slotInfo.isVacant) return null;
|
|
233
|
-
return {
|
|
234
|
-
...slotInfo,
|
|
235
|
-
isVacant: false,
|
|
236
|
-
lastValue: newValue,
|
|
237
|
-
vacantSince: null
|
|
238
|
-
};
|
|
239
|
-
}
|
|
189
|
+
blockId,
|
|
190
|
+
entryIndex,
|
|
191
|
+
slotId,
|
|
192
|
+
isVacant: false,
|
|
193
|
+
lastValue: null,
|
|
194
|
+
vacantSince: null,
|
|
195
|
+
...meta
|
|
240
196
|
};
|
|
241
197
|
}
|
|
242
198
|
|
|
243
|
-
// Backward compatibility exports
|
|
244
|
-
export function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
245
|
-
return createQuadManager().createSlot(blockId, entryIndex, meta);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
199
|
export function markSlotAsVacant(slotInfo, deletedValue) {
|
|
249
|
-
return
|
|
200
|
+
return slotInfo ? {
|
|
201
|
+
...slotInfo,
|
|
202
|
+
isVacant: true,
|
|
203
|
+
lastValue: deletedValue,
|
|
204
|
+
vacantSince: Date.now()
|
|
205
|
+
} : null;
|
|
250
206
|
}
|
|
251
207
|
|
|
252
208
|
export function findVacantSlot(quadIndex, subject, predicate) {
|
|
253
|
-
|
|
209
|
+
const targetSlotId = hash(`${subject.value}|${predicate.value}`);
|
|
210
|
+
return Array.from(quadIndex.values())
|
|
211
|
+
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
254
212
|
}
|
|
255
213
|
|
|
256
214
|
export function occupySlot(slotInfo, newValue) {
|
|
257
|
-
return
|
|
215
|
+
return slotInfo && slotInfo.isVacant ? {
|
|
216
|
+
...slotInfo,
|
|
217
|
+
isVacant: false,
|
|
218
|
+
lastValue: newValue,
|
|
219
|
+
vacantSince: null
|
|
220
|
+
} : null;
|
|
258
221
|
}
|
|
259
222
|
|
|
260
223
|
export function normalizeAttrsTokens(attrsText) {
|
|
@@ -271,32 +234,23 @@ export function removeOneToken(tokens, matchFn) {
|
|
|
271
234
|
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
272
235
|
}
|
|
273
236
|
|
|
274
|
-
//
|
|
275
|
-
function manageToken(tokens, action, tokenType, value) {
|
|
276
|
-
const token = tokenType === 'object' ? `+${value}` :
|
|
277
|
-
tokenType === 'softFragment' ? `+#${value}` : value;
|
|
278
|
-
|
|
279
|
-
switch (action) {
|
|
280
|
-
case 'add': return tokens.includes(token) ? tokens : [...tokens, token];
|
|
281
|
-
case 'remove': return removeOneToken(tokens, t => t === token);
|
|
282
|
-
default: return tokens;
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
237
|
+
// Direct token management - no wrapper function needed
|
|
286
238
|
export function addObjectToken(tokens, iri) {
|
|
287
|
-
|
|
239
|
+
const token = `+${iri}`;
|
|
240
|
+
return tokens.includes(token) ? tokens : [...tokens, token];
|
|
288
241
|
}
|
|
289
242
|
|
|
290
243
|
export function removeObjectToken(tokens, iri) {
|
|
291
|
-
return
|
|
244
|
+
return removeOneToken(tokens, t => t === `+${iri}`);
|
|
292
245
|
}
|
|
293
246
|
|
|
294
247
|
export function addSoftFragmentToken(tokens, fragment) {
|
|
295
|
-
|
|
248
|
+
const token = `+#${fragment}`;
|
|
249
|
+
return tokens.includes(token) ? tokens : [...tokens, token];
|
|
296
250
|
}
|
|
297
251
|
|
|
298
252
|
export function removeSoftFragmentToken(tokens, fragment) {
|
|
299
|
-
return
|
|
253
|
+
return removeOneToken(tokens, t => t === `+#${fragment}`);
|
|
300
254
|
}
|
|
301
255
|
|
|
302
256
|
export function createLiteral(value, datatype, language, context, dataFactory) {
|