mdld-parse 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/serialize.js CHANGED
@@ -94,80 +94,68 @@ function updateAttrsDatatypeLang(tokens, newLit, ctx) {
94
94
  return predicatesAndTypes;
95
95
  }
96
96
 
97
- // Slot abstraction for cleaner operations
98
- class Slot {
99
- constructor(block, entry, kind = null) {
100
- this.block = block;
101
- this.entry = entry;
102
- this.kind = kind || entry?.kind;
103
- this.entryIndex = entry?.entryIndex;
104
- this.isVacant = entry?.isVacant || false;
105
- this.form = entry?.form;
97
+ // Direct slot operations - no class abstraction needed
98
+ function removeTokenFromSlot(entry, tokens, ctx, quad) {
99
+ if (!entry) return { tokens, removed: false };
100
+
101
+ if (entry.kind === 'object') {
102
+ const objectIRI = shortenIRI(quad.object.value, ctx);
103
+ return removeObjectToken(tokens, objectIRI);
104
+ } else if (entry.kind === 'softFragment') {
105
+ const fragment = entry.fragment;
106
+ return removeSoftFragmentToken(tokens, fragment);
107
+ } else if (entry.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
108
+ const expectedType = entry.expandedType || quad.object.value;
109
+ return removeOneToken(tokens, t => {
110
+ if (!t.startsWith('.')) return false;
111
+ const raw = t.slice(1);
112
+ return expandIRI(raw, ctx) === expectedType;
113
+ });
114
+ } else {
115
+ const expectedPred = entry.expandedPredicate || quad.predicate.value;
116
+ const expectedForm = entry.form;
117
+ return removeOneToken(tokens, t => {
118
+ const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
119
+ if (!m) return false;
120
+ const form = m[1] || '';
121
+ const raw = m[2];
122
+ if (expectedForm != null && form !== expectedForm) return false;
123
+ return expandIRI(raw, ctx) === expectedPred;
124
+ });
106
125
  }
126
+ }
107
127
 
108
- removeToken(tokens, ctx, quad) {
109
- if (!this.entry) return { tokens, removed: false };
110
-
111
- if (this.kind === 'object') {
112
- const objectIRI = shortenIRI(quad.object.value, ctx);
113
- return removeObjectToken(tokens, objectIRI);
114
- } else if (this.kind === 'softFragment') {
115
- const fragment = this.entry.fragment;
116
- return removeSoftFragmentToken(tokens, fragment);
117
- } else if (this.kind === 'type' && quad.predicate.value.endsWith('rdf-syntax-ns#type')) {
118
- const expectedType = this.entry.expandedType || quad.object.value;
119
- return removeOneToken(tokens, t => {
120
- if (!t.startsWith('.')) return false;
121
- const raw = t.slice(1);
122
- return expandIRI(raw, ctx) === expectedType;
123
- });
124
- } else {
125
- const expectedPred = this.entry.expandedPredicate || quad.predicate.value;
126
- const expectedForm = this.entry.form;
127
- return removeOneToken(tokens, t => {
128
- const m = String(t).match(/^(\^\?|\^|\?|)(.+)$/);
129
- if (!m) return false;
130
- const form = m[1] || '';
131
- const raw = m[2];
132
- if (expectedForm != null && form !== expectedForm) return false;
133
- return expandIRI(raw, ctx) === expectedPred;
134
- });
128
+ function addTokenToSlot(tokens, ctx, quad) {
129
+ if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
130
+ const typeShort = shortenIRI(quad.object.value, ctx);
131
+ const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
132
+ if (typeToken && !tokens.includes(typeToken)) {
133
+ return [...tokens, typeToken];
135
134
  }
136
- }
137
-
138
- addToken(tokens, ctx, quad) {
139
- if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
140
- const typeShort = shortenIRI(quad.object.value, ctx);
141
- const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
142
- if (typeToken && !tokens.includes(typeToken)) {
143
- return [...tokens, typeToken];
144
- }
145
- } else if (quad.object.termType === 'NamedNode') {
146
- const objectShort = shortenIRI(quad.object.value, ctx);
147
- const isSoftFragment = quad.object.value.includes('#');
148
- const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
135
+ } else if (quad.object.termType === 'NamedNode') {
136
+ const objectShort = shortenIRI(quad.object.value, ctx);
137
+ const isSoftFragment = quad.object.value.includes('#');
138
+ const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
149
139
 
150
- if (isSoftFragment) {
151
- return addSoftFragmentToken(tokens, fragment);
152
- } else {
153
- return addObjectToken(tokens, objectShort);
154
- }
155
- } else if (quad.object.termType === 'Literal') {
156
- // For literal predicates, we need to add the predicate token
157
- const predShort = shortenIRI(quad.predicate.value, ctx);
158
- if (!tokens.includes(predShort)) {
159
- return [...tokens, predShort];
160
- }
140
+ if (isSoftFragment) {
141
+ return addSoftFragmentToken(tokens, fragment);
142
+ } else {
143
+ return addObjectToken(tokens, objectShort);
144
+ }
145
+ } else if (quad.object.termType === 'Literal') {
146
+ const predShort = shortenIRI(quad.predicate.value, ctx);
147
+ if (!tokens.includes(predShort)) {
148
+ return [...tokens, predShort];
161
149
  }
162
- return tokens;
163
150
  }
151
+ return tokens;
152
+ }
164
153
 
165
- markVacant(quad) {
166
- if (this.entry && this.entry.slotId) {
167
- return markSlotAsVacant(this.entry, quad.object);
168
- }
169
- return null;
154
+ function markEntryAsVacant(entry, quad) {
155
+ if (entry && entry.slotId) {
156
+ return markSlotAsVacant(entry, quad.object);
170
157
  }
158
+ return null;
171
159
  }
172
160
 
173
161
  export function serialize({ text, diff, origin, options = {} }) {
@@ -345,10 +333,8 @@ function materializeEdits(plan, text, ctx, base) {
345
333
 
346
334
  // Materialize deletes
347
335
  for (const { quad, entry, block } of plan.deletes) {
348
- const slot = new Slot(block, entry);
349
-
350
336
  // Mark slot as vacant
351
- const vacantSlot = slot.markVacant(quad);
337
+ const vacantSlot = markEntryAsVacant(entry, quad);
352
338
  if (vacantSlot && block) {
353
339
  const blockInfo = {
354
340
  id: entry.blockId,
@@ -378,9 +364,9 @@ function materializeEdits(plan, text, ctx, base) {
378
364
  }
379
365
  }
380
366
 
381
- // Handle token-based removals using Slot abstraction
367
+ // Handle token-based removals using direct functions
382
368
  const tokens = normalizeAttrsTokens(span.text);
383
- const { tokens: updated, removed } = slot.removeToken(tokens, ctx, quad);
369
+ const { tokens: updated, removed } = removeTokenFromSlot(entry, tokens, ctx, quad);
384
370
 
385
371
  if (removed) {
386
372
  const newText = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
@@ -428,8 +414,7 @@ function materializeEdits(plan, text, ctx, base) {
428
414
 
429
415
  // Normal annotation block, add tokens
430
416
  const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
431
- const slot = new Slot(targetBlock, null);
432
- let updated = slot.addToken(existingTokens, ctx, quad);
417
+ let updated = addTokenToSlot(existingTokens, ctx, quad);
433
418
 
434
419
  // For literal predicates with datatypes, we need to add datatype token too
435
420
  if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
@@ -460,32 +445,28 @@ function applyEdits(text, edits, ctx, base) {
460
445
  // Extract vacant slots before reparsing
461
446
  const vacantSlots = new Map();
462
447
  base?.quadIndex?.forEach((slot, key) => {
463
- if (slot.isVacant) {
464
- vacantSlots.set(key, slot);
465
- }
448
+ if (slot.isVacant) vacantSlots.set(key, slot);
466
449
  });
467
450
 
468
451
  const reparsed = parse(result, { context: ctx });
469
452
 
470
453
  // Merge vacant slots back
471
454
  vacantSlots.forEach((vacantSlot, key) => {
472
- if (!reparsed.origin.blocks.has(vacantSlot.blockId)) {
473
- const blockInfo = vacantSlot.blockInfo;
474
- if (blockInfo) {
475
- const emptyBlock = {
476
- id: blockInfo.id,
477
- range: blockInfo.range || { start: 0, end: 0 },
478
- attrsRange: blockInfo.attrsRange,
479
- valueRange: blockInfo.valueRange,
480
- carrierType: blockInfo.carrierType || 'span',
481
- subject: blockInfo.subject || '',
482
- types: [],
483
- predicates: [],
484
- entries: [],
485
- context: blockInfo.context || { ...ctx }
486
- };
487
- reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
488
- }
455
+ if (!reparsed.origin.blocks.has(vacantSlot.blockId) && vacantSlot.blockInfo) {
456
+ const { blockInfo } = vacantSlot;
457
+ const emptyBlock = {
458
+ id: blockInfo.id,
459
+ range: blockInfo.range || { start: 0, end: 0 },
460
+ attrsRange: blockInfo.attrsRange,
461
+ valueRange: blockInfo.valueRange,
462
+ carrierType: blockInfo.carrierType || 'span',
463
+ subject: blockInfo.subject || '',
464
+ types: [],
465
+ predicates: [],
466
+ entries: [],
467
+ context: blockInfo.context || { ...ctx }
468
+ };
469
+ reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
489
470
  }
490
471
  reparsed.origin.quadIndex.set(key, vacantSlot);
491
472
  });
package/src/utils.js CHANGED
@@ -76,14 +76,13 @@ export function parseSemanticBlock(raw) {
76
76
  const relEnd = relStart + token.length;
77
77
  const entryIndex = result.entries.length;
78
78
 
79
- // Handle special tokens first
79
+ // Handle special tokens
80
80
  if (token === '=') {
81
81
  result.subject = 'RESET';
82
82
  result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
83
83
  continue;
84
84
  }
85
85
 
86
- // Handle '=' pattern for subject declarations (not reset)
87
86
  if (token.startsWith('=') && !token.startsWith('=#')) {
88
87
  const iri = token.substring(1);
89
88
  result.subject = iri;
@@ -95,41 +94,32 @@ export function parseSemanticBlock(raw) {
95
94
  let processed = false;
96
95
  for (const [pattern, config] of Object.entries(TOKEN_PATTERNS)) {
97
96
  if (token.startsWith(pattern)) {
98
- const entry = {
99
- kind: config.kind,
100
- relRange: { start: relStart, end: relEnd },
101
- raw: token
102
- };
103
-
104
- if (config.extract) {
105
- const extracted = config.extract(token);
106
- if (config.kind === 'fragment') {
107
- result.subject = `=#${extracted}`;
108
- entry.fragment = extracted;
109
- } else if (config.kind === 'softFragment') {
110
- result.object = `#${extracted}`;
111
- entry.fragment = extracted;
112
- } else if (config.kind === 'object') {
113
- result.object = extracted;
114
- entry.iri = extracted;
115
- } else if (config.kind === 'datatype') {
116
- if (!result.language) result.datatype = extracted;
117
- entry.datatype = extracted;
118
- } else if (config.kind === 'language') {
119
- result.language = extracted;
120
- result.datatype = null;
121
- entry.language = extracted;
122
- } else if (config.kind === 'type') {
123
- result.types.push({ iri: extracted, entryIndex });
124
- entry.iri = extracted;
125
- } else if (config.kind === 'property') {
126
- result.predicates.push({ iri: extracted, form: config.form, entryIndex });
127
- entry.iri = extracted;
128
- entry.form = config.form;
129
- }
130
- } else {
131
- // For '=' pattern (subjectReset handled above)
132
- if (config.kind === 'subjectReset') continue;
97
+ const entry = { kind: config.kind, relRange: { start: relStart, end: relEnd }, raw: token };
98
+ const extracted = config.extract(token);
99
+
100
+ if (config.kind === 'fragment') {
101
+ result.subject = `=#${extracted}`;
102
+ entry.fragment = extracted;
103
+ } else if (config.kind === 'softFragment') {
104
+ result.object = `#${extracted}`;
105
+ entry.fragment = extracted;
106
+ } else if (config.kind === 'object') {
107
+ result.object = extracted;
108
+ entry.iri = extracted;
109
+ } else if (config.kind === 'datatype') {
110
+ if (!result.language) result.datatype = extracted;
111
+ entry.datatype = extracted;
112
+ } else if (config.kind === 'language') {
113
+ result.language = extracted;
114
+ result.datatype = null;
115
+ entry.language = extracted;
116
+ } else if (config.kind === 'type') {
117
+ result.types.push({ iri: extracted, entryIndex });
118
+ entry.iri = extracted;
119
+ } else if (config.kind === 'property') {
120
+ result.predicates.push({ iri: extracted, form: config.form, entryIndex });
121
+ entry.iri = extracted;
122
+ entry.form = config.form;
133
123
  }
134
124
 
135
125
  result.entries.push(entry);
@@ -138,7 +128,7 @@ export function parseSemanticBlock(raw) {
138
128
  }
139
129
  }
140
130
 
141
- // Handle default case (no pattern match)
131
+ // Default case (no pattern match)
142
132
  if (!processed) {
143
133
  result.predicates.push({ iri: token, form: '', entryIndex });
144
134
  result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
@@ -192,69 +182,42 @@ export function parseQuadIndexKey(key) {
192
182
  }
193
183
  }
194
184
 
195
- export function createSemanticSlotId(subject, predicate) {
196
- return hash(`${subject.value}|${predicate.value}`);
197
- }
198
-
199
- // Consolidated quad management
200
- export function createQuadManager() {
185
+ // Direct slot management functions - no factory needed
186
+ export function createSlotInfo(blockId, entryIndex, meta = {}) {
187
+ const slotId = meta.subject && meta.predicate ? hash(`${meta.subject.value}|${meta.predicate.value}`) : null;
201
188
  return {
202
- createSlot: (blockId, entryIndex, meta = {}) => {
203
- const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
204
- return {
205
- blockId,
206
- entryIndex,
207
- slotId,
208
- isVacant: false,
209
- lastValue: null,
210
- vacantSince: null,
211
- ...meta
212
- };
213
- },
214
-
215
- markVacant: (slotInfo, deletedValue) => {
216
- if (!slotInfo) return null;
217
- return {
218
- ...slotInfo,
219
- isVacant: true,
220
- lastValue: deletedValue,
221
- vacantSince: Date.now()
222
- };
223
- },
224
-
225
- findVacant: (quadIndex, subject, predicate) => {
226
- const targetSlotId = createSemanticSlotId(subject, predicate);
227
- return Array.from(quadIndex.values())
228
- .find(slot => slot.slotId === targetSlotId && slot.isVacant);
229
- },
230
-
231
- occupy: (slotInfo, newValue) => {
232
- if (!slotInfo || !slotInfo.isVacant) return null;
233
- return {
234
- ...slotInfo,
235
- isVacant: false,
236
- lastValue: newValue,
237
- vacantSince: null
238
- };
239
- }
189
+ blockId,
190
+ entryIndex,
191
+ slotId,
192
+ isVacant: false,
193
+ lastValue: null,
194
+ vacantSince: null,
195
+ ...meta
240
196
  };
241
197
  }
242
198
 
243
- // Backward compatibility exports
244
- export function createSlotInfo(blockId, entryIndex, meta = {}) {
245
- return createQuadManager().createSlot(blockId, entryIndex, meta);
246
- }
247
-
248
199
  export function markSlotAsVacant(slotInfo, deletedValue) {
249
- return createQuadManager().markVacant(slotInfo, deletedValue);
200
+ return slotInfo ? {
201
+ ...slotInfo,
202
+ isVacant: true,
203
+ lastValue: deletedValue,
204
+ vacantSince: Date.now()
205
+ } : null;
250
206
  }
251
207
 
252
208
  export function findVacantSlot(quadIndex, subject, predicate) {
253
- return createQuadManager().findVacant(quadIndex, subject, predicate);
209
+ const targetSlotId = hash(`${subject.value}|${predicate.value}`);
210
+ return Array.from(quadIndex.values())
211
+ .find(slot => slot.slotId === targetSlotId && slot.isVacant);
254
212
  }
255
213
 
256
214
  export function occupySlot(slotInfo, newValue) {
257
- return createQuadManager().occupy(slotInfo, newValue);
215
+ return slotInfo && slotInfo.isVacant ? {
216
+ ...slotInfo,
217
+ isVacant: false,
218
+ lastValue: newValue,
219
+ vacantSince: null
220
+ } : null;
258
221
  }
259
222
 
260
223
  export function normalizeAttrsTokens(attrsText) {
@@ -271,32 +234,23 @@ export function removeOneToken(tokens, matchFn) {
271
234
  return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
272
235
  }
273
236
 
274
- // Consolidated token management
275
- function manageToken(tokens, action, tokenType, value) {
276
- const token = tokenType === 'object' ? `+${value}` :
277
- tokenType === 'softFragment' ? `+#${value}` : value;
278
-
279
- switch (action) {
280
- case 'add': return tokens.includes(token) ? tokens : [...tokens, token];
281
- case 'remove': return removeOneToken(tokens, t => t === token);
282
- default: return tokens;
283
- }
284
- }
285
-
237
+ // Direct token management - no wrapper function needed
286
238
  export function addObjectToken(tokens, iri) {
287
- return manageToken(tokens, 'add', 'object', iri);
239
+ const token = `+${iri}`;
240
+ return tokens.includes(token) ? tokens : [...tokens, token];
288
241
  }
289
242
 
290
243
  export function removeObjectToken(tokens, iri) {
291
- return manageToken(tokens, 'remove', 'object', iri);
244
+ return removeOneToken(tokens, t => t === `+${iri}`);
292
245
  }
293
246
 
294
247
  export function addSoftFragmentToken(tokens, fragment) {
295
- return manageToken(tokens, 'add', 'softFragment', fragment);
248
+ const token = `+#${fragment}`;
249
+ return tokens.includes(token) ? tokens : [...tokens, token];
296
250
  }
297
251
 
298
252
  export function removeSoftFragmentToken(tokens, fragment) {
299
- return manageToken(tokens, 'remove', 'softFragment', fragment);
253
+ return removeOneToken(tokens, t => t === `+#${fragment}`);
300
254
  }
301
255
 
302
256
  export function createLiteral(value, datatype, language, context, dataFactory) {