mdld-parse 0.6.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.6.2",
3
+ "version": "0.7.1",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -38,8 +38,5 @@
38
38
  "homepage": "https://mdld.js.org",
39
39
  "bugs": {
40
40
  "url": "https://github.com/davay42/mdld-parse/issues"
41
- },
42
- "devDependencies": {
43
- "n3": "^2.0.1"
44
41
  }
45
42
  }
package/src/generate.js CHANGED
@@ -1,4 +1,4 @@
1
- import { shortenIRI, expandIRI, quadIndexKey, createUnifiedSlot, DEFAULT_CONTEXT, DataFactory } from './utils.js';
1
+ import { shortenIRI, expandIRI, DEFAULT_CONTEXT, DataFactory } from './utils.js';
2
2
 
3
3
  // Helper functions for cleaner term type checking
4
4
  function isLiteral(term) {
@@ -29,7 +29,7 @@ function extractLocalName(iri) {
29
29
  * Generate deterministic MDLD from RDF quads
30
30
  * Purpose: TTL→MDLD conversion with canonical structure
31
31
  * Input: RDF quads + context
32
- * Output: MDLD text + origin + context
32
+ * Output: MDLD text
33
33
  */
34
34
  export function generate(quads, context = {}) {
35
35
  const fullContext = { ...DEFAULT_CONTEXT, ...context };
@@ -38,13 +38,9 @@ export function generate(quads, context = {}) {
38
38
 
39
39
  const subjectGroups = groupQuadsBySubject(normalizedQuads);
40
40
 
41
- const { text, quadMap } = buildDeterministicMDLD(subjectGroups, fullContext);
41
+ const { text } = buildDeterministicMDLD(subjectGroups, fullContext);
42
42
 
43
- return {
44
- text,
45
- origin: { quadMap },
46
- context: fullContext
47
- };
43
+ return text;
48
44
  }
49
45
 
50
46
  function normalizeAndSortQuads(quads) {
@@ -86,8 +82,6 @@ function groupQuadsBySubject(quads) {
86
82
 
87
83
  function buildDeterministicMDLD(subjectGroups, context) {
88
84
  let text = '';
89
- let currentPos = 0;
90
- const quadMap = new Map();
91
85
 
92
86
  // Add prefixes first (deterministic order), but exclude default context prefixes
93
87
  const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
@@ -96,13 +90,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
96
90
  if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
97
91
  const prefixDecl = `[${prefix}] <${namespace}>\n`;
98
92
  text += prefixDecl;
99
- currentPos += prefixDecl.length;
100
93
  }
101
94
  }
102
95
 
103
96
  if (sortedPrefixes.length > 0) {
104
97
  text += '\n';
105
- currentPos += 1;
106
98
  }
107
99
 
108
100
  // Process subjects in deterministic order
@@ -125,31 +117,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
125
117
 
126
118
  const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
127
119
 
128
- const headingBlock = {
129
- id: generateBlockId(),
130
- range: { start: currentPos, end: currentPos + headingText.length },
131
- subject: subjectIRI,
132
- types: types.map(t => t.object.value),
133
- predicates: [],
134
- context: { ...context },
135
- carrierType: 'heading',
136
- attrsRange: { start: currentPos + headingText.indexOf('{'), end: currentPos + headingText.indexOf('}') + 1 },
137
- valueRange: { start: currentPos + 2, end: currentPos + 2 + localSubjectName.length }
138
- };
139
-
140
- // Add type quads to quadMap
141
- types.forEach((quad, i) => {
142
- const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
143
- quadMap.set(key, createUnifiedSlot(headingBlock, i, {
144
- kind: 'type',
145
- subject: quad.subject,
146
- predicate: quad.predicate,
147
- object: quad.object
148
- }));
149
- });
150
-
151
120
  text += headingText;
152
- currentPos += headingText.length;
153
121
 
154
122
  // Add literals (deterministic order)
155
123
  const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
@@ -166,30 +134,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
166
134
  }
167
135
 
168
136
  const literalText = `[${quad.object.value}] {${annotation}}\n`;
169
- const literalBlock = {
170
- id: generateBlockId(),
171
- range: { start: currentPos, end: currentPos + literalText.length },
172
- subject: subjectIRI,
173
- types: [],
174
- predicates: [{ iri: quad.predicate.value, form: '' }],
175
- context: { ...context },
176
- carrierType: 'span',
177
- valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
178
- attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
179
- };
180
-
181
- // Add to quadMap
182
- const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
183
- quadMap.set(key, createUnifiedSlot(literalBlock, 0, {
184
- kind: 'pred',
185
- subject: quad.subject,
186
- predicate: quad.predicate,
187
- object: quad.object,
188
- form: ''
189
- }));
190
-
191
137
  text += literalText;
192
- currentPos += literalText.length;
193
138
  }
194
139
 
195
140
  // Add objects (deterministic order)
@@ -198,40 +143,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
198
143
  const objShort = shortenIRI(quad.object.value, context);
199
144
  const predShort = shortenIRI(quad.predicate.value, context);
200
145
  const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
201
-
202
- const objectBlock = {
203
- id: generateBlockId(),
204
- range: { start: currentPos, end: currentPos + objectText.length },
205
- subject: subjectIRI,
206
- types: [],
207
- predicates: [{ iri: quad.predicate.value, form: '?' }],
208
- context: { ...context },
209
- carrierType: 'span',
210
- valueRange: { start: currentPos + 1, end: currentPos + 1 + objShort.length },
211
- attrsRange: { start: currentPos + objectText.indexOf('{'), end: currentPos + objectText.indexOf('}') + 1 }
212
- };
213
-
214
- // Add to quadMap
215
- const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
216
- quadMap.set(key, createUnifiedSlot(objectBlock, 0, {
217
- kind: 'pred',
218
- subject: quad.subject,
219
- predicate: quad.predicate,
220
- object: quad.object,
221
- form: '?'
222
- }));
223
-
224
146
  text += objectText;
225
- currentPos += objectText.length;
226
147
  }
227
148
 
228
149
  text += '\n';
229
- currentPos += 1;
230
150
  }
231
151
 
232
- return { text, quadMap };
233
- }
234
-
235
- function generateBlockId() {
236
- return Math.random().toString(36).substring(2, 10);
152
+ return { text };
237
153
  }
package/src/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export { parse } from './parse.js';
2
- export { applyDiff } from './applyDiff.js';
2
+ export { merge } from './merge.js';
3
3
  export { generate } from './generate.js';
4
4
  export { locate } from './locate.js';
5
5
  export { render } from './render.js';
package/src/locate.js CHANGED
@@ -1,75 +1,38 @@
1
- import { parse } from './parse.js';
2
- import { normalizeQuad, quadIndexKey } from './utils.js';
1
+ import { quadToKeyForOrigin } from './utils.js';
3
2
 
4
3
  /**
5
- * Locate the precise text range of a quad in MDLD text using origin tracking
4
+ * Locate the origin entry for a quad using the lean origin system
6
5
  *
7
6
  * @param {Object} quad - The quad to locate (subject, predicate, object)
8
- * @param {Object} origin - Origin object containing blocks and quadIndex (optional)
9
- * @param {string} text - Original MDLD text (optional, parsed if origin not provided)
10
- * @param {Object} context - Context for parsing (optional, used if text needs parsing)
11
- * @returns {Object|null} Range information or null if not found
7
+ * @param {Object} origin - Origin object containing quadIndex
8
+ * @returns {Object|null} Origin entry or null if not found
12
9
  */
13
- export function locate(quad, origin, text = '', context = {}) {
14
- // If origin not provided, parse text to get origin
15
- if (!origin && text) {
16
- const parseResult = parse(text, { context });
17
- origin = parseResult.origin;
18
- }
19
-
20
- if (!quad || !origin || !origin.quadMap) {
10
+ export function locate(quad, origin) {
11
+ if (!quad || !origin || !origin.quadIndex) {
21
12
  return null;
22
13
  }
23
14
 
24
- // Normalize the quad for consistent key generation
25
- const normalizedQuad = normalizeQuad(quad);
26
- if (!normalizedQuad) {
15
+ // Generate the quad key to lookup in quadIndex
16
+ const quadKey = quadToKeyForOrigin(quad);
17
+ if (!quadKey) {
27
18
  return null;
28
19
  }
29
20
 
30
- // Generate the quad key to lookup in quadMap
31
- const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
32
-
33
- // Find the slot information in quadMap
34
- const slotInfo = origin.quadMap.get(quadKey);
35
- if (!slotInfo) {
21
+ // Find the origin entry in quadIndex
22
+ const entry = origin.quadIndex.get(quadKey);
23
+ if (!entry) {
36
24
  return null;
37
25
  }
38
26
 
39
- // In unified structure, slotInfo contains all block information
40
- const block = slotInfo;
41
-
42
- // Extract the actual text content based on carrier type
43
- let contentRange = null;
44
- let content = '';
45
-
46
- if (block.carrierType === 'heading') {
47
- // For headings, use the value range for the heading text
48
- contentRange = block.valueRange;
49
- content = text.substring(block.valueRange.start, block.valueRange.end);
50
- } else if (block.carrierType === 'emphasis' || block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
51
- // For emphasis, blockquotes, lists, and spans, use the value range
52
- if (block.valueRange) {
53
- contentRange = block.valueRange;
54
- content = text.substring(block.valueRange.start, block.valueRange.end);
55
- } else {
56
- // Fallback to block range
57
- contentRange = block.range;
58
- content = text.substring(block.range.start, block.range.end);
59
- }
60
- }
61
-
27
+ // Return the lean origin entry structure
62
28
  return {
63
- blockId: slotInfo.id,
64
- entryIndex: slotInfo.entryIndex,
65
- kind: slotInfo.kind,
66
- subject: normalizedQuad.subject,
67
- predicate: normalizedQuad.predicate,
68
- object: normalizedQuad.object,
69
- range: contentRange,
70
- content: content,
71
- blockRange: block.range,
72
- carrierType: block.carrierType,
73
- isVacant: slotInfo.isVacant || false
29
+ blockId: entry.blockId,
30
+ range: entry.range,
31
+ carrierType: entry.carrierType,
32
+ subject: entry.subject,
33
+ predicate: entry.predicate,
34
+ context: entry.context,
35
+ value: entry.value,
36
+ polarity: entry.polarity
74
37
  };
75
38
  }
package/src/merge.js ADDED
@@ -0,0 +1,131 @@
1
+ import { parse } from './parse.js';
2
+ import { DEFAULT_CONTEXT } from './utils.js';
3
+
4
+ /**
5
+ * Creates a unique key for quad identity matching
6
+ * @param {Quad} quad
7
+ * @returns {string}
8
+ */
9
+ function quadKey(quad) {
10
+ const datatype = quad.object.datatype?.value || '';
11
+ const language = quad.object.language || '';
12
+ return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
13
+ }
14
+
15
+ /**
16
+ * Normalizes merge input to ParseResult format
17
+ * @param {string|ParseResult} input
18
+ * @param {Object} options
19
+ * @param {Object} docContext
20
+ * @returns {ParseResult}
21
+ */
22
+ function normalizeInput(input, options, docContext) {
23
+ if (typeof input === 'string') {
24
+ return parse(input, {
25
+ ...options,
26
+ context: { ...docContext, ...options.context }
27
+ });
28
+ }
29
+ // ParseResult passthrough - no re-parse
30
+ return input;
31
+ }
32
+
33
+ /**
34
+ * Merges multiple MDLD documents with diff polarity resolution
35
+ * @param {Array<string|ParseResult>} docs
36
+ * @param {Object} options
37
+ * @returns {Object}
38
+ */
39
+ export function merge(docs, options = {}) {
40
+ const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
41
+ const sessionRemoveSet = new Set();
42
+ const allDocuments = [];
43
+ const quadIndex = new Map();
44
+
45
+ // Process each document in order
46
+ for (let i = 0; i < docs.length; i++) {
47
+ const input = docs[i];
48
+
49
+ // Each document gets the same context (no inheritance)
50
+ const docContext = { ...DEFAULT_CONTEXT, ...options.context };
51
+
52
+ // Normalize input to ParseResult
53
+ const doc = normalizeInput(input, options, docContext);
54
+
55
+ // Create document origin
56
+ const documentOrigin = {
57
+ index: i,
58
+ input: typeof input === 'string' ? 'string' : 'ParseResult',
59
+ origin: doc.origin,
60
+ context: doc.context
61
+ };
62
+ allDocuments.push(documentOrigin);
63
+
64
+ // Fold assertions into session buffer
65
+ for (const quad of doc.quads) {
66
+ const key = quadKey(quad);
67
+ sessionBuffer.set(key, quad);
68
+
69
+ // Create quad origin with document index and polarity
70
+ const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
71
+ if (existingOrigin) {
72
+ quadIndex.set(quadKey(quad), {
73
+ ...existingOrigin,
74
+ documentIndex: i,
75
+ polarity: '+'
76
+ });
77
+ }
78
+ }
79
+
80
+ // Fold retractions
81
+ for (const quad of doc.remove) {
82
+ const key = quadKey(quad);
83
+
84
+ if (sessionBuffer.has(key)) {
85
+ // Inter-document cancel - remove from buffer
86
+ sessionBuffer.delete(key);
87
+ } else {
88
+ // External retract - add to remove set
89
+ sessionRemoveSet.add(quad);
90
+ }
91
+
92
+ // Create quad origin for remove quads
93
+ const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
94
+ if (existingOrigin) {
95
+ quadIndex.set(quadKey(quad), {
96
+ ...existingOrigin,
97
+ documentIndex: i,
98
+ polarity: '-'
99
+ });
100
+ }
101
+ }
102
+ }
103
+
104
+ // Build final result
105
+ const finalQuads = Array.from(sessionBuffer.values());
106
+ const finalRemove = Array.from(sessionRemoveSet);
107
+
108
+ // Build merge origin
109
+ const mergeOrigin = {
110
+ documents: allDocuments,
111
+ quadIndex: quadIndex
112
+ };
113
+
114
+ // Build final context (union of all contexts)
115
+ const finalContext = { ...DEFAULT_CONTEXT, ...options.context };
116
+
117
+ // Enforce hard invariant
118
+ const quadKeys = new Set(finalQuads.map(quadKey));
119
+ const removeKeys = new Set(finalRemove.map(quadKey));
120
+
121
+ // Filter out any overlaps (shouldn't happen with correct implementation)
122
+ const filteredQuads = finalQuads.filter(quad => !removeKeys.has(quadKey(quad)));
123
+ const filteredRemove = finalRemove.filter(quad => !quadKeys.has(quadKey(quad)));
124
+
125
+ return {
126
+ quads: filteredQuads,
127
+ remove: filteredRemove,
128
+ origin: mergeOrigin,
129
+ context: finalContext
130
+ };
131
+ }
package/src/parse.js CHANGED
@@ -4,7 +4,6 @@ import {
4
4
  expandIRI,
5
5
  parseSemanticBlock,
6
6
  quadIndexKey,
7
- createUnifiedSlot,
8
7
  createLiteral,
9
8
  hash
10
9
  } from './utils.js';
@@ -343,7 +342,7 @@ function determineCarrierType(url) {
343
342
  return { carrierType: 'span', resourceIRI: null };
344
343
  }
345
344
 
346
- function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx) {
345
+ function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx, text) {
347
346
  const expanded = {
348
347
  subject,
349
348
  types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
@@ -356,30 +355,121 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
356
355
  return {
357
356
  id: blockId,
358
357
  range: { start: range[0], end: range[1] },
359
- attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
360
- valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
361
358
  carrierType: carrierType || null,
362
359
  subject,
363
360
  types: expanded.types,
364
361
  predicates: expanded.predicates,
365
- context: ctx
362
+ context: ctx,
363
+ text: text || ''
366
364
  };
367
365
  }
368
366
 
369
- function emitQuad(quads, quadMap, block, subject, predicate, object, dataFactory, meta = null) {
367
+ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
370
368
  if (!subject || !predicate || !object) return;
371
369
 
372
370
  const quad = dataFactory.quad(subject, predicate, object);
373
- quads.push(quad);
371
+ const remove = meta?.remove || false;
372
+
373
+ if (remove) {
374
+ // Check if quad exists in current buffer
375
+ const quadKey = quadIndexKey(quad.subject, quad.predicate, quad.object);
376
+ if (quadBuffer.has(quadKey)) {
377
+ // In current state → cancel, appears nowhere
378
+ quadBuffer.delete(quadKey);
379
+ // Also remove from quads array if present
380
+ const index = quads.findIndex(q =>
381
+ q.subject.value === quad.subject.value &&
382
+ q.predicate.value === quad.predicate.value &&
383
+ q.object.value === quad.object.value
384
+ );
385
+ if (index !== -1) {
386
+ quads.splice(index, 1);
387
+ }
388
+ // Remove from quadIndex
389
+ quadIndex.delete(quadKey);
390
+ } else {
391
+ // Not in current state → external retract
392
+ removeSet.add(quad);
393
+ }
394
+ } else {
395
+ // Add to buffer and quads
396
+ const quadKey = quadIndexKey(quad.subject, quad.predicate, quad.object);
397
+ quadBuffer.set(quadKey, quad);
398
+ quads.push(quad);
399
+
400
+ // Detect rdf:Statement pattern during single-pass parsing
401
+ detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
402
+
403
+ // Create lean origin entry
404
+ const originEntry = {
405
+ blockId: block.id,
406
+ range: block.range,
407
+ carrierType: block.carrierType,
408
+ subject: subject.value,
409
+ predicate: predicate.value,
410
+ context: { ...block.context },
411
+ polarity: meta?.remove ? '-' : '+',
412
+ value: block.text || ''
413
+ };
414
+
415
+ quadIndex.set(quadKey, originEntry);
416
+ }
417
+ }
374
418
 
375
- const unifiedSlot = createUnifiedSlot(block, meta?.entryIndex, {
376
- ...meta,
377
- subject,
378
- predicate,
379
- object
380
- });
419
+ // Extract RDF constants once at module level for efficiency
420
+ const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
421
+ const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
422
+ const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
423
+ const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
424
+ const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
425
+
426
+ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
427
+ // Skip if not called from parse context (for testing compatibility)
428
+ if (!statements || !statementCandidates) return;
381
429
 
382
- quadMap.set(quadIndexKey(quad.subject, quad.predicate, quad.object), unifiedSlot);
430
+ const predicate = quad.predicate.value;
431
+
432
+ // Early filter: only process rdf:Statement related predicates
433
+ if (predicate !== RDF_TYPE &&
434
+ predicate !== RDF_SUBJECT &&
435
+ predicate !== RDF_PREDICATE &&
436
+ predicate !== RDF_OBJECT) {
437
+ return;
438
+ }
439
+
440
+ // Check if this quad starts a new rdf:Statement pattern
441
+ if (predicate === RDF_TYPE && quad.object.value === RDF_STATEMENT) {
442
+ statementCandidates.set(quad.subject.value, { spo: {} });
443
+ return;
444
+ }
445
+
446
+ // Check if this quad completes part of an existing rdf:Statement pattern
447
+ const candidate = statementCandidates.get(quad.subject.value);
448
+ if (!candidate) return;
449
+
450
+ // Direct property assignment instead of switch for better performance
451
+ if (predicate === RDF_SUBJECT) {
452
+ candidate.spo.subject = quad.object;
453
+ } else if (predicate === RDF_PREDICATE) {
454
+ candidate.spo.predicate = quad.object;
455
+ } else if (predicate === RDF_OBJECT) {
456
+ candidate.spo.object = quad.object;
457
+ // Store the original quad for potential literal extraction
458
+ candidate.objectQuad = quad;
459
+ }
460
+
461
+ // Check if pattern is complete and create elevated SPO quad
462
+ if (candidate.spo.subject && candidate.spo.predicate && candidate.spo.object) {
463
+ // Use the object directly - literal detection happens at parse time
464
+ const spoQuad = dataFactory.quad(
465
+ candidate.spo.subject,
466
+ candidate.spo.predicate,
467
+ candidate.spo.object
468
+ );
469
+ statements.push(spoQuad);
470
+ // Clean up candidate to avoid duplicate detection
471
+ statementCandidates.delete(quad.subject.value);
472
+ }
383
473
  }
384
474
 
385
475
  const resolveFragment = (fragment, state) => {
@@ -406,23 +496,25 @@ function resolveObject(sem, state) {
406
496
 
407
497
  const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
408
498
  const expandedType = expandIRI(typeIRI, state.ctx);
499
+ const typeInfo = typeof entryIndex === 'object' ? entryIndex : { entryIndex, remove: false };
409
500
  emitQuad(
410
- state.quads, state.origin.quadMap, block,
501
+ state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
411
502
  subject,
412
503
  state.df.namedNode(expandIRI('rdf:type', state.ctx)),
413
504
  state.df.namedNode(expandedType),
414
505
  state.df,
415
- { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex }
506
+ { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
507
+ state.statements, state.statementCandidates
416
508
  );
417
509
  };
418
510
 
419
511
  function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier) {
420
512
  sem.types.forEach(t => {
421
513
  const typeIRI = typeof t === 'string' ? t : t.iri;
422
- const entryIndex = typeof t === 'string' ? null : t.entryIndex;
514
+ const typeInfo = typeof t === 'string' ? { entryIndex: null, remove: false } : t;
423
515
  // Type subject priority: explicit subject > soft object > carrier URL > current subject
424
516
  let typeSubject = newSubject || localObject || carrierO || S;
425
- createTypeQuad(typeIRI, typeSubject, state, block, entryIndex);
517
+ createTypeQuad(typeIRI, typeSubject, state, block, typeInfo);
426
518
  });
427
519
  }
428
520
 
@@ -453,9 +545,10 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
453
545
  const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
454
546
  if (role) {
455
547
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
456
- emitQuad(state.quads, state.origin.quadMap, block,
548
+ emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
457
549
  role.subject, P, role.object, state.df,
458
- { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
550
+ { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
551
+ state.statements, state.statementCandidates
459
552
  );
460
553
  }
461
554
  });
@@ -483,7 +576,7 @@ function processAnnotation(carrier, sem, state, options = {}) {
483
576
  const block = createBlock(
484
577
  S.value, sem.types, sem.predicates,
485
578
  carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
486
- carrier.type || null, state.ctx
579
+ carrier.type || null, state.ctx, carrier.text
487
580
  );
488
581
 
489
582
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
@@ -555,10 +648,14 @@ export function parse(text, options = {}) {
555
648
  ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
556
649
  df: options.dataFactory || DataFactory,
557
650
  quads: [],
558
- origin: { quadMap: new Map() },
651
+ quadBuffer: new Map(),
652
+ removeSet: new Set(),
653
+ origin: { quadIndex: new Map() },
559
654
  currentSubject: null,
560
655
  tokens: null,
561
- currentTokenIndex: -1
656
+ currentTokenIndex: -1,
657
+ statements: [],
658
+ statementCandidates: new Map() // Track incomplete rdf:Statement patterns
562
659
  };
563
660
 
564
661
  state.tokens = scanTokens(text);
@@ -582,5 +679,18 @@ export function parse(text, options = {}) {
582
679
  TOKEN_PROCESSORS[token.type]?.(token, state);
583
680
  }
584
681
 
585
- return { quads: state.quads, origin: state.origin, context: state.ctx };
682
+ // Convert removeSet to array and ensure hard invariant: quads remove =
683
+ const removeArray = Array.from(state.removeSet);
684
+ const quadKeys = new Set();
685
+ state.quads.forEach(q => {
686
+ quadKeys.add(quadIndexKey(q.subject, q.predicate, q.object));
687
+ });
688
+
689
+ // Filter removeArray to ensure no overlap with quads
690
+ const filteredRemove = removeArray.filter(quad => {
691
+ const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
692
+ return !quadKeys.has(key);
693
+ });
694
+
695
+ return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
586
696
  }