mdld-parse 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -473
- package/package.json +1 -1
- package/src/generate.js +5 -89
- package/src/index.js +1 -1
- package/src/locate.js +21 -58
- package/src/merge.js +131 -0
- package/src/parse.js +72 -25
- package/src/utils.js +37 -120
- package/src/applyDiff.js +0 -583
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/src/generate.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { shortenIRI, expandIRI,
|
|
1
|
+
import { shortenIRI, expandIRI, DEFAULT_CONTEXT, DataFactory } from './utils.js';
|
|
2
2
|
|
|
3
3
|
// Helper functions for cleaner term type checking
|
|
4
4
|
function isLiteral(term) {
|
|
@@ -29,7 +29,7 @@ function extractLocalName(iri) {
|
|
|
29
29
|
* Generate deterministic MDLD from RDF quads
|
|
30
30
|
* Purpose: TTL→MDLD conversion with canonical structure
|
|
31
31
|
* Input: RDF quads + context
|
|
32
|
-
* Output: MDLD text
|
|
32
|
+
* Output: MDLD text
|
|
33
33
|
*/
|
|
34
34
|
export function generate(quads, context = {}) {
|
|
35
35
|
const fullContext = { ...DEFAULT_CONTEXT, ...context };
|
|
@@ -38,13 +38,9 @@ export function generate(quads, context = {}) {
|
|
|
38
38
|
|
|
39
39
|
const subjectGroups = groupQuadsBySubject(normalizedQuads);
|
|
40
40
|
|
|
41
|
-
const { text
|
|
41
|
+
const { text } = buildDeterministicMDLD(subjectGroups, fullContext);
|
|
42
42
|
|
|
43
|
-
return
|
|
44
|
-
text,
|
|
45
|
-
origin: { quadMap },
|
|
46
|
-
context: fullContext
|
|
47
|
-
};
|
|
43
|
+
return text;
|
|
48
44
|
}
|
|
49
45
|
|
|
50
46
|
function normalizeAndSortQuads(quads) {
|
|
@@ -86,8 +82,6 @@ function groupQuadsBySubject(quads) {
|
|
|
86
82
|
|
|
87
83
|
function buildDeterministicMDLD(subjectGroups, context) {
|
|
88
84
|
let text = '';
|
|
89
|
-
let currentPos = 0;
|
|
90
|
-
const quadMap = new Map();
|
|
91
85
|
|
|
92
86
|
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
93
87
|
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
@@ -96,13 +90,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
96
90
|
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
97
91
|
const prefixDecl = `[${prefix}] <${namespace}>\n`;
|
|
98
92
|
text += prefixDecl;
|
|
99
|
-
currentPos += prefixDecl.length;
|
|
100
93
|
}
|
|
101
94
|
}
|
|
102
95
|
|
|
103
96
|
if (sortedPrefixes.length > 0) {
|
|
104
97
|
text += '\n';
|
|
105
|
-
currentPos += 1;
|
|
106
98
|
}
|
|
107
99
|
|
|
108
100
|
// Process subjects in deterministic order
|
|
@@ -125,31 +117,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
125
117
|
|
|
126
118
|
const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
|
|
127
119
|
|
|
128
|
-
const headingBlock = {
|
|
129
|
-
id: generateBlockId(),
|
|
130
|
-
range: { start: currentPos, end: currentPos + headingText.length },
|
|
131
|
-
subject: subjectIRI,
|
|
132
|
-
types: types.map(t => t.object.value),
|
|
133
|
-
predicates: [],
|
|
134
|
-
context: { ...context },
|
|
135
|
-
carrierType: 'heading',
|
|
136
|
-
attrsRange: { start: currentPos + headingText.indexOf('{'), end: currentPos + headingText.indexOf('}') + 1 },
|
|
137
|
-
valueRange: { start: currentPos + 2, end: currentPos + 2 + localSubjectName.length }
|
|
138
|
-
};
|
|
139
|
-
|
|
140
|
-
// Add type quads to quadMap
|
|
141
|
-
types.forEach((quad, i) => {
|
|
142
|
-
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
143
|
-
quadMap.set(key, createUnifiedSlot(headingBlock, i, {
|
|
144
|
-
kind: 'type',
|
|
145
|
-
subject: quad.subject,
|
|
146
|
-
predicate: quad.predicate,
|
|
147
|
-
object: quad.object
|
|
148
|
-
}));
|
|
149
|
-
});
|
|
150
|
-
|
|
151
120
|
text += headingText;
|
|
152
|
-
currentPos += headingText.length;
|
|
153
121
|
|
|
154
122
|
// Add literals (deterministic order)
|
|
155
123
|
const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
@@ -166,30 +134,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
166
134
|
}
|
|
167
135
|
|
|
168
136
|
const literalText = `[${quad.object.value}] {${annotation}}\n`;
|
|
169
|
-
const literalBlock = {
|
|
170
|
-
id: generateBlockId(),
|
|
171
|
-
range: { start: currentPos, end: currentPos + literalText.length },
|
|
172
|
-
subject: subjectIRI,
|
|
173
|
-
types: [],
|
|
174
|
-
predicates: [{ iri: quad.predicate.value, form: '' }],
|
|
175
|
-
context: { ...context },
|
|
176
|
-
carrierType: 'span',
|
|
177
|
-
valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
|
|
178
|
-
attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
|
|
179
|
-
};
|
|
180
|
-
|
|
181
|
-
// Add to quadMap
|
|
182
|
-
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
183
|
-
quadMap.set(key, createUnifiedSlot(literalBlock, 0, {
|
|
184
|
-
kind: 'pred',
|
|
185
|
-
subject: quad.subject,
|
|
186
|
-
predicate: quad.predicate,
|
|
187
|
-
object: quad.object,
|
|
188
|
-
form: ''
|
|
189
|
-
}));
|
|
190
|
-
|
|
191
137
|
text += literalText;
|
|
192
|
-
currentPos += literalText.length;
|
|
193
138
|
}
|
|
194
139
|
|
|
195
140
|
// Add objects (deterministic order)
|
|
@@ -198,40 +143,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
198
143
|
const objShort = shortenIRI(quad.object.value, context);
|
|
199
144
|
const predShort = shortenIRI(quad.predicate.value, context);
|
|
200
145
|
const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
201
|
-
|
|
202
|
-
const objectBlock = {
|
|
203
|
-
id: generateBlockId(),
|
|
204
|
-
range: { start: currentPos, end: currentPos + objectText.length },
|
|
205
|
-
subject: subjectIRI,
|
|
206
|
-
types: [],
|
|
207
|
-
predicates: [{ iri: quad.predicate.value, form: '?' }],
|
|
208
|
-
context: { ...context },
|
|
209
|
-
carrierType: 'span',
|
|
210
|
-
valueRange: { start: currentPos + 1, end: currentPos + 1 + objShort.length },
|
|
211
|
-
attrsRange: { start: currentPos + objectText.indexOf('{'), end: currentPos + objectText.indexOf('}') + 1 }
|
|
212
|
-
};
|
|
213
|
-
|
|
214
|
-
// Add to quadMap
|
|
215
|
-
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
216
|
-
quadMap.set(key, createUnifiedSlot(objectBlock, 0, {
|
|
217
|
-
kind: 'pred',
|
|
218
|
-
subject: quad.subject,
|
|
219
|
-
predicate: quad.predicate,
|
|
220
|
-
object: quad.object,
|
|
221
|
-
form: '?'
|
|
222
|
-
}));
|
|
223
|
-
|
|
224
146
|
text += objectText;
|
|
225
|
-
currentPos += objectText.length;
|
|
226
147
|
}
|
|
227
148
|
|
|
228
149
|
text += '\n';
|
|
229
|
-
currentPos += 1;
|
|
230
150
|
}
|
|
231
151
|
|
|
232
|
-
return { text
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
function generateBlockId() {
|
|
236
|
-
return Math.random().toString(36).substring(2, 10);
|
|
152
|
+
return { text };
|
|
237
153
|
}
|
package/src/index.js
CHANGED
package/src/locate.js
CHANGED
|
@@ -1,75 +1,38 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { normalizeQuad, quadIndexKey } from './utils.js';
|
|
1
|
+
import { quadToKeyForOrigin } from './utils.js';
|
|
3
2
|
|
|
4
3
|
/**
|
|
5
|
-
* Locate the
|
|
4
|
+
* Locate the origin entry for a quad using the lean origin system
|
|
6
5
|
*
|
|
7
6
|
* @param {Object} quad - The quad to locate (subject, predicate, object)
|
|
8
|
-
* @param {Object} origin - Origin object containing
|
|
9
|
-
* @
|
|
10
|
-
* @param {Object} context - Context for parsing (optional, used if text needs parsing)
|
|
11
|
-
* @returns {Object|null} Range information or null if not found
|
|
7
|
+
* @param {Object} origin - Origin object containing quadIndex
|
|
8
|
+
* @returns {Object|null} Origin entry or null if not found
|
|
12
9
|
*/
|
|
13
|
-
export function locate(quad, origin
|
|
14
|
-
|
|
15
|
-
if (!origin && text) {
|
|
16
|
-
const parseResult = parse(text, { context });
|
|
17
|
-
origin = parseResult.origin;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
if (!quad || !origin || !origin.quadMap) {
|
|
10
|
+
export function locate(quad, origin) {
|
|
11
|
+
if (!quad || !origin || !origin.quadIndex) {
|
|
21
12
|
return null;
|
|
22
13
|
}
|
|
23
14
|
|
|
24
|
-
//
|
|
25
|
-
const
|
|
26
|
-
if (!
|
|
15
|
+
// Generate the quad key to lookup in quadIndex
|
|
16
|
+
const quadKey = quadToKeyForOrigin(quad);
|
|
17
|
+
if (!quadKey) {
|
|
27
18
|
return null;
|
|
28
19
|
}
|
|
29
20
|
|
|
30
|
-
//
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
// Find the slot information in quadMap
|
|
34
|
-
const slotInfo = origin.quadMap.get(quadKey);
|
|
35
|
-
if (!slotInfo) {
|
|
21
|
+
// Find the origin entry in quadIndex
|
|
22
|
+
const entry = origin.quadIndex.get(quadKey);
|
|
23
|
+
if (!entry) {
|
|
36
24
|
return null;
|
|
37
25
|
}
|
|
38
26
|
|
|
39
|
-
//
|
|
40
|
-
const block = slotInfo;
|
|
41
|
-
|
|
42
|
-
// Extract the actual text content based on carrier type
|
|
43
|
-
let contentRange = null;
|
|
44
|
-
let content = '';
|
|
45
|
-
|
|
46
|
-
if (block.carrierType === 'heading') {
|
|
47
|
-
// For headings, use the value range for the heading text
|
|
48
|
-
contentRange = block.valueRange;
|
|
49
|
-
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
50
|
-
} else if (block.carrierType === 'emphasis' || block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
51
|
-
// For emphasis, blockquotes, lists, and spans, use the value range
|
|
52
|
-
if (block.valueRange) {
|
|
53
|
-
contentRange = block.valueRange;
|
|
54
|
-
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
55
|
-
} else {
|
|
56
|
-
// Fallback to block range
|
|
57
|
-
contentRange = block.range;
|
|
58
|
-
content = text.substring(block.range.start, block.range.end);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
27
|
+
// Return the lean origin entry structure
|
|
62
28
|
return {
|
|
63
|
-
blockId:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
subject:
|
|
67
|
-
predicate:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
blockRange: block.range,
|
|
72
|
-
carrierType: block.carrierType,
|
|
73
|
-
isVacant: slotInfo.isVacant || false
|
|
29
|
+
blockId: entry.blockId,
|
|
30
|
+
range: entry.range,
|
|
31
|
+
carrierType: entry.carrierType,
|
|
32
|
+
subject: entry.subject,
|
|
33
|
+
predicate: entry.predicate,
|
|
34
|
+
context: entry.context,
|
|
35
|
+
value: entry.value,
|
|
36
|
+
polarity: entry.polarity
|
|
74
37
|
};
|
|
75
38
|
}
|
package/src/merge.js
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { parse } from './parse.js';
|
|
2
|
+
import { DEFAULT_CONTEXT } from './utils.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Creates a unique key for quad identity matching
|
|
6
|
+
* @param {Quad} quad
|
|
7
|
+
* @returns {string}
|
|
8
|
+
*/
|
|
9
|
+
function quadKey(quad) {
|
|
10
|
+
const datatype = quad.object.datatype?.value || '';
|
|
11
|
+
const language = quad.object.language || '';
|
|
12
|
+
return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Normalizes merge input to ParseResult format
|
|
17
|
+
* @param {string|ParseResult} input
|
|
18
|
+
* @param {Object} options
|
|
19
|
+
* @param {Object} docContext
|
|
20
|
+
* @returns {ParseResult}
|
|
21
|
+
*/
|
|
22
|
+
function normalizeInput(input, options, docContext) {
|
|
23
|
+
if (typeof input === 'string') {
|
|
24
|
+
return parse(input, {
|
|
25
|
+
...options,
|
|
26
|
+
context: { ...docContext, ...options.context }
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
// ParseResult passthrough - no re-parse
|
|
30
|
+
return input;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Merges multiple MDLD documents with diff polarity resolution
|
|
35
|
+
* @param {Array<string|ParseResult>} docs
|
|
36
|
+
* @param {Object} options
|
|
37
|
+
* @returns {Object}
|
|
38
|
+
*/
|
|
39
|
+
export function merge(docs, options = {}) {
|
|
40
|
+
const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
|
|
41
|
+
const sessionRemoveSet = new Set();
|
|
42
|
+
const allDocuments = [];
|
|
43
|
+
const quadIndex = new Map();
|
|
44
|
+
|
|
45
|
+
// Process each document in order
|
|
46
|
+
for (let i = 0; i < docs.length; i++) {
|
|
47
|
+
const input = docs[i];
|
|
48
|
+
|
|
49
|
+
// Each document gets the same context (no inheritance)
|
|
50
|
+
const docContext = { ...DEFAULT_CONTEXT, ...options.context };
|
|
51
|
+
|
|
52
|
+
// Normalize input to ParseResult
|
|
53
|
+
const doc = normalizeInput(input, options, docContext);
|
|
54
|
+
|
|
55
|
+
// Create document origin
|
|
56
|
+
const documentOrigin = {
|
|
57
|
+
index: i,
|
|
58
|
+
input: typeof input === 'string' ? 'string' : 'ParseResult',
|
|
59
|
+
origin: doc.origin,
|
|
60
|
+
context: doc.context
|
|
61
|
+
};
|
|
62
|
+
allDocuments.push(documentOrigin);
|
|
63
|
+
|
|
64
|
+
// Fold assertions into session buffer
|
|
65
|
+
for (const quad of doc.quads) {
|
|
66
|
+
const key = quadKey(quad);
|
|
67
|
+
sessionBuffer.set(key, quad);
|
|
68
|
+
|
|
69
|
+
// Create quad origin with document index and polarity
|
|
70
|
+
const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
|
|
71
|
+
if (existingOrigin) {
|
|
72
|
+
quadIndex.set(quadKey(quad), {
|
|
73
|
+
...existingOrigin,
|
|
74
|
+
documentIndex: i,
|
|
75
|
+
polarity: '+'
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Fold retractions
|
|
81
|
+
for (const quad of doc.remove) {
|
|
82
|
+
const key = quadKey(quad);
|
|
83
|
+
|
|
84
|
+
if (sessionBuffer.has(key)) {
|
|
85
|
+
// Inter-document cancel - remove from buffer
|
|
86
|
+
sessionBuffer.delete(key);
|
|
87
|
+
} else {
|
|
88
|
+
// External retract - add to remove set
|
|
89
|
+
sessionRemoveSet.add(quad);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Create quad origin for remove quads
|
|
93
|
+
const existingOrigin = doc.origin.quadIndex.get(quadKey(quad));
|
|
94
|
+
if (existingOrigin) {
|
|
95
|
+
quadIndex.set(quadKey(quad), {
|
|
96
|
+
...existingOrigin,
|
|
97
|
+
documentIndex: i,
|
|
98
|
+
polarity: '-'
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Build final result
|
|
105
|
+
const finalQuads = Array.from(sessionBuffer.values());
|
|
106
|
+
const finalRemove = Array.from(sessionRemoveSet);
|
|
107
|
+
|
|
108
|
+
// Build merge origin
|
|
109
|
+
const mergeOrigin = {
|
|
110
|
+
documents: allDocuments,
|
|
111
|
+
quadIndex: quadIndex
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// Build final context (union of all contexts)
|
|
115
|
+
const finalContext = { ...DEFAULT_CONTEXT, ...options.context };
|
|
116
|
+
|
|
117
|
+
// Enforce hard invariant
|
|
118
|
+
const quadKeys = new Set(finalQuads.map(quadKey));
|
|
119
|
+
const removeKeys = new Set(finalRemove.map(quadKey));
|
|
120
|
+
|
|
121
|
+
// Filter out any overlaps (shouldn't happen with correct implementation)
|
|
122
|
+
const filteredQuads = finalQuads.filter(quad => !removeKeys.has(quadKey(quad)));
|
|
123
|
+
const filteredRemove = finalRemove.filter(quad => !quadKeys.has(quadKey(quad)));
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
quads: filteredQuads,
|
|
127
|
+
remove: filteredRemove,
|
|
128
|
+
origin: mergeOrigin,
|
|
129
|
+
context: finalContext
|
|
130
|
+
};
|
|
131
|
+
}
|
package/src/parse.js
CHANGED
|
@@ -4,7 +4,6 @@ import {
|
|
|
4
4
|
expandIRI,
|
|
5
5
|
parseSemanticBlock,
|
|
6
6
|
quadIndexKey,
|
|
7
|
-
createUnifiedSlot,
|
|
8
7
|
createLiteral,
|
|
9
8
|
hash
|
|
10
9
|
} from './utils.js';
|
|
@@ -343,7 +342,7 @@ function determineCarrierType(url) {
|
|
|
343
342
|
return { carrierType: 'span', resourceIRI: null };
|
|
344
343
|
}
|
|
345
344
|
|
|
346
|
-
function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx) {
|
|
345
|
+
function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx, text) {
|
|
347
346
|
const expanded = {
|
|
348
347
|
subject,
|
|
349
348
|
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
@@ -356,30 +355,62 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
|
|
|
356
355
|
return {
|
|
357
356
|
id: blockId,
|
|
358
357
|
range: { start: range[0], end: range[1] },
|
|
359
|
-
attrsRange: attrsRange ? { start: attrsRange[0], end: attrsRange[1] } : null,
|
|
360
|
-
valueRange: valueRange ? { start: valueRange[0], end: valueRange[1] } : null,
|
|
361
358
|
carrierType: carrierType || null,
|
|
362
359
|
subject,
|
|
363
360
|
types: expanded.types,
|
|
364
361
|
predicates: expanded.predicates,
|
|
365
|
-
context: ctx
|
|
362
|
+
context: ctx,
|
|
363
|
+
text: text || ''
|
|
366
364
|
};
|
|
367
365
|
}
|
|
368
366
|
|
|
369
|
-
function emitQuad(quads,
|
|
367
|
+
function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null) {
|
|
370
368
|
if (!subject || !predicate || !object) return;
|
|
371
369
|
|
|
372
370
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
subject,
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
371
|
+
const remove = meta?.remove || false;
|
|
372
|
+
|
|
373
|
+
if (remove) {
|
|
374
|
+
// Check if quad exists in current buffer
|
|
375
|
+
const quadKey = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
376
|
+
if (quadBuffer.has(quadKey)) {
|
|
377
|
+
// In current state → cancel, appears nowhere
|
|
378
|
+
quadBuffer.delete(quadKey);
|
|
379
|
+
// Also remove from quads array if present
|
|
380
|
+
const index = quads.findIndex(q =>
|
|
381
|
+
q.subject.value === quad.subject.value &&
|
|
382
|
+
q.predicate.value === quad.predicate.value &&
|
|
383
|
+
q.object.value === quad.object.value
|
|
384
|
+
);
|
|
385
|
+
if (index !== -1) {
|
|
386
|
+
quads.splice(index, 1);
|
|
387
|
+
}
|
|
388
|
+
// Remove from quadIndex
|
|
389
|
+
quadIndex.delete(quadKey);
|
|
390
|
+
} else {
|
|
391
|
+
// Not in current state → external retract
|
|
392
|
+
removeSet.add(quad);
|
|
393
|
+
}
|
|
394
|
+
} else {
|
|
395
|
+
// Add to buffer and quads
|
|
396
|
+
const quadKey = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
397
|
+
quadBuffer.set(quadKey, quad);
|
|
398
|
+
quads.push(quad);
|
|
399
|
+
|
|
400
|
+
// Create lean origin entry
|
|
401
|
+
const originEntry = {
|
|
402
|
+
blockId: block.id,
|
|
403
|
+
range: block.range,
|
|
404
|
+
carrierType: block.carrierType,
|
|
405
|
+
subject: subject.value,
|
|
406
|
+
predicate: predicate.value,
|
|
407
|
+
context: { ...block.context },
|
|
408
|
+
polarity: meta?.remove ? '-' : '+',
|
|
409
|
+
value: block.text || ''
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
quadIndex.set(quadKey, originEntry);
|
|
413
|
+
}
|
|
383
414
|
}
|
|
384
415
|
|
|
385
416
|
const resolveFragment = (fragment, state) => {
|
|
@@ -406,23 +437,24 @@ function resolveObject(sem, state) {
|
|
|
406
437
|
|
|
407
438
|
const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
408
439
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
440
|
+
const typeInfo = typeof entryIndex === 'object' ? entryIndex : { entryIndex, remove: false };
|
|
409
441
|
emitQuad(
|
|
410
|
-
state.quads, state.origin.
|
|
442
|
+
state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
|
|
411
443
|
subject,
|
|
412
444
|
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
413
445
|
state.df.namedNode(expandedType),
|
|
414
446
|
state.df,
|
|
415
|
-
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex }
|
|
447
|
+
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove }
|
|
416
448
|
);
|
|
417
449
|
};
|
|
418
450
|
|
|
419
451
|
function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier) {
|
|
420
452
|
sem.types.forEach(t => {
|
|
421
453
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
422
|
-
const
|
|
454
|
+
const typeInfo = typeof t === 'string' ? { entryIndex: null, remove: false } : t;
|
|
423
455
|
// Type subject priority: explicit subject > soft object > carrier URL > current subject
|
|
424
456
|
let typeSubject = newSubject || localObject || carrierO || S;
|
|
425
|
-
createTypeQuad(typeIRI, typeSubject, state, block,
|
|
457
|
+
createTypeQuad(typeIRI, typeSubject, state, block, typeInfo);
|
|
426
458
|
});
|
|
427
459
|
}
|
|
428
460
|
|
|
@@ -453,9 +485,9 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
|
|
|
453
485
|
const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
|
|
454
486
|
if (role) {
|
|
455
487
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
456
|
-
emitQuad(state.quads, state.origin.
|
|
488
|
+
emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
|
|
457
489
|
role.subject, P, role.object, state.df,
|
|
458
|
-
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
|
|
490
|
+
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false }
|
|
459
491
|
);
|
|
460
492
|
}
|
|
461
493
|
});
|
|
@@ -483,7 +515,7 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
483
515
|
const block = createBlock(
|
|
484
516
|
S.value, sem.types, sem.predicates,
|
|
485
517
|
carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
|
|
486
|
-
carrier.type || null, state.ctx
|
|
518
|
+
carrier.type || null, state.ctx, carrier.text
|
|
487
519
|
);
|
|
488
520
|
|
|
489
521
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
@@ -555,7 +587,9 @@ export function parse(text, options = {}) {
|
|
|
555
587
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
556
588
|
df: options.dataFactory || DataFactory,
|
|
557
589
|
quads: [],
|
|
558
|
-
|
|
590
|
+
quadBuffer: new Map(),
|
|
591
|
+
removeSet: new Set(),
|
|
592
|
+
origin: { quadIndex: new Map() },
|
|
559
593
|
currentSubject: null,
|
|
560
594
|
tokens: null,
|
|
561
595
|
currentTokenIndex: -1
|
|
@@ -582,5 +616,18 @@ export function parse(text, options = {}) {
|
|
|
582
616
|
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
583
617
|
}
|
|
584
618
|
|
|
585
|
-
|
|
619
|
+
// Convert removeSet to array and ensure hard invariant: quads ∩ remove = ∅
|
|
620
|
+
const removeArray = Array.from(state.removeSet);
|
|
621
|
+
const quadKeys = new Set();
|
|
622
|
+
state.quads.forEach(q => {
|
|
623
|
+
quadKeys.add(quadIndexKey(q.subject, q.predicate, q.object));
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
// Filter removeArray to ensure no overlap with quads
|
|
627
|
+
const filteredRemove = removeArray.filter(quad => {
|
|
628
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
629
|
+
return !quadKeys.has(key);
|
|
630
|
+
});
|
|
631
|
+
|
|
632
|
+
return { quads: state.quads, remove: filteredRemove, origin: state.origin, context: state.ctx };
|
|
586
633
|
}
|