mdld-parse 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/package.json +1 -1
- package/src/applyDiff.js +141 -76
- package/src/generate.js +68 -79
- package/src/index.js +1 -0
- package/src/locate.js +20 -37
- package/src/parse.js +24 -195
- package/src/utils.js +254 -13
package/src/generate.js
CHANGED
|
@@ -1,4 +1,17 @@
|
|
|
1
|
-
import { shortenIRI, expandIRI, quadIndexKey,
|
|
1
|
+
import { shortenIRI, expandIRI, quadIndexKey, createUnifiedSlot, DEFAULT_CONTEXT, DataFactory } from './utils.js';
|
|
2
|
+
|
|
3
|
+
// Helper functions for cleaner term type checking
|
|
4
|
+
function isLiteral(term) {
|
|
5
|
+
return term?.termType === 'Literal';
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function isNamedNode(term) {
|
|
9
|
+
return term?.termType === 'NamedNode';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function isRdfType(term) {
|
|
13
|
+
return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
14
|
+
}
|
|
2
15
|
|
|
3
16
|
|
|
4
17
|
function extractLocalName(iri) {
|
|
@@ -25,37 +38,37 @@ export function generate(quads, context = {}) {
|
|
|
25
38
|
|
|
26
39
|
const subjectGroups = groupQuadsBySubject(normalizedQuads);
|
|
27
40
|
|
|
28
|
-
const { text,
|
|
41
|
+
const { text, quadMap } = buildDeterministicMDLD(subjectGroups, fullContext);
|
|
29
42
|
|
|
30
43
|
return {
|
|
31
44
|
text,
|
|
32
|
-
origin: {
|
|
45
|
+
origin: { quadMap },
|
|
33
46
|
context: fullContext
|
|
34
47
|
};
|
|
35
48
|
}
|
|
36
49
|
|
|
37
50
|
function normalizeAndSortQuads(quads) {
|
|
38
51
|
return quads
|
|
39
|
-
.map(quad =>
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
})
|
|
52
|
+
.map(quad => {
|
|
53
|
+
// Use DataFactory.fromTerm to ensure proper RDF/JS compatibility
|
|
54
|
+
const normSubject = DataFactory.fromTerm(quad.subject);
|
|
55
|
+
const normPredicate = DataFactory.fromTerm(quad.predicate);
|
|
56
|
+
const normObject = DataFactory.fromTerm(quad.object);
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
subject: normSubject,
|
|
60
|
+
predicate: normPredicate,
|
|
61
|
+
object: normObject
|
|
62
|
+
};
|
|
63
|
+
})
|
|
51
64
|
.sort((a, b) => {
|
|
52
65
|
// Deterministic sorting: subject -> predicate -> object
|
|
53
66
|
const sComp = a.subject.value.localeCompare(b.subject.value);
|
|
54
67
|
if (sComp !== 0) return sComp;
|
|
55
68
|
const pComp = a.predicate.value.localeCompare(b.predicate.value);
|
|
56
69
|
if (pComp !== 0) return pComp;
|
|
57
|
-
const oA = a.object
|
|
58
|
-
const oB = b.object
|
|
70
|
+
const oA = isLiteral(a.object) ? a.object.value : a.object.value;
|
|
71
|
+
const oB = isLiteral(b.object) ? b.object.value : b.object.value;
|
|
59
72
|
return oA.localeCompare(oB);
|
|
60
73
|
});
|
|
61
74
|
}
|
|
@@ -74,8 +87,7 @@ function groupQuadsBySubject(quads) {
|
|
|
74
87
|
function buildDeterministicMDLD(subjectGroups, context) {
|
|
75
88
|
let text = '';
|
|
76
89
|
let currentPos = 0;
|
|
77
|
-
const
|
|
78
|
-
const quadIndex = new Map();
|
|
90
|
+
const quadMap = new Map();
|
|
79
91
|
|
|
80
92
|
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
81
93
|
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
@@ -83,14 +95,6 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
83
95
|
// Skip default context prefixes - they're implicit in MDLD
|
|
84
96
|
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
85
97
|
const prefixDecl = `[${prefix}] <${namespace}>\n`;
|
|
86
|
-
const blockId = generateBlockId();
|
|
87
|
-
blocks.set(blockId, {
|
|
88
|
-
id: blockId,
|
|
89
|
-
range: { start: currentPos, end: currentPos + prefixDecl.length },
|
|
90
|
-
subject: null,
|
|
91
|
-
entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
|
|
92
|
-
carrierType: 'prefix'
|
|
93
|
-
});
|
|
94
98
|
text += prefixDecl;
|
|
95
99
|
currentPos += prefixDecl.length;
|
|
96
100
|
}
|
|
@@ -108,10 +112,10 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
108
112
|
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
109
113
|
const shortSubject = shortenIRI(subjectIRI, context);
|
|
110
114
|
|
|
111
|
-
// Separate types, literals, and objects
|
|
112
|
-
const types = subjectQuads.filter(q => q.predicate
|
|
113
|
-
const literals = subjectQuads.filter(q => q.object
|
|
114
|
-
const objects = subjectQuads.filter(q => q.object
|
|
115
|
+
// Separate types, literals, and objects using helper functions
|
|
116
|
+
const types = subjectQuads.filter(q => isRdfType(q.predicate));
|
|
117
|
+
const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
|
|
118
|
+
const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
|
|
115
119
|
|
|
116
120
|
// Generate heading
|
|
117
121
|
const localSubjectName = extractLocalName(subjectIRI);
|
|
@@ -120,28 +124,23 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
120
124
|
: '';
|
|
121
125
|
|
|
122
126
|
const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
|
|
123
|
-
|
|
127
|
+
|
|
124
128
|
const headingBlock = {
|
|
125
|
-
id:
|
|
129
|
+
id: generateBlockId(),
|
|
126
130
|
range: { start: currentPos, end: currentPos + headingText.length },
|
|
127
131
|
subject: subjectIRI,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
entryIndex: i
|
|
135
|
-
}))
|
|
136
|
-
],
|
|
137
|
-
carrierType: 'heading'
|
|
132
|
+
types: types.map(t => t.object.value),
|
|
133
|
+
predicates: [],
|
|
134
|
+
context: { ...context },
|
|
135
|
+
carrierType: 'heading',
|
|
136
|
+
attrsRange: { start: currentPos + headingText.indexOf('{'), end: currentPos + headingText.indexOf('}') + 1 },
|
|
137
|
+
valueRange: { start: currentPos + 2, end: currentPos + 2 + localSubjectName.length }
|
|
138
138
|
};
|
|
139
|
-
blocks.set(blockId, headingBlock);
|
|
140
139
|
|
|
141
|
-
// Add type quads to
|
|
140
|
+
// Add type quads to quadMap
|
|
142
141
|
types.forEach((quad, i) => {
|
|
143
142
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
144
|
-
|
|
143
|
+
quadMap.set(key, createUnifiedSlot(headingBlock, i, {
|
|
145
144
|
kind: 'type',
|
|
146
145
|
subject: quad.subject,
|
|
147
146
|
predicate: quad.predicate,
|
|
@@ -158,34 +157,30 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
158
157
|
const predShort = shortenIRI(quad.predicate.value, context);
|
|
159
158
|
let annotation = predShort;
|
|
160
159
|
|
|
160
|
+
// Use DataFactory XSD constants for datatype comparison
|
|
161
|
+
const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
|
|
161
162
|
if (quad.object.language) {
|
|
162
163
|
annotation += ` @${quad.object.language}`;
|
|
163
|
-
} else if (quad.object.datatype.value !==
|
|
164
|
+
} else if (quad.object.datatype.value !== xsdString) {
|
|
164
165
|
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
165
166
|
}
|
|
166
167
|
|
|
167
168
|
const literalText = `[${quad.object.value}] {${annotation}}\n`;
|
|
168
|
-
const literalBlockId = generateBlockId();
|
|
169
169
|
const literalBlock = {
|
|
170
|
-
id:
|
|
170
|
+
id: generateBlockId(),
|
|
171
171
|
range: { start: currentPos, end: currentPos + literalText.length },
|
|
172
172
|
subject: subjectIRI,
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
expandedPredicate: quad.predicate.value,
|
|
177
|
-
form: '',
|
|
178
|
-
entryIndex: 0
|
|
179
|
-
}],
|
|
173
|
+
types: [],
|
|
174
|
+
predicates: [{ iri: quad.predicate.value, form: '' }],
|
|
175
|
+
context: { ...context },
|
|
180
176
|
carrierType: 'span',
|
|
181
177
|
valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
|
|
182
178
|
attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
|
|
183
179
|
};
|
|
184
|
-
blocks.set(literalBlockId, literalBlock);
|
|
185
180
|
|
|
186
|
-
// Add to
|
|
181
|
+
// Add to quadMap
|
|
187
182
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
188
|
-
|
|
183
|
+
quadMap.set(key, createUnifiedSlot(literalBlock, 0, {
|
|
189
184
|
kind: 'pred',
|
|
190
185
|
subject: quad.subject,
|
|
191
186
|
predicate: quad.predicate,
|
|
@@ -200,29 +195,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
200
195
|
// Add objects (deterministic order)
|
|
201
196
|
const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
202
197
|
for (const quad of sortedObjects) {
|
|
203
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
204
198
|
const objShort = shortenIRI(quad.object.value, context);
|
|
205
|
-
const
|
|
199
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
200
|
+
const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
206
201
|
|
|
207
|
-
const objectText = `[${localName}] {+${objShort} ?${predShort}}\n`;
|
|
208
|
-
const objectBlockId = generateBlockId();
|
|
209
202
|
const objectBlock = {
|
|
210
|
-
id:
|
|
203
|
+
id: generateBlockId(),
|
|
211
204
|
range: { start: currentPos, end: currentPos + objectText.length },
|
|
212
205
|
subject: subjectIRI,
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
carrierType: 'span'
|
|
206
|
+
types: [],
|
|
207
|
+
predicates: [{ iri: quad.predicate.value, form: '?' }],
|
|
208
|
+
context: { ...context },
|
|
209
|
+
carrierType: 'span',
|
|
210
|
+
valueRange: { start: currentPos + 1, end: currentPos + 1 + objShort.length },
|
|
211
|
+
attrsRange: { start: currentPos + objectText.indexOf('{'), end: currentPos + objectText.indexOf('}') + 1 }
|
|
220
212
|
};
|
|
221
|
-
blocks.set(objectBlockId, objectBlock);
|
|
222
213
|
|
|
223
|
-
// Add to
|
|
214
|
+
// Add to quadMap
|
|
224
215
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
225
|
-
|
|
216
|
+
quadMap.set(key, createUnifiedSlot(objectBlock, 0, {
|
|
226
217
|
kind: 'pred',
|
|
227
218
|
subject: quad.subject,
|
|
228
219
|
predicate: quad.predicate,
|
|
@@ -234,13 +225,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
234
225
|
currentPos += objectText.length;
|
|
235
226
|
}
|
|
236
227
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
currentPos += 1;
|
|
240
|
-
}
|
|
228
|
+
text += '\n';
|
|
229
|
+
currentPos += 1;
|
|
241
230
|
}
|
|
242
231
|
|
|
243
|
-
return { text
|
|
232
|
+
return { text, quadMap };
|
|
244
233
|
}
|
|
245
234
|
|
|
246
235
|
function generateBlockId() {
|
package/src/index.js
CHANGED
package/src/locate.js
CHANGED
|
@@ -17,7 +17,7 @@ export function locate(quad, origin, text = '', context = {}) {
|
|
|
17
17
|
origin = parseResult.origin;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
if (!quad || !origin || !origin.
|
|
20
|
+
if (!quad || !origin || !origin.quadMap) {
|
|
21
21
|
return null;
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -27,57 +27,40 @@ export function locate(quad, origin, text = '', context = {}) {
|
|
|
27
27
|
return null;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
// Generate the quad key to lookup in
|
|
30
|
+
// Generate the quad key to lookup in quadMap
|
|
31
31
|
const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
|
|
32
32
|
|
|
33
|
-
// Find the slot information in
|
|
34
|
-
const slotInfo = origin.
|
|
33
|
+
// Find the slot information in quadMap
|
|
34
|
+
const slotInfo = origin.quadMap.get(quadKey);
|
|
35
35
|
if (!slotInfo) {
|
|
36
36
|
return null;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
//
|
|
40
|
-
const block =
|
|
41
|
-
if (!block) {
|
|
42
|
-
return null;
|
|
43
|
-
}
|
|
39
|
+
// In unified structure, slotInfo contains all block information
|
|
40
|
+
const block = slotInfo;
|
|
44
41
|
|
|
45
|
-
// Extract the actual text content based on carrier type
|
|
42
|
+
// Extract the actual text content based on carrier type
|
|
46
43
|
let contentRange = null;
|
|
47
44
|
let content = '';
|
|
48
45
|
|
|
49
46
|
if (block.carrierType === 'heading') {
|
|
50
|
-
// For headings, use the
|
|
51
|
-
contentRange = block.
|
|
52
|
-
content = text.substring(block.
|
|
53
|
-
} else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
54
|
-
// For blockquotes, lists, and spans,
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
// For blockquotes, the entry.raw contains the full carrier text
|
|
63
|
-
// Extract just the content part before the annotation
|
|
64
|
-
const annotationStart = entry.raw.indexOf('{');
|
|
65
|
-
if (annotationStart !== -1) {
|
|
66
|
-
const carrierContent = entry.raw.substring(0, annotationStart).trim();
|
|
67
|
-
// Find this content in the block text
|
|
68
|
-
const contentStart = text.indexOf(carrierContent, block.range.start);
|
|
69
|
-
if (contentStart !== -1) {
|
|
70
|
-
const contentEnd = contentStart + carrierContent.length;
|
|
71
|
-
contentRange = { start: contentStart, end: contentEnd };
|
|
72
|
-
content = text.substring(contentStart, contentEnd);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
47
|
+
// For headings, use the value range for the heading text
|
|
48
|
+
contentRange = block.valueRange;
|
|
49
|
+
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
50
|
+
} else if (block.carrierType === 'emphasis' || block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
51
|
+
// For emphasis, blockquotes, lists, and spans, use the value range
|
|
52
|
+
if (block.valueRange) {
|
|
53
|
+
contentRange = block.valueRange;
|
|
54
|
+
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
55
|
+
} else {
|
|
56
|
+
// Fallback to block range
|
|
57
|
+
contentRange = block.range;
|
|
58
|
+
content = text.substring(block.range.start, block.range.end);
|
|
76
59
|
}
|
|
77
60
|
}
|
|
78
61
|
|
|
79
62
|
return {
|
|
80
|
-
blockId: slotInfo.
|
|
63
|
+
blockId: slotInfo.id,
|
|
81
64
|
entryIndex: slotInfo.entryIndex,
|
|
82
65
|
kind: slotInfo.kind,
|
|
83
66
|
subject: normalizedQuad.subject,
|