mdld-parse 0.5.6 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/package.json +1 -1
- package/src/applyDiff.js +141 -76
- package/src/generate.js +68 -79
- package/src/index.js +1 -0
- package/src/locate.js +20 -37
- package/src/parse.js +24 -195
- package/src/utils.js +37 -6
package/README.md
CHANGED
|
@@ -22,10 +22,10 @@ Energy level: [8] {my:energyLevel ^^xsd:integer}
|
|
|
22
22
|
|
|
23
23
|
Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
|
|
24
24
|
|
|
25
|
-
Activities:
|
|
25
|
+
Activities:
|
|
26
26
|
|
|
27
|
-
- Walking {
|
|
28
|
-
- Reading {
|
|
27
|
+
- **Walking** {+ex:walking ?my:hasActivity .my:Activity label}
|
|
28
|
+
- **Reading** {+ex:reading ?my:hasActivity .my:Activity label}
|
|
29
29
|
|
|
30
30
|
```
|
|
31
31
|
|
|
@@ -59,7 +59,7 @@ my:central-park a my:Place;
|
|
|
59
59
|
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
-
Read the [FULL SPEC](./
|
|
62
|
+
Read the [FULL SPEC](./spec/Spec.md).
|
|
63
63
|
|
|
64
64
|
## Core Features
|
|
65
65
|
|
|
@@ -69,7 +69,6 @@ Read the [FULL SPEC](./docs/Spec/Spec.md).
|
|
|
69
69
|
- **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
|
|
70
70
|
- **Type declarations**: `.Class` for rdf:type triples
|
|
71
71
|
- **Datatypes & language**: `^^xsd:date` and `@en` support
|
|
72
|
-
- **Lists**: Explicit subject declarations and numbered ordered lists with `rdf:List` support
|
|
73
72
|
- **Fragments**: Built-in document structuring with `{=#fragment}`
|
|
74
73
|
- **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
|
|
75
74
|
|
|
@@ -213,14 +212,15 @@ ex:armstrong a prov:Person .
|
|
|
213
212
|
|
|
214
213
|
### Lists
|
|
215
214
|
|
|
216
|
-
Lists
|
|
215
|
+
Lists are pure Markdown structure. Each list item requires explicit annotations:
|
|
217
216
|
|
|
218
217
|
```markdown
|
|
219
218
|
# Recipe {=ex:recipe}
|
|
220
219
|
|
|
221
|
-
Ingredients:
|
|
222
|
-
|
|
223
|
-
-
|
|
220
|
+
Ingredients:
|
|
221
|
+
|
|
222
|
+
- **Flour** {+ex:flour ?ex:ingredient .ex:Ingredient label}
|
|
223
|
+
- **Water** {+ex:water ?ex:ingredient .ex:Ingredient label}
|
|
224
224
|
```
|
|
225
225
|
|
|
226
226
|
```turtle
|
|
@@ -229,6 +229,11 @@ ex:flour a ex:Ingredient ; rdfs:label "Flour" .
|
|
|
229
229
|
ex:water a ex:Ingredient ; rdfs:label "Water" .
|
|
230
230
|
```
|
|
231
231
|
|
|
232
|
+
**Key Rules:**
|
|
233
|
+
- No semantic propagation from list scope
|
|
234
|
+
- Each item must have explicit annotations
|
|
235
|
+
- Use `+IRI` to maintain subject chaining for repeated object properties
|
|
236
|
+
|
|
232
237
|
### Code Blocks
|
|
233
238
|
|
|
234
239
|
Code blocks are value carriers:
|
|
@@ -503,7 +508,7 @@ Only specific markdown elements can carry semantic values:
|
|
|
503
508
|
|
|
504
509
|
**Block:**
|
|
505
510
|
- Headings (`# Title`)
|
|
506
|
-
- List items (`- item`, `1. item`)
|
|
511
|
+
- List items (`- item`, `1. item`) — pure Markdown structure
|
|
507
512
|
- Blockquotes (`> quote`)
|
|
508
513
|
- Code blocks (` ```lang `)
|
|
509
514
|
|
|
@@ -579,14 +584,14 @@ Therefore, the algebra is **closed**.
|
|
|
579
584
|
|
|
580
585
|
# Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
|
|
581
586
|
|
|
582
|
-
Attendees:
|
|
587
|
+
Attendees:
|
|
583
588
|
|
|
584
|
-
- Alice {
|
|
585
|
-
- Bob {
|
|
589
|
+
- **Alice** {+alice:alice ?alice:attendee label}
|
|
590
|
+
- **Bob** {+alice:bob ?alice:attendee label}
|
|
586
591
|
|
|
587
|
-
Action items:
|
|
592
|
+
Action items:
|
|
588
593
|
|
|
589
|
-
- Review proposal {
|
|
594
|
+
- **Review proposal** {+alice:task-1 ?alice:actionItem label}
|
|
590
595
|
```
|
|
591
596
|
|
|
592
597
|
### Developer Documentation
|
|
@@ -630,7 +635,7 @@ Tests validate:
|
|
|
630
635
|
- Subject declaration and context
|
|
631
636
|
- All predicate forms (p, ?p, !p)
|
|
632
637
|
- Datatypes and language tags
|
|
633
|
-
-
|
|
638
|
+
- Explicit list item annotations
|
|
634
639
|
- Code blocks and blockquotes
|
|
635
640
|
- Round-trip serialization
|
|
636
641
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/src/applyDiff.js
CHANGED
|
@@ -15,25 +15,93 @@ import {
|
|
|
15
15
|
addSoftFragmentToken,
|
|
16
16
|
removeSoftFragmentToken,
|
|
17
17
|
objectSignature,
|
|
18
|
-
expandIRI
|
|
18
|
+
expandIRI,
|
|
19
|
+
DataFactory
|
|
19
20
|
} from './utils.js';
|
|
20
21
|
|
|
21
22
|
function getBlockById(base, blockId) {
|
|
22
|
-
return blockId ? base?.
|
|
23
|
+
return blockId ? base?.quadMap?.get(blockId) : null;
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
function getEntryByQuadKey(base, quadKey) {
|
|
26
|
-
return quadKey ? base?.
|
|
27
|
+
return quadKey ? base?.quadMap?.get(quadKey) : null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Helper functions for cleaner term type checking
|
|
31
|
+
function isLiteral(term) {
|
|
32
|
+
return term?.termType === 'Literal';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function isNamedNode(term) {
|
|
36
|
+
return term?.termType === 'NamedNode';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isRdfType(term) {
|
|
40
|
+
return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function createAnnotationForQuad(quad, ctx) {
|
|
44
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
45
|
+
if (isLiteral(quad.object)) {
|
|
46
|
+
const value = String(quad.object.value ?? '');
|
|
47
|
+
const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
|
|
48
|
+
return { text: `[${value}] {${ann}}`, isLiteral: true };
|
|
49
|
+
} else if (isNamedNode(quad.object)) {
|
|
50
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
51
|
+
const objectAnn = createObjectAnnotation(objectShort, predShort);
|
|
52
|
+
return { text: objectAnn, isLiteral: false };
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function createSubjectBlockForQuad(quad, ctx) {
|
|
58
|
+
const subjectShort = shortenIRI(quad.subject.value, ctx);
|
|
59
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
60
|
+
const subjectName = extractLocalName(quad.subject.value);
|
|
61
|
+
|
|
62
|
+
if (isNamedNode(quad.object)) {
|
|
63
|
+
// IRI object: create object reference
|
|
64
|
+
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
65
|
+
return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${objectShort}] {${predShort}}\n`, isNewSubject: true };
|
|
66
|
+
} else {
|
|
67
|
+
// Literal object: create property on separate line
|
|
68
|
+
const value = String(quad.object.value ?? '');
|
|
69
|
+
const annotation = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
|
|
70
|
+
return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${value}] {${annotation}}\n`, isNewSubject: true };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function extractLocalName(iri) {
|
|
75
|
+
return iri.split('/').pop() || iri.split('#').pop() || iri;
|
|
27
76
|
}
|
|
28
77
|
|
|
29
78
|
function isValidQuad(quad) {
|
|
30
79
|
return quad && quad.subject && quad.predicate && quad.object;
|
|
31
80
|
}
|
|
32
81
|
|
|
82
|
+
function normalizeDiffQuads(quads, ctx) {
|
|
83
|
+
// Use DataFactory.fromQuad for proper RDF/JS compatibility
|
|
84
|
+
// But first expand any CURIEs in the quads to ensure proper matching
|
|
85
|
+
return quads.map(quad => {
|
|
86
|
+
// Expand CURIEs to full IRIs before normalization
|
|
87
|
+
const expandedQuad = {
|
|
88
|
+
subject: quad.subject.termType === 'NamedNode'
|
|
89
|
+
? { ...quad.subject, value: expandIRI(quad.subject.value, ctx) }
|
|
90
|
+
: quad.subject,
|
|
91
|
+
predicate: quad.predicate.termType === 'NamedNode'
|
|
92
|
+
? { ...quad.predicate, value: expandIRI(quad.predicate.value, ctx) }
|
|
93
|
+
: quad.predicate,
|
|
94
|
+
object: quad.object,
|
|
95
|
+
graph: quad.graph
|
|
96
|
+
};
|
|
97
|
+
return DataFactory.fromQuad(expandedQuad);
|
|
98
|
+
}).filter(isValidQuad);
|
|
99
|
+
}
|
|
100
|
+
|
|
33
101
|
function createLiteralAnnotation(value, predicate, language, datatype, ctx) {
|
|
34
102
|
let ann = predicate;
|
|
35
103
|
if (language) ann += ` @${language}`;
|
|
36
|
-
else if (datatype?.value && datatype.value !== '
|
|
104
|
+
else if (datatype?.value && datatype.value !== DataFactory.literal('').datatype.value) {
|
|
37
105
|
ann += ` ^^${shortenIRI(datatype.value, ctx)}`;
|
|
38
106
|
}
|
|
39
107
|
return ann;
|
|
@@ -126,23 +194,24 @@ function removeTokenFromSlot(entry, tokens, ctx, quad) {
|
|
|
126
194
|
}
|
|
127
195
|
|
|
128
196
|
function addTokenToSlot(tokens, ctx, quad) {
|
|
129
|
-
|
|
197
|
+
// Use cleaner helper functions
|
|
198
|
+
if (isRdfType(quad.predicate) && isNamedNode(quad.object)) {
|
|
130
199
|
const typeShort = shortenIRI(quad.object.value, ctx);
|
|
131
200
|
const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
|
|
132
201
|
if (typeToken && !tokens.includes(typeToken)) {
|
|
133
202
|
return [...tokens, typeToken];
|
|
134
203
|
}
|
|
135
|
-
} else if (quad.object
|
|
204
|
+
} else if (isNamedNode(quad.object)) {
|
|
136
205
|
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
137
206
|
const isSoftFragment = quad.object.value.includes('#');
|
|
138
207
|
const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
|
|
139
208
|
|
|
140
|
-
if (
|
|
141
|
-
return addSoftFragmentToken(tokens, fragment);
|
|
209
|
+
if (fragment) {
|
|
210
|
+
return addSoftFragmentToken(tokens, objectShort, fragment);
|
|
142
211
|
} else {
|
|
143
212
|
return addObjectToken(tokens, objectShort);
|
|
144
213
|
}
|
|
145
|
-
} else if (quad.object
|
|
214
|
+
} else if (isLiteral(quad.object)) {
|
|
146
215
|
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
147
216
|
if (!tokens.includes(predShort)) {
|
|
148
217
|
return [...tokens, predShort];
|
|
@@ -179,9 +248,9 @@ export function applyDiff({ text, diff, origin, options = {} }) {
|
|
|
179
248
|
|
|
180
249
|
|
|
181
250
|
function planOperations(diff, base, ctx) {
|
|
182
|
-
// Normalize quads
|
|
183
|
-
const normAdds = (diff.add || [])
|
|
184
|
-
const normDeletes = (diff.delete || [])
|
|
251
|
+
// Normalize quads using DataFactory for proper RDF/JS compatibility
|
|
252
|
+
const normAdds = normalizeDiffQuads(diff.add || [], ctx);
|
|
253
|
+
const normDeletes = normalizeDiffQuads(diff.delete || [], ctx);
|
|
185
254
|
|
|
186
255
|
const plan = {
|
|
187
256
|
literalUpdates: [],
|
|
@@ -206,8 +275,7 @@ function planOperations(diff, base, ctx) {
|
|
|
206
275
|
const key = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
|
|
207
276
|
const quadKey = quadToKeyForOrigin(quad);
|
|
208
277
|
const entry = getEntryByQuadKey(base, quadKey);
|
|
209
|
-
const
|
|
210
|
-
const block = getBlockById(base, blockId);
|
|
278
|
+
const block = entry; // In unified structure, entry is the block
|
|
211
279
|
if (block?.attrsRange) {
|
|
212
280
|
anchors.set(key, { block, entry });
|
|
213
281
|
}
|
|
@@ -215,18 +283,18 @@ function planOperations(diff, base, ctx) {
|
|
|
215
283
|
|
|
216
284
|
// Detect literal updates early
|
|
217
285
|
for (const deleteQuad of normDeletes) {
|
|
218
|
-
if (deleteQuad.object
|
|
286
|
+
if (!isLiteral(deleteQuad.object)) continue;
|
|
219
287
|
|
|
220
288
|
const k = JSON.stringify([deleteQuad.subject.value, deleteQuad.predicate.value]);
|
|
221
289
|
const candidates = addBySP.get(k) || [];
|
|
222
290
|
const addQuad = candidates.find(x =>
|
|
223
|
-
x?.object
|
|
291
|
+
isLiteral(x?.object) && !plan.consumedAdds.has(quadToKeyForOrigin(x))
|
|
224
292
|
);
|
|
225
293
|
|
|
226
294
|
if (!addQuad) continue;
|
|
227
295
|
|
|
228
296
|
const entry = resolveOriginEntry(deleteQuad, base);
|
|
229
|
-
const block = entry
|
|
297
|
+
const block = entry; // In unified structure, the entry is the block
|
|
230
298
|
|
|
231
299
|
if (block) {
|
|
232
300
|
plan.literalUpdates.push({ deleteQuad, addQuad, entry, block });
|
|
@@ -236,13 +304,13 @@ function planOperations(diff, base, ctx) {
|
|
|
236
304
|
|
|
237
305
|
// Find vacant slot occupations
|
|
238
306
|
for (const quad of normAdds) {
|
|
239
|
-
if (quad.object
|
|
307
|
+
if (!isLiteral(quad.object)) continue;
|
|
240
308
|
if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
|
|
241
309
|
|
|
242
|
-
const vacantSlot = findVacantSlot(base?.
|
|
310
|
+
const vacantSlot = findVacantSlot(base?.quadMap, quad.subject, quad.predicate);
|
|
243
311
|
if (!vacantSlot) continue;
|
|
244
312
|
|
|
245
|
-
const block =
|
|
313
|
+
const block = vacantSlot; // In unified structure, the slot is the block
|
|
246
314
|
if (block) {
|
|
247
315
|
plan.vacantSlotOccupations.push({ quad, vacantSlot, block });
|
|
248
316
|
plan.consumedAdds.add(quadToKeyForOrigin(quad));
|
|
@@ -251,7 +319,7 @@ function planOperations(diff, base, ctx) {
|
|
|
251
319
|
|
|
252
320
|
// Plan remaining deletes
|
|
253
321
|
for (const quad of normDeletes) {
|
|
254
|
-
if (quad.object
|
|
322
|
+
if (isLiteral(quad.object)) {
|
|
255
323
|
const isUpdated = plan.literalUpdates.some(u =>
|
|
256
324
|
u.deleteQuad.subject.value === quad.subject.value &&
|
|
257
325
|
u.deleteQuad.predicate.value === quad.predicate.value &&
|
|
@@ -261,7 +329,7 @@ function planOperations(diff, base, ctx) {
|
|
|
261
329
|
}
|
|
262
330
|
|
|
263
331
|
const entry = resolveOriginEntry(quad, base);
|
|
264
|
-
const block = entry
|
|
332
|
+
const block = entry; // In unified structure, entry is the block
|
|
265
333
|
if (block) {
|
|
266
334
|
plan.deletes.push({ quad, entry, block });
|
|
267
335
|
}
|
|
@@ -348,7 +416,7 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
348
416
|
};
|
|
349
417
|
vacantSlot.blockInfo = blockInfo;
|
|
350
418
|
const key = quadToKeyForOrigin(quad);
|
|
351
|
-
if (key) base.
|
|
419
|
+
if (key) base.quadMap.set(key, vacantSlot);
|
|
352
420
|
}
|
|
353
421
|
|
|
354
422
|
const span = readSpan(block, text, 'attrs');
|
|
@@ -382,56 +450,45 @@ function materializeEdits(plan, text, ctx, base) {
|
|
|
382
450
|
continue;
|
|
383
451
|
}
|
|
384
452
|
|
|
385
|
-
if (quad.object
|
|
453
|
+
if (isLiteral(quad.object) || isNamedNode(quad.object)) {
|
|
386
454
|
if (!targetBlock) {
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
455
|
+
// No target block - check if subject already exists in document
|
|
456
|
+
const subjectExists = Array.from(base?.quadMap?.values() || [])
|
|
457
|
+
.some(block => block.subject?.value === quad.subject.value);
|
|
458
|
+
|
|
459
|
+
let annotation;
|
|
460
|
+
if (!subjectExists && isNamedNode(quad.object)) {
|
|
461
|
+
// New subject with IRI object - create subject block
|
|
462
|
+
annotation = createSubjectBlockForQuad(quad, ctx);
|
|
463
|
+
} else if (subjectExists) {
|
|
464
|
+
// Existing subject - create simple annotation
|
|
465
|
+
annotation = createAnnotationForQuad(quad, ctx);
|
|
392
466
|
} else {
|
|
393
|
-
|
|
394
|
-
|
|
467
|
+
// New subject with literal - create subject block
|
|
468
|
+
annotation = createSubjectBlockForQuad(quad, ctx);
|
|
395
469
|
}
|
|
396
|
-
continue;
|
|
397
|
-
}
|
|
398
470
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
// Check if this is a subject-only block (like {=ex:order-123})
|
|
403
|
-
const tokens = normalizeAttrsTokens(span.text);
|
|
404
|
-
const hasSubjectToken = tokens.some(t => t.startsWith('='));
|
|
405
|
-
const hasPredicateTokens = tokens.some(t => !t.startsWith('=') && !t.startsWith('.'));
|
|
406
|
-
|
|
407
|
-
if (tokens.length === 1 && tokens[0].startsWith('=')) {
|
|
408
|
-
// This is a subject-only block, create new annotation
|
|
409
|
-
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
410
|
-
if (quad.object.termType === 'Literal') {
|
|
411
|
-
const value = String(quad.object.value ?? '');
|
|
412
|
-
const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
|
|
413
|
-
edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
|
|
414
|
-
} else {
|
|
415
|
-
const objectShort = shortenIRI(quad.object.value, ctx);
|
|
416
|
-
edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
|
|
471
|
+
if (annotation) {
|
|
472
|
+
edits.push({ start: text.length, end: text.length, text: annotation.text });
|
|
417
473
|
}
|
|
418
474
|
continue;
|
|
419
475
|
}
|
|
420
476
|
|
|
421
|
-
//
|
|
422
|
-
const
|
|
423
|
-
|
|
477
|
+
// Insert annotation after target block's range
|
|
478
|
+
const annotation = createAnnotationForQuad(quad, ctx);
|
|
479
|
+
if (annotation) {
|
|
480
|
+
// Find the end of the target block's content, not just its range
|
|
481
|
+
const targetBlockEnd = targetBlock.range.end;
|
|
482
|
+
let insertPos = targetBlockEnd;
|
|
424
483
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
if (!updated.includes(datatypeToken)) {
|
|
429
|
-
updated = [...updated, datatypeToken];
|
|
484
|
+
// Skip past the target block's content to find the right insertion point
|
|
485
|
+
while (insertPos < text.length && text[insertPos] !== '\n') {
|
|
486
|
+
insertPos++;
|
|
430
487
|
}
|
|
431
|
-
}
|
|
432
488
|
|
|
433
|
-
|
|
434
|
-
|
|
489
|
+
// Insert after the target block's content
|
|
490
|
+
const finalInsertPos = insertPos < text.length ? insertPos : text.length;
|
|
491
|
+
edits.push({ start: finalInsertPos, end: finalInsertPos, text: `\n${annotation.text}` });
|
|
435
492
|
}
|
|
436
493
|
}
|
|
437
494
|
}
|
|
@@ -450,7 +507,7 @@ function applyEdits(text, edits, ctx, base) {
|
|
|
450
507
|
|
|
451
508
|
// Extract vacant slots before reparsing
|
|
452
509
|
const vacantSlots = new Map();
|
|
453
|
-
base?.
|
|
510
|
+
base?.quadMap?.forEach((slot, key) => {
|
|
454
511
|
if (slot.isVacant) vacantSlots.set(key, slot);
|
|
455
512
|
});
|
|
456
513
|
|
|
@@ -458,7 +515,7 @@ function applyEdits(text, edits, ctx, base) {
|
|
|
458
515
|
|
|
459
516
|
// Merge vacant slots back
|
|
460
517
|
vacantSlots.forEach((vacantSlot, key) => {
|
|
461
|
-
if (!reparsed.origin.
|
|
518
|
+
if (!reparsed.origin.quadMap.has(vacantSlot.id) && vacantSlot.blockInfo) {
|
|
462
519
|
const { blockInfo } = vacantSlot;
|
|
463
520
|
const emptyBlock = {
|
|
464
521
|
id: blockInfo.id,
|
|
@@ -469,12 +526,11 @@ function applyEdits(text, edits, ctx, base) {
|
|
|
469
526
|
subject: blockInfo.subject || '',
|
|
470
527
|
types: [],
|
|
471
528
|
predicates: [],
|
|
472
|
-
entries: [],
|
|
473
529
|
context: blockInfo.context || { ...ctx }
|
|
474
530
|
};
|
|
475
|
-
reparsed.origin.
|
|
531
|
+
reparsed.origin.quadMap.set(vacantSlot.id, emptyBlock);
|
|
476
532
|
}
|
|
477
|
-
reparsed.origin.
|
|
533
|
+
reparsed.origin.quadMap.set(key, vacantSlot);
|
|
478
534
|
});
|
|
479
535
|
|
|
480
536
|
return { text: result, origin: reparsed.origin };
|
|
@@ -483,11 +539,11 @@ function applyEdits(text, edits, ctx, base) {
|
|
|
483
539
|
// Helper functions for origin lookup
|
|
484
540
|
function resolveOriginEntry(quad, base) {
|
|
485
541
|
const key = quadToKeyForOrigin(quad);
|
|
486
|
-
let entry = key ? base?.
|
|
542
|
+
let entry = key ? base?.quadMap?.get(key) : null;
|
|
487
543
|
|
|
488
|
-
if (!entry && quad.object
|
|
544
|
+
if (!entry && isLiteral(quad.object)) {
|
|
489
545
|
// Fallback: search by value
|
|
490
|
-
for (const [k, e] of base?.
|
|
546
|
+
for (const [k, e] of base?.quadMap || []) {
|
|
491
547
|
const parsed = parseQuadIndexKey(k);
|
|
492
548
|
if (parsed && parsed.s === quad.subject.value &&
|
|
493
549
|
parsed.p === quad.predicate.value &&
|
|
@@ -507,12 +563,21 @@ function findTargetBlock(quad, base, anchors) {
|
|
|
507
563
|
const anchored = anchors.get(anchorKey);
|
|
508
564
|
if (anchored?.block) return anchored.block;
|
|
509
565
|
|
|
510
|
-
//
|
|
511
|
-
for
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
566
|
+
// Find the best position within the subject's section
|
|
567
|
+
// Look for blocks with the same subject and sort by position
|
|
568
|
+
const subjectBlocks = Array.from(base?.quadMap?.values() || [])
|
|
569
|
+
.filter(block => block.subject?.value === quad.subject.value)
|
|
570
|
+
.sort((a, b) => a.range.start - b.range.start);
|
|
571
|
+
|
|
572
|
+
if (subjectBlocks.length === 0) return null;
|
|
573
|
+
|
|
574
|
+
// Strategy: Find the last block with attrsRange to maintain consistency
|
|
575
|
+
// For identical subject blocks, prefer the first one to avoid creating duplicates
|
|
576
|
+
const blocksWithAttrs = subjectBlocks.filter(block => block.attrsRange);
|
|
577
|
+
if (blocksWithAttrs.length > 0) {
|
|
578
|
+
return blocksWithAttrs[blocksWithAttrs.length - 1]; // Return last matching block
|
|
515
579
|
}
|
|
516
580
|
|
|
517
|
-
return
|
|
581
|
+
// Fallback: return the last block in the subject's section
|
|
582
|
+
return subjectBlocks[subjectBlocks.length - 1];
|
|
518
583
|
}
|
package/src/generate.js
CHANGED
|
@@ -1,4 +1,17 @@
|
|
|
1
|
-
import { shortenIRI, expandIRI, quadIndexKey,
|
|
1
|
+
import { shortenIRI, expandIRI, quadIndexKey, createUnifiedSlot, DEFAULT_CONTEXT, DataFactory } from './utils.js';
|
|
2
|
+
|
|
3
|
+
// Helper functions for cleaner term type checking
|
|
4
|
+
function isLiteral(term) {
|
|
5
|
+
return term?.termType === 'Literal';
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function isNamedNode(term) {
|
|
9
|
+
return term?.termType === 'NamedNode';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function isRdfType(term) {
|
|
13
|
+
return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
14
|
+
}
|
|
2
15
|
|
|
3
16
|
|
|
4
17
|
function extractLocalName(iri) {
|
|
@@ -25,37 +38,37 @@ export function generate(quads, context = {}) {
|
|
|
25
38
|
|
|
26
39
|
const subjectGroups = groupQuadsBySubject(normalizedQuads);
|
|
27
40
|
|
|
28
|
-
const { text,
|
|
41
|
+
const { text, quadMap } = buildDeterministicMDLD(subjectGroups, fullContext);
|
|
29
42
|
|
|
30
43
|
return {
|
|
31
44
|
text,
|
|
32
|
-
origin: {
|
|
45
|
+
origin: { quadMap },
|
|
33
46
|
context: fullContext
|
|
34
47
|
};
|
|
35
48
|
}
|
|
36
49
|
|
|
37
50
|
function normalizeAndSortQuads(quads) {
|
|
38
51
|
return quads
|
|
39
|
-
.map(quad =>
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
})
|
|
52
|
+
.map(quad => {
|
|
53
|
+
// Use DataFactory.fromTerm to ensure proper RDF/JS compatibility
|
|
54
|
+
const normSubject = DataFactory.fromTerm(quad.subject);
|
|
55
|
+
const normPredicate = DataFactory.fromTerm(quad.predicate);
|
|
56
|
+
const normObject = DataFactory.fromTerm(quad.object);
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
subject: normSubject,
|
|
60
|
+
predicate: normPredicate,
|
|
61
|
+
object: normObject
|
|
62
|
+
};
|
|
63
|
+
})
|
|
51
64
|
.sort((a, b) => {
|
|
52
65
|
// Deterministic sorting: subject -> predicate -> object
|
|
53
66
|
const sComp = a.subject.value.localeCompare(b.subject.value);
|
|
54
67
|
if (sComp !== 0) return sComp;
|
|
55
68
|
const pComp = a.predicate.value.localeCompare(b.predicate.value);
|
|
56
69
|
if (pComp !== 0) return pComp;
|
|
57
|
-
const oA = a.object
|
|
58
|
-
const oB = b.object
|
|
70
|
+
const oA = isLiteral(a.object) ? a.object.value : a.object.value;
|
|
71
|
+
const oB = isLiteral(b.object) ? b.object.value : b.object.value;
|
|
59
72
|
return oA.localeCompare(oB);
|
|
60
73
|
});
|
|
61
74
|
}
|
|
@@ -74,8 +87,7 @@ function groupQuadsBySubject(quads) {
|
|
|
74
87
|
function buildDeterministicMDLD(subjectGroups, context) {
|
|
75
88
|
let text = '';
|
|
76
89
|
let currentPos = 0;
|
|
77
|
-
const
|
|
78
|
-
const quadIndex = new Map();
|
|
90
|
+
const quadMap = new Map();
|
|
79
91
|
|
|
80
92
|
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
81
93
|
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
@@ -83,14 +95,6 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
83
95
|
// Skip default context prefixes - they're implicit in MDLD
|
|
84
96
|
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
85
97
|
const prefixDecl = `[${prefix}] <${namespace}>\n`;
|
|
86
|
-
const blockId = generateBlockId();
|
|
87
|
-
blocks.set(blockId, {
|
|
88
|
-
id: blockId,
|
|
89
|
-
range: { start: currentPos, end: currentPos + prefixDecl.length },
|
|
90
|
-
subject: null,
|
|
91
|
-
entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
|
|
92
|
-
carrierType: 'prefix'
|
|
93
|
-
});
|
|
94
98
|
text += prefixDecl;
|
|
95
99
|
currentPos += prefixDecl.length;
|
|
96
100
|
}
|
|
@@ -108,10 +112,10 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
108
112
|
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
109
113
|
const shortSubject = shortenIRI(subjectIRI, context);
|
|
110
114
|
|
|
111
|
-
// Separate types, literals, and objects
|
|
112
|
-
const types = subjectQuads.filter(q => q.predicate
|
|
113
|
-
const literals = subjectQuads.filter(q => q.object
|
|
114
|
-
const objects = subjectQuads.filter(q => q.object
|
|
115
|
+
// Separate types, literals, and objects using helper functions
|
|
116
|
+
const types = subjectQuads.filter(q => isRdfType(q.predicate));
|
|
117
|
+
const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
|
|
118
|
+
const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
|
|
115
119
|
|
|
116
120
|
// Generate heading
|
|
117
121
|
const localSubjectName = extractLocalName(subjectIRI);
|
|
@@ -120,28 +124,23 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
120
124
|
: '';
|
|
121
125
|
|
|
122
126
|
const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
|
|
123
|
-
|
|
127
|
+
|
|
124
128
|
const headingBlock = {
|
|
125
|
-
id:
|
|
129
|
+
id: generateBlockId(),
|
|
126
130
|
range: { start: currentPos, end: currentPos + headingText.length },
|
|
127
131
|
subject: subjectIRI,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
entryIndex: i
|
|
135
|
-
}))
|
|
136
|
-
],
|
|
137
|
-
carrierType: 'heading'
|
|
132
|
+
types: types.map(t => t.object.value),
|
|
133
|
+
predicates: [],
|
|
134
|
+
context: { ...context },
|
|
135
|
+
carrierType: 'heading',
|
|
136
|
+
attrsRange: { start: currentPos + headingText.indexOf('{'), end: currentPos + headingText.indexOf('}') + 1 },
|
|
137
|
+
valueRange: { start: currentPos + 2, end: currentPos + 2 + localSubjectName.length }
|
|
138
138
|
};
|
|
139
|
-
blocks.set(blockId, headingBlock);
|
|
140
139
|
|
|
141
|
-
// Add type quads to
|
|
140
|
+
// Add type quads to quadMap
|
|
142
141
|
types.forEach((quad, i) => {
|
|
143
142
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
144
|
-
|
|
143
|
+
quadMap.set(key, createUnifiedSlot(headingBlock, i, {
|
|
145
144
|
kind: 'type',
|
|
146
145
|
subject: quad.subject,
|
|
147
146
|
predicate: quad.predicate,
|
|
@@ -158,34 +157,30 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
158
157
|
const predShort = shortenIRI(quad.predicate.value, context);
|
|
159
158
|
let annotation = predShort;
|
|
160
159
|
|
|
160
|
+
// Use DataFactory XSD constants for datatype comparison
|
|
161
|
+
const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
|
|
161
162
|
if (quad.object.language) {
|
|
162
163
|
annotation += ` @${quad.object.language}`;
|
|
163
|
-
} else if (quad.object.datatype.value !==
|
|
164
|
+
} else if (quad.object.datatype.value !== xsdString) {
|
|
164
165
|
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
165
166
|
}
|
|
166
167
|
|
|
167
168
|
const literalText = `[${quad.object.value}] {${annotation}}\n`;
|
|
168
|
-
const literalBlockId = generateBlockId();
|
|
169
169
|
const literalBlock = {
|
|
170
|
-
id:
|
|
170
|
+
id: generateBlockId(),
|
|
171
171
|
range: { start: currentPos, end: currentPos + literalText.length },
|
|
172
172
|
subject: subjectIRI,
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
expandedPredicate: quad.predicate.value,
|
|
177
|
-
form: '',
|
|
178
|
-
entryIndex: 0
|
|
179
|
-
}],
|
|
173
|
+
types: [],
|
|
174
|
+
predicates: [{ iri: quad.predicate.value, form: '' }],
|
|
175
|
+
context: { ...context },
|
|
180
176
|
carrierType: 'span',
|
|
181
177
|
valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
|
|
182
178
|
attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
|
|
183
179
|
};
|
|
184
|
-
blocks.set(literalBlockId, literalBlock);
|
|
185
180
|
|
|
186
|
-
// Add to
|
|
181
|
+
// Add to quadMap
|
|
187
182
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
188
|
-
|
|
183
|
+
quadMap.set(key, createUnifiedSlot(literalBlock, 0, {
|
|
189
184
|
kind: 'pred',
|
|
190
185
|
subject: quad.subject,
|
|
191
186
|
predicate: quad.predicate,
|
|
@@ -200,29 +195,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
200
195
|
// Add objects (deterministic order)
|
|
201
196
|
const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
202
197
|
for (const quad of sortedObjects) {
|
|
203
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
204
198
|
const objShort = shortenIRI(quad.object.value, context);
|
|
205
|
-
const
|
|
199
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
200
|
+
const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
206
201
|
|
|
207
|
-
const objectText = `[${localName}] {+${objShort} ?${predShort}}\n`;
|
|
208
|
-
const objectBlockId = generateBlockId();
|
|
209
202
|
const objectBlock = {
|
|
210
|
-
id:
|
|
203
|
+
id: generateBlockId(),
|
|
211
204
|
range: { start: currentPos, end: currentPos + objectText.length },
|
|
212
205
|
subject: subjectIRI,
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
carrierType: 'span'
|
|
206
|
+
types: [],
|
|
207
|
+
predicates: [{ iri: quad.predicate.value, form: '?' }],
|
|
208
|
+
context: { ...context },
|
|
209
|
+
carrierType: 'span',
|
|
210
|
+
valueRange: { start: currentPos + 1, end: currentPos + 1 + objShort.length },
|
|
211
|
+
attrsRange: { start: currentPos + objectText.indexOf('{'), end: currentPos + objectText.indexOf('}') + 1 }
|
|
220
212
|
};
|
|
221
|
-
blocks.set(objectBlockId, objectBlock);
|
|
222
213
|
|
|
223
|
-
// Add to
|
|
214
|
+
// Add to quadMap
|
|
224
215
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
225
|
-
|
|
216
|
+
quadMap.set(key, createUnifiedSlot(objectBlock, 0, {
|
|
226
217
|
kind: 'pred',
|
|
227
218
|
subject: quad.subject,
|
|
228
219
|
predicate: quad.predicate,
|
|
@@ -234,13 +225,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
234
225
|
currentPos += objectText.length;
|
|
235
226
|
}
|
|
236
227
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
currentPos += 1;
|
|
240
|
-
}
|
|
228
|
+
text += '\n';
|
|
229
|
+
currentPos += 1;
|
|
241
230
|
}
|
|
242
231
|
|
|
243
|
-
return { text
|
|
232
|
+
return { text, quadMap };
|
|
244
233
|
}
|
|
245
234
|
|
|
246
235
|
function generateBlockId() {
|
package/src/index.js
CHANGED
package/src/locate.js
CHANGED
|
@@ -17,7 +17,7 @@ export function locate(quad, origin, text = '', context = {}) {
|
|
|
17
17
|
origin = parseResult.origin;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
if (!quad || !origin || !origin.
|
|
20
|
+
if (!quad || !origin || !origin.quadMap) {
|
|
21
21
|
return null;
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -27,57 +27,40 @@ export function locate(quad, origin, text = '', context = {}) {
|
|
|
27
27
|
return null;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
// Generate the quad key to lookup in
|
|
30
|
+
// Generate the quad key to lookup in quadMap
|
|
31
31
|
const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
|
|
32
32
|
|
|
33
|
-
// Find the slot information in
|
|
34
|
-
const slotInfo = origin.
|
|
33
|
+
// Find the slot information in quadMap
|
|
34
|
+
const slotInfo = origin.quadMap.get(quadKey);
|
|
35
35
|
if (!slotInfo) {
|
|
36
36
|
return null;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
//
|
|
40
|
-
const block =
|
|
41
|
-
if (!block) {
|
|
42
|
-
return null;
|
|
43
|
-
}
|
|
39
|
+
// In unified structure, slotInfo contains all block information
|
|
40
|
+
const block = slotInfo;
|
|
44
41
|
|
|
45
|
-
// Extract the actual text content based on carrier type
|
|
42
|
+
// Extract the actual text content based on carrier type
|
|
46
43
|
let contentRange = null;
|
|
47
44
|
let content = '';
|
|
48
45
|
|
|
49
46
|
if (block.carrierType === 'heading') {
|
|
50
|
-
// For headings, use the
|
|
51
|
-
contentRange = block.
|
|
52
|
-
content = text.substring(block.
|
|
53
|
-
} else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
54
|
-
// For blockquotes, lists, and spans,
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
// For blockquotes, the entry.raw contains the full carrier text
|
|
63
|
-
// Extract just the content part before the annotation
|
|
64
|
-
const annotationStart = entry.raw.indexOf('{');
|
|
65
|
-
if (annotationStart !== -1) {
|
|
66
|
-
const carrierContent = entry.raw.substring(0, annotationStart).trim();
|
|
67
|
-
// Find this content in the block text
|
|
68
|
-
const contentStart = text.indexOf(carrierContent, block.range.start);
|
|
69
|
-
if (contentStart !== -1) {
|
|
70
|
-
const contentEnd = contentStart + carrierContent.length;
|
|
71
|
-
contentRange = { start: contentStart, end: contentEnd };
|
|
72
|
-
content = text.substring(contentStart, contentEnd);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
47
|
+
// For headings, use the value range for the heading text
|
|
48
|
+
contentRange = block.valueRange;
|
|
49
|
+
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
50
|
+
} else if (block.carrierType === 'emphasis' || block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
51
|
+
// For emphasis, blockquotes, lists, and spans, use the value range
|
|
52
|
+
if (block.valueRange) {
|
|
53
|
+
contentRange = block.valueRange;
|
|
54
|
+
content = text.substring(block.valueRange.start, block.valueRange.end);
|
|
55
|
+
} else {
|
|
56
|
+
// Fallback to block range
|
|
57
|
+
contentRange = block.range;
|
|
58
|
+
content = text.substring(block.range.start, block.range.end);
|
|
76
59
|
}
|
|
77
60
|
}
|
|
78
61
|
|
|
79
62
|
return {
|
|
80
|
-
blockId: slotInfo.
|
|
63
|
+
blockId: slotInfo.id,
|
|
81
64
|
entryIndex: slotInfo.entryIndex,
|
|
82
65
|
kind: slotInfo.kind,
|
|
83
66
|
subject: normalizedQuad.subject,
|
package/src/parse.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
expandIRI,
|
|
5
5
|
parseSemanticBlock,
|
|
6
6
|
quadIndexKey,
|
|
7
|
-
|
|
7
|
+
createUnifiedSlot,
|
|
8
8
|
createLiteral,
|
|
9
9
|
hash
|
|
10
10
|
} from './utils.js';
|
|
@@ -16,7 +16,6 @@ const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
|
16
16
|
const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
17
17
|
const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
18
18
|
const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
19
|
-
const LIST_CONTEXT_REGEX = /^(.+?)\s*\{([^}]+)\}$/;
|
|
20
19
|
const INLINE_CARRIER_PATTERNS = {
|
|
21
20
|
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
22
21
|
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
@@ -87,13 +86,12 @@ function getCarriers(token) {
|
|
|
87
86
|
return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
|
|
88
87
|
}
|
|
89
88
|
|
|
90
|
-
const createListToken = (type, line, lineStart, pos, match
|
|
89
|
+
const createListToken = (type, line, lineStart, pos, match) => {
|
|
91
90
|
const attrs = match[4] || null;
|
|
92
91
|
const prefix = match[1].length + (match[2] ? match[2].length : 0);
|
|
93
92
|
const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
|
|
94
|
-
const extra = indent !== null ? { indent } : { indent: match[1].length };
|
|
95
93
|
return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
|
|
96
|
-
rangeInfo.attrsRange, rangeInfo.valueRange,
|
|
94
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
|
|
97
95
|
};
|
|
98
96
|
|
|
99
97
|
function scanTokens(text) {
|
|
@@ -173,7 +171,7 @@ function scanTokens(text) {
|
|
|
173
171
|
|
|
174
172
|
function handleList(line, lineStart, pos) {
|
|
175
173
|
const match = UNORDERED_LIST_REGEX.exec(line);
|
|
176
|
-
tokens.push(createListToken('list', line, lineStart, pos, match
|
|
174
|
+
tokens.push(createListToken('list', line, lineStart, pos, match));
|
|
177
175
|
return true;
|
|
178
176
|
}
|
|
179
177
|
|
|
@@ -277,14 +275,14 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
277
275
|
}
|
|
278
276
|
|
|
279
277
|
function calcCarrierRanges(match, baseOffset, matchStart) {
|
|
280
|
-
const valueStart = baseOffset + matchStart;
|
|
278
|
+
const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
|
|
281
279
|
const valueEnd = valueStart + match[1].length;
|
|
282
280
|
const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
|
|
283
281
|
const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
|
|
284
282
|
return {
|
|
285
283
|
valueRange: [valueStart, valueEnd],
|
|
286
284
|
attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
|
|
287
|
-
range: [
|
|
285
|
+
range: [baseOffset + matchStart, attrsEnd],
|
|
288
286
|
pos: matchStart + match[0].length // pos should be relative to current text, not document
|
|
289
287
|
};
|
|
290
288
|
}
|
|
@@ -345,7 +343,7 @@ function determineCarrierType(url) {
|
|
|
345
343
|
return { carrierType: 'span', resourceIRI: null };
|
|
346
344
|
}
|
|
347
345
|
|
|
348
|
-
function createBlock(subject, types, predicates,
|
|
346
|
+
function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx) {
|
|
349
347
|
const expanded = {
|
|
350
348
|
subject,
|
|
351
349
|
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
@@ -364,23 +362,26 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
|
|
|
364
362
|
subject,
|
|
365
363
|
types: expanded.types,
|
|
366
364
|
predicates: expanded.predicates,
|
|
367
|
-
|
|
368
|
-
context: { ...ctx }
|
|
365
|
+
context: ctx
|
|
369
366
|
};
|
|
370
367
|
}
|
|
371
368
|
|
|
372
|
-
function emitQuad(quads,
|
|
369
|
+
function emitQuad(quads, quadMap, block, subject, predicate, object, dataFactory, meta = null) {
|
|
373
370
|
if (!subject || !predicate || !object) return;
|
|
371
|
+
|
|
374
372
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
375
373
|
quads.push(quad);
|
|
376
374
|
|
|
377
|
-
const
|
|
375
|
+
const unifiedSlot = createUnifiedSlot(block, meta?.entryIndex, {
|
|
378
376
|
...meta,
|
|
379
|
-
subject,
|
|
377
|
+
subject,
|
|
378
|
+
predicate,
|
|
379
|
+
object
|
|
380
380
|
});
|
|
381
381
|
|
|
382
|
-
|
|
382
|
+
quadMap.set(quadIndexKey(quad.subject, quad.predicate, quad.object), unifiedSlot);
|
|
383
383
|
}
|
|
384
|
+
|
|
384
385
|
const resolveFragment = (fragment, state) => {
|
|
385
386
|
if (!state.currentSubject) return null;
|
|
386
387
|
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
@@ -403,10 +404,10 @@ function resolveObject(sem, state) {
|
|
|
403
404
|
return state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
404
405
|
}
|
|
405
406
|
|
|
406
|
-
const createTypeQuad = (typeIRI, subject, state,
|
|
407
|
+
const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
407
408
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
408
409
|
emitQuad(
|
|
409
|
-
state.quads, state.origin.
|
|
410
|
+
state.quads, state.origin.quadMap, block,
|
|
410
411
|
subject,
|
|
411
412
|
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
412
413
|
state.df.namedNode(expandedType),
|
|
@@ -419,9 +420,9 @@ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block
|
|
|
419
420
|
sem.types.forEach(t => {
|
|
420
421
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
421
422
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
422
|
-
// Type subject priority: explicit subject > soft object > URL > current subject
|
|
423
|
+
// Type subject priority: explicit subject > soft object > carrier URL > current subject
|
|
423
424
|
let typeSubject = newSubject || localObject || carrierO || S;
|
|
424
|
-
createTypeQuad(typeIRI, typeSubject, state, block
|
|
425
|
+
createTypeQuad(typeIRI, typeSubject, state, block, entryIndex);
|
|
425
426
|
});
|
|
426
427
|
}
|
|
427
428
|
|
|
@@ -452,7 +453,7 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
|
|
|
452
453
|
const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
|
|
453
454
|
if (role) {
|
|
454
455
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
455
|
-
emitQuad(state.quads, state.origin.
|
|
456
|
+
emitQuad(state.quads, state.origin.quadMap, block,
|
|
456
457
|
role.subject, P, role.object, state.df,
|
|
457
458
|
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
|
|
458
459
|
);
|
|
@@ -480,11 +481,10 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
480
481
|
if (!S) return;
|
|
481
482
|
|
|
482
483
|
const block = createBlock(
|
|
483
|
-
S.value, sem.types, sem.predicates,
|
|
484
|
+
S.value, sem.types, sem.predicates,
|
|
484
485
|
carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
|
|
485
486
|
carrier.type || null, state.ctx
|
|
486
487
|
);
|
|
487
|
-
state.origin.blocks.set(block.id, block);
|
|
488
488
|
|
|
489
489
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
490
490
|
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
@@ -494,171 +494,12 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
494
494
|
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
|
|
495
495
|
}
|
|
496
496
|
|
|
497
|
-
export function findItemSubject(listToken, carriers, state) {
|
|
498
|
-
const sem = parseSemCached(listToken.attrs);
|
|
499
|
-
if (sem.subject && sem.subject !== 'RESET') {
|
|
500
|
-
const subject = resolveSubject(sem, state);
|
|
501
|
-
if (subject) {
|
|
502
|
-
return {
|
|
503
|
-
subject,
|
|
504
|
-
carrier: { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range }
|
|
505
|
-
};
|
|
506
|
-
}
|
|
507
|
-
}
|
|
508
497
|
|
|
509
|
-
for (const carrier of carriers) {
|
|
510
|
-
const carrierSem = parseSemCached(carrier.attrs);
|
|
511
|
-
if (carrierSem.subject && carrierSem.subject !== 'RESET') {
|
|
512
|
-
const subject = resolveSubject(carrierSem, state);
|
|
513
|
-
if (subject) {
|
|
514
|
-
return { subject, carrier };
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
498
|
|
|
519
|
-
return null;
|
|
520
|
-
}
|
|
521
499
|
|
|
522
|
-
const processContextSem = ({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) => {
|
|
523
|
-
sem.types.forEach(t => {
|
|
524
|
-
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
525
|
-
emitQuad(
|
|
526
|
-
state.quads, state.origin.quadIndex, blockId,
|
|
527
|
-
itemSubject,
|
|
528
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
529
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
530
|
-
state.df
|
|
531
|
-
);
|
|
532
|
-
});
|
|
533
500
|
|
|
534
|
-
sem.predicates.forEach(pred => {
|
|
535
|
-
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
536
|
-
if (pred.form === '!') {
|
|
537
|
-
emitQuad(state.quads, state.origin.quadIndex, blockId, itemSubject, P, contextSubject, state.df);
|
|
538
|
-
} else if (pred.form === '?') {
|
|
539
|
-
emitQuad(state.quads, state.origin.quadIndex, blockId, contextSubject, P, itemSubject, state.df);
|
|
540
|
-
}
|
|
541
|
-
});
|
|
542
501
|
|
|
543
|
-
if (inheritLiterals) {
|
|
544
|
-
const literalPredicates = sem.predicates.filter(p => p.form === '');
|
|
545
|
-
if (literalPredicates.length > 0) {
|
|
546
|
-
return {
|
|
547
|
-
subject: null, object: null, types: [],
|
|
548
|
-
predicates: literalPredicates.map(p => ({ iri: p.iri, form: p.form, entryIndex: p.entryIndex })),
|
|
549
|
-
datatype: null, language: null, entries: []
|
|
550
|
-
};
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
return null;
|
|
554
|
-
};
|
|
555
502
|
|
|
556
|
-
const manageListStack = (token, state) => {
|
|
557
|
-
while (state.listStack.length && token.indent < state.listStack[state.listStack.length - 1].indent) {
|
|
558
|
-
state.listStack.pop();
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
if (state.pendingListContext) {
|
|
562
|
-
state.listStack.push({
|
|
563
|
-
indent: token.indent,
|
|
564
|
-
anchorSubject: state.pendingListContext.subject,
|
|
565
|
-
contextSubject: state.pendingListContext.subject,
|
|
566
|
-
contextSem: state.pendingListContext.sem,
|
|
567
|
-
contextText: state.pendingListContext.contextText,
|
|
568
|
-
contextToken: state.pendingListContext.contextToken // Store context token for origins
|
|
569
|
-
});
|
|
570
|
-
state.pendingListContext = null;
|
|
571
|
-
} else if (state.listStack.length === 0 || token.indent > state.listStack[state.listStack.length - 1].indent) {
|
|
572
|
-
const parentFrame = state.listStack.length > 0 ? state.listStack[state.listStack.length - 1] : null;
|
|
573
|
-
state.listStack.push({
|
|
574
|
-
indent: token.indent,
|
|
575
|
-
anchorSubject: parentFrame?.anchorSubject || null,
|
|
576
|
-
contextSubject: parentFrame?.anchorSubject || null,
|
|
577
|
-
contextSem: null
|
|
578
|
-
});
|
|
579
|
-
}
|
|
580
|
-
};
|
|
581
|
-
|
|
582
|
-
const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
|
|
583
|
-
const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
584
|
-
const addSem = (sem) => {
|
|
585
|
-
const entryIndex = combinedSem.entries.length;
|
|
586
|
-
combinedSem.types.push(...sem.types);
|
|
587
|
-
combinedSem.predicates.push(...sem.predicates);
|
|
588
|
-
combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
|
|
589
|
-
};
|
|
590
|
-
|
|
591
|
-
if (listFrame?.contextSem) {
|
|
592
|
-
const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
|
|
593
|
-
if (inheritedSem) addSem(inheritedSem);
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
if (token.attrs) addSem(parseSemCached(token.attrs));
|
|
597
|
-
carriers.forEach(carrier => { if (carrier.attrs) addSem(parseSemCached(carrier.attrs)); });
|
|
598
|
-
|
|
599
|
-
return combinedSem;
|
|
600
|
-
};
|
|
601
|
-
|
|
602
|
-
const processListItem = (token, state) => {
|
|
603
|
-
const carriers = getCarriers(token);
|
|
604
|
-
const itemInfo = findItemSubject(token, carriers, state);
|
|
605
|
-
if (!itemInfo) return;
|
|
606
|
-
|
|
607
|
-
const { subject: itemSubject } = itemInfo;
|
|
608
|
-
if (state.listStack.length > 0) state.listStack[state.listStack.length - 1].anchorSubject = itemSubject;
|
|
609
|
-
|
|
610
|
-
const listFrame = state.listStack[state.listStack.length - 1];
|
|
611
|
-
const combinedSem = combineSemanticInfo(token, carriers, listFrame, state, itemSubject);
|
|
612
|
-
|
|
613
|
-
if (combinedSem.entries.length > 0) {
|
|
614
|
-
const prevSubject = state.currentSubject;
|
|
615
|
-
state.currentSubject = itemSubject;
|
|
616
|
-
|
|
617
|
-
processAnnotation({ type: 'list', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null }, combinedSem, state, { preserveGlobalSubject: !state.listStack.length, implicitSubject: itemSubject });
|
|
618
|
-
|
|
619
|
-
state.currentSubject = prevSubject;
|
|
620
|
-
}
|
|
621
|
-
};
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
function processListContextFromParagraph(token, state) {
|
|
625
|
-
const contextMatch = LIST_CONTEXT_REGEX.exec(token.text);
|
|
626
|
-
if (!contextMatch) return;
|
|
627
|
-
|
|
628
|
-
const contextSem = parseSemCached(`{${contextMatch[2]}}`);
|
|
629
|
-
let contextSubject = state.currentSubject || state.documentSubject;
|
|
630
|
-
|
|
631
|
-
if (!contextSubject && state.tokens) {
|
|
632
|
-
for (let i = state.currentTokenIndex - 1; i >= 0; i--) {
|
|
633
|
-
const prevToken = state.tokens[i];
|
|
634
|
-
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
635
|
-
const prevSem = parseSemCached(prevToken.attrs);
|
|
636
|
-
if (prevSem.subject) {
|
|
637
|
-
const resolvedSubject = resolveSubject(prevSem, state);
|
|
638
|
-
if (resolvedSubject) {
|
|
639
|
-
contextSubject = resolvedSubject.value;
|
|
640
|
-
break;
|
|
641
|
-
}
|
|
642
|
-
}
|
|
643
|
-
}
|
|
644
|
-
}
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
const nextToken = state.tokens?.[state.currentTokenIndex + 1];
|
|
648
|
-
if (state.listStack.length > 0 && nextToken && nextToken.type === 'list') {
|
|
649
|
-
const currentFrame = state.listStack[state.listStack.length - 1];
|
|
650
|
-
if (currentFrame.anchorSubject && nextToken.indent > currentFrame.indent) {
|
|
651
|
-
contextSubject = currentFrame.anchorSubject;
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
state.pendingListContext = {
|
|
656
|
-
sem: contextSem,
|
|
657
|
-
subject: contextSubject,
|
|
658
|
-
contextText: contextMatch[1].replace(':', '').trim(),
|
|
659
|
-
contextToken: token // Store the context token for origin ranges
|
|
660
|
-
};
|
|
661
|
-
}
|
|
662
503
|
|
|
663
504
|
function processTokenAnnotations(token, state, tokenType) {
|
|
664
505
|
if (token.attrs) {
|
|
@@ -692,13 +533,6 @@ function processStandaloneSubject(token, state) {
|
|
|
692
533
|
|
|
693
534
|
const TOKEN_PROCESSORS = {
|
|
694
535
|
heading: (token, state) => {
|
|
695
|
-
if (token.attrs) {
|
|
696
|
-
const headingSem = parseSemCached(token.attrs);
|
|
697
|
-
if (headingSem.subject) {
|
|
698
|
-
const subject = resolveSubject(headingSem, state);
|
|
699
|
-
if (subject) state.documentSubject = subject;
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
536
|
processTokenAnnotations(token, state, token.type);
|
|
703
537
|
},
|
|
704
538
|
code: (token, state) => {
|
|
@@ -709,12 +543,10 @@ const TOKEN_PROCESSORS = {
|
|
|
709
543
|
},
|
|
710
544
|
para: (token, state) => {
|
|
711
545
|
processStandaloneSubject(token, state);
|
|
712
|
-
processListContextFromParagraph(token, state);
|
|
713
546
|
processTokenAnnotations(token, state, token.type);
|
|
714
547
|
},
|
|
715
548
|
list: (token, state) => {
|
|
716
|
-
|
|
717
|
-
processListItem(token, state);
|
|
549
|
+
processTokenAnnotations(token, state, token.type);
|
|
718
550
|
},
|
|
719
551
|
};
|
|
720
552
|
|
|
@@ -723,11 +555,8 @@ export function parse(text, options = {}) {
|
|
|
723
555
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
724
556
|
df: options.dataFactory || DataFactory,
|
|
725
557
|
quads: [],
|
|
726
|
-
origin: {
|
|
558
|
+
origin: { quadMap: new Map() },
|
|
727
559
|
currentSubject: null,
|
|
728
|
-
documentSubject: null,
|
|
729
|
-
listStack: [],
|
|
730
|
-
pendingListContext: null,
|
|
731
560
|
tokens: null,
|
|
732
561
|
currentTokenIndex: -1
|
|
733
562
|
};
|
package/src/utils.js
CHANGED
|
@@ -218,6 +218,15 @@ export const DataFactory = {
|
|
|
218
218
|
fromQuad: (inQuad) => {
|
|
219
219
|
if (inQuad instanceof Quad) return inQuad;
|
|
220
220
|
if (inQuad.termType !== 'Quad') {
|
|
221
|
+
// Handle plain object quads by treating them as quads
|
|
222
|
+
if (inQuad.subject && inQuad.predicate && inQuad.object) {
|
|
223
|
+
return new Quad(
|
|
224
|
+
DataFactory.fromTerm(inQuad.subject),
|
|
225
|
+
DataFactory.fromTerm(inQuad.predicate),
|
|
226
|
+
DataFactory.fromTerm(inQuad.object),
|
|
227
|
+
DataFactory.fromTerm(inQuad.graph || DataFactory.defaultGraph())
|
|
228
|
+
);
|
|
229
|
+
}
|
|
221
230
|
throw new Error(`Unexpected termType: ${inQuad.termType}`);
|
|
222
231
|
}
|
|
223
232
|
return new Quad(
|
|
@@ -265,9 +274,18 @@ export function expandIRI(term, ctx) {
|
|
|
265
274
|
export function shortenIRI(iri, ctx) {
|
|
266
275
|
if (!iri || !iri.startsWith('http')) return iri;
|
|
267
276
|
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
277
|
+
|
|
278
|
+
// Find the best matching prefix - more precise matching
|
|
268
279
|
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
269
280
|
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
270
|
-
|
|
281
|
+
// Check if this is the best match (longest namespace)
|
|
282
|
+
const isBestMatch = Object.entries(ctx)
|
|
283
|
+
.filter(([p, ns]) => p !== '@vocab' && iri.startsWith(ns))
|
|
284
|
+
.every(([p, ns]) => namespace.length >= ns.length || (p === prefix && ns.length === namespace.length));
|
|
285
|
+
|
|
286
|
+
if (isBestMatch) {
|
|
287
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
288
|
+
}
|
|
271
289
|
}
|
|
272
290
|
}
|
|
273
291
|
return iri;
|
|
@@ -407,16 +425,29 @@ export function parseQuadIndexKey(key) {
|
|
|
407
425
|
}
|
|
408
426
|
}
|
|
409
427
|
|
|
410
|
-
// Direct slot management functions -
|
|
411
|
-
export function
|
|
428
|
+
// Direct slot management functions - unified with block data
|
|
429
|
+
export function createUnifiedSlot(block, entryIndex, meta = {}) {
|
|
412
430
|
const slotId = meta.subject && meta.predicate ? hash(`${meta.subject.value}|${meta.predicate.value}`) : null;
|
|
413
431
|
return {
|
|
414
|
-
|
|
432
|
+
// Block metadata
|
|
433
|
+
id: block.id,
|
|
434
|
+
range: block.range,
|
|
435
|
+
attrsRange: block.attrsRange,
|
|
436
|
+
valueRange: block.valueRange,
|
|
437
|
+
carrierType: block.carrierType,
|
|
438
|
+
subject: block.subject,
|
|
439
|
+
types: block.types,
|
|
440
|
+
predicates: block.predicates,
|
|
441
|
+
context: block.context,
|
|
442
|
+
|
|
443
|
+
// Slot metadata
|
|
415
444
|
entryIndex,
|
|
416
445
|
slotId,
|
|
417
446
|
isVacant: false,
|
|
418
447
|
lastValue: null,
|
|
419
448
|
vacantSince: null,
|
|
449
|
+
|
|
450
|
+
// Quad metadata
|
|
420
451
|
...meta
|
|
421
452
|
};
|
|
422
453
|
}
|
|
@@ -430,9 +461,9 @@ export function markSlotAsVacant(slotInfo, deletedValue) {
|
|
|
430
461
|
} : null;
|
|
431
462
|
}
|
|
432
463
|
|
|
433
|
-
export function findVacantSlot(
|
|
464
|
+
export function findVacantSlot(quadMap, subject, predicate) {
|
|
434
465
|
const targetSlotId = hash(`${subject.value}|${predicate.value}`);
|
|
435
|
-
return Array.from(
|
|
466
|
+
return Array.from(quadMap.values())
|
|
436
467
|
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
437
468
|
}
|
|
438
469
|
|