mdld-parse 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/package.json +1 -1
- package/src/applyDiff.js +141 -76
- package/src/generate.js +68 -79
- package/src/index.js +1 -0
- package/src/locate.js +20 -37
- package/src/parse.js +24 -195
- package/src/utils.js +254 -13
package/src/parse.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
expandIRI,
|
|
5
5
|
parseSemanticBlock,
|
|
6
6
|
quadIndexKey,
|
|
7
|
-
|
|
7
|
+
createUnifiedSlot,
|
|
8
8
|
createLiteral,
|
|
9
9
|
hash
|
|
10
10
|
} from './utils.js';
|
|
@@ -16,7 +16,6 @@ const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
|
16
16
|
const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
17
17
|
const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
18
18
|
const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
19
|
-
const LIST_CONTEXT_REGEX = /^(.+?)\s*\{([^}]+)\}$/;
|
|
20
19
|
const INLINE_CARRIER_PATTERNS = {
|
|
21
20
|
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
22
21
|
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
@@ -87,13 +86,12 @@ function getCarriers(token) {
|
|
|
87
86
|
return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
|
|
88
87
|
}
|
|
89
88
|
|
|
90
|
-
const createListToken = (type, line, lineStart, pos, match
|
|
89
|
+
const createListToken = (type, line, lineStart, pos, match) => {
|
|
91
90
|
const attrs = match[4] || null;
|
|
92
91
|
const prefix = match[1].length + (match[2] ? match[2].length : 0);
|
|
93
92
|
const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
|
|
94
|
-
const extra = indent !== null ? { indent } : { indent: match[1].length };
|
|
95
93
|
return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
|
|
96
|
-
rangeInfo.attrsRange, rangeInfo.valueRange,
|
|
94
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
|
|
97
95
|
};
|
|
98
96
|
|
|
99
97
|
function scanTokens(text) {
|
|
@@ -173,7 +171,7 @@ function scanTokens(text) {
|
|
|
173
171
|
|
|
174
172
|
function handleList(line, lineStart, pos) {
|
|
175
173
|
const match = UNORDERED_LIST_REGEX.exec(line);
|
|
176
|
-
tokens.push(createListToken('list', line, lineStart, pos, match
|
|
174
|
+
tokens.push(createListToken('list', line, lineStart, pos, match));
|
|
177
175
|
return true;
|
|
178
176
|
}
|
|
179
177
|
|
|
@@ -277,14 +275,14 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
277
275
|
}
|
|
278
276
|
|
|
279
277
|
function calcCarrierRanges(match, baseOffset, matchStart) {
|
|
280
|
-
const valueStart = baseOffset + matchStart;
|
|
278
|
+
const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
|
|
281
279
|
const valueEnd = valueStart + match[1].length;
|
|
282
280
|
const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
|
|
283
281
|
const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
|
|
284
282
|
return {
|
|
285
283
|
valueRange: [valueStart, valueEnd],
|
|
286
284
|
attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
|
|
287
|
-
range: [
|
|
285
|
+
range: [baseOffset + matchStart, attrsEnd],
|
|
288
286
|
pos: matchStart + match[0].length // pos should be relative to current text, not document
|
|
289
287
|
};
|
|
290
288
|
}
|
|
@@ -345,7 +343,7 @@ function determineCarrierType(url) {
|
|
|
345
343
|
return { carrierType: 'span', resourceIRI: null };
|
|
346
344
|
}
|
|
347
345
|
|
|
348
|
-
function createBlock(subject, types, predicates,
|
|
346
|
+
function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx) {
|
|
349
347
|
const expanded = {
|
|
350
348
|
subject,
|
|
351
349
|
types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
|
|
@@ -364,23 +362,26 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
|
|
|
364
362
|
subject,
|
|
365
363
|
types: expanded.types,
|
|
366
364
|
predicates: expanded.predicates,
|
|
367
|
-
|
|
368
|
-
context: { ...ctx }
|
|
365
|
+
context: ctx
|
|
369
366
|
};
|
|
370
367
|
}
|
|
371
368
|
|
|
372
|
-
function emitQuad(quads,
|
|
369
|
+
function emitQuad(quads, quadMap, block, subject, predicate, object, dataFactory, meta = null) {
|
|
373
370
|
if (!subject || !predicate || !object) return;
|
|
371
|
+
|
|
374
372
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
375
373
|
quads.push(quad);
|
|
376
374
|
|
|
377
|
-
const
|
|
375
|
+
const unifiedSlot = createUnifiedSlot(block, meta?.entryIndex, {
|
|
378
376
|
...meta,
|
|
379
|
-
subject,
|
|
377
|
+
subject,
|
|
378
|
+
predicate,
|
|
379
|
+
object
|
|
380
380
|
});
|
|
381
381
|
|
|
382
|
-
|
|
382
|
+
quadMap.set(quadIndexKey(quad.subject, quad.predicate, quad.object), unifiedSlot);
|
|
383
383
|
}
|
|
384
|
+
|
|
384
385
|
const resolveFragment = (fragment, state) => {
|
|
385
386
|
if (!state.currentSubject) return null;
|
|
386
387
|
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
@@ -403,10 +404,10 @@ function resolveObject(sem, state) {
|
|
|
403
404
|
return state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
404
405
|
}
|
|
405
406
|
|
|
406
|
-
const createTypeQuad = (typeIRI, subject, state,
|
|
407
|
+
const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
407
408
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
408
409
|
emitQuad(
|
|
409
|
-
state.quads, state.origin.
|
|
410
|
+
state.quads, state.origin.quadMap, block,
|
|
410
411
|
subject,
|
|
411
412
|
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
412
413
|
state.df.namedNode(expandedType),
|
|
@@ -419,9 +420,9 @@ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block
|
|
|
419
420
|
sem.types.forEach(t => {
|
|
420
421
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
421
422
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
422
|
-
// Type subject priority: explicit subject > soft object > URL > current subject
|
|
423
|
+
// Type subject priority: explicit subject > soft object > carrier URL > current subject
|
|
423
424
|
let typeSubject = newSubject || localObject || carrierO || S;
|
|
424
|
-
createTypeQuad(typeIRI, typeSubject, state, block
|
|
425
|
+
createTypeQuad(typeIRI, typeSubject, state, block, entryIndex);
|
|
425
426
|
});
|
|
426
427
|
}
|
|
427
428
|
|
|
@@ -452,7 +453,7 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
|
|
|
452
453
|
const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
|
|
453
454
|
if (role) {
|
|
454
455
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
455
|
-
emitQuad(state.quads, state.origin.
|
|
456
|
+
emitQuad(state.quads, state.origin.quadMap, block,
|
|
456
457
|
role.subject, P, role.object, state.df,
|
|
457
458
|
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
|
|
458
459
|
);
|
|
@@ -480,11 +481,10 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
480
481
|
if (!S) return;
|
|
481
482
|
|
|
482
483
|
const block = createBlock(
|
|
483
|
-
S.value, sem.types, sem.predicates,
|
|
484
|
+
S.value, sem.types, sem.predicates,
|
|
484
485
|
carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
|
|
485
486
|
carrier.type || null, state.ctx
|
|
486
487
|
);
|
|
487
|
-
state.origin.blocks.set(block.id, block);
|
|
488
488
|
|
|
489
489
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
490
490
|
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
@@ -494,171 +494,12 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
494
494
|
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
|
|
495
495
|
}
|
|
496
496
|
|
|
497
|
-
export function findItemSubject(listToken, carriers, state) {
|
|
498
|
-
const sem = parseSemCached(listToken.attrs);
|
|
499
|
-
if (sem.subject && sem.subject !== 'RESET') {
|
|
500
|
-
const subject = resolveSubject(sem, state);
|
|
501
|
-
if (subject) {
|
|
502
|
-
return {
|
|
503
|
-
subject,
|
|
504
|
-
carrier: { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range }
|
|
505
|
-
};
|
|
506
|
-
}
|
|
507
|
-
}
|
|
508
497
|
|
|
509
|
-
for (const carrier of carriers) {
|
|
510
|
-
const carrierSem = parseSemCached(carrier.attrs);
|
|
511
|
-
if (carrierSem.subject && carrierSem.subject !== 'RESET') {
|
|
512
|
-
const subject = resolveSubject(carrierSem, state);
|
|
513
|
-
if (subject) {
|
|
514
|
-
return { subject, carrier };
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
498
|
|
|
519
|
-
return null;
|
|
520
|
-
}
|
|
521
499
|
|
|
522
|
-
const processContextSem = ({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) => {
|
|
523
|
-
sem.types.forEach(t => {
|
|
524
|
-
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
525
|
-
emitQuad(
|
|
526
|
-
state.quads, state.origin.quadIndex, blockId,
|
|
527
|
-
itemSubject,
|
|
528
|
-
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
529
|
-
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
530
|
-
state.df
|
|
531
|
-
);
|
|
532
|
-
});
|
|
533
500
|
|
|
534
|
-
sem.predicates.forEach(pred => {
|
|
535
|
-
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
536
|
-
if (pred.form === '!') {
|
|
537
|
-
emitQuad(state.quads, state.origin.quadIndex, blockId, itemSubject, P, contextSubject, state.df);
|
|
538
|
-
} else if (pred.form === '?') {
|
|
539
|
-
emitQuad(state.quads, state.origin.quadIndex, blockId, contextSubject, P, itemSubject, state.df);
|
|
540
|
-
}
|
|
541
|
-
});
|
|
542
501
|
|
|
543
|
-
if (inheritLiterals) {
|
|
544
|
-
const literalPredicates = sem.predicates.filter(p => p.form === '');
|
|
545
|
-
if (literalPredicates.length > 0) {
|
|
546
|
-
return {
|
|
547
|
-
subject: null, object: null, types: [],
|
|
548
|
-
predicates: literalPredicates.map(p => ({ iri: p.iri, form: p.form, entryIndex: p.entryIndex })),
|
|
549
|
-
datatype: null, language: null, entries: []
|
|
550
|
-
};
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
return null;
|
|
554
|
-
};
|
|
555
502
|
|
|
556
|
-
const manageListStack = (token, state) => {
|
|
557
|
-
while (state.listStack.length && token.indent < state.listStack[state.listStack.length - 1].indent) {
|
|
558
|
-
state.listStack.pop();
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
if (state.pendingListContext) {
|
|
562
|
-
state.listStack.push({
|
|
563
|
-
indent: token.indent,
|
|
564
|
-
anchorSubject: state.pendingListContext.subject,
|
|
565
|
-
contextSubject: state.pendingListContext.subject,
|
|
566
|
-
contextSem: state.pendingListContext.sem,
|
|
567
|
-
contextText: state.pendingListContext.contextText,
|
|
568
|
-
contextToken: state.pendingListContext.contextToken // Store context token for origins
|
|
569
|
-
});
|
|
570
|
-
state.pendingListContext = null;
|
|
571
|
-
} else if (state.listStack.length === 0 || token.indent > state.listStack[state.listStack.length - 1].indent) {
|
|
572
|
-
const parentFrame = state.listStack.length > 0 ? state.listStack[state.listStack.length - 1] : null;
|
|
573
|
-
state.listStack.push({
|
|
574
|
-
indent: token.indent,
|
|
575
|
-
anchorSubject: parentFrame?.anchorSubject || null,
|
|
576
|
-
contextSubject: parentFrame?.anchorSubject || null,
|
|
577
|
-
contextSem: null
|
|
578
|
-
});
|
|
579
|
-
}
|
|
580
|
-
};
|
|
581
|
-
|
|
582
|
-
const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
|
|
583
|
-
const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
584
|
-
const addSem = (sem) => {
|
|
585
|
-
const entryIndex = combinedSem.entries.length;
|
|
586
|
-
combinedSem.types.push(...sem.types);
|
|
587
|
-
combinedSem.predicates.push(...sem.predicates);
|
|
588
|
-
combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
|
|
589
|
-
};
|
|
590
|
-
|
|
591
|
-
if (listFrame?.contextSem) {
|
|
592
|
-
const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
|
|
593
|
-
if (inheritedSem) addSem(inheritedSem);
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
if (token.attrs) addSem(parseSemCached(token.attrs));
|
|
597
|
-
carriers.forEach(carrier => { if (carrier.attrs) addSem(parseSemCached(carrier.attrs)); });
|
|
598
|
-
|
|
599
|
-
return combinedSem;
|
|
600
|
-
};
|
|
601
|
-
|
|
602
|
-
const processListItem = (token, state) => {
|
|
603
|
-
const carriers = getCarriers(token);
|
|
604
|
-
const itemInfo = findItemSubject(token, carriers, state);
|
|
605
|
-
if (!itemInfo) return;
|
|
606
|
-
|
|
607
|
-
const { subject: itemSubject } = itemInfo;
|
|
608
|
-
if (state.listStack.length > 0) state.listStack[state.listStack.length - 1].anchorSubject = itemSubject;
|
|
609
|
-
|
|
610
|
-
const listFrame = state.listStack[state.listStack.length - 1];
|
|
611
|
-
const combinedSem = combineSemanticInfo(token, carriers, listFrame, state, itemSubject);
|
|
612
|
-
|
|
613
|
-
if (combinedSem.entries.length > 0) {
|
|
614
|
-
const prevSubject = state.currentSubject;
|
|
615
|
-
state.currentSubject = itemSubject;
|
|
616
|
-
|
|
617
|
-
processAnnotation({ type: 'list', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null }, combinedSem, state, { preserveGlobalSubject: !state.listStack.length, implicitSubject: itemSubject });
|
|
618
|
-
|
|
619
|
-
state.currentSubject = prevSubject;
|
|
620
|
-
}
|
|
621
|
-
};
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
function processListContextFromParagraph(token, state) {
|
|
625
|
-
const contextMatch = LIST_CONTEXT_REGEX.exec(token.text);
|
|
626
|
-
if (!contextMatch) return;
|
|
627
|
-
|
|
628
|
-
const contextSem = parseSemCached(`{${contextMatch[2]}}`);
|
|
629
|
-
let contextSubject = state.currentSubject || state.documentSubject;
|
|
630
|
-
|
|
631
|
-
if (!contextSubject && state.tokens) {
|
|
632
|
-
for (let i = state.currentTokenIndex - 1; i >= 0; i--) {
|
|
633
|
-
const prevToken = state.tokens[i];
|
|
634
|
-
if (prevToken.type === 'heading' && prevToken.attrs) {
|
|
635
|
-
const prevSem = parseSemCached(prevToken.attrs);
|
|
636
|
-
if (prevSem.subject) {
|
|
637
|
-
const resolvedSubject = resolveSubject(prevSem, state);
|
|
638
|
-
if (resolvedSubject) {
|
|
639
|
-
contextSubject = resolvedSubject.value;
|
|
640
|
-
break;
|
|
641
|
-
}
|
|
642
|
-
}
|
|
643
|
-
}
|
|
644
|
-
}
|
|
645
|
-
}
|
|
646
|
-
|
|
647
|
-
const nextToken = state.tokens?.[state.currentTokenIndex + 1];
|
|
648
|
-
if (state.listStack.length > 0 && nextToken && nextToken.type === 'list') {
|
|
649
|
-
const currentFrame = state.listStack[state.listStack.length - 1];
|
|
650
|
-
if (currentFrame.anchorSubject && nextToken.indent > currentFrame.indent) {
|
|
651
|
-
contextSubject = currentFrame.anchorSubject;
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
state.pendingListContext = {
|
|
656
|
-
sem: contextSem,
|
|
657
|
-
subject: contextSubject,
|
|
658
|
-
contextText: contextMatch[1].replace(':', '').trim(),
|
|
659
|
-
contextToken: token // Store the context token for origin ranges
|
|
660
|
-
};
|
|
661
|
-
}
|
|
662
503
|
|
|
663
504
|
function processTokenAnnotations(token, state, tokenType) {
|
|
664
505
|
if (token.attrs) {
|
|
@@ -692,13 +533,6 @@ function processStandaloneSubject(token, state) {
|
|
|
692
533
|
|
|
693
534
|
const TOKEN_PROCESSORS = {
|
|
694
535
|
heading: (token, state) => {
|
|
695
|
-
if (token.attrs) {
|
|
696
|
-
const headingSem = parseSemCached(token.attrs);
|
|
697
|
-
if (headingSem.subject) {
|
|
698
|
-
const subject = resolveSubject(headingSem, state);
|
|
699
|
-
if (subject) state.documentSubject = subject;
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
536
|
processTokenAnnotations(token, state, token.type);
|
|
703
537
|
},
|
|
704
538
|
code: (token, state) => {
|
|
@@ -709,12 +543,10 @@ const TOKEN_PROCESSORS = {
|
|
|
709
543
|
},
|
|
710
544
|
para: (token, state) => {
|
|
711
545
|
processStandaloneSubject(token, state);
|
|
712
|
-
processListContextFromParagraph(token, state);
|
|
713
546
|
processTokenAnnotations(token, state, token.type);
|
|
714
547
|
},
|
|
715
548
|
list: (token, state) => {
|
|
716
|
-
|
|
717
|
-
processListItem(token, state);
|
|
549
|
+
processTokenAnnotations(token, state, token.type);
|
|
718
550
|
},
|
|
719
551
|
};
|
|
720
552
|
|
|
@@ -723,11 +555,8 @@ export function parse(text, options = {}) {
|
|
|
723
555
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
724
556
|
df: options.dataFactory || DataFactory,
|
|
725
557
|
quads: [],
|
|
726
|
-
origin: {
|
|
558
|
+
origin: { quadMap: new Map() },
|
|
727
559
|
currentSubject: null,
|
|
728
|
-
documentSubject: null,
|
|
729
|
-
listStack: [],
|
|
730
|
-
pendingListContext: null,
|
|
731
560
|
tokens: null,
|
|
732
561
|
currentTokenIndex: -1
|
|
733
562
|
};
|
package/src/utils.js
CHANGED
|
@@ -7,16 +7,235 @@ export const DEFAULT_CONTEXT = {
|
|
|
7
7
|
prov: 'http://www.w3.org/ns/prov#'
|
|
8
8
|
};
|
|
9
9
|
|
|
10
|
+
// Base Term class for RDF/JS compatibility
|
|
11
|
+
export class Term {
|
|
12
|
+
constructor(id) {
|
|
13
|
+
this.id = id;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
equals(other) {
|
|
17
|
+
return !!other && this.termType === other.termType && this.value === other.value;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// NamedNode implementation
|
|
22
|
+
export class NamedNode extends Term {
|
|
23
|
+
constructor(iri) {
|
|
24
|
+
super(iri);
|
|
25
|
+
this.termType = 'NamedNode';
|
|
26
|
+
this.value = iri;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Literal implementation with language/direction support
|
|
31
|
+
export class Literal extends Term {
|
|
32
|
+
constructor(id) {
|
|
33
|
+
super(id);
|
|
34
|
+
this.termType = 'Literal';
|
|
35
|
+
this.value = '';
|
|
36
|
+
this.language = '';
|
|
37
|
+
this.datatype = null;
|
|
38
|
+
|
|
39
|
+
// Parse the literal ID - handle escaped quotes properly
|
|
40
|
+
const dtMatch = id.match(/^"([^"\\]*(?:\\.[^"\\]*)*)"(\^\^([^"]+))?(@([^-]+)(--(.+))?)?$/);
|
|
41
|
+
if (dtMatch) {
|
|
42
|
+
// Unescape the value
|
|
43
|
+
this.value = dtMatch[1].replace(/\\"/g, '"').replace(/\\\\/g, '\\');
|
|
44
|
+
if (dtMatch[5]) {
|
|
45
|
+
this.language = dtMatch[5];
|
|
46
|
+
this.datatype = new NamedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString');
|
|
47
|
+
} else if (dtMatch[3]) {
|
|
48
|
+
this.datatype = new NamedNode(dtMatch[3]);
|
|
49
|
+
} else {
|
|
50
|
+
this.datatype = new NamedNode('http://www.w3.org/2001/XMLSchema#string');
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
// Fallback for simple literals without complex parsing
|
|
54
|
+
this.value = id.replace(/^"|"$/g, '');
|
|
55
|
+
this.datatype = new NamedNode('http://www.w3.org/2001/XMLSchema#string');
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
equals(other) {
|
|
60
|
+
return !!other &&
|
|
61
|
+
this.termType === other.termType &&
|
|
62
|
+
this.value === other.value &&
|
|
63
|
+
this.language === other.language &&
|
|
64
|
+
this.datatype?.value === other.datatype?.value;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// BlankNode implementation
|
|
69
|
+
export class BlankNode extends Term {
|
|
70
|
+
constructor(name) {
|
|
71
|
+
super(name || `b${Math.random().toString(36).slice(2, 11)}`);
|
|
72
|
+
this.termType = 'BlankNode';
|
|
73
|
+
this.value = this.id;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Variable implementation
|
|
78
|
+
export class Variable extends Term {
|
|
79
|
+
constructor(name) {
|
|
80
|
+
super(name);
|
|
81
|
+
this.termType = 'Variable';
|
|
82
|
+
this.value = name;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// DefaultGraph implementation
|
|
87
|
+
export class DefaultGraph extends Term {
|
|
88
|
+
constructor() {
|
|
89
|
+
super('');
|
|
90
|
+
this.termType = 'DefaultGraph';
|
|
91
|
+
this.value = '';
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
equals(other) {
|
|
95
|
+
return !!other && this.termType === other.termType;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Default graph singleton
|
|
100
|
+
const DEFAULTGRAPH = new DefaultGraph();
|
|
101
|
+
|
|
102
|
+
// Quad implementation
|
|
103
|
+
export class Quad extends Term {
|
|
104
|
+
constructor(subject, predicate, object, graph = DEFAULTGRAPH) {
|
|
105
|
+
super(`${subject.id}|${predicate.id}|${object.id}|${graph.id}`);
|
|
106
|
+
this.termType = 'Quad';
|
|
107
|
+
this.subject = subject;
|
|
108
|
+
this.predicate = predicate;
|
|
109
|
+
this.object = object;
|
|
110
|
+
this.graph = graph;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
equals(other) {
|
|
114
|
+
return !!other &&
|
|
115
|
+
this.termType === other.termType &&
|
|
116
|
+
this.subject.equals(other.subject) &&
|
|
117
|
+
this.predicate.equals(other.predicate) &&
|
|
118
|
+
this.object.equals(other.object) &&
|
|
119
|
+
this.graph.equals(other.graph);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
toJSON() {
|
|
123
|
+
return {
|
|
124
|
+
termType: this.termType,
|
|
125
|
+
subject: this.subject.toJSON ? this.subject.toJSON() : { termType: this.subject.termType, value: this.subject.value },
|
|
126
|
+
predicate: this.predicate.toJSON ? this.predicate.toJSON() : { termType: this.predicate.termType, value: this.predicate.value },
|
|
127
|
+
object: this.object.toJSON ? this.object.toJSON() : { termType: this.object.termType, value: this.object.value },
|
|
128
|
+
graph: this.graph.toJSON ? this.graph.toJSON() : { termType: this.graph.termType, value: this.graph.value }
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// XSD constants
|
|
134
|
+
const xsd = {
|
|
135
|
+
boolean: 'http://www.w3.org/2001/XMLSchema#boolean',
|
|
136
|
+
integer: 'http://www.w3.org/2001/XMLSchema#integer',
|
|
137
|
+
double: 'http://www.w3.org/2001/XMLSchema#double',
|
|
138
|
+
string: 'http://www.w3.org/2001/XMLSchema#string'
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
// DataFactory singleton matching N3.js interface
|
|
10
142
|
export const DataFactory = {
|
|
11
|
-
namedNode: (
|
|
12
|
-
blankNode: (
|
|
13
|
-
literal: (
|
|
14
|
-
|
|
15
|
-
|
|
143
|
+
namedNode: (iri) => new NamedNode(iri),
|
|
144
|
+
blankNode: (name) => new BlankNode(name),
|
|
145
|
+
literal: (value, languageOrDataType) => {
|
|
146
|
+
// Convert non-string values to string for proper serialization
|
|
147
|
+
const stringValue = String(value);
|
|
148
|
+
// Escape quotes in the value for proper serialization
|
|
149
|
+
const escapedValue = stringValue.replace(/"/g, '\\"');
|
|
150
|
+
|
|
151
|
+
// Create a language-tagged string
|
|
152
|
+
if (typeof languageOrDataType === 'string') {
|
|
153
|
+
return new Literal(`"${escapedValue}"@${languageOrDataType.toLowerCase()}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Create a language-tagged string with base direction
|
|
157
|
+
if (languageOrDataType !== undefined && !('termType' in languageOrDataType)) {
|
|
158
|
+
const direction = languageOrDataType.direction ? `--${languageOrDataType.direction.toLowerCase()}` : '';
|
|
159
|
+
return new Literal(`"${escapedValue}"@${languageOrDataType.language.toLowerCase()}${direction}`);
|
|
16
160
|
}
|
|
17
|
-
|
|
161
|
+
|
|
162
|
+
// Automatically determine datatype for booleans and numbers
|
|
163
|
+
let datatype = languageOrDataType ? languageOrDataType.value : '';
|
|
164
|
+
if (datatype === '') {
|
|
165
|
+
// Convert a boolean
|
|
166
|
+
if (typeof value === 'boolean') {
|
|
167
|
+
datatype = xsd.boolean;
|
|
168
|
+
}
|
|
169
|
+
// Convert an integer or double
|
|
170
|
+
else if (typeof value === 'number') {
|
|
171
|
+
if (Number.isFinite(value)) {
|
|
172
|
+
datatype = Number.isInteger(value) ? xsd.integer : xsd.double;
|
|
173
|
+
} else {
|
|
174
|
+
datatype = xsd.double;
|
|
175
|
+
if (!Number.isNaN(value)) {
|
|
176
|
+
value = value > 0 ? 'INF' : '-INF';
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Create a datatyped literal
|
|
183
|
+
return (datatype === '' || datatype === xsd.string)
|
|
184
|
+
? new Literal(`"${escapedValue}"`)
|
|
185
|
+
: new Literal(`"${escapedValue}"^^${datatype}`);
|
|
18
186
|
},
|
|
19
|
-
|
|
187
|
+
variable: (name) => new Variable(name),
|
|
188
|
+
defaultGraph: () => DEFAULTGRAPH,
|
|
189
|
+
quad: (subject, predicate, object, graph) => new Quad(subject, predicate, object, graph),
|
|
190
|
+
triple: (subject, predicate, object, graph) => new Quad(subject, predicate, object, graph), // Alias for quad
|
|
191
|
+
fromTerm: (term) => {
|
|
192
|
+
if (term instanceof Term) return term;
|
|
193
|
+
|
|
194
|
+
// Term instantiated with another library
|
|
195
|
+
switch (term.termType) {
|
|
196
|
+
case 'NamedNode':
|
|
197
|
+
return new NamedNode(term.value);
|
|
198
|
+
case 'BlankNode':
|
|
199
|
+
return new BlankNode(term.value);
|
|
200
|
+
case 'Variable':
|
|
201
|
+
return new Variable(term.value);
|
|
202
|
+
case 'DefaultGraph':
|
|
203
|
+
return DEFAULTGRAPH;
|
|
204
|
+
case 'Literal':
|
|
205
|
+
if (term.language) {
|
|
206
|
+
return new Literal(`"${term.value}"@${term.language}`);
|
|
207
|
+
} else if (term.datatype) {
|
|
208
|
+
return new Literal(`"${term.value}"^^${term.datatype.value || term.datatype}`);
|
|
209
|
+
} else {
|
|
210
|
+
return new Literal(`"${term.value}"`);
|
|
211
|
+
}
|
|
212
|
+
case 'Quad':
|
|
213
|
+
return DataFactory.fromQuad(term);
|
|
214
|
+
default:
|
|
215
|
+
throw new Error(`Unexpected termType: ${term.termType}`);
|
|
216
|
+
}
|
|
217
|
+
},
|
|
218
|
+
fromQuad: (inQuad) => {
|
|
219
|
+
if (inQuad instanceof Quad) return inQuad;
|
|
220
|
+
if (inQuad.termType !== 'Quad') {
|
|
221
|
+
// Handle plain object quads by treating them as quads
|
|
222
|
+
if (inQuad.subject && inQuad.predicate && inQuad.object) {
|
|
223
|
+
return new Quad(
|
|
224
|
+
DataFactory.fromTerm(inQuad.subject),
|
|
225
|
+
DataFactory.fromTerm(inQuad.predicate),
|
|
226
|
+
DataFactory.fromTerm(inQuad.object),
|
|
227
|
+
DataFactory.fromTerm(inQuad.graph || DataFactory.defaultGraph())
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
throw new Error(`Unexpected termType: ${inQuad.termType}`);
|
|
231
|
+
}
|
|
232
|
+
return new Quad(
|
|
233
|
+
DataFactory.fromTerm(inQuad.subject),
|
|
234
|
+
DataFactory.fromTerm(inQuad.predicate),
|
|
235
|
+
DataFactory.fromTerm(inQuad.object),
|
|
236
|
+
DataFactory.fromTerm(inQuad.graph)
|
|
237
|
+
);
|
|
238
|
+
}
|
|
20
239
|
};
|
|
21
240
|
|
|
22
241
|
export function hash(str) {
|
|
@@ -55,9 +274,18 @@ export function expandIRI(term, ctx) {
|
|
|
55
274
|
export function shortenIRI(iri, ctx) {
|
|
56
275
|
if (!iri || !iri.startsWith('http')) return iri;
|
|
57
276
|
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
277
|
+
|
|
278
|
+
// Find the best matching prefix - more precise matching
|
|
58
279
|
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
59
280
|
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
60
|
-
|
|
281
|
+
// Check if this is the best match (longest namespace)
|
|
282
|
+
const isBestMatch = Object.entries(ctx)
|
|
283
|
+
.filter(([p, ns]) => p !== '@vocab' && iri.startsWith(ns))
|
|
284
|
+
.every(([p, ns]) => namespace.length >= ns.length || (p === prefix && ns.length === namespace.length));
|
|
285
|
+
|
|
286
|
+
if (isBestMatch) {
|
|
287
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
288
|
+
}
|
|
61
289
|
}
|
|
62
290
|
}
|
|
63
291
|
return iri;
|
|
@@ -197,16 +425,29 @@ export function parseQuadIndexKey(key) {
|
|
|
197
425
|
}
|
|
198
426
|
}
|
|
199
427
|
|
|
200
|
-
// Direct slot management functions -
|
|
201
|
-
export function
|
|
428
|
+
// Direct slot management functions - unified with block data
|
|
429
|
+
export function createUnifiedSlot(block, entryIndex, meta = {}) {
|
|
202
430
|
const slotId = meta.subject && meta.predicate ? hash(`${meta.subject.value}|${meta.predicate.value}`) : null;
|
|
203
431
|
return {
|
|
204
|
-
|
|
432
|
+
// Block metadata
|
|
433
|
+
id: block.id,
|
|
434
|
+
range: block.range,
|
|
435
|
+
attrsRange: block.attrsRange,
|
|
436
|
+
valueRange: block.valueRange,
|
|
437
|
+
carrierType: block.carrierType,
|
|
438
|
+
subject: block.subject,
|
|
439
|
+
types: block.types,
|
|
440
|
+
predicates: block.predicates,
|
|
441
|
+
context: block.context,
|
|
442
|
+
|
|
443
|
+
// Slot metadata
|
|
205
444
|
entryIndex,
|
|
206
445
|
slotId,
|
|
207
446
|
isVacant: false,
|
|
208
447
|
lastValue: null,
|
|
209
448
|
vacantSince: null,
|
|
449
|
+
|
|
450
|
+
// Quad metadata
|
|
210
451
|
...meta
|
|
211
452
|
};
|
|
212
453
|
}
|
|
@@ -220,9 +461,9 @@ export function markSlotAsVacant(slotInfo, deletedValue) {
|
|
|
220
461
|
} : null;
|
|
221
462
|
}
|
|
222
463
|
|
|
223
|
-
export function findVacantSlot(
|
|
464
|
+
export function findVacantSlot(quadMap, subject, predicate) {
|
|
224
465
|
const targetSlotId = hash(`${subject.value}|${predicate.value}`);
|
|
225
|
-
return Array.from(
|
|
466
|
+
return Array.from(quadMap.values())
|
|
226
467
|
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
227
468
|
}
|
|
228
469
|
|