mdld-parse 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +88 -27
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const DEFAULT_CONTEXT = {
|
|
1
|
+
export const DEFAULT_CONTEXT = {
|
|
2
2
|
'@vocab': 'http://schema.org/',
|
|
3
3
|
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
4
|
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
@@ -6,7 +6,7 @@ const DEFAULT_CONTEXT = {
|
|
|
6
6
|
schema: 'http://schema.org/'
|
|
7
7
|
};
|
|
8
8
|
|
|
9
|
-
const DataFactory = {
|
|
9
|
+
export const DataFactory = {
|
|
10
10
|
namedNode: (v) => ({ termType: 'NamedNode', value: v }),
|
|
11
11
|
blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
|
|
12
12
|
literal: (v, lang) => {
|
|
@@ -18,14 +18,14 @@ const DataFactory = {
|
|
|
18
18
|
quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
-
function hash(str) {
|
|
21
|
+
export function hash(str) {
|
|
22
22
|
let h = 5381;
|
|
23
23
|
for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
|
|
24
24
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// IRI Utilities
|
|
28
|
-
function expandIRI(term, ctx) {
|
|
28
|
+
export function expandIRI(term, ctx) {
|
|
29
29
|
if (term == null) return null;
|
|
30
30
|
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
31
31
|
const t = raw.trim();
|
|
@@ -48,17 +48,13 @@ export function shortenIRI(iri, ctx) {
|
|
|
48
48
|
return iri;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
function
|
|
52
|
-
return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
function parseSemanticBlock(raw) {
|
|
51
|
+
export function parseSemanticBlock(raw) {
|
|
56
52
|
try {
|
|
57
53
|
const src = String(raw || '').trim();
|
|
58
54
|
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
59
|
-
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
55
|
+
if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
60
56
|
|
|
61
|
-
const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
57
|
+
const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
62
58
|
const re = /\S+/g;
|
|
63
59
|
let m;
|
|
64
60
|
while ((m = re.exec(cleaned)) !== null) {
|
|
@@ -80,6 +76,13 @@ function parseSemanticBlock(raw) {
|
|
|
80
76
|
continue;
|
|
81
77
|
}
|
|
82
78
|
|
|
79
|
+
if (token.startsWith('=?')) {
|
|
80
|
+
const iri = token.substring(2);
|
|
81
|
+
result.object = iri;
|
|
82
|
+
result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
83
86
|
if (token.startsWith('=')) {
|
|
84
87
|
const iri = token.substring(1);
|
|
85
88
|
result.subject = iri;
|
|
@@ -137,7 +140,7 @@ function parseSemanticBlock(raw) {
|
|
|
137
140
|
return result;
|
|
138
141
|
} catch (error) {
|
|
139
142
|
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
140
|
-
return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
143
|
+
return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
141
144
|
}
|
|
142
145
|
}
|
|
143
146
|
|
|
@@ -472,11 +475,13 @@ function createLiteral(value, datatype, language, context, dataFactory) {
|
|
|
472
475
|
function processAnnotation(carrier, sem, state) {
|
|
473
476
|
if (sem.subject === 'RESET') {
|
|
474
477
|
state.currentSubject = null;
|
|
478
|
+
state.currentObject = null;
|
|
475
479
|
return;
|
|
476
480
|
}
|
|
477
481
|
|
|
478
482
|
const previousSubject = state.currentSubject;
|
|
479
483
|
let newSubject = null;
|
|
484
|
+
let localObject = null;
|
|
480
485
|
|
|
481
486
|
if (sem.subject) {
|
|
482
487
|
if (sem.subject.startsWith('=#')) {
|
|
@@ -492,6 +497,12 @@ function processAnnotation(carrier, sem, state) {
|
|
|
492
497
|
newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
493
498
|
}
|
|
494
499
|
}
|
|
500
|
+
|
|
501
|
+
if (sem.object) {
|
|
502
|
+
// Handle soft IRI object declaration - local to this annotation only
|
|
503
|
+
localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
504
|
+
}
|
|
505
|
+
|
|
495
506
|
if (newSubject) state.currentSubject = newSubject;
|
|
496
507
|
|
|
497
508
|
const S = state.currentSubject;
|
|
@@ -501,12 +512,15 @@ function processAnnotation(carrier, sem, state) {
|
|
|
501
512
|
state.origin.blocks.set(block.id, block);
|
|
502
513
|
|
|
503
514
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
504
|
-
const
|
|
515
|
+
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
505
516
|
|
|
506
517
|
sem.types.forEach(t => {
|
|
507
518
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
508
519
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
509
|
-
|
|
520
|
+
// For types with subject declarations, the type applies to the new subject
|
|
521
|
+
// For types with soft IRI declarations, the type applies to the soft IRI object
|
|
522
|
+
// Otherwise, type applies to carrier object or current subject
|
|
523
|
+
const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
|
|
510
524
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
511
525
|
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
512
526
|
});
|
|
@@ -516,18 +530,26 @@ function processAnnotation(carrier, sem, state) {
|
|
|
516
530
|
const token = `${pred.form}${pred.iri}`;
|
|
517
531
|
|
|
518
532
|
if (pred.form === '') {
|
|
519
|
-
|
|
533
|
+
// S —p→ L (use soft IRI object as subject if available, otherwise current subject)
|
|
534
|
+
const subjectIRI = localObject || S;
|
|
535
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
520
536
|
} else if (pred.form === '?') {
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
537
|
+
// S —p→ O (use previous subject as subject, newSubject as object)
|
|
538
|
+
const subjectIRI = newSubject ? previousSubject : S;
|
|
539
|
+
const objectIRI = localObject || newSubject || carrierO;
|
|
540
|
+
if (objectIRI && subjectIRI) {
|
|
541
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
525
542
|
}
|
|
543
|
+
} else if (pred.form === '^') {
|
|
544
|
+
// L —p→ S (use soft IRI object as subject if available, otherwise current subject)
|
|
545
|
+
const subjectIRI = localObject || S;
|
|
546
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
526
547
|
} else if (pred.form === '^?') {
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
548
|
+
// O —p→ S (use previous subject as object, newSubject as subject)
|
|
549
|
+
const objectIRI = newSubject ? previousSubject : S;
|
|
550
|
+
const subjectIRI = localObject || newSubject || carrierO;
|
|
551
|
+
if (objectIRI && subjectIRI) {
|
|
552
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
531
553
|
}
|
|
532
554
|
}
|
|
533
555
|
});
|
|
@@ -622,7 +644,8 @@ export function parse(text, options = {}) {
|
|
|
622
644
|
df: options.dataFactory || DataFactory,
|
|
623
645
|
quads: [],
|
|
624
646
|
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
625
|
-
currentSubject: null
|
|
647
|
+
currentSubject: null,
|
|
648
|
+
currentObject: null
|
|
626
649
|
};
|
|
627
650
|
|
|
628
651
|
const tokens = scanTokens(text);
|
|
@@ -720,6 +743,16 @@ function removeOneToken(tokens, matchFn) {
|
|
|
720
743
|
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
721
744
|
}
|
|
722
745
|
|
|
746
|
+
function addObjectToken(tokens, iri) {
|
|
747
|
+
const objectToken = `=?${iri}`;
|
|
748
|
+
return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
function removeObjectToken(tokens, iri) {
|
|
752
|
+
const objectToken = `=?${iri}`;
|
|
753
|
+
return removeOneToken(tokens, t => t === objectToken);
|
|
754
|
+
}
|
|
755
|
+
|
|
723
756
|
function sanitizeCarrierValueForBlock(block, raw) {
|
|
724
757
|
const s = String(raw ?? '');
|
|
725
758
|
const t = block?.carrierType;
|
|
@@ -1013,6 +1046,17 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1013
1046
|
return;
|
|
1014
1047
|
}
|
|
1015
1048
|
|
|
1049
|
+
// Handle object token removal
|
|
1050
|
+
if (entry?.kind === 'object') {
|
|
1051
|
+
const objectIRI = shortenIRI(quad.object.value, ctx);
|
|
1052
|
+
const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
|
|
1053
|
+
if (!removed) return;
|
|
1054
|
+
|
|
1055
|
+
const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
1056
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1057
|
+
return;
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1016
1060
|
const tokens = normalizeAttrsTokens(span.text);
|
|
1017
1061
|
let updated = tokens;
|
|
1018
1062
|
let removed = false;
|
|
@@ -1084,7 +1128,8 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1084
1128
|
} else {
|
|
1085
1129
|
const full = quad.object.value;
|
|
1086
1130
|
const label = shortenIRI(full, ctx);
|
|
1087
|
-
|
|
1131
|
+
const objectShort = shortenIRI(full, ctx);
|
|
1132
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=?${objectShort} ?${predShort}}` });
|
|
1088
1133
|
}
|
|
1089
1134
|
return;
|
|
1090
1135
|
}
|
|
@@ -1103,8 +1148,24 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1103
1148
|
|
|
1104
1149
|
if (quad.object.termType === 'NamedNode') {
|
|
1105
1150
|
const full = quad.object.value;
|
|
1106
|
-
const
|
|
1107
|
-
|
|
1151
|
+
const objectShort = shortenIRI(full, ctx);
|
|
1152
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1153
|
+
|
|
1154
|
+
// Check if this is a ?predicate form (should use object IRI)
|
|
1155
|
+
const span = readSpan(targetBlock, text, 'attrs');
|
|
1156
|
+
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
1157
|
+
const hasObjectToken = tokens.some(t => t.startsWith('=?'));
|
|
1158
|
+
|
|
1159
|
+
if (hasObjectToken || anchored?.entry?.form === '?') {
|
|
1160
|
+
// Add object token if not present
|
|
1161
|
+
const updated = addObjectToken(tokens, objectShort);
|
|
1162
|
+
if (updated.length !== tokens.length) {
|
|
1163
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1164
|
+
}
|
|
1165
|
+
} else {
|
|
1166
|
+
// Create new annotation with object token
|
|
1167
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
|
|
1168
|
+
}
|
|
1108
1169
|
return;
|
|
1109
1170
|
}
|
|
1110
1171
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|