mdld-parse 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/index.js +131 -30
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -94,12 +94,28 @@ Each predicate form determines the graph edge:
|
|
|
94
94
|
|
|
95
95
|
### Subject Declaration
|
|
96
96
|
|
|
97
|
-
Set
|
|
97
|
+
Set current subject (emits no quads):
|
|
98
98
|
|
|
99
99
|
```markdown
|
|
100
100
|
## Apollo 11 {=ex:apollo11}
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
+
#### Fragment Syntax
|
|
104
|
+
|
|
105
|
+
Create fragment IRIs relative to current subject:
|
|
106
|
+
|
|
107
|
+
```markdown
|
|
108
|
+
# Document {=ex:document}
|
|
109
|
+
{=#summary}
|
|
110
|
+
[Content] {name}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
```turtle
|
|
114
|
+
ex:document#summary schema:name "Content" .
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Fragments replace any existing fragment and require a current subject.
|
|
118
|
+
|
|
103
119
|
Subject remains in scope until reset with `{=}` or new subject declared.
|
|
104
120
|
|
|
105
121
|
### Type Declaration
|
package/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const DEFAULT_CONTEXT = {
|
|
1
|
+
export const DEFAULT_CONTEXT = {
|
|
2
2
|
'@vocab': 'http://schema.org/',
|
|
3
3
|
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
4
|
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
@@ -6,7 +6,7 @@ const DEFAULT_CONTEXT = {
|
|
|
6
6
|
schema: 'http://schema.org/'
|
|
7
7
|
};
|
|
8
8
|
|
|
9
|
-
const DataFactory = {
|
|
9
|
+
export const DataFactory = {
|
|
10
10
|
namedNode: (v) => ({ termType: 'NamedNode', value: v }),
|
|
11
11
|
blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
|
|
12
12
|
literal: (v, lang) => {
|
|
@@ -18,14 +18,14 @@ const DataFactory = {
|
|
|
18
18
|
quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
-
function hash(str) {
|
|
21
|
+
export function hash(str) {
|
|
22
22
|
let h = 5381;
|
|
23
23
|
for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
|
|
24
24
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// IRI Utilities
|
|
28
|
-
function expandIRI(term, ctx) {
|
|
28
|
+
export function expandIRI(term, ctx) {
|
|
29
29
|
if (term == null) return null;
|
|
30
30
|
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
31
31
|
const t = raw.trim();
|
|
@@ -48,17 +48,13 @@ export function shortenIRI(iri, ctx) {
|
|
|
48
48
|
return iri;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
function
|
|
52
|
-
return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
function parseSemanticBlock(raw) {
|
|
51
|
+
export function parseSemanticBlock(raw) {
|
|
56
52
|
try {
|
|
57
53
|
const src = String(raw || '').trim();
|
|
58
54
|
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
59
|
-
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
55
|
+
if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
60
56
|
|
|
61
|
-
const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
57
|
+
const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
62
58
|
const re = /\S+/g;
|
|
63
59
|
let m;
|
|
64
60
|
while ((m = re.exec(cleaned)) !== null) {
|
|
@@ -73,6 +69,20 @@ function parseSemanticBlock(raw) {
|
|
|
73
69
|
continue;
|
|
74
70
|
}
|
|
75
71
|
|
|
72
|
+
if (token.startsWith('=#')) {
|
|
73
|
+
const fragment = token.substring(2);
|
|
74
|
+
result.subject = `=#${fragment}`;
|
|
75
|
+
result.entries.push({ kind: 'fragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (token.startsWith('=?')) {
|
|
80
|
+
const iri = token.substring(2);
|
|
81
|
+
result.object = iri;
|
|
82
|
+
result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
76
86
|
if (token.startsWith('=')) {
|
|
77
87
|
const iri = token.substring(1);
|
|
78
88
|
result.subject = iri;
|
|
@@ -130,7 +140,7 @@ function parseSemanticBlock(raw) {
|
|
|
130
140
|
return result;
|
|
131
141
|
} catch (error) {
|
|
132
142
|
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
133
|
-
return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
143
|
+
return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
134
144
|
}
|
|
135
145
|
}
|
|
136
146
|
|
|
@@ -465,11 +475,34 @@ function createLiteral(value, datatype, language, context, dataFactory) {
|
|
|
465
475
|
function processAnnotation(carrier, sem, state) {
|
|
466
476
|
if (sem.subject === 'RESET') {
|
|
467
477
|
state.currentSubject = null;
|
|
478
|
+
state.currentObject = null;
|
|
468
479
|
return;
|
|
469
480
|
}
|
|
470
481
|
|
|
471
482
|
const previousSubject = state.currentSubject;
|
|
472
|
-
let newSubject =
|
|
483
|
+
let newSubject = null;
|
|
484
|
+
let localObject = null;
|
|
485
|
+
|
|
486
|
+
if (sem.subject) {
|
|
487
|
+
if (sem.subject.startsWith('=#')) {
|
|
488
|
+
// Handle fragment syntax
|
|
489
|
+
const fragment = sem.subject.substring(2);
|
|
490
|
+
if (state.currentSubject) {
|
|
491
|
+
// Replace any existing fragment in current subject
|
|
492
|
+
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
493
|
+
newSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
494
|
+
}
|
|
495
|
+
} else {
|
|
496
|
+
// Regular IRI
|
|
497
|
+
newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if (sem.object) {
|
|
502
|
+
// Handle soft IRI object declaration - local to this annotation only
|
|
503
|
+
localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
504
|
+
}
|
|
505
|
+
|
|
473
506
|
if (newSubject) state.currentSubject = newSubject;
|
|
474
507
|
|
|
475
508
|
const S = state.currentSubject;
|
|
@@ -479,12 +512,15 @@ function processAnnotation(carrier, sem, state) {
|
|
|
479
512
|
state.origin.blocks.set(block.id, block);
|
|
480
513
|
|
|
481
514
|
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
482
|
-
const
|
|
515
|
+
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
483
516
|
|
|
484
517
|
sem.types.forEach(t => {
|
|
485
518
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
486
519
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
487
|
-
|
|
520
|
+
// For types with subject declarations, the type applies to the new subject
|
|
521
|
+
// For types with soft IRI declarations, the type applies to the soft IRI object
|
|
522
|
+
// Otherwise, type applies to carrier object or current subject
|
|
523
|
+
const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
|
|
488
524
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
489
525
|
emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
|
|
490
526
|
});
|
|
@@ -494,18 +530,26 @@ function processAnnotation(carrier, sem, state) {
|
|
|
494
530
|
const token = `${pred.form}${pred.iri}`;
|
|
495
531
|
|
|
496
532
|
if (pred.form === '') {
|
|
497
|
-
|
|
533
|
+
// S —p→ L (use soft IRI object as subject if available, otherwise current subject)
|
|
534
|
+
const subjectIRI = localObject || S;
|
|
535
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
498
536
|
} else if (pred.form === '?') {
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
537
|
+
// S —p→ O (use previous subject as subject, newSubject as object)
|
|
538
|
+
const subjectIRI = newSubject ? previousSubject : S;
|
|
539
|
+
const objectIRI = localObject || newSubject || carrierO;
|
|
540
|
+
if (objectIRI && subjectIRI) {
|
|
541
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
503
542
|
}
|
|
543
|
+
} else if (pred.form === '^') {
|
|
544
|
+
// L —p→ S (use soft IRI object as subject if available, otherwise current subject)
|
|
545
|
+
const subjectIRI = localObject || S;
|
|
546
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
504
547
|
} else if (pred.form === '^?') {
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
548
|
+
// O —p→ S (use previous subject as object, newSubject as subject)
|
|
549
|
+
const objectIRI = newSubject ? previousSubject : S;
|
|
550
|
+
const subjectIRI = localObject || newSubject || carrierO;
|
|
551
|
+
if (objectIRI && subjectIRI) {
|
|
552
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
|
|
509
553
|
}
|
|
510
554
|
}
|
|
511
555
|
});
|
|
@@ -522,7 +566,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
|
|
|
522
566
|
if (listToken.attrs) {
|
|
523
567
|
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
524
568
|
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
525
|
-
|
|
569
|
+
if (itemSem.subject.startsWith('=#')) {
|
|
570
|
+
// Handle fragment syntax in list items
|
|
571
|
+
const fragment = itemSem.subject.substring(2);
|
|
572
|
+
if (state.currentSubject) {
|
|
573
|
+
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
574
|
+
itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
575
|
+
}
|
|
576
|
+
} else {
|
|
577
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
578
|
+
}
|
|
526
579
|
itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
|
|
527
580
|
}
|
|
528
581
|
}
|
|
@@ -532,7 +585,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
|
|
|
532
585
|
if (carrier.attrs) {
|
|
533
586
|
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
534
587
|
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
535
|
-
|
|
588
|
+
if (itemSem.subject.startsWith('=#')) {
|
|
589
|
+
// Handle fragment syntax in inline carriers
|
|
590
|
+
const fragment = itemSem.subject.substring(2);
|
|
591
|
+
if (state.currentSubject) {
|
|
592
|
+
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
593
|
+
itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
594
|
+
}
|
|
595
|
+
} else {
|
|
596
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
597
|
+
}
|
|
536
598
|
itemSubjectCarrier = carrier;
|
|
537
599
|
break;
|
|
538
600
|
}
|
|
@@ -582,7 +644,8 @@ export function parse(text, options = {}) {
|
|
|
582
644
|
df: options.dataFactory || DataFactory,
|
|
583
645
|
quads: [],
|
|
584
646
|
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
585
|
-
currentSubject: null
|
|
647
|
+
currentSubject: null,
|
|
648
|
+
currentObject: null
|
|
586
649
|
};
|
|
587
650
|
|
|
588
651
|
const tokens = scanTokens(text);
|
|
@@ -680,6 +743,16 @@ function removeOneToken(tokens, matchFn) {
|
|
|
680
743
|
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
681
744
|
}
|
|
682
745
|
|
|
746
|
+
function addObjectToken(tokens, iri) {
|
|
747
|
+
const objectToken = `=?${iri}`;
|
|
748
|
+
return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
function removeObjectToken(tokens, iri) {
|
|
752
|
+
const objectToken = `=?${iri}`;
|
|
753
|
+
return removeOneToken(tokens, t => t === objectToken);
|
|
754
|
+
}
|
|
755
|
+
|
|
683
756
|
function sanitizeCarrierValueForBlock(block, raw) {
|
|
684
757
|
const s = String(raw ?? '');
|
|
685
758
|
const t = block?.carrierType;
|
|
@@ -973,6 +1046,17 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
973
1046
|
return;
|
|
974
1047
|
}
|
|
975
1048
|
|
|
1049
|
+
// Handle object token removal
|
|
1050
|
+
if (entry?.kind === 'object') {
|
|
1051
|
+
const objectIRI = shortenIRI(quad.object.value, ctx);
|
|
1052
|
+
const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
|
|
1053
|
+
if (!removed) return;
|
|
1054
|
+
|
|
1055
|
+
const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
1056
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1057
|
+
return;
|
|
1058
|
+
}
|
|
1059
|
+
|
|
976
1060
|
const tokens = normalizeAttrsTokens(span.text);
|
|
977
1061
|
let updated = tokens;
|
|
978
1062
|
let removed = false;
|
|
@@ -1044,7 +1128,8 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1044
1128
|
} else {
|
|
1045
1129
|
const full = quad.object.value;
|
|
1046
1130
|
const label = shortenIRI(full, ctx);
|
|
1047
|
-
|
|
1131
|
+
const objectShort = shortenIRI(full, ctx);
|
|
1132
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=?${objectShort} ?${predShort}}` });
|
|
1048
1133
|
}
|
|
1049
1134
|
return;
|
|
1050
1135
|
}
|
|
@@ -1063,8 +1148,24 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1063
1148
|
|
|
1064
1149
|
if (quad.object.termType === 'NamedNode') {
|
|
1065
1150
|
const full = quad.object.value;
|
|
1066
|
-
const
|
|
1067
|
-
|
|
1151
|
+
const objectShort = shortenIRI(full, ctx);
|
|
1152
|
+
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1153
|
+
|
|
1154
|
+
// Check if this is a ?predicate form (should use object IRI)
|
|
1155
|
+
const span = readSpan(targetBlock, text, 'attrs');
|
|
1156
|
+
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
1157
|
+
const hasObjectToken = tokens.some(t => t.startsWith('=?'));
|
|
1158
|
+
|
|
1159
|
+
if (hasObjectToken || anchored?.entry?.form === '?') {
|
|
1160
|
+
// Add object token if not present
|
|
1161
|
+
const updated = addObjectToken(tokens, objectShort);
|
|
1162
|
+
if (updated.length !== tokens.length) {
|
|
1163
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1164
|
+
}
|
|
1165
|
+
} else {
|
|
1166
|
+
// Create new annotation with object token
|
|
1167
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
|
|
1168
|
+
}
|
|
1068
1169
|
return;
|
|
1069
1170
|
}
|
|
1070
1171
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|