mdld-parse 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -11
- package/index.js +95 -16
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,8 +14,11 @@ MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}
|
|
|
14
14
|
# Apollo 11 {=ex:apollo11 .SpaceMission}
|
|
15
15
|
|
|
16
16
|
Launch: [1969-07-16] {startDate ^^xsd:date}
|
|
17
|
-
Crew: [Neil Armstrong]
|
|
17
|
+
Crew: [Neil Armstrong] {=?ex:armstrong ?crewMember fullName}
|
|
18
18
|
Description: [First crewed Moon landing] {description}
|
|
19
|
+
|
|
20
|
+
[Section] {=?#overview ?hasPart}
|
|
21
|
+
Overview: [Mission summary] {description}
|
|
19
22
|
```
|
|
20
23
|
|
|
21
24
|
Generates valid RDF triples:
|
|
@@ -25,18 +28,20 @@ ex:apollo11 a schema:SpaceMission ;
|
|
|
25
28
|
schema:startDate "1969-07-16"^^xsd:date ;
|
|
26
29
|
schema:crewMember ex:armstrong ;
|
|
27
30
|
schema:description "First crewed Moon landing" .
|
|
28
|
-
```
|
|
29
31
|
|
|
30
|
-
|
|
32
|
+
ex:armstrong schema:fullName "Neil Armstrong" .
|
|
33
|
+
```
|
|
31
34
|
|
|
32
|
-
|
|
35
|
+
## Core Features
|
|
33
36
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
- **Subject declarations**: `{=IRI}` and `{=#fragment}` for context setting
|
|
38
|
+
- **Object IRIs**: `{=?IRI}` and `{=?#fragment}` for temporary object declarations
|
|
39
|
+
- **Four predicate forms**: `p` (S→L), `?p` (S→O), `^p` (L→S), `^?p` (O→S)
|
|
40
|
+
- **Type declarations**: `.Class` for rdf:type triples
|
|
41
|
+
- **Datatypes & language**: `^^xsd:date` and `@en` support
|
|
42
|
+
- **Lists**: Explicit subject declarations for structured data
|
|
43
|
+
- **Fragments**: Built-in document structuring with `{=#fragment}`
|
|
44
|
+
- **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
|
|
40
45
|
|
|
41
46
|
## Installation
|
|
42
47
|
|
|
@@ -246,6 +251,7 @@ ex:book schema:hasPart ex:part .
|
|
|
246
251
|
```markdown
|
|
247
252
|
[ex] {: http://example.org/}
|
|
248
253
|
[foaf] {: http://xmlns.com/foaf/0.1/}
|
|
254
|
+
[@vocab] {: http://schema.org/}
|
|
249
255
|
|
|
250
256
|
# Person {=ex:alice .foaf:Person}
|
|
251
257
|
```
|
|
@@ -385,6 +391,41 @@ MD-LD explicitly forbids to ensure deterministic parsing:
|
|
|
385
391
|
- ❌ Predicate guessing from context
|
|
386
392
|
- ❌ Multi-pass or backtracking parsers
|
|
387
393
|
|
|
394
|
+
Below is a **tight, README-ready refinement** of the Algebra section.
|
|
395
|
+
It keeps the math precise, examples exhaustive, and language compact.
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
## Algebra
|
|
400
|
+
|
|
401
|
+
> Every RDF triple `(s, p, o)` can be authored **explicitly, deterministically, and locally**, with no inference, guessing, or reordering.
|
|
402
|
+
|
|
403
|
+
MD-LD models RDF authoring as a **closed edge algebra** over a small, explicit state. To be algebraically complete for RDF triple construction, a syntax must support:
|
|
404
|
+
|
|
405
|
+
* Binding a **subject** `S`
|
|
406
|
+
* Binding an **object** `O`
|
|
407
|
+
* Emitting predicates in **both directions**
|
|
408
|
+
* Distinguishing **IRI nodes** from **literal nodes**
|
|
409
|
+
* Operating with **no implicit state or inference**
|
|
410
|
+
|
|
411
|
+
MD-LD satisfies these requirements with four explicit operators.
|
|
412
|
+
|
|
413
|
+
Each predicate is partitioned by **direction** and **node kind**:
|
|
414
|
+
|
|
415
|
+
| Predicate form | Emitted triple |
|
|
416
|
+
| -------------- | -------------- |
|
|
417
|
+
| `p` | `S ─p→ L` |
|
|
418
|
+
| `?p` | `S ─p→ O` |
|
|
419
|
+
| `^p` | `L ─p→ S` |
|
|
420
|
+
| `^?p` | `O ─p→ S` |
|
|
421
|
+
|
|
422
|
+
This spans all **2 × 2** combinations of:
|
|
423
|
+
|
|
424
|
+
* source ∈ {subject, object/literal}
|
|
425
|
+
* target ∈ {subject, object/literal}
|
|
426
|
+
|
|
427
|
+
Therefore, the algebra is **closed**.
|
|
428
|
+
|
|
388
429
|
## Use Cases
|
|
389
430
|
|
|
390
431
|
### Personal Knowledge Management
|
|
@@ -456,8 +497,10 @@ Contributions welcome! Please:
|
|
|
456
497
|
|
|
457
498
|
## Acknowledgments
|
|
458
499
|
|
|
500
|
+
Developed by [Denis Starov](https://github.com/davay42).
|
|
501
|
+
|
|
459
502
|
Inspired by:
|
|
460
|
-
- Thomas Francart's [Semantic Markdown](https://blog.sparna.fr/2020/02/20/semantic-markdown/)
|
|
503
|
+
- Thomas Francart's [Semantic Markdown](https://blog.sparna.fr/2020/02/20/semantic-markdown/) article
|
|
461
504
|
- RDFa decades of structured data experience
|
|
462
505
|
- CommonMark's rigorous parsing approach
|
|
463
506
|
|
package/index.js
CHANGED
|
@@ -24,7 +24,6 @@ export function hash(str) {
|
|
|
24
24
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
// IRI Utilities
|
|
28
27
|
export function expandIRI(term, ctx) {
|
|
29
28
|
if (term == null) return null;
|
|
30
29
|
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
@@ -76,6 +75,13 @@ export function parseSemanticBlock(raw) {
|
|
|
76
75
|
continue;
|
|
77
76
|
}
|
|
78
77
|
|
|
78
|
+
if (token.startsWith('=?#')) {
|
|
79
|
+
const fragment = token.substring(3);
|
|
80
|
+
result.object = `#${fragment}`;
|
|
81
|
+
result.entries.push({ kind: 'softFragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
79
85
|
if (token.startsWith('=?')) {
|
|
80
86
|
const iri = token.substring(2);
|
|
81
87
|
result.object = iri;
|
|
@@ -274,6 +280,41 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
274
280
|
let pos = 0;
|
|
275
281
|
|
|
276
282
|
while (pos < text.length) {
|
|
283
|
+
// Try emphasis patterns first (before brackets)
|
|
284
|
+
const emphasisMatch = text.match(/^[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/, pos);
|
|
285
|
+
if (emphasisMatch) {
|
|
286
|
+
const carrierText = emphasisMatch[1];
|
|
287
|
+
const valueRange = [baseOffset + emphasisMatch[0].length, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length];
|
|
288
|
+
carriers.push({
|
|
289
|
+
type: 'emphasis',
|
|
290
|
+
text: carrierText,
|
|
291
|
+
attrs: `{${emphasisMatch[2]}}`,
|
|
292
|
+
attrsRange: [baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + 2, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + emphasisMatch[2].length],
|
|
293
|
+
valueRange,
|
|
294
|
+
range: [baseOffset + emphasisMatch[0].length, baseOffset + emphasisMatch[0].length + emphasisMatch[1].length]
|
|
295
|
+
});
|
|
296
|
+
pos = baseOffset + emphasisMatch[0].length + emphasisMatch[1].length + emphasisMatch[2].length;
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Try code spans
|
|
301
|
+
const codeMatch = text.match(/^``(.+?)``\s*\{([^}]+)\}/, pos);
|
|
302
|
+
if (codeMatch) {
|
|
303
|
+
const carrierText = codeMatch[1];
|
|
304
|
+
const valueRange = [baseOffset + 2, baseOffset + 2 + codeMatch[1].length];
|
|
305
|
+
carriers.push({
|
|
306
|
+
type: 'code',
|
|
307
|
+
text: carrierText,
|
|
308
|
+
attrs: `{${codeMatch[2]}}`,
|
|
309
|
+
attrsRange: [baseOffset + 2 + codeMatch[1].length + 2, baseOffset + 2 + codeMatch[1].length + 2],
|
|
310
|
+
valueRange,
|
|
311
|
+
range: [baseOffset + 2, baseOffset + 2 + codeMatch[1].length + 2]
|
|
312
|
+
});
|
|
313
|
+
pos = baseOffset + 2 + codeMatch[1].length + 2;
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Try bracket patterns (original logic)
|
|
277
318
|
const bracketStart = text.indexOf('[', pos);
|
|
278
319
|
if (bracketStart === -1) break;
|
|
279
320
|
|
|
@@ -365,7 +406,6 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
|
|
|
365
406
|
};
|
|
366
407
|
}
|
|
367
408
|
|
|
368
|
-
// Quad Utilities
|
|
369
409
|
function quadIndexKey(subject, predicate, object) {
|
|
370
410
|
const objKey = object.termType === 'Literal'
|
|
371
411
|
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
@@ -406,7 +446,6 @@ function parseQuadIndexKey(key) {
|
|
|
406
446
|
}
|
|
407
447
|
}
|
|
408
448
|
|
|
409
|
-
// Semantic Slot Utilities
|
|
410
449
|
function createSemanticSlotId(subject, predicate) {
|
|
411
450
|
return hash(`${subject.value}|${predicate.value}`);
|
|
412
451
|
}
|
|
@@ -500,7 +539,17 @@ function processAnnotation(carrier, sem, state) {
|
|
|
500
539
|
|
|
501
540
|
if (sem.object) {
|
|
502
541
|
// Handle soft IRI object declaration - local to this annotation only
|
|
503
|
-
|
|
542
|
+
if (sem.object.startsWith('#')) {
|
|
543
|
+
// Soft fragment - resolve against current subject base
|
|
544
|
+
const fragment = sem.object.substring(1);
|
|
545
|
+
if (state.currentSubject) {
|
|
546
|
+
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
547
|
+
localObject = state.df.namedNode(`${baseIRI}#${fragment}`);
|
|
548
|
+
}
|
|
549
|
+
} else {
|
|
550
|
+
// Regular soft IRI
|
|
551
|
+
localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
552
|
+
}
|
|
504
553
|
}
|
|
505
554
|
|
|
506
555
|
if (newSubject) state.currentSubject = newSubject;
|
|
@@ -718,8 +767,6 @@ export function parse(text, options = {}) {
|
|
|
718
767
|
return { quads: state.quads, origin: state.origin, context: state.ctx };
|
|
719
768
|
}
|
|
720
769
|
|
|
721
|
-
|
|
722
|
-
// Text Processing Utilities
|
|
723
770
|
function readSpan(block, text, spanType = 'attrs') {
|
|
724
771
|
const range = spanType === 'attrs' ? block?.attrsRange : block?.valueRange;
|
|
725
772
|
if (!range) return null;
|
|
@@ -753,6 +800,16 @@ function removeObjectToken(tokens, iri) {
|
|
|
753
800
|
return removeOneToken(tokens, t => t === objectToken);
|
|
754
801
|
}
|
|
755
802
|
|
|
803
|
+
function addSoftFragmentToken(tokens, fragment) {
|
|
804
|
+
const fragmentToken = `=?#${fragment}`;
|
|
805
|
+
return tokens.includes(fragmentToken) ? tokens : [...tokens, fragmentToken];
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
function removeSoftFragmentToken(tokens, fragment) {
|
|
809
|
+
const fragmentToken = `=?#${fragment}`;
|
|
810
|
+
return removeOneToken(tokens, t => t === fragmentToken);
|
|
811
|
+
}
|
|
812
|
+
|
|
756
813
|
function sanitizeCarrierValueForBlock(block, raw) {
|
|
757
814
|
const s = String(raw ?? '');
|
|
758
815
|
const t = block?.carrierType;
|
|
@@ -1057,6 +1114,17 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1057
1114
|
return;
|
|
1058
1115
|
}
|
|
1059
1116
|
|
|
1117
|
+
// Handle soft fragment token removal
|
|
1118
|
+
if (entry?.kind === 'softFragment') {
|
|
1119
|
+
const fragment = entry.fragment;
|
|
1120
|
+
const { tokens: updated, removed } = removeSoftFragmentToken(tokens, fragment);
|
|
1121
|
+
if (!removed) return;
|
|
1122
|
+
|
|
1123
|
+
const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
|
|
1124
|
+
edits.push({ start: span.start, end: span.end, text: newAttrs });
|
|
1125
|
+
return;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1060
1128
|
const tokens = normalizeAttrsTokens(span.text);
|
|
1061
1129
|
let updated = tokens;
|
|
1062
1130
|
let removed = false;
|
|
@@ -1151,20 +1219,31 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
1151
1219
|
const objectShort = shortenIRI(full, ctx);
|
|
1152
1220
|
const predShort = shortenIRI(quad.predicate.value, ctx);
|
|
1153
1221
|
|
|
1154
|
-
// Check if this is a
|
|
1155
|
-
const
|
|
1156
|
-
const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
|
|
1157
|
-
const hasObjectToken = tokens.some(t => t.startsWith('=?'));
|
|
1222
|
+
// Check if this is a soft fragment
|
|
1223
|
+
const isSoftFragment = full.includes('#') && anchored?.entry?.kind === 'softFragment';
|
|
1158
1224
|
|
|
1159
|
-
if (
|
|
1160
|
-
// Add
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1225
|
+
if (isSoftFragment || anchored?.entry?.form === '?') {
|
|
1226
|
+
// Add soft fragment token if not present
|
|
1227
|
+
if (isSoftFragment) {
|
|
1228
|
+
const fragment = full.split('#')[1];
|
|
1229
|
+
const updated = addSoftFragmentToken(tokens, fragment);
|
|
1230
|
+
if (updated.length !== tokens.length) {
|
|
1231
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1232
|
+
}
|
|
1233
|
+
} else {
|
|
1234
|
+
const updated = addObjectToken(tokens, objectShort);
|
|
1235
|
+
if (updated.length !== tokens.length) {
|
|
1236
|
+
edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
|
|
1237
|
+
}
|
|
1164
1238
|
}
|
|
1165
1239
|
} else {
|
|
1166
1240
|
// Create new annotation with object token
|
|
1167
|
-
|
|
1241
|
+
if (isSoftFragment) {
|
|
1242
|
+
const fragment = full.split('#')[1];
|
|
1243
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?#${fragment} ?${predShort}}` });
|
|
1244
|
+
} else {
|
|
1245
|
+
edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
|
|
1246
|
+
}
|
|
1168
1247
|
}
|
|
1169
1248
|
return;
|
|
1170
1249
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.9",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|