mdld-parse 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +54 -11
  2. package/index.js +137 -27
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -14,8 +14,11 @@ MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}
14
14
  # Apollo 11 {=ex:apollo11 .SpaceMission}
15
15
 
16
16
  Launch: [1969-07-16] {startDate ^^xsd:date}
17
- Crew: [Neil Armstrong](ex:armstrong) {?crewMember}
17
+ Crew: [Neil Armstrong] {=?ex:armstrong ?crewMember fullName}
18
18
  Description: [First crewed Moon landing] {description}
19
+
20
+ [Section] {=?#overview ?hasPart}
21
+ Overview: [Mission summary] {description}
19
22
  ```
20
23
 
21
24
  Generates valid RDF triples:
@@ -25,18 +28,20 @@ ex:apollo11 a schema:SpaceMission ;
25
28
  schema:startDate "1969-07-16"^^xsd:date ;
26
29
  schema:crewMember ex:armstrong ;
27
30
  schema:description "First crewed Moon landing" .
28
- ```
29
31
 
30
- ## Core Guarantees
32
+ ex:armstrong schema:fullName "Neil Armstrong" .
33
+ ```
31
34
 
32
- MD-LD v0.2 provides strict semantic guarantees:
35
+ ## Core Features
33
36
 
34
- 1. **CommonMark-preserving** Removing `{...}` yields valid Markdown
35
- 2. **Explicit semantics** Every quad originates from explicit `{...}`
36
- 3. **Single-pass parsing** Streaming-friendly, deterministic
37
- 4. **No blank nodes** All subjects are stable IRIs
38
- 5. **Complete traceability** Every quad maps to source location
39
- 6. **Round-trip capable** Markdown RDF Markdown preserves structure
37
+ - **Subject declarations**: `{=IRI}` and `{=#fragment}` for context setting
38
+ - **Object IRIs**: `{=?IRI}` and `{=?#fragment}` for temporary object declarations
39
+ - **Four predicate forms**: `p` (S→L), `?p` (S→O), `^p` (L→S), `^?p` (O→S)
40
+ - **Type declarations**: `.Class` for rdf:type triples
41
+ - **Datatypes & language**: `^^xsd:date` and `@en` support
42
+ - **Lists**: Explicit subject declarations for structured data
43
+ - **Fragments**: Built-in document structuring with `{=#fragment}`
44
+ - **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
40
45
 
41
46
  ## Installation
42
47
 
@@ -246,6 +251,7 @@ ex:book schema:hasPart ex:part .
246
251
  ```markdown
247
252
  [ex] {: http://example.org/}
248
253
  [foaf] {: http://xmlns.com/foaf/0.1/}
254
+ [@vocab] {: http://schema.org/}
249
255
 
250
256
  # Person {=ex:alice .foaf:Person}
251
257
  ```
@@ -385,6 +391,41 @@ MD-LD explicitly forbids to ensure deterministic parsing:
385
391
  - ❌ Predicate guessing from context
386
392
  - ❌ Multi-pass or backtracking parsers
387
393
 
394
+ Below is a **tight, README-ready refinement** of the Algebra section.
395
+ It keeps the math precise, examples exhaustive, and language compact.
396
+
397
+ ---
398
+
399
+ ## Algebra
400
+
401
+ > Every RDF triple `(s, p, o)` can be authored **explicitly, deterministically, and locally**, with no inference, guessing, or reordering.
402
+
403
+ MD-LD models RDF authoring as a **closed edge algebra** over a small, explicit state. To be algebraically complete for RDF triple construction, a syntax must support:
404
+
405
+ * Binding a **subject** `S`
406
+ * Binding an **object** `O`
407
+ * Emitting predicates in **both directions**
408
+ * Distinguishing **IRI nodes** from **literal nodes**
409
+ * Operating with **no implicit state or inference**
410
+
411
+ MD-LD satisfies these requirements with four explicit operators.
412
+
413
+ Each predicate is partitioned by **direction** and **node kind**:
414
+
415
+ | Predicate form | Emitted triple |
416
+ | -------------- | -------------- |
417
+ | `p` | `S ─p→ L` |
418
+ | `?p` | `S ─p→ O` |
419
+ | `^p` | `L ─p→ S` |
420
+ | `^?p` | `O ─p→ S` |
421
+
422
+ This spans all **2 × 2** combinations of:
423
+
424
+ * source ∈ {subject, object/literal}
425
+ * target ∈ {subject, object/literal}
426
+
427
+ Therefore, the algebra is **closed**.
428
+
388
429
  ## Use Cases
389
430
 
390
431
  ### Personal Knowledge Management
@@ -456,8 +497,10 @@ Contributions welcome! Please:
456
497
 
457
498
  ## Acknowledgments
458
499
 
500
+ Developed by [Denis Starov](https://github.com/davay42).
501
+
459
502
  Inspired by:
460
- - Thomas Francart's [Semantic Markdown](https://blog.sparna.fr/2020/02/20/semantic-markdown/)
503
+ - Thomas Francart's [Semantic Markdown](https://blog.sparna.fr/2020/02/20/semantic-markdown/) article
461
504
  - RDFa decades of structured data experience
462
505
  - CommonMark's rigorous parsing approach
463
506
 
package/index.js CHANGED
@@ -1,4 +1,4 @@
1
- const DEFAULT_CONTEXT = {
1
+ export const DEFAULT_CONTEXT = {
2
2
  '@vocab': 'http://schema.org/',
3
3
  rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
4
4
  rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
@@ -6,7 +6,7 @@ const DEFAULT_CONTEXT = {
6
6
  schema: 'http://schema.org/'
7
7
  };
8
8
 
9
- const DataFactory = {
9
+ export const DataFactory = {
10
10
  namedNode: (v) => ({ termType: 'NamedNode', value: v }),
11
11
  blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
12
12
  literal: (v, lang) => {
@@ -18,14 +18,14 @@ const DataFactory = {
18
18
  quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
19
19
  };
20
20
 
21
- function hash(str) {
21
+ export function hash(str) {
22
22
  let h = 5381;
23
23
  for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
27
  // IRI Utilities
28
- function expandIRI(term, ctx) {
28
+ export function expandIRI(term, ctx) {
29
29
  if (term == null) return null;
30
30
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
31
31
  const t = raw.trim();
@@ -48,17 +48,13 @@ export function shortenIRI(iri, ctx) {
48
48
  return iri;
49
49
  }
50
50
 
51
- function processIRI(term, ctx, operation = 'expand') {
52
- return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
- }
54
-
55
- function parseSemanticBlock(raw) {
51
+ export function parseSemanticBlock(raw) {
56
52
  try {
57
53
  const src = String(raw || '').trim();
58
54
  const cleaned = src.replace(/^\{|\}$/g, '').trim();
59
- if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
55
+ if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
60
56
 
61
- const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
57
+ const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
62
58
  const re = /\S+/g;
63
59
  let m;
64
60
  while ((m = re.exec(cleaned)) !== null) {
@@ -80,6 +76,20 @@ function parseSemanticBlock(raw) {
80
76
  continue;
81
77
  }
82
78
 
79
+ if (token.startsWith('=?#')) {
80
+ const fragment = token.substring(3);
81
+ result.object = `#${fragment}`;
82
+ result.entries.push({ kind: 'softFragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
83
+ continue;
84
+ }
85
+
86
+ if (token.startsWith('=?')) {
87
+ const iri = token.substring(2);
88
+ result.object = iri;
89
+ result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
90
+ continue;
91
+ }
92
+
83
93
  if (token.startsWith('=')) {
84
94
  const iri = token.substring(1);
85
95
  result.subject = iri;
@@ -137,7 +147,7 @@ function parseSemanticBlock(raw) {
137
147
  return result;
138
148
  } catch (error) {
139
149
  console.error(`Error parsing semantic block ${raw}:`, error);
140
- return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
150
+ return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
141
151
  }
142
152
  }
143
153
 
@@ -472,11 +482,13 @@ function createLiteral(value, datatype, language, context, dataFactory) {
472
482
  function processAnnotation(carrier, sem, state) {
473
483
  if (sem.subject === 'RESET') {
474
484
  state.currentSubject = null;
485
+ state.currentObject = null;
475
486
  return;
476
487
  }
477
488
 
478
489
  const previousSubject = state.currentSubject;
479
490
  let newSubject = null;
491
+ let localObject = null;
480
492
 
481
493
  if (sem.subject) {
482
494
  if (sem.subject.startsWith('=#')) {
@@ -492,6 +504,22 @@ function processAnnotation(carrier, sem, state) {
492
504
  newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
493
505
  }
494
506
  }
507
+
508
+ if (sem.object) {
509
+ // Handle soft IRI object declaration - local to this annotation only
510
+ if (sem.object.startsWith('#')) {
511
+ // Soft fragment - resolve against current subject base
512
+ const fragment = sem.object.substring(1);
513
+ if (state.currentSubject) {
514
+ const baseIRI = state.currentSubject.value.split('#')[0];
515
+ localObject = state.df.namedNode(`${baseIRI}#${fragment}`);
516
+ }
517
+ } else {
518
+ // Regular soft IRI
519
+ localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
520
+ }
521
+ }
522
+
495
523
  if (newSubject) state.currentSubject = newSubject;
496
524
 
497
525
  const S = state.currentSubject;
@@ -501,12 +529,15 @@ function processAnnotation(carrier, sem, state) {
501
529
  state.origin.blocks.set(block.id, block);
502
530
 
503
531
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
504
- const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
532
+ const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
505
533
 
506
534
  sem.types.forEach(t => {
507
535
  const typeIRI = typeof t === 'string' ? t : t.iri;
508
536
  const entryIndex = typeof t === 'string' ? null : t.entryIndex;
509
- const typeSubject = O || S;
537
+ // For types with subject declarations, the type applies to the new subject
538
+ // For types with soft IRI declarations, the type applies to the soft IRI object
539
+ // Otherwise, type applies to carrier object or current subject
540
+ const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
510
541
  const expandedType = expandIRI(typeIRI, state.ctx);
511
542
  emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
512
543
  });
@@ -516,18 +547,26 @@ function processAnnotation(carrier, sem, state) {
516
547
  const token = `${pred.form}${pred.iri}`;
517
548
 
518
549
  if (pred.form === '') {
519
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
550
+ // S —p→ L (use soft IRI object as subject if available, otherwise current subject)
551
+ const subjectIRI = localObject || S;
552
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
520
553
  } else if (pred.form === '?') {
521
- if (newSubject) {
522
- emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
523
- } else if (O) {
524
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
554
+ // S —p→ O (use previous subject as subject, newSubject as object)
555
+ const subjectIRI = newSubject ? previousSubject : S;
556
+ const objectIRI = localObject || newSubject || carrierO;
557
+ if (objectIRI && subjectIRI) {
558
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
525
559
  }
560
+ } else if (pred.form === '^') {
561
+ // L —p→ S (use soft IRI object as subject if available, otherwise current subject)
562
+ const subjectIRI = localObject || S;
563
+ emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
526
564
  } else if (pred.form === '^?') {
527
- if (newSubject) {
528
- emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
529
- } else if (O) {
530
- emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
565
+ // O —p→ S (use previous subject as object, newSubject as subject)
566
+ const objectIRI = newSubject ? previousSubject : S;
567
+ const subjectIRI = localObject || newSubject || carrierO;
568
+ if (objectIRI && subjectIRI) {
569
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
531
570
  }
532
571
  }
533
572
  });
@@ -622,7 +661,8 @@ export function parse(text, options = {}) {
622
661
  df: options.dataFactory || DataFactory,
623
662
  quads: [],
624
663
  origin: { blocks: new Map(), quadIndex: new Map() },
625
- currentSubject: null
664
+ currentSubject: null,
665
+ currentObject: null
626
666
  };
627
667
 
628
668
  const tokens = scanTokens(text);
@@ -720,6 +760,26 @@ function removeOneToken(tokens, matchFn) {
720
760
  return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
721
761
  }
722
762
 
763
+ function addObjectToken(tokens, iri) {
764
+ const objectToken = `=?${iri}`;
765
+ return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
766
+ }
767
+
768
+ function removeObjectToken(tokens, iri) {
769
+ const objectToken = `=?${iri}`;
770
+ return removeOneToken(tokens, t => t === objectToken);
771
+ }
772
+
773
+ function addSoftFragmentToken(tokens, fragment) {
774
+ const fragmentToken = `=?#${fragment}`;
775
+ return tokens.includes(fragmentToken) ? tokens : [...tokens, fragmentToken];
776
+ }
777
+
778
+ function removeSoftFragmentToken(tokens, fragment) {
779
+ const fragmentToken = `=?#${fragment}`;
780
+ return removeOneToken(tokens, t => t === fragmentToken);
781
+ }
782
+
723
783
  function sanitizeCarrierValueForBlock(block, raw) {
724
784
  const s = String(raw ?? '');
725
785
  const t = block?.carrierType;
@@ -1013,6 +1073,28 @@ export function serialize({ text, diff, origin, options = {} }) {
1013
1073
  return;
1014
1074
  }
1015
1075
 
1076
+ // Handle object token removal
1077
+ if (entry?.kind === 'object') {
1078
+ const objectIRI = shortenIRI(quad.object.value, ctx);
1079
+ const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
1080
+ if (!removed) return;
1081
+
1082
+ const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
1083
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
1084
+ return;
1085
+ }
1086
+
1087
+ // Handle soft fragment token removal
1088
+ if (entry?.kind === 'softFragment') {
1089
+ const fragment = entry.fragment;
1090
+ const { tokens: updated, removed } = removeSoftFragmentToken(tokens, fragment);
1091
+ if (!removed) return;
1092
+
1093
+ const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
1094
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
1095
+ return;
1096
+ }
1097
+
1016
1098
  const tokens = normalizeAttrsTokens(span.text);
1017
1099
  let updated = tokens;
1018
1100
  let removed = false;
@@ -1084,7 +1166,8 @@ export function serialize({ text, diff, origin, options = {} }) {
1084
1166
  } else {
1085
1167
  const full = quad.object.value;
1086
1168
  const label = shortenIRI(full, ctx);
1087
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
1169
+ const objectShort = shortenIRI(full, ctx);
1170
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=?${objectShort} ?${predShort}}` });
1088
1171
  }
1089
1172
  return;
1090
1173
  }
@@ -1103,8 +1186,35 @@ export function serialize({ text, diff, origin, options = {} }) {
1103
1186
 
1104
1187
  if (quad.object.termType === 'NamedNode') {
1105
1188
  const full = quad.object.value;
1106
- const label = shortenIRI(full, ctx);
1107
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
1189
+ const objectShort = shortenIRI(full, ctx);
1190
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1191
+
1192
+ // Check if this is a soft fragment
1193
+ const isSoftFragment = full.includes('#') && anchored?.entry?.kind === 'softFragment';
1194
+
1195
+ if (isSoftFragment || anchored?.entry?.form === '?') {
1196
+ // Add soft fragment token if not present
1197
+ if (isSoftFragment) {
1198
+ const fragment = full.split('#')[1];
1199
+ const updated = addSoftFragmentToken(tokens, fragment);
1200
+ if (updated.length !== tokens.length) {
1201
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
1202
+ }
1203
+ } else {
1204
+ const updated = addObjectToken(tokens, objectShort);
1205
+ if (updated.length !== tokens.length) {
1206
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
1207
+ }
1208
+ }
1209
+ } else {
1210
+ // Create new annotation with object token
1211
+ if (isSoftFragment) {
1212
+ const fragment = full.split('#')[1];
1213
+ edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?#${fragment} ?${predShort}}` });
1214
+ } else {
1215
+ edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
1216
+ }
1217
+ }
1108
1218
  return;
1109
1219
  }
1110
1220
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.6",
3
+ "version": "0.2.8",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",