mdld-parse 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +17 -1
  2. package/index.js +131 -30
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -94,12 +94,28 @@ Each predicate form determines the graph edge:
94
94
 
95
95
  ### Subject Declaration
96
96
 
97
- Set the current subject (emits no quads):
97
+ Set current subject (emits no quads):
98
98
 
99
99
  ```markdown
100
100
  ## Apollo 11 {=ex:apollo11}
101
101
  ```
102
102
 
103
+ #### Fragment Syntax
104
+
105
+ Create fragment IRIs relative to current subject:
106
+
107
+ ```markdown
108
+ # Document {=ex:document}
109
+ {=#summary}
110
+ [Content] {name}
111
+ ```
112
+
113
+ ```turtle
114
+ ex:document#summary schema:name "Content" .
115
+ ```
116
+
117
+ Fragments replace any existing fragment and require a current subject.
118
+
103
119
  Subject remains in scope until reset with `{=}` or new subject declared.
104
120
 
105
121
  ### Type Declaration
package/index.js CHANGED
@@ -1,4 +1,4 @@
1
- const DEFAULT_CONTEXT = {
1
+ export const DEFAULT_CONTEXT = {
2
2
  '@vocab': 'http://schema.org/',
3
3
  rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
4
4
  rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
@@ -6,7 +6,7 @@ const DEFAULT_CONTEXT = {
6
6
  schema: 'http://schema.org/'
7
7
  };
8
8
 
9
- const DataFactory = {
9
+ export const DataFactory = {
10
10
  namedNode: (v) => ({ termType: 'NamedNode', value: v }),
11
11
  blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
12
12
  literal: (v, lang) => {
@@ -18,14 +18,14 @@ const DataFactory = {
18
18
  quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
19
19
  };
20
20
 
21
- function hash(str) {
21
+ export function hash(str) {
22
22
  let h = 5381;
23
23
  for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
27
  // IRI Utilities
28
- function expandIRI(term, ctx) {
28
+ export function expandIRI(term, ctx) {
29
29
  if (term == null) return null;
30
30
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
31
31
  const t = raw.trim();
@@ -48,17 +48,13 @@ export function shortenIRI(iri, ctx) {
48
48
  return iri;
49
49
  }
50
50
 
51
- function processIRI(term, ctx, operation = 'expand') {
52
- return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
- }
54
-
55
- function parseSemanticBlock(raw) {
51
+ export function parseSemanticBlock(raw) {
56
52
  try {
57
53
  const src = String(raw || '').trim();
58
54
  const cleaned = src.replace(/^\{|\}$/g, '').trim();
59
- if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
55
+ if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
60
56
 
61
- const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
57
+ const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
62
58
  const re = /\S+/g;
63
59
  let m;
64
60
  while ((m = re.exec(cleaned)) !== null) {
@@ -73,6 +69,20 @@ function parseSemanticBlock(raw) {
73
69
  continue;
74
70
  }
75
71
 
72
+ if (token.startsWith('=#')) {
73
+ const fragment = token.substring(2);
74
+ result.subject = `=#${fragment}`;
75
+ result.entries.push({ kind: 'fragment', fragment, relRange: { start: relStart, end: relEnd }, raw: token });
76
+ continue;
77
+ }
78
+
79
+ if (token.startsWith('=?')) {
80
+ const iri = token.substring(2);
81
+ result.object = iri;
82
+ result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
83
+ continue;
84
+ }
85
+
76
86
  if (token.startsWith('=')) {
77
87
  const iri = token.substring(1);
78
88
  result.subject = iri;
@@ -130,7 +140,7 @@ function parseSemanticBlock(raw) {
130
140
  return result;
131
141
  } catch (error) {
132
142
  console.error(`Error parsing semantic block ${raw}:`, error);
133
- return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
143
+ return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
134
144
  }
135
145
  }
136
146
 
@@ -465,11 +475,34 @@ function createLiteral(value, datatype, language, context, dataFactory) {
465
475
  function processAnnotation(carrier, sem, state) {
466
476
  if (sem.subject === 'RESET') {
467
477
  state.currentSubject = null;
478
+ state.currentObject = null;
468
479
  return;
469
480
  }
470
481
 
471
482
  const previousSubject = state.currentSubject;
472
- let newSubject = sem.subject ? state.df.namedNode(expandIRI(sem.subject, state.ctx)) : null;
483
+ let newSubject = null;
484
+ let localObject = null;
485
+
486
+ if (sem.subject) {
487
+ if (sem.subject.startsWith('=#')) {
488
+ // Handle fragment syntax
489
+ const fragment = sem.subject.substring(2);
490
+ if (state.currentSubject) {
491
+ // Replace any existing fragment in current subject
492
+ const baseIRI = state.currentSubject.value.split('#')[0];
493
+ newSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
494
+ }
495
+ } else {
496
+ // Regular IRI
497
+ newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
498
+ }
499
+ }
500
+
501
+ if (sem.object) {
502
+ // Handle soft IRI object declaration - local to this annotation only
503
+ localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
504
+ }
505
+
473
506
  if (newSubject) state.currentSubject = newSubject;
474
507
 
475
508
  const S = state.currentSubject;
@@ -479,12 +512,15 @@ function processAnnotation(carrier, sem, state) {
479
512
  state.origin.blocks.set(block.id, block);
480
513
 
481
514
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
482
- const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
515
+ const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
483
516
 
484
517
  sem.types.forEach(t => {
485
518
  const typeIRI = typeof t === 'string' ? t : t.iri;
486
519
  const entryIndex = typeof t === 'string' ? null : t.entryIndex;
487
- const typeSubject = O || S;
520
+ // For types with subject declarations, the type applies to the new subject
521
+ // For types with soft IRI declarations, the type applies to the soft IRI object
522
+ // Otherwise, type applies to carrier object or current subject
523
+ const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
488
524
  const expandedType = expandIRI(typeIRI, state.ctx);
489
525
  emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
490
526
  });
@@ -494,18 +530,26 @@ function processAnnotation(carrier, sem, state) {
494
530
  const token = `${pred.form}${pred.iri}`;
495
531
 
496
532
  if (pred.form === '') {
497
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
533
+ // S —p→ L (use soft IRI object as subject if available, otherwise current subject)
534
+ const subjectIRI = localObject || S;
535
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
498
536
  } else if (pred.form === '?') {
499
- if (newSubject) {
500
- emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
501
- } else if (O) {
502
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
537
+ // S —p→ O (use previous subject as subject, newSubject as object)
538
+ const subjectIRI = newSubject ? previousSubject : S;
539
+ const objectIRI = localObject || newSubject || carrierO;
540
+ if (objectIRI && subjectIRI) {
541
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
503
542
  }
543
+ } else if (pred.form === '^') {
544
+ // L —p→ S (use soft IRI object as subject if available, otherwise current subject)
545
+ const subjectIRI = localObject || S;
546
+ emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
504
547
  } else if (pred.form === '^?') {
505
- if (newSubject) {
506
- emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
507
- } else if (O) {
508
- emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
548
+ // O —p→ S (use previous subject as object, newSubject as subject)
549
+ const objectIRI = newSubject ? previousSubject : S;
550
+ const subjectIRI = localObject || newSubject || carrierO;
551
+ if (objectIRI && subjectIRI) {
552
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
509
553
  }
510
554
  }
511
555
  });
@@ -522,7 +566,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
522
566
  if (listToken.attrs) {
523
567
  const itemSem = parseSemanticBlock(listToken.attrs);
524
568
  if (itemSem.subject && itemSem.subject !== 'RESET') {
525
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
569
+ if (itemSem.subject.startsWith('=#')) {
570
+ // Handle fragment syntax in list items
571
+ const fragment = itemSem.subject.substring(2);
572
+ if (state.currentSubject) {
573
+ const baseIRI = state.currentSubject.value.split('#')[0];
574
+ itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
575
+ }
576
+ } else {
577
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
578
+ }
526
579
  itemSubjectCarrier = { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range };
527
580
  }
528
581
  }
@@ -532,7 +585,16 @@ function processListContext(contextSem, listTokens, state, contextSubject = null
532
585
  if (carrier.attrs) {
533
586
  const itemSem = parseSemanticBlock(carrier.attrs);
534
587
  if (itemSem.subject && itemSem.subject !== 'RESET') {
535
- itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
588
+ if (itemSem.subject.startsWith('=#')) {
589
+ // Handle fragment syntax in inline carriers
590
+ const fragment = itemSem.subject.substring(2);
591
+ if (state.currentSubject) {
592
+ const baseIRI = state.currentSubject.value.split('#')[0];
593
+ itemSubject = state.df.namedNode(`${baseIRI}#${fragment}`);
594
+ }
595
+ } else {
596
+ itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
597
+ }
536
598
  itemSubjectCarrier = carrier;
537
599
  break;
538
600
  }
@@ -582,7 +644,8 @@ export function parse(text, options = {}) {
582
644
  df: options.dataFactory || DataFactory,
583
645
  quads: [],
584
646
  origin: { blocks: new Map(), quadIndex: new Map() },
585
- currentSubject: null
647
+ currentSubject: null,
648
+ currentObject: null
586
649
  };
587
650
 
588
651
  const tokens = scanTokens(text);
@@ -680,6 +743,16 @@ function removeOneToken(tokens, matchFn) {
680
743
  return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
681
744
  }
682
745
 
746
+ function addObjectToken(tokens, iri) {
747
+ const objectToken = `=?${iri}`;
748
+ return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
749
+ }
750
+
751
+ function removeObjectToken(tokens, iri) {
752
+ const objectToken = `=?${iri}`;
753
+ return removeOneToken(tokens, t => t === objectToken);
754
+ }
755
+
683
756
  function sanitizeCarrierValueForBlock(block, raw) {
684
757
  const s = String(raw ?? '');
685
758
  const t = block?.carrierType;
@@ -973,6 +1046,17 @@ export function serialize({ text, diff, origin, options = {} }) {
973
1046
  return;
974
1047
  }
975
1048
 
1049
+ // Handle object token removal
1050
+ if (entry?.kind === 'object') {
1051
+ const objectIRI = shortenIRI(quad.object.value, ctx);
1052
+ const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
1053
+ if (!removed) return;
1054
+
1055
+ const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
1056
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
1057
+ return;
1058
+ }
1059
+
976
1060
  const tokens = normalizeAttrsTokens(span.text);
977
1061
  let updated = tokens;
978
1062
  let removed = false;
@@ -1044,7 +1128,8 @@ export function serialize({ text, diff, origin, options = {} }) {
1044
1128
  } else {
1045
1129
  const full = quad.object.value;
1046
1130
  const label = shortenIRI(full, ctx);
1047
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
1131
+ const objectShort = shortenIRI(full, ctx);
1132
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=?${objectShort} ?${predShort}}` });
1048
1133
  }
1049
1134
  return;
1050
1135
  }
@@ -1063,8 +1148,24 @@ export function serialize({ text, diff, origin, options = {} }) {
1063
1148
 
1064
1149
  if (quad.object.termType === 'NamedNode') {
1065
1150
  const full = quad.object.value;
1066
- const label = shortenIRI(full, ctx);
1067
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
1151
+ const objectShort = shortenIRI(full, ctx);
1152
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1153
+
1154
+ // Check if this is a ?predicate form (should use object IRI)
1155
+ const span = readSpan(targetBlock, text, 'attrs');
1156
+ const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
1157
+ const hasObjectToken = tokens.some(t => t.startsWith('=?'));
1158
+
1159
+ if (hasObjectToken || anchored?.entry?.form === '?') {
1160
+ // Add object token if not present
1161
+ const updated = addObjectToken(tokens, objectShort);
1162
+ if (updated.length !== tokens.length) {
1163
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
1164
+ }
1165
+ } else {
1166
+ // Create new annotation with object token
1167
+ edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
1168
+ }
1068
1169
  return;
1069
1170
  }
1070
1171
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.5",
3
+ "version": "0.2.7",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",