mdld-parse 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +88 -27
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -1,4 +1,4 @@
1
- const DEFAULT_CONTEXT = {
1
+ export const DEFAULT_CONTEXT = {
2
2
  '@vocab': 'http://schema.org/',
3
3
  rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
4
4
  rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
@@ -6,7 +6,7 @@ const DEFAULT_CONTEXT = {
6
6
  schema: 'http://schema.org/'
7
7
  };
8
8
 
9
- const DataFactory = {
9
+ export const DataFactory = {
10
10
  namedNode: (v) => ({ termType: 'NamedNode', value: v }),
11
11
  blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
12
12
  literal: (v, lang) => {
@@ -18,14 +18,14 @@ const DataFactory = {
18
18
  quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
19
19
  };
20
20
 
21
- function hash(str) {
21
+ export function hash(str) {
22
22
  let h = 5381;
23
23
  for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
24
24
  return Math.abs(h).toString(16).slice(0, 12);
25
25
  }
26
26
 
27
27
  // IRI Utilities
28
- function expandIRI(term, ctx) {
28
+ export function expandIRI(term, ctx) {
29
29
  if (term == null) return null;
30
30
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
31
31
  const t = raw.trim();
@@ -48,17 +48,13 @@ export function shortenIRI(iri, ctx) {
48
48
  return iri;
49
49
  }
50
50
 
51
- function processIRI(term, ctx, operation = 'expand') {
52
- return operation === 'expand' ? expandIRI(term, ctx) : shortenIRI(term, ctx);
53
- }
54
-
55
- function parseSemanticBlock(raw) {
51
+ export function parseSemanticBlock(raw) {
56
52
  try {
57
53
  const src = String(raw || '').trim();
58
54
  const cleaned = src.replace(/^\{|\}$/g, '').trim();
59
- if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
55
+ if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
60
56
 
61
- const result = { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
57
+ const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
62
58
  const re = /\S+/g;
63
59
  let m;
64
60
  while ((m = re.exec(cleaned)) !== null) {
@@ -80,6 +76,13 @@ function parseSemanticBlock(raw) {
80
76
  continue;
81
77
  }
82
78
 
79
+ if (token.startsWith('=?')) {
80
+ const iri = token.substring(2);
81
+ result.object = iri;
82
+ result.entries.push({ kind: 'object', iri, relRange: { start: relStart, end: relEnd }, raw: token });
83
+ continue;
84
+ }
85
+
83
86
  if (token.startsWith('=')) {
84
87
  const iri = token.substring(1);
85
88
  result.subject = iri;
@@ -137,7 +140,7 @@ function parseSemanticBlock(raw) {
137
140
  return result;
138
141
  } catch (error) {
139
142
  console.error(`Error parsing semantic block ${raw}:`, error);
140
- return { subject: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
143
+ return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
141
144
  }
142
145
  }
143
146
 
@@ -472,11 +475,13 @@ function createLiteral(value, datatype, language, context, dataFactory) {
472
475
  function processAnnotation(carrier, sem, state) {
473
476
  if (sem.subject === 'RESET') {
474
477
  state.currentSubject = null;
478
+ state.currentObject = null;
475
479
  return;
476
480
  }
477
481
 
478
482
  const previousSubject = state.currentSubject;
479
483
  let newSubject = null;
484
+ let localObject = null;
480
485
 
481
486
  if (sem.subject) {
482
487
  if (sem.subject.startsWith('=#')) {
@@ -492,6 +497,12 @@ function processAnnotation(carrier, sem, state) {
492
497
  newSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
493
498
  }
494
499
  }
500
+
501
+ if (sem.object) {
502
+ // Handle soft IRI object declaration - local to this annotation only
503
+ localObject = state.df.namedNode(expandIRI(sem.object, state.ctx));
504
+ }
505
+
495
506
  if (newSubject) state.currentSubject = newSubject;
496
507
 
497
508
  const S = state.currentSubject;
@@ -501,12 +512,15 @@ function processAnnotation(carrier, sem, state) {
501
512
  state.origin.blocks.set(block.id, block);
502
513
 
503
514
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
504
- const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
515
+ const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
505
516
 
506
517
  sem.types.forEach(t => {
507
518
  const typeIRI = typeof t === 'string' ? t : t.iri;
508
519
  const entryIndex = typeof t === 'string' ? null : t.entryIndex;
509
- const typeSubject = O || S;
520
+ // For types with subject declarations, the type applies to the new subject
521
+ // For types with soft IRI declarations, the type applies to the soft IRI object
522
+ // Otherwise, type applies to carrier object or current subject
523
+ const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
510
524
  const expandedType = expandIRI(typeIRI, state.ctx);
511
525
  emitQuad(state.quads, state.origin.quadIndex, block.id, typeSubject, state.df.namedNode(expandIRI('rdf:type', state.ctx)), state.df.namedNode(expandedType), state.df, { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex });
512
526
  });
@@ -516,18 +530,26 @@ function processAnnotation(carrier, sem, state) {
516
530
  const token = `${pred.form}${pred.iri}`;
517
531
 
518
532
  if (pred.form === '') {
519
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
533
+ // S —p→ L (use soft IRI object as subject if available, otherwise current subject)
534
+ const subjectIRI = localObject || S;
535
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, L, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
520
536
  } else if (pred.form === '?') {
521
- if (newSubject) {
522
- emitQuad(state.quads, state.origin.quadIndex, block.id, previousSubject, P, newSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
523
- } else if (O) {
524
- emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
537
+ // S —p→ O (use previous subject as subject, newSubject as object)
538
+ const subjectIRI = newSubject ? previousSubject : S;
539
+ const objectIRI = localObject || newSubject || carrierO;
540
+ if (objectIRI && subjectIRI) {
541
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
525
542
  }
543
+ } else if (pred.form === '^') {
544
+ // L —p→ S (use soft IRI object as subject if available, otherwise current subject)
545
+ const subjectIRI = localObject || S;
546
+ emitQuad(state.quads, state.origin.quadIndex, block.id, L, P, subjectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
526
547
  } else if (pred.form === '^?') {
527
- if (newSubject) {
528
- emitQuad(state.quads, state.origin.quadIndex, block.id, newSubject, P, previousSubject, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
529
- } else if (O) {
530
- emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
548
+ // O —p→ S (use previous subject as object, newSubject as subject)
549
+ const objectIRI = newSubject ? previousSubject : S;
550
+ const subjectIRI = localObject || newSubject || carrierO;
551
+ if (objectIRI && subjectIRI) {
552
+ emitQuad(state.quads, state.origin.quadIndex, block.id, subjectIRI, P, objectIRI, state.df, { kind: 'pred', token, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex });
531
553
  }
532
554
  }
533
555
  });
@@ -622,7 +644,8 @@ export function parse(text, options = {}) {
622
644
  df: options.dataFactory || DataFactory,
623
645
  quads: [],
624
646
  origin: { blocks: new Map(), quadIndex: new Map() },
625
- currentSubject: null
647
+ currentSubject: null,
648
+ currentObject: null
626
649
  };
627
650
 
628
651
  const tokens = scanTokens(text);
@@ -720,6 +743,16 @@ function removeOneToken(tokens, matchFn) {
720
743
  return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
721
744
  }
722
745
 
746
+ function addObjectToken(tokens, iri) {
747
+ const objectToken = `=?${iri}`;
748
+ return tokens.includes(objectToken) ? tokens : [...tokens, objectToken];
749
+ }
750
+
751
+ function removeObjectToken(tokens, iri) {
752
+ const objectToken = `=?${iri}`;
753
+ return removeOneToken(tokens, t => t === objectToken);
754
+ }
755
+
723
756
  function sanitizeCarrierValueForBlock(block, raw) {
724
757
  const s = String(raw ?? '');
725
758
  const t = block?.carrierType;
@@ -1013,6 +1046,17 @@ export function serialize({ text, diff, origin, options = {} }) {
1013
1046
  return;
1014
1047
  }
1015
1048
 
1049
+ // Handle object token removal
1050
+ if (entry?.kind === 'object') {
1051
+ const objectIRI = shortenIRI(quad.object.value, ctx);
1052
+ const { tokens: updated, removed } = removeObjectToken(tokens, objectIRI);
1053
+ if (!removed) return;
1054
+
1055
+ const newAttrs = updated.length === 0 ? '{}' : writeAttrsTokens(updated);
1056
+ edits.push({ start: span.start, end: span.end, text: newAttrs });
1057
+ return;
1058
+ }
1059
+
1016
1060
  const tokens = normalizeAttrsTokens(span.text);
1017
1061
  let updated = tokens;
1018
1062
  let removed = false;
@@ -1084,7 +1128,8 @@ export function serialize({ text, diff, origin, options = {} }) {
1084
1128
  } else {
1085
1129
  const full = quad.object.value;
1086
1130
  const label = shortenIRI(full, ctx);
1087
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${label}) {?${predShort}}` });
1131
+ const objectShort = shortenIRI(full, ctx);
1132
+ edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=?${objectShort} ?${predShort}}` });
1088
1133
  }
1089
1134
  return;
1090
1135
  }
@@ -1103,8 +1148,24 @@ export function serialize({ text, diff, origin, options = {} }) {
1103
1148
 
1104
1149
  if (quad.object.termType === 'NamedNode') {
1105
1150
  const full = quad.object.value;
1106
- const label = shortenIRI(full, ctx);
1107
- edits.push({ start: result.length, end: result.length, text: `\n[${label}] {=${shortenIRI(full, ctx)} ?${predShort}}` });
1151
+ const objectShort = shortenIRI(full, ctx);
1152
+ const predShort = shortenIRI(quad.predicate.value, ctx);
1153
+
1154
+ // Check if this is a ?predicate form (should use object IRI)
1155
+ const span = readSpan(targetBlock, text, 'attrs');
1156
+ const tokens = blockTokensFromEntries(targetBlock) || normalizeAttrsTokens(span.text);
1157
+ const hasObjectToken = tokens.some(t => t.startsWith('=?'));
1158
+
1159
+ if (hasObjectToken || anchored?.entry?.form === '?') {
1160
+ // Add object token if not present
1161
+ const updated = addObjectToken(tokens, objectShort);
1162
+ if (updated.length !== tokens.length) {
1163
+ edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
1164
+ }
1165
+ } else {
1166
+ // Create new annotation with object token
1167
+ edits.push({ start: result.length, end: result.length, text: `\n[${objectShort}] {=?${objectShort} ?${predShort}}` });
1168
+ }
1108
1169
  return;
1109
1170
  }
1110
1171
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",