mdld-parse 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,6 +56,7 @@ console.log(result.quads);
56
56
  - **🧩 Fragments** - Document structuring with `{=#fragment}`
57
57
  - **⚡ Polarity system** - Sophisticated diff authoring with `+` and `-` prefixes
58
58
  - **📍 Origin tracking** - Complete provenance with lean quad-to-source mapping
59
+ - **🎯 Elevated statements** - Automatic rdf:Statement pattern detection for "golden" graph extraction
59
60
 
60
61
  ## 🌟 What is MD-LD?
61
62
 
@@ -122,7 +123,24 @@ Each predicate form determines the graph edge:
122
123
  | `?p` | S → O | `[NASA] {=ex:nasa ?org}` | object property |
123
124
  | `!p` | O → S | `[Parent] {=ex:p !hasPart}` | reverse object |
124
125
 
125
- ## 🎨 Syntax Quick Reference
126
+ ## Elevated Statements
127
+
128
+ MD-LD automatically detects `rdf:Statement` patterns during parsing and extracts elevated SPO quads for convenient consumption by applications.
129
+
130
+ ### Pattern Detection
131
+
132
+ When the parser encounters a complete `rdf:Statement` pattern with `rdf:subject`, `rdf:predicate`, and `rdf:object`, it automatically adds the corresponding SPO quad to the `statements` array:
133
+
134
+ ```markdown
135
+ [ex] <http://example.org/>
136
+
137
+ ## Elevated statement {=ex:stmt1 .rdf:Statement}
138
+ **Alice** {+ex:alice ?rdf:subject} *knows* {+ex:knows ?rdf:predicate} **Bob** {+ex:bob ?rdf:object}
139
+
140
+ Direct statement:**Alice** {=ex:alice} knows **Bob** {?ex:knows +ex:bob}
141
+ ``
142
+
143
+ ## �🎨 Syntax Quick Reference
126
144
 
127
145
  ### Subject Declaration
128
146
  Set current subject (emits no quads):
@@ -180,10 +198,11 @@ Parse MD-LD markdown and return RDF quads with lean origin tracking.
180
198
  - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
181
199
  - `dataFactory` (object) — Custom RDF/JS DataFactory
182
200
 
183
- **Returns:** `{ quads, remove, origin, context }`
201
+ **Returns:** `{ quads, remove, statements, origin, context }`
184
202
 
185
203
  - `quads` — Array of RDF/JS Quads (final resolved graph state)
186
204
  - `remove` — Array of RDF/JS Quads (external retractions targeting prior state)
205
+ - `statements` — Array of elevated RDF/JS Quads extracted from rdf:Statement patterns
187
206
  - `origin` — Lean origin tracking object with quadIndex for UI navigation
188
207
  - `context` — Final context used (includes prefixes)
189
208
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.7.0",
3
+ "version": "0.7.2",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -38,8 +38,5 @@
38
38
  "homepage": "https://mdld.js.org",
39
39
  "bugs": {
40
40
  "url": "https://github.com/davay42/mdld-parse/issues"
41
- },
42
- "devDependencies": {
43
- "n3": "^2.0.1"
44
41
  }
45
42
  }
package/src/merge.js CHANGED
@@ -34,13 +34,14 @@ function normalizeInput(input, options, docContext) {
34
34
  * Merges multiple MDLD documents with diff polarity resolution
35
35
  * @param {Array<string|ParseResult>} docs
36
36
  * @param {Object} options
37
- * @returns {Object}
37
+ * @returns {Object} Merge result with quads, remove, statements, origin, and context
38
38
  */
39
39
  export function merge(docs, options = {}) {
40
40
  const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
41
41
  const sessionRemoveSet = new Set();
42
42
  const allDocuments = [];
43
43
  const quadIndex = new Map();
44
+ const allStatements = []; // Collect statements from all documents
44
45
 
45
46
  // Process each document in order
46
47
  for (let i = 0; i < docs.length; i++) {
@@ -57,10 +58,16 @@ export function merge(docs, options = {}) {
57
58
  index: i,
58
59
  input: typeof input === 'string' ? 'string' : 'ParseResult',
59
60
  origin: doc.origin,
60
- context: doc.context
61
+ context: doc.context,
62
+ statementsCount: doc.statements?.length || 0 // Track statements count
61
63
  };
62
64
  allDocuments.push(documentOrigin);
63
65
 
66
+ // Collect statements from this document
67
+ if (doc.statements && doc.statements.length > 0) {
68
+ allStatements.push(...doc.statements);
69
+ }
70
+
64
71
  // Fold assertions into session buffer
65
72
  for (const quad of doc.quads) {
66
73
  const key = quadKey(quad);
@@ -125,6 +132,7 @@ export function merge(docs, options = {}) {
125
132
  return {
126
133
  quads: filteredQuads,
127
134
  remove: filteredRemove,
135
+ statements: allStatements, // Include all collected statements
128
136
  origin: mergeOrigin,
129
137
  context: finalContext
130
138
  };
package/src/parse.js CHANGED
@@ -210,6 +210,12 @@ function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, ex
210
210
  return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
211
211
  }
212
212
 
213
+ // Pre-compiled carrier patterns for better performance
214
+ const CARRIER_PATTERN_ARRAY = [
215
+ ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
216
+ ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
217
+ ];
218
+
213
219
  function extractInlineCarriers(text, baseOffset = 0) {
214
220
  const carriers = [];
215
221
  let pos = 0;
@@ -243,7 +249,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
243
249
  const extractor = CARRIER_EXTRACTORS[text[pos]];
244
250
  if (extractor) return extractor(text, pos, baseOffset);
245
251
 
246
- for (const [type, pattern] of Object.entries(INLINE_CARRIER_PATTERNS)) {
252
+ // Use pre-compiled patterns instead of Object.entries()
253
+ for (const [type, pattern] of CARRIER_PATTERN_ARRAY) {
247
254
  pattern.lastIndex = pos;
248
255
  const match = pattern.exec(text);
249
256
  if (match) {
@@ -364,7 +371,7 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
364
371
  };
365
372
  }
366
373
 
367
- function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null) {
374
+ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
368
375
  if (!subject || !predicate || !object) return;
369
376
 
370
377
  const quad = dataFactory.quad(subject, predicate, object);
@@ -397,14 +404,17 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
397
404
  quadBuffer.set(quadKey, quad);
398
405
  quads.push(quad);
399
406
 
400
- // Create lean origin entry
407
+ // Detect rdf:Statement pattern during single-pass parsing
408
+ detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
409
+
410
+ // Create lean origin entry - avoid spread operator for better performance
401
411
  const originEntry = {
402
412
  blockId: block.id,
403
413
  range: block.range,
404
414
  carrierType: block.carrierType,
405
415
  subject: subject.value,
406
416
  predicate: predicate.value,
407
- context: { ...block.context },
417
+ context: block.context, // Direct reference instead of spread
408
418
  polarity: meta?.remove ? '-' : '+',
409
419
  value: block.text || ''
410
420
  };
@@ -413,10 +423,68 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
413
423
  }
414
424
  }
415
425
 
426
+ // Extract RDF constants once at module level for efficiency
427
+ const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
428
+ const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
429
+ const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
430
+ const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
431
+ const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
432
+
433
+ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
434
+ // Skip if not called from parse context (for testing compatibility)
435
+ if (!statements || !statementCandidates) return;
436
+
437
+ const predicate = quad.predicate.value;
438
+
439
+ // Early filter: only process rdf:Statement related predicates
440
+ if (predicate !== RDF_TYPE &&
441
+ predicate !== RDF_SUBJECT &&
442
+ predicate !== RDF_PREDICATE &&
443
+ predicate !== RDF_OBJECT) {
444
+ return;
445
+ }
446
+
447
+ // Check if this quad starts a new rdf:Statement pattern
448
+ if (predicate === RDF_TYPE && quad.object.value === RDF_STATEMENT) {
449
+ statementCandidates.set(quad.subject.value, { spo: {} });
450
+ return;
451
+ }
452
+
453
+ // Check if this quad completes part of an existing rdf:Statement pattern
454
+ const candidate = statementCandidates.get(quad.subject.value);
455
+ if (!candidate) return;
456
+
457
+ // Direct property assignment instead of switch for better performance
458
+ if (predicate === RDF_SUBJECT) {
459
+ candidate.spo.subject = quad.object;
460
+ } else if (predicate === RDF_PREDICATE) {
461
+ candidate.spo.predicate = quad.object;
462
+ } else if (predicate === RDF_OBJECT) {
463
+ candidate.spo.object = quad.object;
464
+ // Store the original quad for potential literal extraction
465
+ candidate.objectQuad = quad;
466
+ }
467
+
468
+ // Check if pattern is complete and create elevated SPO quad
469
+ if (candidate.spo.subject && candidate.spo.predicate && candidate.spo.object) {
470
+ // Use the object directly - literal detection happens at parse time
471
+ const spoQuad = dataFactory.quad(
472
+ candidate.spo.subject,
473
+ candidate.spo.predicate,
474
+ candidate.spo.object
475
+ );
476
+ statements.push(spoQuad);
477
+ // Clean up candidate to avoid duplicate detection
478
+ statementCandidates.delete(quad.subject.value);
479
+ }
480
+ }
481
+
416
482
  const resolveFragment = (fragment, state) => {
417
483
  if (!state.currentSubject) return null;
418
- const baseIRI = state.currentSubject.value.split('#')[0];
419
- return state.df.namedNode(`${baseIRI}#${fragment}`);
484
+ const subjectValue = state.currentSubject.value;
485
+ const hashIndex = subjectValue.indexOf('#');
486
+ const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
487
+ return state.df.namedNode(baseIRI + '#' + fragment);
420
488
  };
421
489
 
422
490
  function resolveSubject(sem, state) {
@@ -444,7 +512,8 @@ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
444
512
  state.df.namedNode(expandIRI('rdf:type', state.ctx)),
445
513
  state.df.namedNode(expandedType),
446
514
  state.df,
447
- { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove }
515
+ { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
516
+ state.statements, state.statementCandidates
448
517
  );
449
518
  };
450
519
 
@@ -487,7 +556,8 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
487
556
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
488
557
  emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
489
558
  role.subject, P, role.object, state.df,
490
- { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false }
559
+ { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
560
+ state.statements, state.statementCandidates
491
561
  );
492
562
  }
493
563
  });
@@ -592,23 +662,28 @@ export function parse(text, options = {}) {
592
662
  origin: { quadIndex: new Map() },
593
663
  currentSubject: null,
594
664
  tokens: null,
595
- currentTokenIndex: -1
665
+ currentTokenIndex: -1,
666
+ statements: [],
667
+ statementCandidates: new Map() // Track incomplete rdf:Statement patterns
596
668
  };
597
669
 
598
670
  state.tokens = scanTokens(text);
599
671
 
600
- state.tokens.filter(t => t.type === 'prefix').forEach(t => {
601
- let resolvedIri = t.iri;
602
- if (t.iri.includes(':')) {
603
- const colonIndex = t.iri.indexOf(':');
604
- const potentialPrefix = t.iri.substring(0, colonIndex);
605
- const reference = t.iri.substring(colonIndex + 1);
606
- if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
607
- resolvedIri = state.ctx[potentialPrefix] + reference;
672
+ // Single loop instead of filter+forEach for better performance
673
+ for (const token of state.tokens) {
674
+ if (token.type === 'prefix') {
675
+ let resolvedIri = token.iri;
676
+ if (token.iri.includes(':')) {
677
+ const colonIndex = token.iri.indexOf(':');
678
+ const potentialPrefix = token.iri.substring(0, colonIndex);
679
+ const reference = token.iri.substring(colonIndex + 1);
680
+ if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
681
+ resolvedIri = state.ctx[potentialPrefix] + reference;
682
+ }
608
683
  }
684
+ state.ctx[token.prefix] = resolvedIri;
609
685
  }
610
- state.ctx[t.prefix] = resolvedIri;
611
- });
686
+ }
612
687
 
613
688
  for (let i = 0; i < state.tokens.length; i++) {
614
689
  const token = state.tokens[i];
@@ -616,18 +691,20 @@ export function parse(text, options = {}) {
616
691
  TOKEN_PROCESSORS[token.type]?.(token, state);
617
692
  }
618
693
 
619
- // Convert removeSet to array and ensure hard invariant: quads ∩ remove = ∅
620
- const removeArray = Array.from(state.removeSet);
694
+ // Optimize array operations - avoid Array.from() and filter()
621
695
  const quadKeys = new Set();
622
- state.quads.forEach(q => {
623
- quadKeys.add(quadIndexKey(q.subject, q.predicate, q.object));
624
- });
696
+ for (const quad of state.quads) {
697
+ quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
698
+ }
625
699
 
626
- // Filter removeArray to ensure no overlap with quads
627
- const filteredRemove = removeArray.filter(quad => {
700
+ // Direct iteration instead of Array.from() + filter()
701
+ const filteredRemove = [];
702
+ for (const quad of state.removeSet) {
628
703
  const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
629
- return !quadKeys.has(key);
630
- });
704
+ if (!quadKeys.has(key)) {
705
+ filteredRemove.push(quad);
706
+ }
707
+ }
631
708
 
632
- return { quads: state.quads, remove: filteredRemove, origin: state.origin, context: state.ctx };
709
+ return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
633
710
  }