mdld-parse 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +21 -2
  2. package/package.json +1 -4
  3. package/src/parse.js +68 -5
package/README.md CHANGED
@@ -56,6 +56,7 @@ console.log(result.quads);
56
56
  - **🧩 Fragments** - Document structuring with `{=#fragment}`
57
57
  - **⚡ Polarity system** - Sophisticated diff authoring with `+` and `-` prefixes
58
58
  - **📍 Origin tracking** - Complete provenance with lean quad-to-source mapping
59
+ - **🎯 Elevated statements** - Automatic rdf:Statement pattern detection for "golden" graph extraction
59
60
 
60
61
  ## 🌟 What is MD-LD?
61
62
 
@@ -122,7 +123,24 @@ Each predicate form determines the graph edge:
122
123
  | `?p` | S → O | `[NASA] {=ex:nasa ?org}` | object property |
123
124
  | `!p` | O → S | `[Parent] {=ex:p !hasPart}` | reverse object |
124
125
 
125
- ## 🎨 Syntax Quick Reference
126
+ ## Elevated Statements
127
+
128
+ MD-LD automatically detects `rdf:Statement` patterns during parsing and extracts elevated SPO quads for convenient consumption by applications.
129
+
130
+ ### Pattern Detection
131
+
132
+ When the parser encounters a complete `rdf:Statement` pattern with `rdf:subject`, `rdf:predicate`, and `rdf:object`, it automatically adds the corresponding SPO quad to the `statements` array:
133
+
134
+ ```markdown
135
+ [ex] <http://example.org/>
136
+
137
+ ## Elevated statement {=ex:stmt1 .rdf:Statement}
138
+ **Alice** {+ex:alice ?rdf:subject} *knows* {+ex:knows ?rdf:predicate} **Bob** {+ex:bob ?rdf:object}
139
+
140
+ Direct statement:**Alice** {=ex:alice} knows **Bob** {?ex:knows +ex:bob}
141
+ ``
142
+
143
+ ## �🎨 Syntax Quick Reference
126
144
 
127
145
  ### Subject Declaration
128
146
  Set current subject (emits no quads):
@@ -180,10 +198,11 @@ Parse MD-LD markdown and return RDF quads with lean origin tracking.
180
198
  - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
181
199
  - `dataFactory` (object) — Custom RDF/JS DataFactory
182
200
 
183
- **Returns:** `{ quads, remove, origin, context }`
201
+ **Returns:** `{ quads, remove, statements, origin, context }`
184
202
 
185
203
  - `quads` — Array of RDF/JS Quads (final resolved graph state)
186
204
  - `remove` — Array of RDF/JS Quads (external retractions targeting prior state)
205
+ - `statements` — Array of elevated RDF/JS Quads extracted from rdf:Statement patterns
187
206
  - `origin` — Lean origin tracking object with quadIndex for UI navigation
188
207
  - `context` — Final context used (includes prefixes)
189
208
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.7.0",
3
+ "version": "0.7.1",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -38,8 +38,5 @@
38
38
  "homepage": "https://mdld.js.org",
39
39
  "bugs": {
40
40
  "url": "https://github.com/davay42/mdld-parse/issues"
41
- },
42
- "devDependencies": {
43
- "n3": "^2.0.1"
44
41
  }
45
42
  }
package/src/parse.js CHANGED
@@ -364,7 +364,7 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
364
364
  };
365
365
  }
366
366
 
367
- function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null) {
367
+ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
368
368
  if (!subject || !predicate || !object) return;
369
369
 
370
370
  const quad = dataFactory.quad(subject, predicate, object);
@@ -397,6 +397,9 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
397
397
  quadBuffer.set(quadKey, quad);
398
398
  quads.push(quad);
399
399
 
400
+ // Detect rdf:Statement pattern during single-pass parsing
401
+ detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
402
+
400
403
  // Create lean origin entry
401
404
  const originEntry = {
402
405
  blockId: block.id,
@@ -413,6 +416,62 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
413
416
  }
414
417
  }
415
418
 
419
+ // Extract RDF constants once at module level for efficiency
420
+ const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
421
+ const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
422
+ const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
423
+ const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
424
+ const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
425
+
426
+ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
427
+ // Skip if not called from parse context (for testing compatibility)
428
+ if (!statements || !statementCandidates) return;
429
+
430
+ const predicate = quad.predicate.value;
431
+
432
+ // Early filter: only process rdf:Statement related predicates
433
+ if (predicate !== RDF_TYPE &&
434
+ predicate !== RDF_SUBJECT &&
435
+ predicate !== RDF_PREDICATE &&
436
+ predicate !== RDF_OBJECT) {
437
+ return;
438
+ }
439
+
440
+ // Check if this quad starts a new rdf:Statement pattern
441
+ if (predicate === RDF_TYPE && quad.object.value === RDF_STATEMENT) {
442
+ statementCandidates.set(quad.subject.value, { spo: {} });
443
+ return;
444
+ }
445
+
446
+ // Check if this quad completes part of an existing rdf:Statement pattern
447
+ const candidate = statementCandidates.get(quad.subject.value);
448
+ if (!candidate) return;
449
+
450
+ // Direct property assignment instead of switch for better performance
451
+ if (predicate === RDF_SUBJECT) {
452
+ candidate.spo.subject = quad.object;
453
+ } else if (predicate === RDF_PREDICATE) {
454
+ candidate.spo.predicate = quad.object;
455
+ } else if (predicate === RDF_OBJECT) {
456
+ candidate.spo.object = quad.object;
457
+ // Store the original quad for potential literal extraction
458
+ candidate.objectQuad = quad;
459
+ }
460
+
461
+ // Check if pattern is complete and create elevated SPO quad
462
+ if (candidate.spo.subject && candidate.spo.predicate && candidate.spo.object) {
463
+ // Use the object directly - literal detection happens at parse time
464
+ const spoQuad = dataFactory.quad(
465
+ candidate.spo.subject,
466
+ candidate.spo.predicate,
467
+ candidate.spo.object
468
+ );
469
+ statements.push(spoQuad);
470
+ // Clean up candidate to avoid duplicate detection
471
+ statementCandidates.delete(quad.subject.value);
472
+ }
473
+ }
474
+
416
475
  const resolveFragment = (fragment, state) => {
417
476
  if (!state.currentSubject) return null;
418
477
  const baseIRI = state.currentSubject.value.split('#')[0];
@@ -444,7 +503,8 @@ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
444
503
  state.df.namedNode(expandIRI('rdf:type', state.ctx)),
445
504
  state.df.namedNode(expandedType),
446
505
  state.df,
447
- { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove }
506
+ { kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
507
+ state.statements, state.statementCandidates
448
508
  );
449
509
  };
450
510
 
@@ -487,7 +547,8 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
487
547
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
488
548
  emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
489
549
  role.subject, P, role.object, state.df,
490
- { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false }
550
+ { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
551
+ state.statements, state.statementCandidates
491
552
  );
492
553
  }
493
554
  });
@@ -592,7 +653,9 @@ export function parse(text, options = {}) {
592
653
  origin: { quadIndex: new Map() },
593
654
  currentSubject: null,
594
655
  tokens: null,
595
- currentTokenIndex: -1
656
+ currentTokenIndex: -1,
657
+ statements: [],
658
+ statementCandidates: new Map() // Track incomplete rdf:Statement patterns
596
659
  };
597
660
 
598
661
  state.tokens = scanTokens(text);
@@ -629,5 +692,5 @@ export function parse(text, options = {}) {
629
692
  return !quadKeys.has(key);
630
693
  });
631
694
 
632
- return { quads: state.quads, remove: filteredRemove, origin: state.origin, context: state.ctx };
695
+ return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
633
696
  }