mdld-parse 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -2
- package/package.json +1 -4
- package/src/parse.js +68 -5
package/README.md
CHANGED
|
@@ -56,6 +56,7 @@ console.log(result.quads);
|
|
|
56
56
|
- **🧩 Fragments** - Document structuring with `{=#fragment}`
|
|
57
57
|
- **⚡ Polarity system** - Sophisticated diff authoring with `+` and `-` prefixes
|
|
58
58
|
- **📍 Origin tracking** - Complete provenance with lean quad-to-source mapping
|
|
59
|
+
- **🎯 Elevated statements** - Automatic rdf:Statement pattern detection for "golden" graph extraction
|
|
59
60
|
|
|
60
61
|
## 🌟 What is MD-LD?
|
|
61
62
|
|
|
@@ -122,7 +123,24 @@ Each predicate form determines the graph edge:
|
|
|
122
123
|
| `?p` | S → O | `[NASA] {=ex:nasa ?org}` | object property |
|
|
123
124
|
| `!p` | O → S | `[Parent] {=ex:p !hasPart}` | reverse object |
|
|
124
125
|
|
|
125
|
-
##
|
|
126
|
+
## � Elevated Statements
|
|
127
|
+
|
|
128
|
+
MD-LD automatically detects `rdf:Statement` patterns during parsing and extracts elevated SPO quads for convenient consumption by applications.
|
|
129
|
+
|
|
130
|
+
### Pattern Detection
|
|
131
|
+
|
|
132
|
+
When the parser encounters a complete `rdf:Statement` pattern with `rdf:subject`, `rdf:predicate`, and `rdf:object`, it automatically adds the corresponding SPO quad to the `statements` array:
|
|
133
|
+
|
|
134
|
+
```markdown
|
|
135
|
+
[ex] <http://example.org/>
|
|
136
|
+
|
|
137
|
+
## Elevated statement {=ex:stmt1 .rdf:Statement}
|
|
138
|
+
**Alice** {+ex:alice ?rdf:subject} *knows* {+ex:knows ?rdf:predicate} **Bob** {+ex:bob ?rdf:object}
|
|
139
|
+
|
|
140
|
+
Direct statement:**Alice** {=ex:alice} knows **Bob** {?ex:knows +ex:bob}
|
|
141
|
+
``
|
|
142
|
+
|
|
143
|
+
## �🎨 Syntax Quick Reference
|
|
126
144
|
|
|
127
145
|
### Subject Declaration
|
|
128
146
|
Set current subject (emits no quads):
|
|
@@ -180,10 +198,11 @@ Parse MD-LD markdown and return RDF quads with lean origin tracking.
|
|
|
180
198
|
- `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
|
|
181
199
|
- `dataFactory` (object) — Custom RDF/JS DataFactory
|
|
182
200
|
|
|
183
|
-
**Returns:** `{ quads, remove, origin, context }`
|
|
201
|
+
**Returns:** `{ quads, remove, statements, origin, context }`
|
|
184
202
|
|
|
185
203
|
- `quads` — Array of RDF/JS Quads (final resolved graph state)
|
|
186
204
|
- `remove` — Array of RDF/JS Quads (external retractions targeting prior state)
|
|
205
|
+
- `statements` — Array of elevated RDF/JS Quads extracted from rdf:Statement patterns
|
|
187
206
|
- `origin` — Lean origin tracking object with quadIndex for UI navigation
|
|
188
207
|
- `context` — Final context used (includes prefixes)
|
|
189
208
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -38,8 +38,5 @@
|
|
|
38
38
|
"homepage": "https://mdld.js.org",
|
|
39
39
|
"bugs": {
|
|
40
40
|
"url": "https://github.com/davay42/mdld-parse/issues"
|
|
41
|
-
},
|
|
42
|
-
"devDependencies": {
|
|
43
|
-
"n3": "^2.0.1"
|
|
44
41
|
}
|
|
45
42
|
}
|
package/src/parse.js
CHANGED
|
@@ -364,7 +364,7 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
|
|
|
364
364
|
};
|
|
365
365
|
}
|
|
366
366
|
|
|
367
|
-
function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null) {
|
|
367
|
+
function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
|
|
368
368
|
if (!subject || !predicate || !object) return;
|
|
369
369
|
|
|
370
370
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
@@ -397,6 +397,9 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
397
397
|
quadBuffer.set(quadKey, quad);
|
|
398
398
|
quads.push(quad);
|
|
399
399
|
|
|
400
|
+
// Detect rdf:Statement pattern during single-pass parsing
|
|
401
|
+
detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
|
|
402
|
+
|
|
400
403
|
// Create lean origin entry
|
|
401
404
|
const originEntry = {
|
|
402
405
|
blockId: block.id,
|
|
@@ -413,6 +416,62 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
413
416
|
}
|
|
414
417
|
}
|
|
415
418
|
|
|
419
|
+
// Extract RDF constants once at module level for efficiency
|
|
420
|
+
const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
421
|
+
const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
|
|
422
|
+
const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
|
|
423
|
+
const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
|
|
424
|
+
const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
|
|
425
|
+
|
|
426
|
+
function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
|
|
427
|
+
// Skip if not called from parse context (for testing compatibility)
|
|
428
|
+
if (!statements || !statementCandidates) return;
|
|
429
|
+
|
|
430
|
+
const predicate = quad.predicate.value;
|
|
431
|
+
|
|
432
|
+
// Early filter: only process rdf:Statement related predicates
|
|
433
|
+
if (predicate !== RDF_TYPE &&
|
|
434
|
+
predicate !== RDF_SUBJECT &&
|
|
435
|
+
predicate !== RDF_PREDICATE &&
|
|
436
|
+
predicate !== RDF_OBJECT) {
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Check if this quad starts a new rdf:Statement pattern
|
|
441
|
+
if (predicate === RDF_TYPE && quad.object.value === RDF_STATEMENT) {
|
|
442
|
+
statementCandidates.set(quad.subject.value, { spo: {} });
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Check if this quad completes part of an existing rdf:Statement pattern
|
|
447
|
+
const candidate = statementCandidates.get(quad.subject.value);
|
|
448
|
+
if (!candidate) return;
|
|
449
|
+
|
|
450
|
+
// Direct property assignment instead of switch for better performance
|
|
451
|
+
if (predicate === RDF_SUBJECT) {
|
|
452
|
+
candidate.spo.subject = quad.object;
|
|
453
|
+
} else if (predicate === RDF_PREDICATE) {
|
|
454
|
+
candidate.spo.predicate = quad.object;
|
|
455
|
+
} else if (predicate === RDF_OBJECT) {
|
|
456
|
+
candidate.spo.object = quad.object;
|
|
457
|
+
// Store the original quad for potential literal extraction
|
|
458
|
+
candidate.objectQuad = quad;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Check if pattern is complete and create elevated SPO quad
|
|
462
|
+
if (candidate.spo.subject && candidate.spo.predicate && candidate.spo.object) {
|
|
463
|
+
// Use the object directly - literal detection happens at parse time
|
|
464
|
+
const spoQuad = dataFactory.quad(
|
|
465
|
+
candidate.spo.subject,
|
|
466
|
+
candidate.spo.predicate,
|
|
467
|
+
candidate.spo.object
|
|
468
|
+
);
|
|
469
|
+
statements.push(spoQuad);
|
|
470
|
+
// Clean up candidate to avoid duplicate detection
|
|
471
|
+
statementCandidates.delete(quad.subject.value);
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
416
475
|
const resolveFragment = (fragment, state) => {
|
|
417
476
|
if (!state.currentSubject) return null;
|
|
418
477
|
const baseIRI = state.currentSubject.value.split('#')[0];
|
|
@@ -444,7 +503,8 @@ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
|
444
503
|
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
445
504
|
state.df.namedNode(expandedType),
|
|
446
505
|
state.df,
|
|
447
|
-
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove }
|
|
506
|
+
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
|
|
507
|
+
state.statements, state.statementCandidates
|
|
448
508
|
);
|
|
449
509
|
};
|
|
450
510
|
|
|
@@ -487,7 +547,8 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
|
|
|
487
547
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
488
548
|
emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
|
|
489
549
|
role.subject, P, role.object, state.df,
|
|
490
|
-
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false }
|
|
550
|
+
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
|
|
551
|
+
state.statements, state.statementCandidates
|
|
491
552
|
);
|
|
492
553
|
}
|
|
493
554
|
});
|
|
@@ -592,7 +653,9 @@ export function parse(text, options = {}) {
|
|
|
592
653
|
origin: { quadIndex: new Map() },
|
|
593
654
|
currentSubject: null,
|
|
594
655
|
tokens: null,
|
|
595
|
-
currentTokenIndex: -1
|
|
656
|
+
currentTokenIndex: -1,
|
|
657
|
+
statements: [],
|
|
658
|
+
statementCandidates: new Map() // Track incomplete rdf:Statement patterns
|
|
596
659
|
};
|
|
597
660
|
|
|
598
661
|
state.tokens = scanTokens(text);
|
|
@@ -629,5 +692,5 @@ export function parse(text, options = {}) {
|
|
|
629
692
|
return !quadKeys.has(key);
|
|
630
693
|
});
|
|
631
694
|
|
|
632
|
-
return { quads: state.quads, remove: filteredRemove, origin: state.origin, context: state.ctx };
|
|
695
|
+
return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
|
|
633
696
|
}
|