mdld-parse 0.3.5 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -7
- package/package.json +5 -2
- package/src/parse.js +63 -7
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# MD-LD Parse v0.
|
|
1
|
+
# MD-LD Parse v0.4.1
|
|
2
2
|
|
|
3
3
|
**Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
|
|
4
4
|
|
|
@@ -34,6 +34,7 @@ ex:armstrong schema:name "Neil Armstrong" .
|
|
|
34
34
|
|
|
35
35
|
## Core Features
|
|
36
36
|
|
|
37
|
+
- **Prefix folding**: Build hierarchical namespaces with lightweight IRI authoring
|
|
37
38
|
- **Subject declarations**: `{=IRI}` and `{=#fragment}` for context setting
|
|
38
39
|
- **Object IRIs**: `{+IRI}` and `{+#fragment}` for temporary object declarations
|
|
39
40
|
- **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
|
|
@@ -255,6 +256,38 @@ ex:book schema:hasPart ex:part .
|
|
|
255
256
|
# Person {=ex:alice .foaf:Person}
|
|
256
257
|
```
|
|
257
258
|
|
|
259
|
+
### Prefix Folding: Lightweight IRI Authoring
|
|
260
|
+
|
|
261
|
+
Build hierarchical namespaces by referencing previously defined prefixes:
|
|
262
|
+
|
|
263
|
+
```markdown
|
|
264
|
+
# Create your domain authority
|
|
265
|
+
[my] <tag:mymail@domain.com,2026:>
|
|
266
|
+
|
|
267
|
+
# Build namespace hierarchy
|
|
268
|
+
[j] <my:journal:>
|
|
269
|
+
[p] <my:property:>
|
|
270
|
+
[c] <my:class:>
|
|
271
|
+
[person] <my:people:>
|
|
272
|
+
|
|
273
|
+
# Use in content
|
|
274
|
+
# 2026-01-27 {=j:2026-01-27 .c:Event p:date ^^xsd:date}
|
|
275
|
+
|
|
276
|
+
## Harry {=person:harry p:name}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
**Resolves to absolute IRIs:**
|
|
280
|
+
- `j:2026-01-27` → `tag:mymail@domain.com,2026:journal:2026-01-27`
|
|
281
|
+
- `c:Event` → `tag:mymail@domain.com,2026:class:Event`
|
|
282
|
+
- `p:date` → `tag:mymail@domain.com,2026:property:date`
|
|
283
|
+
- `person:harry` → `tag:mymail@domain.com,2026:people:harry`
|
|
284
|
+
|
|
285
|
+
**Benefits:**
|
|
286
|
+
- **Lightweight**: No external ontology dependencies
|
|
287
|
+
- **Domain authority**: Use `tag:` URIs for personal namespaces
|
|
288
|
+
- **Hierarchical**: Build deep namespace structures
|
|
289
|
+
- **Streaming-safe**: Forward-reference only, single-pass parsing
|
|
290
|
+
|
|
258
291
|
## API Reference
|
|
259
292
|
|
|
260
293
|
### `parse(markdown, options)`
|
|
@@ -430,16 +463,18 @@ Therefore, the algebra is **closed**.
|
|
|
430
463
|
### Personal Knowledge Management
|
|
431
464
|
|
|
432
465
|
```markdown
|
|
433
|
-
|
|
466
|
+
[alice] <tag:alice@example.com,2026:>
|
|
467
|
+
|
|
468
|
+
# Meeting Notes {=alice:meeting-2024-01-15 .Meeting}
|
|
434
469
|
|
|
435
470
|
Attendees: {?attendee name}
|
|
436
471
|
|
|
437
|
-
- Alice {=
|
|
438
|
-
- Bob {=
|
|
472
|
+
- Alice {=alice:alice}
|
|
473
|
+
- Bob {=alice:bob}
|
|
439
474
|
|
|
440
475
|
Action items: {?actionItem name}
|
|
441
476
|
|
|
442
|
-
- Review proposal {=
|
|
477
|
+
- Review proposal {=alice:task-1}
|
|
443
478
|
```
|
|
444
479
|
|
|
445
480
|
### Developer Documentation
|
|
@@ -460,10 +495,12 @@ curl https://api.example.com/users/123
|
|
|
460
495
|
### Academic Research
|
|
461
496
|
|
|
462
497
|
```markdown
|
|
463
|
-
|
|
498
|
+
[alice] <tag:alice@example.com,2026:>
|
|
499
|
+
|
|
500
|
+
# Paper {=alice:paper-semantic-markdown .ScholarlyArticle}
|
|
464
501
|
|
|
465
502
|
[Semantic Web] {about}
|
|
466
|
-
[Alice Johnson] {=
|
|
503
|
+
[Alice Johnson] {=alice:alice-johnson ?author}
|
|
467
504
|
[2024-01] {datePublished ^^xsd:gYearMonth}
|
|
468
505
|
|
|
469
506
|
> This paper explores semantic markup in Markdown. {abstract @en}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -24,7 +24,10 @@
|
|
|
24
24
|
"rdfjs",
|
|
25
25
|
"browser",
|
|
26
26
|
"web-worker",
|
|
27
|
-
"parser"
|
|
27
|
+
"parser",
|
|
28
|
+
"prefix-folding",
|
|
29
|
+
"curie",
|
|
30
|
+
"iri-authoring"
|
|
28
31
|
],
|
|
29
32
|
"author": "davay42",
|
|
30
33
|
"repository": {
|
package/src/parse.js
CHANGED
|
@@ -182,6 +182,13 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
182
182
|
continue;
|
|
183
183
|
}
|
|
184
184
|
|
|
185
|
+
const angleBracketCarrier = tryExtractAngleBracketCarrier(text, pos, baseOffset);
|
|
186
|
+
if (angleBracketCarrier) {
|
|
187
|
+
carriers.push(angleBracketCarrier);
|
|
188
|
+
pos = angleBracketCarrier.pos;
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
185
192
|
const bracketCarrier = tryExtractBracketCarrier(text, pos, baseOffset);
|
|
186
193
|
if (bracketCarrier) {
|
|
187
194
|
if (bracketCarrier.skip) {
|
|
@@ -232,6 +239,31 @@ function tryExtractCodeCarrier(text, pos, baseOffset) {
|
|
|
232
239
|
ranges.attrsRange, ranges.valueRange, ranges.range, ranges.pos);
|
|
233
240
|
}
|
|
234
241
|
|
|
242
|
+
function tryExtractAngleBracketCarrier(text, pos, baseOffset) {
|
|
243
|
+
const angleStart = text.indexOf('<', pos);
|
|
244
|
+
if (angleStart === -1 || angleStart !== pos) return null;
|
|
245
|
+
|
|
246
|
+
// Look for closing angle bracket
|
|
247
|
+
const angleEnd = text.indexOf('>', angleStart);
|
|
248
|
+
if (angleEnd === -1) return null;
|
|
249
|
+
|
|
250
|
+
const url = text.substring(angleStart + 1, angleEnd);
|
|
251
|
+
|
|
252
|
+
// Basic URL validation - should contain at least a scheme and colon
|
|
253
|
+
if (!url.match(/^[a-zA-Z][a-zA-Z0-9+.-]*:/)) {
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const { attrs, attrsRange, finalSpanEnd } = extractAttributesFromText(text, angleEnd + 1, baseOffset);
|
|
258
|
+
|
|
259
|
+
// For angle-bracket URLs, always provide the URL as text content
|
|
260
|
+
// The processing logic will handle whether to use it for literals or not
|
|
261
|
+
return createCarrier('link', url, attrs, attrsRange,
|
|
262
|
+
[baseOffset + angleStart + 1, baseOffset + angleEnd],
|
|
263
|
+
[baseOffset + angleStart, baseOffset + finalSpanEnd],
|
|
264
|
+
finalSpanEnd, { url: url });
|
|
265
|
+
}
|
|
266
|
+
|
|
235
267
|
function tryExtractBracketCarrier(text, pos, baseOffset) {
|
|
236
268
|
const bracketStart = text.indexOf('[', pos);
|
|
237
269
|
if (bracketStart === -1 || bracketStart !== pos) return null;
|
|
@@ -391,11 +423,18 @@ function resolveObject(sem, state) {
|
|
|
391
423
|
}
|
|
392
424
|
}
|
|
393
425
|
|
|
394
|
-
function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state) {
|
|
426
|
+
function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier) {
|
|
395
427
|
sem.types.forEach(t => {
|
|
396
428
|
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
397
429
|
const entryIndex = typeof t === 'string' ? null : t.entryIndex;
|
|
398
|
-
|
|
430
|
+
|
|
431
|
+
// For angle-bracket URLs, use the URL as the subject for type declarations ONLY when
|
|
432
|
+
// there's no explicit subject declaration. This implements {+URL} behavior.
|
|
433
|
+
let typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
|
|
434
|
+
if (carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url && !newSubject) {
|
|
435
|
+
typeSubject = carrierO; // Use URL as subject for type declarations only if no explicit subject
|
|
436
|
+
}
|
|
437
|
+
|
|
399
438
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
400
439
|
|
|
401
440
|
emitQuad(
|
|
@@ -409,10 +448,15 @@ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block
|
|
|
409
448
|
});
|
|
410
449
|
}
|
|
411
450
|
|
|
412
|
-
function processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state) {
|
|
451
|
+
function processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier) {
|
|
413
452
|
sem.predicates.forEach(pred => {
|
|
414
453
|
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
415
454
|
|
|
455
|
+
// Skip literal predicates for angle-bracket URLs - they only support ? and ! predicates
|
|
456
|
+
if (pred.form === '' && carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url) {
|
|
457
|
+
return; // Angle-bracket URLs don't support literal predicates
|
|
458
|
+
}
|
|
459
|
+
|
|
416
460
|
// Pre-bind subject/object roles for clarity
|
|
417
461
|
const roles = {
|
|
418
462
|
'': { subject: localObject || S, object: L },
|
|
@@ -462,8 +506,8 @@ function processAnnotation(carrier, sem, state, options = {}) {
|
|
|
462
506
|
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
463
507
|
const newSubjectOrCarrierO = newSubject || carrierO;
|
|
464
508
|
|
|
465
|
-
processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state);
|
|
466
|
-
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state);
|
|
509
|
+
processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier);
|
|
510
|
+
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
|
|
467
511
|
}
|
|
468
512
|
|
|
469
513
|
// Helper functions for list item processing
|
|
@@ -744,8 +788,20 @@ export function parse(text, options = {}) {
|
|
|
744
788
|
|
|
745
789
|
state.tokens = scanTokens(text);
|
|
746
790
|
|
|
747
|
-
// Process prefix declarations first
|
|
748
|
-
state.tokens.filter(t => t.type === 'prefix').forEach(t =>
|
|
791
|
+
// Process prefix declarations first with prefix folding support
|
|
792
|
+
state.tokens.filter(t => t.type === 'prefix').forEach(t => {
|
|
793
|
+
// Check if the IRI value contains a CURIE that references a previously defined prefix
|
|
794
|
+
let resolvedIri = t.iri;
|
|
795
|
+
if (t.iri.includes(':')) {
|
|
796
|
+
const [potentialPrefix, ...referenceParts] = t.iri.split(':');
|
|
797
|
+
const reference = referenceParts.join(':'); // Preserve any additional colons in reference
|
|
798
|
+
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
799
|
+
// This is a CURIE referencing an existing prefix - resolve it
|
|
800
|
+
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
state.ctx[t.prefix] = resolvedIri;
|
|
804
|
+
});
|
|
749
805
|
|
|
750
806
|
// Process all other tokens
|
|
751
807
|
for (let i = 0; i < state.tokens.length; i++) {
|