mdld-parse 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +34 -1
  2. package/package.json +5 -2
  3. package/src/parse.js +63 -7
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # MD-LD Parse v0.3
1
+ # MD-LD Parse v0.4.1
2
2
 
3
3
  **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
4
4
 
@@ -34,6 +34,7 @@ ex:armstrong schema:name "Neil Armstrong" .
34
34
 
35
35
  ## Core Features
36
36
 
37
+ - **Prefix folding**: Build hierarchical namespaces with lightweight IRI authoring
37
38
  - **Subject declarations**: `{=IRI}` and `{=#fragment}` for context setting
38
39
  - **Object IRIs**: `{+IRI}` and `{+#fragment}` for temporary object declarations
39
40
  - **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
@@ -255,6 +256,38 @@ ex:book schema:hasPart ex:part .
255
256
  # Person {=ex:alice .foaf:Person}
256
257
  ```
257
258
 
259
+ ### Prefix Folding: Lightweight IRI Authoring
260
+
261
+ Build hierarchical namespaces by referencing previously defined prefixes:
262
+
263
+ ```markdown
264
+ # Create your domain authority
265
+ [my] <tag:mymail@domain.com,2026:>
266
+
267
+ # Build namespace hierarchy
268
+ [j] <my:journal:>
269
+ [p] <my:property:>
270
+ [c] <my:class:>
271
+ [person] <my:people:>
272
+
273
+ # Use in content
274
+ # 2026-01-27 {=j:2026-01-27 .c:Event p:date ^^xsd:date}
275
+
276
+ ## Harry {=person:harry p:name}
277
+ ```
278
+
279
+ **Resolves to absolute IRIs:**
280
+ - `j:2026-01-27` → `tag:mymail@domain.com,2026:journal:2026-01-27`
281
+ - `c:Event` → `tag:mymail@domain.com,2026:class:Event`
282
+ - `p:date` → `tag:mymail@domain.com,2026:property:date`
283
+ - `person:harry` → `tag:mymail@domain.com,2026:people:harry`
284
+
285
+ **Benefits:**
286
+ - **Lightweight**: No external ontology dependencies
287
+ - **Domain authority**: Use `tag:` URIs for personal namespaces
288
+ - **Hierarchical**: Build deep namespace structures
289
+ - **Streaming-safe**: Forward-reference only, single-pass parsing
290
+
258
291
  ## API Reference
259
292
 
260
293
  ### `parse(markdown, options)`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -24,7 +24,10 @@
24
24
  "rdfjs",
25
25
  "browser",
26
26
  "web-worker",
27
- "parser"
27
+ "parser",
28
+ "prefix-folding",
29
+ "curie",
30
+ "iri-authoring"
28
31
  ],
29
32
  "author": "davay42",
30
33
  "repository": {
package/src/parse.js CHANGED
@@ -182,6 +182,13 @@ function extractInlineCarriers(text, baseOffset = 0) {
182
182
  continue;
183
183
  }
184
184
 
185
+ const angleBracketCarrier = tryExtractAngleBracketCarrier(text, pos, baseOffset);
186
+ if (angleBracketCarrier) {
187
+ carriers.push(angleBracketCarrier);
188
+ pos = angleBracketCarrier.pos;
189
+ continue;
190
+ }
191
+
185
192
  const bracketCarrier = tryExtractBracketCarrier(text, pos, baseOffset);
186
193
  if (bracketCarrier) {
187
194
  if (bracketCarrier.skip) {
@@ -232,6 +239,31 @@ function tryExtractCodeCarrier(text, pos, baseOffset) {
232
239
  ranges.attrsRange, ranges.valueRange, ranges.range, ranges.pos);
233
240
  }
234
241
 
242
+ function tryExtractAngleBracketCarrier(text, pos, baseOffset) {
243
+ const angleStart = text.indexOf('<', pos);
244
+ if (angleStart === -1 || angleStart !== pos) return null;
245
+
246
+ // Look for closing angle bracket
247
+ const angleEnd = text.indexOf('>', angleStart);
248
+ if (angleEnd === -1) return null;
249
+
250
+ const url = text.substring(angleStart + 1, angleEnd);
251
+
252
+ // Basic URL validation - should contain at least a scheme and colon
253
+ if (!url.match(/^[a-zA-Z][a-zA-Z0-9+.-]*:/)) {
254
+ return null;
255
+ }
256
+
257
+ const { attrs, attrsRange, finalSpanEnd } = extractAttributesFromText(text, angleEnd + 1, baseOffset);
258
+
259
+ // For angle-bracket URLs, always provide the URL as text content
260
+ // The processing logic will handle whether to use it for literals or not
261
+ return createCarrier('link', url, attrs, attrsRange,
262
+ [baseOffset + angleStart + 1, baseOffset + angleEnd],
263
+ [baseOffset + angleStart, baseOffset + finalSpanEnd],
264
+ finalSpanEnd, { url: url });
265
+ }
266
+
235
267
  function tryExtractBracketCarrier(text, pos, baseOffset) {
236
268
  const bracketStart = text.indexOf('[', pos);
237
269
  if (bracketStart === -1 || bracketStart !== pos) return null;
@@ -391,11 +423,18 @@ function resolveObject(sem, state) {
391
423
  }
392
424
  }
393
425
 
394
- function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state) {
426
+ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier) {
395
427
  sem.types.forEach(t => {
396
428
  const typeIRI = typeof t === 'string' ? t : t.iri;
397
429
  const entryIndex = typeof t === 'string' ? null : t.entryIndex;
398
- const typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
430
+
431
+ // For angle-bracket URLs, use the URL as the subject for type declarations ONLY when
432
+ // there's no explicit subject declaration. This implements {+URL} behavior.
433
+ let typeSubject = newSubject ? newSubject : (localObject || carrierO || S);
434
+ if (carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url && !newSubject) {
435
+ typeSubject = carrierO; // Use URL as subject for type declarations only if no explicit subject
436
+ }
437
+
399
438
  const expandedType = expandIRI(typeIRI, state.ctx);
400
439
 
401
440
  emitQuad(
@@ -409,10 +448,15 @@ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block
409
448
  });
410
449
  }
411
450
 
412
- function processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state) {
451
+ function processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier) {
413
452
  sem.predicates.forEach(pred => {
414
453
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
415
454
 
455
+ // Skip literal predicates for angle-bracket URLs - they only support ? and ! predicates
456
+ if (pred.form === '' && carrier?.type === 'link' && carrier?.url && carrier.text === carrier.url) {
457
+ return; // Angle-bracket URLs don't support literal predicates
458
+ }
459
+
416
460
  // Pre-bind subject/object roles for clarity
417
461
  const roles = {
418
462
  '': { subject: localObject || S, object: L },
@@ -462,8 +506,8 @@ function processAnnotation(carrier, sem, state, options = {}) {
462
506
  const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
463
507
  const newSubjectOrCarrierO = newSubject || carrierO;
464
508
 
465
- processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state);
466
- processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state);
509
+ processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier);
510
+ processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
467
511
  }
468
512
 
469
513
  // Helper functions for list item processing
@@ -744,8 +788,20 @@ export function parse(text, options = {}) {
744
788
 
745
789
  state.tokens = scanTokens(text);
746
790
 
747
- // Process prefix declarations first
748
- state.tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
791
+ // Process prefix declarations first with prefix folding support
792
+ state.tokens.filter(t => t.type === 'prefix').forEach(t => {
793
+ // Check if the IRI value contains a CURIE that references a previously defined prefix
794
+ let resolvedIri = t.iri;
795
+ if (t.iri.includes(':')) {
796
+ const [potentialPrefix, ...referenceParts] = t.iri.split(':');
797
+ const reference = referenceParts.join(':'); // Preserve any additional colons in reference
798
+ if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
799
+ // This is a CURIE referencing an existing prefix - resolve it
800
+ resolvedIri = state.ctx[potentialPrefix] + reference;
801
+ }
802
+ }
803
+ state.ctx[t.prefix] = resolvedIri;
804
+ });
749
805
 
750
806
  // Process all other tokens
751
807
  for (let i = 0; i < state.tokens.length; i++) {