mdld-parse 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -141,11 +141,11 @@ Create fragment IRIs relative to current subject:
141
141
  ```markdown
142
142
  # Document {=ex:document}
143
143
  {=#summary}
144
- [Content] {name}
144
+ [Content] {label}
145
145
  ```
146
146
 
147
147
  ```turtle
148
- ex:document#summary schema:name "Content" .
148
+ ex:document#summary rdfs:label "Content" .
149
149
  ```
150
150
 
151
151
  Fragments replace any existing fragment and require a current subject.
@@ -157,11 +157,11 @@ Subject remains in scope until reset with `{=}` or new subject declared.
157
157
  Emit `rdf:type` triple:
158
158
 
159
159
  ```markdown
160
- ## Apollo 11 {=ex:apollo11 .SpaceMission .Event}
160
+ ## Apollo 11 {=ex:apollo11 .ex:SpaceMission .ex:Event}
161
161
  ```
162
162
 
163
163
  ```turtle
164
- ex:apollo11 a schema:SpaceMission, schema:Event .
164
+ ex:apollo11 a ex:SpaceMission, ex:Event .
165
165
  ```
166
166
 
167
167
  ### Literal Properties
@@ -171,15 +171,15 @@ Inline value carriers emit literal properties:
171
171
  ```markdown
172
172
  # Mission {=ex:apollo11}
173
173
 
174
- [Neil Armstrong] {commander}
175
- [1969] {year ^^xsd:gYear}
176
- [Historic mission] {description @en}
174
+ [Neil Armstrong] {ex:commander}
175
+ [1969] {ex:year ^^xsd:gYear}
176
+ [Historic mission] {ex:description @en}
177
177
  ```
178
178
 
179
179
  ```turtle
180
- ex:apollo11 schema:commander "Neil Armstrong" ;
181
- schema:year "1969"^^xsd:gYear ;
182
- schema:description "Historic mission"@en .
180
+ ex:apollo11 ex:commander "Neil Armstrong" ;
181
+ ex:year "1969"^^xsd:gYear ;
182
+ ex:description "Historic mission"@en .
183
183
  ```
184
184
 
185
185
  ### Object Properties
@@ -189,11 +189,11 @@ Links create relationships (use `?` prefix):
189
189
  ```markdown
190
190
  # Mission {=ex:apollo11}
191
191
 
192
- [NASA] {=ex:nasa ?organizer}
192
+ [NASA] {=ex:nasa ?ex:organizer}
193
193
  ```
194
194
 
195
195
  ```turtle
196
- ex:apollo11 schema:organizer ex:nasa .
196
+ ex:apollo11 ex:organizer ex:nasa .
197
197
  ```
198
198
 
199
199
  ### Resource Declaration
@@ -203,12 +203,12 @@ Declare resources inline with `{=iri}`:
203
203
  ```markdown
204
204
  # Mission {=ex:apollo11}
205
205
 
206
- [Neil Armstrong] {=ex:armstrong ?commander .Person}
206
+ [Neil Armstrong] {=ex:armstrong ?ex:commander .prov:Person}
207
207
  ```
208
208
 
209
209
  ```turtle
210
- ex:apollo11 schema:commander ex:armstrong .
211
- ex:armstrong a schema:Person .
210
+ ex:apollo11 ex:commander ex:armstrong .
211
+ ex:armstrong a prov:Person .
212
212
  ```
213
213
 
214
214
  ### Lists
@@ -218,15 +218,15 @@ Lists require explicit subjects per item.
218
218
  ```markdown
219
219
  # Recipe {=ex:recipe}
220
220
 
221
- Ingredients: {?ingredient .Ingredient}
222
- - Flour {=ex:flour name}
223
- - Water {=ex:water name}
221
+ Ingredients: {?ex:ingredient .ex:Ingredient}
222
+ - Flour {=ex:flour label}
223
+ - Water {=ex:water label}
224
224
  ```
225
225
 
226
226
  ```turtle
227
- ex:recipe schema:ingredient ex:flour, ex:water .
228
- ex:flour a schema:Ingredient ; schema:name "Flour" .
229
- ex:water a schema:Ingredient ; schema:name "Water" .
227
+ ex:recipe ex:ingredient ex:flour, ex:water .
228
+ ex:flour a ex:Ingredient ; rdfs:label "Flour" .
229
+ ex:water a ex:Ingredient ; rdfs:label "Water" .
230
230
  ```
231
231
 
232
232
  ### Code Blocks
@@ -236,14 +236,14 @@ Code blocks are value carriers:
236
236
  ````markdown
237
237
  # Example {=ex:example}
238
238
 
239
- ```javascript {=ex:code .SoftwareSourceCode text}
239
+ ```javascript {=ex:code .ex:SoftwareSourceCode ex:text}
240
240
  console.log("hello");
241
241
  ```
242
242
  ````
243
243
 
244
244
  ```turtle
245
- ex:code a schema:SoftwareSourceCode ;
246
- schema:text "console.log(\"hello\")" .
245
+ ex:code a ex:SoftwareSourceCode ;
246
+ ex:text "console.log(\"hello\")" .
247
247
  ```
248
248
 
249
249
  ### Blockquotes
@@ -251,11 +251,11 @@ ex:code a schema:SoftwareSourceCode ;
251
251
  ```markdown
252
252
  # Article {=ex:article}
253
253
 
254
- > MD-LD bridges Markdown and RDF. {abstract}
254
+ > MD-LD bridges Markdown and RDF. {comment}
255
255
  ```
256
256
 
257
257
  ```turtle
258
- ex:article schema:abstract "MD-LD bridges Markdown and RDF." .
258
+ ex:article rdfs:comment "MD-LD bridges Markdown and RDF." .
259
259
  ```
260
260
 
261
261
  ### Reverse Relations
@@ -265,13 +265,13 @@ Reverse the relationship direction:
265
265
  ```markdown
266
266
  # Part {=ex:part}
267
267
 
268
- Part of: {!hasPart}
268
+ Part of: {!ex:hasPart}
269
269
 
270
270
  - Book {=ex:book}
271
271
  ```
272
272
 
273
273
  ```turtle
274
- ex:book schema:hasPart ex:part .
274
+ ex:book ex:hasPart ex:part .
275
275
  ```
276
276
 
277
277
  ### Prefix Declarations
@@ -279,7 +279,6 @@ ex:book schema:hasPart ex:part .
279
279
  ```markdown
280
280
  [ex] <http://example.org/>
281
281
  [foaf] <http://xmlns.com/foaf/0.1/>
282
- [@vocab] <http://schema.org/>
283
282
 
284
283
  # Person {=ex:alice .foaf:Person}
285
284
  ```
@@ -326,7 +325,7 @@ Parse MD-LD markdown and return RDF quads with origin tracking.
326
325
 
327
326
  - `markdown` (string) — MD-LD formatted text
328
327
  - `options` (object, optional):
329
- - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, schema }`)
328
+ - `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
330
329
  - `dataFactory` (object) — Custom RDF/JS DataFactory
331
330
 
332
331
  **Returns:** `{ quads, origin, context }`
@@ -382,14 +381,14 @@ Apply RDF changes back to markdown with proper positioning.
382
381
  ```javascript
383
382
  const original = `# Article {=ex:article}
384
383
 
385
- [Alice] {author}`;
384
+ [Alice] {ex:author}`;
386
385
 
387
386
  const result = parse(original, { context: { ex: 'http://example.org/' } });
388
387
 
389
388
  // Add a new property
390
389
  const newQuad = {
391
390
  subject: { termType: 'NamedNode', value: 'http://example.org/article' },
392
- predicate: { termType: 'NamedNode', value: 'http://schema.org/datePublished' },
391
+ predicate: { termType: 'NamedNode', value: 'http://example.org/datePublished' },
393
392
  object: { termType: 'Literal', value: '2024-01-01' }
394
393
  };
395
394
 
@@ -437,20 +436,19 @@ const quads = [
437
436
  },
438
437
  {
439
438
  subject: { termType: 'NamedNode', value: 'http://example.org/article' },
440
- predicate: { termType: 'NamedNode', value: 'http://schema.org/author' },
439
+ predicate: { termType: 'NamedNode', value: 'http://example.org/author' },
441
440
  object: { termType: 'NamedNode', value: 'http://example.org/alice' }
442
441
  }
443
442
  ];
444
443
 
445
444
  const result = generate(quads, {
446
445
  ex: 'http://example.org/',
447
- schema: 'http://schema.org/'
448
446
  });
449
447
 
450
448
  console.log(result.text);
451
449
  // # Article {=ex:article .ex:Article}
452
450
  //
453
- // > alice {+ex:alice ?schema:author}
451
+ // > alice {+ex:alice ?ex:author}
454
452
  ```
455
453
 
456
454
  ### `locate(quad, origin, text, context)`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/src/generate.js CHANGED
@@ -164,7 +164,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
164
164
  annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
165
165
  }
166
166
 
167
- const literalText = `> ${quad.object.value} {${annotation}}\n`;
167
+ const literalText = `[${quad.object.value}] {${annotation}}\n`;
168
168
  const literalBlockId = generateBlockId();
169
169
  const literalBlock = {
170
170
  id: literalBlockId,
@@ -204,7 +204,7 @@ function buildDeterministicMDLD(subjectGroups, context) {
204
204
  const objShort = shortenIRI(quad.object.value, context);
205
205
  const localName = extractLocalName(quad.object.value);
206
206
 
207
- const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
207
+ const objectText = `[${localName}] {+${objShort} ?${predShort}}\n`;
208
208
  const objectBlockId = generateBlockId();
209
209
  const objectBlock = {
210
210
  id: objectBlockId,
package/src/parse.js CHANGED
@@ -10,7 +10,7 @@ import {
10
10
  } from './utils.js';
11
11
 
12
12
  const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
13
- const FENCE_REGEX = /^(`{3,})(.*)/;
13
+ const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
14
14
  const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
15
15
  const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
16
16
  const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
@@ -22,6 +22,29 @@ const INLINE_CARRIER_PATTERNS = {
22
22
  CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
23
23
  };
24
24
 
25
+ // Cache for fence regex patterns to avoid recreation
26
+ const FENCE_CLOSE_PATTERNS = new Map();
27
+
28
+ function getFenceClosePattern(fenceChar) {
29
+ if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
30
+ FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
31
+ }
32
+ return FENCE_CLOSE_PATTERNS.get(fenceChar);
33
+ }
34
+
35
+ function parseLangAndAttrs(langAndAttrs) {
36
+ const spaceIndex = langAndAttrs.indexOf(' ');
37
+ const braceIndex = langAndAttrs.indexOf('{');
38
+ const langEnd = Math.min(
39
+ spaceIndex > -1 ? spaceIndex : Infinity,
40
+ braceIndex > -1 ? braceIndex : Infinity
41
+ );
42
+ return {
43
+ lang: langAndAttrs.substring(0, langEnd),
44
+ attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
45
+ };
46
+ }
47
+
25
48
  const semCache = {};
26
49
  const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
27
50
 
@@ -79,108 +102,104 @@ function scanTokens(text) {
79
102
  let pos = 0;
80
103
  let codeBlock = null;
81
104
 
82
- const processors = [
83
- {
84
- test: line => line.startsWith('```'),
85
- process: (line, lineStart, pos) => {
86
- if (!codeBlock) {
87
- const fenceMatch = line.match(FENCE_REGEX);
88
- const attrsText = fenceMatch[2].match(/\{[^{}]*\}/)?.[0] || null;
89
- const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
90
- const contentStart = lineStart + line.length + 1;
91
- const langAndAttrs = fenceMatch[2];
92
- const langEnd = langAndAttrs.indexOf(' ') > -1 ? langAndAttrs.indexOf(' ') :
93
- langAndAttrs.indexOf('{') > -1 ? langAndAttrs.indexOf('{') : langAndAttrs.length;
94
- codeBlock = {
95
- fence: fenceMatch[1],
96
- start: lineStart,
97
- content: [],
98
- lang: langAndAttrs.substring(0, langEnd),
99
- attrs: attrsText,
100
- attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
101
- valueRangeStart: contentStart
102
- };
103
- } else if (line.startsWith(codeBlock.fence)) {
104
- const valueStart = codeBlock.valueRangeStart;
105
- const valueEnd = Math.max(valueStart, lineStart - 1);
106
- tokens.push({
107
- type: 'code',
108
- range: [codeBlock.start, lineStart],
109
- text: codeBlock.content.join('\n'),
110
- lang: codeBlock.lang,
111
- attrs: codeBlock.attrs,
112
- attrsRange: codeBlock.attrsRange,
113
- valueRange: [valueStart, valueEnd]
114
- });
115
- codeBlock = null;
116
- }
117
- return true;
118
- }
119
- },
120
- {
121
- test: () => codeBlock,
122
- process: line => {
123
- codeBlock.content.push(line);
124
- return true;
125
- }
126
- },
127
- {
128
- test: line => PREFIX_REGEX.test(line),
129
- process: (line, lineStart, pos) => {
130
- const match = PREFIX_REGEX.exec(line);
131
- tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
132
- return true;
133
- }
134
- },
135
- {
136
- test: line => HEADING_REGEX.test(line),
137
- process: (line, lineStart, pos) => {
138
- const match = HEADING_REGEX.exec(line);
139
- const attrs = match[3] || null;
140
- const afterHashes = match[1].length;
141
- const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
142
- tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
143
- rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
144
- return true;
145
- }
146
- },
147
- {
148
- test: line => UNORDERED_LIST_REGEX.test(line),
149
- process: (line, lineStart, pos) => {
150
- const match = UNORDERED_LIST_REGEX.exec(line);
151
- tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
152
- return true;
153
- }
154
- },
155
- {
156
- test: line => BLOCKQUOTE_REGEX.test(line),
157
- process: (line, lineStart, pos) => {
158
- const match = BLOCKQUOTE_REGEX.exec(line);
159
- const attrs = match[2] || null;
160
- const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
161
- const valueEndInLine = valueStartInLine + match[1].length;
162
- tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
163
- calcAttrsRange(line, attrs, lineStart),
164
- [lineStart + valueStartInLine, lineStart + valueEndInLine]));
165
- return true;
166
- }
167
- },
168
- {
169
- test: line => line.trim(),
170
- process: (line, lineStart, pos) => {
171
- tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
172
- return true;
105
+ // Direct lookup instead of linear search
106
+ const PROCESSORS = [
107
+ { type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: handleFence },
108
+ { type: 'content', test: () => codeBlock, process: line => codeBlock.content.push(line) },
109
+ { type: 'prefix', test: line => PREFIX_REGEX.test(line), process: handlePrefix },
110
+ { type: 'heading', test: line => HEADING_REGEX.test(line), process: handleHeading },
111
+ { type: 'list', test: line => UNORDERED_LIST_REGEX.test(line), process: handleList },
112
+ { type: 'blockquote', test: line => BLOCKQUOTE_REGEX.test(line), process: handleBlockquote },
113
+ { type: 'para', test: line => line.trim(), process: handlePara }
114
+ ];
115
+
116
+ function handleFence(line, lineStart, pos) {
117
+ const trimmedLine = line.trim();
118
+ if (!codeBlock) {
119
+ const fenceMatch = trimmedLine.match(FENCE_REGEX);
120
+ if (!fenceMatch) return false;
121
+
122
+ const { lang, attrsText } = parseLangAndAttrs(fenceMatch[2]);
123
+ const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
124
+ const contentStart = lineStart + line.length + 1;
125
+
126
+ codeBlock = {
127
+ fence: fenceMatch[1],
128
+ start: lineStart,
129
+ content: [],
130
+ lang,
131
+ attrs: attrsText,
132
+ attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
133
+ valueRangeStart: contentStart
134
+ };
135
+ } else {
136
+ const fenceChar = codeBlock.fence[0];
137
+ const expectedFence = fenceChar.repeat(codeBlock.fence.length);
138
+ const fenceMatch = trimmedLine.match(getFenceClosePattern(fenceChar));
139
+
140
+ if (fenceMatch && fenceMatch[1] === expectedFence) {
141
+ const valueStart = codeBlock.valueRangeStart;
142
+ const valueEnd = Math.max(valueStart, lineStart - 1);
143
+ tokens.push({
144
+ type: 'code',
145
+ range: [codeBlock.start, lineStart],
146
+ text: codeBlock.content.join('\n'),
147
+ lang: codeBlock.lang,
148
+ attrs: codeBlock.attrs,
149
+ attrsRange: codeBlock.attrsRange,
150
+ valueRange: [valueStart, valueEnd]
151
+ });
152
+ codeBlock = null;
173
153
  }
174
154
  }
175
- ];
155
+ return true;
156
+ }
157
+
158
+ function handlePrefix(line, lineStart, pos) {
159
+ const match = PREFIX_REGEX.exec(line);
160
+ tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
161
+ return true;
162
+ }
163
+
164
+ function handleHeading(line, lineStart, pos) {
165
+ const match = HEADING_REGEX.exec(line);
166
+ const attrs = match[3] || null;
167
+ const afterHashes = match[1].length;
168
+ const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
169
+ tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
170
+ rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
171
+ return true;
172
+ }
173
+
174
+ function handleList(line, lineStart, pos) {
175
+ const match = UNORDERED_LIST_REGEX.exec(line);
176
+ tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
177
+ return true;
178
+ }
179
+
180
+ function handleBlockquote(line, lineStart, pos) {
181
+ const match = BLOCKQUOTE_REGEX.exec(line);
182
+ const attrs = match[2] || null;
183
+ const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
184
+ const valueEndInLine = valueStartInLine + match[1].length;
185
+ tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
186
+ calcAttrsRange(line, attrs, lineStart),
187
+ [lineStart + valueStartInLine, lineStart + valueEndInLine]));
188
+ return true;
189
+ }
190
+
191
+ function handlePara(line, lineStart, pos) {
192
+ tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
193
+ return true;
194
+ }
176
195
 
177
196
  for (let i = 0; i < lines.length; i++) {
178
197
  const line = lines[i];
179
198
  const lineStart = pos;
180
199
  pos += line.length + 1;
181
200
 
182
- // Try each processor until one handles the line
183
- for (const processor of processors) {
201
+ // Direct processor lookup - O(n) instead of O(n*m)
202
+ for (const processor of PROCESSORS) {
184
203
  if (processor.test(line) && processor.process(line, lineStart, pos)) {
185
204
  break;
186
205
  }
package/src/utils.js CHANGED
@@ -25,16 +25,31 @@ export function hash(str) {
25
25
  return Math.abs(h).toString(16).slice(0, 12);
26
26
  }
27
27
 
28
+ const iriCache = new Map();
29
+
28
30
  export function expandIRI(term, ctx) {
29
31
  if (term == null) return null;
32
+
33
+ const cacheKey = `${term}|${ctx['@vocab'] || ''}|${Object.keys(ctx).filter(k => k !== '@vocab').sort().map(k => `${k}:${ctx[k]}`).join(',')}`;
34
+ if (iriCache.has(cacheKey)) {
35
+ return iriCache.get(cacheKey);
36
+ }
37
+
30
38
  const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
31
39
  const t = raw.trim();
32
- if (t.match(/^https?:/)) return t;
33
- if (t.includes(':')) {
40
+ let result;
41
+
42
+ if (t.match(/^https?:/)) {
43
+ result = t;
44
+ } else if (t.includes(':')) {
34
45
  const [prefix, ref] = t.split(':', 2);
35
- return ctx[prefix] ? ctx[prefix] + ref : t;
46
+ result = ctx[prefix] ? ctx[prefix] + ref : t;
47
+ } else {
48
+ result = (ctx['@vocab'] || '') + t;
36
49
  }
37
- return (ctx['@vocab'] || '') + t;
50
+
51
+ iriCache.set(cacheKey, result);
52
+ return result;
38
53
  }
39
54
 
40
55
  export function shortenIRI(iri, ctx) {