mdld-parse 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,10 +22,10 @@ Energy level: [8] {my:energyLevel ^^xsd:integer}
22
22
 
23
23
  Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
24
24
 
25
- Activities: {?my:hasActivity .my:Activity label}
25
+ Activities:
26
26
 
27
- - Walking {=#walking}
28
- - Reading {=#reading}
27
+ - **Walking** {+ex:walking ?my:hasActivity .my:Activity label}
28
+ - **Reading** {+ex:reading ?my:hasActivity .my:Activity label}
29
29
 
30
30
  ```
31
31
 
@@ -59,7 +59,7 @@ my:central-park a my:Place;
59
59
 
60
60
  ```
61
61
 
62
- Read the [FULL SPEC](./docs/Spec/Spec.md).
62
+ Read the [FULL SPEC](./spec/Spec.md).
63
63
 
64
64
  ## Core Features
65
65
 
@@ -69,7 +69,6 @@ Read the [FULL SPEC](./docs/Spec/Spec.md).
69
69
  - **Four predicate forms**: `p` (S→L), `?p` (S→O), `!p` (O→S)
70
70
  - **Type declarations**: `.Class` for rdf:type triples
71
71
  - **Datatypes & language**: `^^xsd:date` and `@en` support
72
- - **Lists**: Explicit subject declarations and numbered ordered lists with `rdf:List` support
73
72
  - **Fragments**: Built-in document structuring with `{=#fragment}`
74
73
  - **Round-trip serialization**: Markdown ↔ RDF ↔ Markdown preserves structure
75
74
 
@@ -213,14 +212,15 @@ ex:armstrong a prov:Person .
213
212
 
214
213
  ### Lists
215
214
 
216
- Lists require explicit subjects per item.
215
+ Lists are pure Markdown structure. Each list item requires explicit annotations:
217
216
 
218
217
  ```markdown
219
218
  # Recipe {=ex:recipe}
220
219
 
221
- Ingredients: {?ex:ingredient .ex:Ingredient}
222
- - Flour {=ex:flour label}
223
- - Water {=ex:water label}
220
+ Ingredients:
221
+
222
+ - **Flour** {+ex:flour ?ex:ingredient .ex:Ingredient label}
223
+ - **Water** {+ex:water ?ex:ingredient .ex:Ingredient label}
224
224
  ```
225
225
 
226
226
  ```turtle
@@ -229,6 +229,11 @@ ex:flour a ex:Ingredient ; rdfs:label "Flour" .
229
229
  ex:water a ex:Ingredient ; rdfs:label "Water" .
230
230
  ```
231
231
 
232
+ **Key Rules:**
233
+ - No semantic propagation from list scope
234
+ - Each item must have explicit annotations
235
+ - Use `+IRI` to maintain subject chaining for repeated object properties
236
+
232
237
  ### Code Blocks
233
238
 
234
239
  Code blocks are value carriers:
@@ -503,7 +508,7 @@ Only specific markdown elements can carry semantic values:
503
508
 
504
509
  **Block:**
505
510
  - Headings (`# Title`)
506
- - List items (`- item`, `1. item`) (single-level)
511
+ - List items (`- item`, `1. item`) — pure Markdown structure
507
512
  - Blockquotes (`> quote`)
508
513
  - Code blocks (` ```lang `)
509
514
 
@@ -579,14 +584,14 @@ Therefore, the algebra is **closed**.
579
584
 
580
585
  # Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
581
586
 
582
- Attendees: {?alice:attendee label}
587
+ Attendees:
583
588
 
584
- - Alice {=alice:alice}
585
- - Bob {=alice:bob}
589
+ - **Alice** {+alice:alice ?alice:attendee label}
590
+ - **Bob** {+alice:bob ?alice:attendee label}
586
591
 
587
- Action items: {?alice:actionItem label}
592
+ Action items:
588
593
 
589
- - Review proposal {=alice:task-1}
594
+ - **Review proposal** {+alice:task-1 ?alice:actionItem label}
590
595
  ```
591
596
 
592
597
  ### Developer Documentation
@@ -630,7 +635,7 @@ Tests validate:
630
635
  - Subject declaration and context
631
636
  - All predicate forms (p, ?p, !p)
632
637
  - Datatypes and language tags
633
- - List processing
638
+ - Explicit list item annotations
634
639
  - Code blocks and blockquotes
635
640
  - Round-trip serialization
636
641
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.5.6",
3
+ "version": "0.6.0",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/src/applyDiff.js CHANGED
@@ -15,25 +15,93 @@ import {
15
15
  addSoftFragmentToken,
16
16
  removeSoftFragmentToken,
17
17
  objectSignature,
18
- expandIRI
18
+ expandIRI,
19
+ DataFactory
19
20
  } from './utils.js';
20
21
 
21
22
  function getBlockById(base, blockId) {
22
- return blockId ? base?.blocks?.get(blockId) : null;
23
+ return blockId ? base?.quadMap?.get(blockId) : null;
23
24
  }
24
25
 
25
26
  function getEntryByQuadKey(base, quadKey) {
26
- return quadKey ? base?.quadIndex?.get(quadKey) : null;
27
+ return quadKey ? base?.quadMap?.get(quadKey) : null;
28
+ }
29
+
30
+ // Helper functions for cleaner term type checking
31
+ function isLiteral(term) {
32
+ return term?.termType === 'Literal';
33
+ }
34
+
35
+ function isNamedNode(term) {
36
+ return term?.termType === 'NamedNode';
37
+ }
38
+
39
+ function isRdfType(term) {
40
+ return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
41
+ }
42
+
43
+ function createAnnotationForQuad(quad, ctx) {
44
+ const predShort = shortenIRI(quad.predicate.value, ctx);
45
+ if (isLiteral(quad.object)) {
46
+ const value = String(quad.object.value ?? '');
47
+ const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
48
+ return { text: `[${value}] {${ann}}`, isLiteral: true };
49
+ } else if (isNamedNode(quad.object)) {
50
+ const objectShort = shortenIRI(quad.object.value, ctx);
51
+ const objectAnn = createObjectAnnotation(objectShort, predShort);
52
+ return { text: objectAnn, isLiteral: false };
53
+ }
54
+ return null;
55
+ }
56
+
57
+ function createSubjectBlockForQuad(quad, ctx) {
58
+ const subjectShort = shortenIRI(quad.subject.value, ctx);
59
+ const predShort = shortenIRI(quad.predicate.value, ctx);
60
+ const subjectName = extractLocalName(quad.subject.value);
61
+
62
+ if (isNamedNode(quad.object)) {
63
+ // IRI object: create object reference
64
+ const objectShort = shortenIRI(quad.object.value, ctx);
65
+ return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${objectShort}] {${predShort}}\n`, isNewSubject: true };
66
+ } else {
67
+ // Literal object: create property on separate line
68
+ const value = String(quad.object.value ?? '');
69
+ const annotation = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
70
+ return { text: `\n\n# ${subjectName.charAt(0).toUpperCase() + subjectName.slice(1)} {=${subjectShort}}\n[${value}] {${annotation}}\n`, isNewSubject: true };
71
+ }
72
+ }
73
+
74
+ function extractLocalName(iri) {
75
+ return iri.split('/').pop() || iri.split('#').pop() || iri;
27
76
  }
28
77
 
29
78
  function isValidQuad(quad) {
30
79
  return quad && quad.subject && quad.predicate && quad.object;
31
80
  }
32
81
 
82
+ function normalizeDiffQuads(quads, ctx) {
83
+ // Use DataFactory.fromQuad for proper RDF/JS compatibility
84
+ // But first expand any CURIEs in the quads to ensure proper matching
85
+ return quads.map(quad => {
86
+ // Expand CURIEs to full IRIs before normalization
87
+ const expandedQuad = {
88
+ subject: quad.subject.termType === 'NamedNode'
89
+ ? { ...quad.subject, value: expandIRI(quad.subject.value, ctx) }
90
+ : quad.subject,
91
+ predicate: quad.predicate.termType === 'NamedNode'
92
+ ? { ...quad.predicate, value: expandIRI(quad.predicate.value, ctx) }
93
+ : quad.predicate,
94
+ object: quad.object,
95
+ graph: quad.graph
96
+ };
97
+ return DataFactory.fromQuad(expandedQuad);
98
+ }).filter(isValidQuad);
99
+ }
100
+
33
101
  function createLiteralAnnotation(value, predicate, language, datatype, ctx) {
34
102
  let ann = predicate;
35
103
  if (language) ann += ` @${language}`;
36
- else if (datatype?.value && datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
104
+ else if (datatype?.value && datatype.value !== DataFactory.literal('').datatype.value) {
37
105
  ann += ` ^^${shortenIRI(datatype.value, ctx)}`;
38
106
  }
39
107
  return ann;
@@ -126,23 +194,24 @@ function removeTokenFromSlot(entry, tokens, ctx, quad) {
126
194
  }
127
195
 
128
196
  function addTokenToSlot(tokens, ctx, quad) {
129
- if (quad.predicate.value.endsWith('rdf-syntax-ns#type') && quad.object?.termType === 'NamedNode') {
197
+ // Use cleaner helper functions
198
+ if (isRdfType(quad.predicate) && isNamedNode(quad.object)) {
130
199
  const typeShort = shortenIRI(quad.object.value, ctx);
131
200
  const typeToken = typeShort.includes(':') || !typeShort.startsWith('http') ? `.${typeShort}` : null;
132
201
  if (typeToken && !tokens.includes(typeToken)) {
133
202
  return [...tokens, typeToken];
134
203
  }
135
- } else if (quad.object.termType === 'NamedNode') {
204
+ } else if (isNamedNode(quad.object)) {
136
205
  const objectShort = shortenIRI(quad.object.value, ctx);
137
206
  const isSoftFragment = quad.object.value.includes('#');
138
207
  const fragment = isSoftFragment ? quad.object.value.split('#')[1] : null;
139
208
 
140
- if (isSoftFragment) {
141
- return addSoftFragmentToken(tokens, fragment);
209
+ if (fragment) {
210
+ return addSoftFragmentToken(tokens, objectShort, fragment);
142
211
  } else {
143
212
  return addObjectToken(tokens, objectShort);
144
213
  }
145
- } else if (quad.object.termType === 'Literal') {
214
+ } else if (isLiteral(quad.object)) {
146
215
  const predShort = shortenIRI(quad.predicate.value, ctx);
147
216
  if (!tokens.includes(predShort)) {
148
217
  return [...tokens, predShort];
@@ -179,9 +248,9 @@ export function applyDiff({ text, diff, origin, options = {} }) {
179
248
 
180
249
 
181
250
  function planOperations(diff, base, ctx) {
182
- // Normalize quads once
183
- const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
184
- const normDeletes = (diff.delete || []).map(normalizeQuad).filter(isValidQuad);
251
+ // Normalize quads using DataFactory for proper RDF/JS compatibility
252
+ const normAdds = normalizeDiffQuads(diff.add || [], ctx);
253
+ const normDeletes = normalizeDiffQuads(diff.delete || [], ctx);
185
254
 
186
255
  const plan = {
187
256
  literalUpdates: [],
@@ -206,8 +275,7 @@ function planOperations(diff, base, ctx) {
206
275
  const key = JSON.stringify([quad.subject.value, objectSignature(quad.object)]);
207
276
  const quadKey = quadToKeyForOrigin(quad);
208
277
  const entry = getEntryByQuadKey(base, quadKey);
209
- const blockId = entry?.blockId || entry;
210
- const block = getBlockById(base, blockId);
278
+ const block = entry; // In unified structure, entry is the block
211
279
  if (block?.attrsRange) {
212
280
  anchors.set(key, { block, entry });
213
281
  }
@@ -215,18 +283,18 @@ function planOperations(diff, base, ctx) {
215
283
 
216
284
  // Detect literal updates early
217
285
  for (const deleteQuad of normDeletes) {
218
- if (deleteQuad.object.termType !== 'Literal') continue;
286
+ if (!isLiteral(deleteQuad.object)) continue;
219
287
 
220
288
  const k = JSON.stringify([deleteQuad.subject.value, deleteQuad.predicate.value]);
221
289
  const candidates = addBySP.get(k) || [];
222
290
  const addQuad = candidates.find(x =>
223
- x?.object?.termType === 'Literal' && !plan.consumedAdds.has(quadToKeyForOrigin(x))
291
+ isLiteral(x?.object) && !plan.consumedAdds.has(quadToKeyForOrigin(x))
224
292
  );
225
293
 
226
294
  if (!addQuad) continue;
227
295
 
228
296
  const entry = resolveOriginEntry(deleteQuad, base);
229
- const block = entry ? getBlockById(base, entry.blockId || entry) : null;
297
+ const block = entry; // In unified structure, the entry is the block
230
298
 
231
299
  if (block) {
232
300
  plan.literalUpdates.push({ deleteQuad, addQuad, entry, block });
@@ -236,13 +304,13 @@ function planOperations(diff, base, ctx) {
236
304
 
237
305
  // Find vacant slot occupations
238
306
  for (const quad of normAdds) {
239
- if (quad.object.termType !== 'Literal') continue;
307
+ if (!isLiteral(quad.object)) continue;
240
308
  if (plan.consumedAdds.has(quadToKeyForOrigin(quad))) continue;
241
309
 
242
- const vacantSlot = findVacantSlot(base?.quadIndex, quad.subject, quad.predicate);
310
+ const vacantSlot = findVacantSlot(base?.quadMap, quad.subject, quad.predicate);
243
311
  if (!vacantSlot) continue;
244
312
 
245
- const block = base?.blocks?.get(vacantSlot.blockId);
313
+ const block = vacantSlot; // In unified structure, the slot is the block
246
314
  if (block) {
247
315
  plan.vacantSlotOccupations.push({ quad, vacantSlot, block });
248
316
  plan.consumedAdds.add(quadToKeyForOrigin(quad));
@@ -251,7 +319,7 @@ function planOperations(diff, base, ctx) {
251
319
 
252
320
  // Plan remaining deletes
253
321
  for (const quad of normDeletes) {
254
- if (quad.object.termType === 'Literal') {
322
+ if (isLiteral(quad.object)) {
255
323
  const isUpdated = plan.literalUpdates.some(u =>
256
324
  u.deleteQuad.subject.value === quad.subject.value &&
257
325
  u.deleteQuad.predicate.value === quad.predicate.value &&
@@ -261,7 +329,7 @@ function planOperations(diff, base, ctx) {
261
329
  }
262
330
 
263
331
  const entry = resolveOriginEntry(quad, base);
264
- const block = entry ? getBlockById(base, entry.blockId || entry) : null;
332
+ const block = entry; // In unified structure, entry is the block
265
333
  if (block) {
266
334
  plan.deletes.push({ quad, entry, block });
267
335
  }
@@ -348,7 +416,7 @@ function materializeEdits(plan, text, ctx, base) {
348
416
  };
349
417
  vacantSlot.blockInfo = blockInfo;
350
418
  const key = quadToKeyForOrigin(quad);
351
- if (key) base.quadIndex.set(key, vacantSlot);
419
+ if (key) base.quadMap.set(key, vacantSlot);
352
420
  }
353
421
 
354
422
  const span = readSpan(block, text, 'attrs');
@@ -382,56 +450,45 @@ function materializeEdits(plan, text, ctx, base) {
382
450
  continue;
383
451
  }
384
452
 
385
- if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
453
+ if (isLiteral(quad.object) || isNamedNode(quad.object)) {
386
454
  if (!targetBlock) {
387
- const predShort = shortenIRI(quad.predicate.value, ctx);
388
- if (quad.object.termType === 'Literal') {
389
- const value = String(quad.object.value ?? '');
390
- const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
391
- edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
455
+ // No target block - check if subject already exists in document
456
+ const subjectExists = Array.from(base?.quadMap?.values() || [])
457
+ .some(block => block.subject?.value === quad.subject.value);
458
+
459
+ let annotation;
460
+ if (!subjectExists && isNamedNode(quad.object)) {
461
+ // New subject with IRI object - create subject block
462
+ annotation = createSubjectBlockForQuad(quad, ctx);
463
+ } else if (subjectExists) {
464
+ // Existing subject - create simple annotation
465
+ annotation = createAnnotationForQuad(quad, ctx);
392
466
  } else {
393
- const objectShort = shortenIRI(quad.object.value, ctx);
394
- edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
467
+ // New subject with literal - create subject block
468
+ annotation = createSubjectBlockForQuad(quad, ctx);
395
469
  }
396
- continue;
397
- }
398
470
 
399
- const span = readSpan(targetBlock, text, 'attrs');
400
- if (!span) continue;
401
-
402
- // Check if this is a subject-only block (like {=ex:order-123})
403
- const tokens = normalizeAttrsTokens(span.text);
404
- const hasSubjectToken = tokens.some(t => t.startsWith('='));
405
- const hasPredicateTokens = tokens.some(t => !t.startsWith('=') && !t.startsWith('.'));
406
-
407
- if (tokens.length === 1 && tokens[0].startsWith('=')) {
408
- // This is a subject-only block, create new annotation
409
- const predShort = shortenIRI(quad.predicate.value, ctx);
410
- if (quad.object.termType === 'Literal') {
411
- const value = String(quad.object.value ?? '');
412
- const ann = createLiteralAnnotation(value, predShort, quad.object.language, quad.object.datatype, ctx);
413
- edits.push({ start: text.length, end: text.length, text: `\n[${value}] {${ann}}` });
414
- } else {
415
- const objectShort = shortenIRI(quad.object.value, ctx);
416
- edits.push({ start: text.length, end: text.length, text: createObjectAnnotation(objectShort, predShort) });
471
+ if (annotation) {
472
+ edits.push({ start: text.length, end: text.length, text: annotation.text });
417
473
  }
418
474
  continue;
419
475
  }
420
476
 
421
- // Normal annotation block, add tokens
422
- const existingTokens = blockTokensFromEntries(targetBlock) || tokens;
423
- let updated = addTokenToSlot(existingTokens, ctx, quad);
477
+ // Insert annotation after target block's range
478
+ const annotation = createAnnotationForQuad(quad, ctx);
479
+ if (annotation) {
480
+ // Find the end of the target block's content, not just its range
481
+ const targetBlockEnd = targetBlock.range.end;
482
+ let insertPos = targetBlockEnd;
424
483
 
425
- // For literal predicates with datatypes, we need to add datatype token too
426
- if (quad.object.termType === 'Literal' && quad.object.datatype && quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
427
- const datatypeToken = `^^${shortenIRI(quad.object.datatype.value, ctx)}`;
428
- if (!updated.includes(datatypeToken)) {
429
- updated = [...updated, datatypeToken];
484
+ // Skip past the target block's content to find the right insertion point
485
+ while (insertPos < text.length && text[insertPos] !== '\n') {
486
+ insertPos++;
430
487
  }
431
- }
432
488
 
433
- if (updated.length !== existingTokens.length) {
434
- edits.push({ start: span.start, end: span.end, text: writeAttrsTokens(updated) });
489
+ // Insert after the target block's content
490
+ const finalInsertPos = insertPos < text.length ? insertPos : text.length;
491
+ edits.push({ start: finalInsertPos, end: finalInsertPos, text: `\n${annotation.text}` });
435
492
  }
436
493
  }
437
494
  }
@@ -450,7 +507,7 @@ function applyEdits(text, edits, ctx, base) {
450
507
 
451
508
  // Extract vacant slots before reparsing
452
509
  const vacantSlots = new Map();
453
- base?.quadIndex?.forEach((slot, key) => {
510
+ base?.quadMap?.forEach((slot, key) => {
454
511
  if (slot.isVacant) vacantSlots.set(key, slot);
455
512
  });
456
513
 
@@ -458,7 +515,7 @@ function applyEdits(text, edits, ctx, base) {
458
515
 
459
516
  // Merge vacant slots back
460
517
  vacantSlots.forEach((vacantSlot, key) => {
461
- if (!reparsed.origin.blocks.has(vacantSlot.blockId) && vacantSlot.blockInfo) {
518
+ if (!reparsed.origin.quadMap.has(vacantSlot.id) && vacantSlot.blockInfo) {
462
519
  const { blockInfo } = vacantSlot;
463
520
  const emptyBlock = {
464
521
  id: blockInfo.id,
@@ -469,12 +526,11 @@ function applyEdits(text, edits, ctx, base) {
469
526
  subject: blockInfo.subject || '',
470
527
  types: [],
471
528
  predicates: [],
472
- entries: [],
473
529
  context: blockInfo.context || { ...ctx }
474
530
  };
475
- reparsed.origin.blocks.set(vacantSlot.blockId, emptyBlock);
531
+ reparsed.origin.quadMap.set(vacantSlot.id, emptyBlock);
476
532
  }
477
- reparsed.origin.quadIndex.set(key, vacantSlot);
533
+ reparsed.origin.quadMap.set(key, vacantSlot);
478
534
  });
479
535
 
480
536
  return { text: result, origin: reparsed.origin };
@@ -483,11 +539,11 @@ function applyEdits(text, edits, ctx, base) {
483
539
  // Helper functions for origin lookup
484
540
  function resolveOriginEntry(quad, base) {
485
541
  const key = quadToKeyForOrigin(quad);
486
- let entry = key ? base?.quadIndex?.get(key) : null;
542
+ let entry = key ? base?.quadMap?.get(key) : null;
487
543
 
488
- if (!entry && quad.object?.termType === 'Literal') {
544
+ if (!entry && isLiteral(quad.object)) {
489
545
  // Fallback: search by value
490
- for (const [k, e] of base?.quadIndex || []) {
546
+ for (const [k, e] of base?.quadMap || []) {
491
547
  const parsed = parseQuadIndexKey(k);
492
548
  if (parsed && parsed.s === quad.subject.value &&
493
549
  parsed.p === quad.predicate.value &&
@@ -507,12 +563,21 @@ function findTargetBlock(quad, base, anchors) {
507
563
  const anchored = anchors.get(anchorKey);
508
564
  if (anchored?.block) return anchored.block;
509
565
 
510
- // Block affinity: prefer same block, then same subject
511
- for (const [, block] of base?.blocks || []) {
512
- if (block.subject === quad.subject.value && block.attrsRange) {
513
- return block;
514
- }
566
+ // Find the best position within the subject's section
567
+ // Look for blocks with the same subject and sort by position
568
+ const subjectBlocks = Array.from(base?.quadMap?.values() || [])
569
+ .filter(block => block.subject?.value === quad.subject.value)
570
+ .sort((a, b) => a.range.start - b.range.start);
571
+
572
+ if (subjectBlocks.length === 0) return null;
573
+
574
+ // Strategy: Find the last block with attrsRange to maintain consistency
575
+ // For identical subject blocks, prefer the first one to avoid creating duplicates
576
+ const blocksWithAttrs = subjectBlocks.filter(block => block.attrsRange);
577
+ if (blocksWithAttrs.length > 0) {
578
+ return blocksWithAttrs[blocksWithAttrs.length - 1]; // Return last matching block
515
579
  }
516
580
 
517
- return null;
581
+ // Fallback: return the last block in the subject's section
582
+ return subjectBlocks[subjectBlocks.length - 1];
518
583
  }
package/src/generate.js CHANGED
@@ -1,4 +1,17 @@
1
- import { shortenIRI, expandIRI, quadIndexKey, createSlotInfo, DEFAULT_CONTEXT } from './utils.js';
1
+ import { shortenIRI, expandIRI, quadIndexKey, createUnifiedSlot, DEFAULT_CONTEXT, DataFactory } from './utils.js';
2
+
3
+ // Helper functions for cleaner term type checking
4
+ function isLiteral(term) {
5
+ return term?.termType === 'Literal';
6
+ }
7
+
8
+ function isNamedNode(term) {
9
+ return term?.termType === 'NamedNode';
10
+ }
11
+
12
+ function isRdfType(term) {
13
+ return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
14
+ }
2
15
 
3
16
 
4
17
  function extractLocalName(iri) {
@@ -25,37 +38,37 @@ export function generate(quads, context = {}) {
25
38
 
26
39
  const subjectGroups = groupQuadsBySubject(normalizedQuads);
27
40
 
28
- const { text, blocks, quadIndex } = buildDeterministicMDLD(subjectGroups, fullContext);
41
+ const { text, quadMap } = buildDeterministicMDLD(subjectGroups, fullContext);
29
42
 
30
43
  return {
31
44
  text,
32
- origin: { blocks, quadIndex },
45
+ origin: { quadMap },
33
46
  context: fullContext
34
47
  };
35
48
  }
36
49
 
37
50
  function normalizeAndSortQuads(quads) {
38
51
  return quads
39
- .map(quad => ({
40
- subject: { termType: quad.subject.termType, value: quad.subject.value },
41
- predicate: { termType: quad.predicate.termType, value: quad.predicate.value },
42
- object: quad.object.termType === 'Literal'
43
- ? {
44
- termType: 'Literal',
45
- value: quad.object.value,
46
- language: quad.object.language || null,
47
- datatype: quad.object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
48
- }
49
- : { termType: 'NamedNode', value: quad.object.value }
50
- }))
52
+ .map(quad => {
53
+ // Use DataFactory.fromTerm to ensure proper RDF/JS compatibility
54
+ const normSubject = DataFactory.fromTerm(quad.subject);
55
+ const normPredicate = DataFactory.fromTerm(quad.predicate);
56
+ const normObject = DataFactory.fromTerm(quad.object);
57
+
58
+ return {
59
+ subject: normSubject,
60
+ predicate: normPredicate,
61
+ object: normObject
62
+ };
63
+ })
51
64
  .sort((a, b) => {
52
65
  // Deterministic sorting: subject -> predicate -> object
53
66
  const sComp = a.subject.value.localeCompare(b.subject.value);
54
67
  if (sComp !== 0) return sComp;
55
68
  const pComp = a.predicate.value.localeCompare(b.predicate.value);
56
69
  if (pComp !== 0) return pComp;
57
- const oA = a.object.termType === 'Literal' ? a.object.value : a.object.value;
58
- const oB = b.object.termType === 'Literal' ? b.object.value : b.object.value;
70
+ const oA = isLiteral(a.object) ? a.object.value : a.object.value;
71
+ const oB = isLiteral(b.object) ? b.object.value : b.object.value;
59
72
  return oA.localeCompare(oB);
60
73
  });
61
74
  }
@@ -74,8 +87,7 @@ function groupQuadsBySubject(quads) {
74
87
  function buildDeterministicMDLD(subjectGroups, context) {
75
88
  let text = '';
76
89
  let currentPos = 0;
77
- const blocks = new Map();
78
- const quadIndex = new Map();
90
+ const quadMap = new Map();
79
91
 
80
92
  // Add prefixes first (deterministic order), but exclude default context prefixes
81
93
  const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
@@ -83,14 +95,6 @@ function buildDeterministicMDLD(subjectGroups, context) {
83
95
  // Skip default context prefixes - they're implicit in MDLD
84
96
  if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
85
97
  const prefixDecl = `[${prefix}] <${namespace}>\n`;
86
- const blockId = generateBlockId();
87
- blocks.set(blockId, {
88
- id: blockId,
89
- range: { start: currentPos, end: currentPos + prefixDecl.length },
90
- subject: null,
91
- entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
92
- carrierType: 'prefix'
93
- });
94
98
  text += prefixDecl;
95
99
  currentPos += prefixDecl.length;
96
100
  }
@@ -108,10 +112,10 @@ function buildDeterministicMDLD(subjectGroups, context) {
108
112
  const subjectQuads = subjectGroups.get(subjectIRI);
109
113
  const shortSubject = shortenIRI(subjectIRI, context);
110
114
 
111
- // Separate types, literals, and objects
112
- const types = subjectQuads.filter(q => q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
113
- const literals = subjectQuads.filter(q => q.object.termType === 'Literal' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
114
- const objects = subjectQuads.filter(q => q.object.termType === 'NamedNode' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
115
+ // Separate types, literals, and objects using helper functions
116
+ const types = subjectQuads.filter(q => isRdfType(q.predicate));
117
+ const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
118
+ const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
115
119
 
116
120
  // Generate heading
117
121
  const localSubjectName = extractLocalName(subjectIRI);
@@ -120,28 +124,23 @@ function buildDeterministicMDLD(subjectGroups, context) {
120
124
  : '';
121
125
 
122
126
  const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
123
- const blockId = generateBlockId();
127
+
124
128
  const headingBlock = {
125
- id: blockId,
129
+ id: generateBlockId(),
126
130
  range: { start: currentPos, end: currentPos + headingText.length },
127
131
  subject: subjectIRI,
128
- entries: [
129
- { kind: 'subject', raw: `=${shortSubject}`, expandedSubject: subjectIRI },
130
- ...types.map((t, i) => ({
131
- kind: 'type',
132
- raw: '.' + extractLocalName(t.object.value),
133
- expandedType: t.object.value,
134
- entryIndex: i
135
- }))
136
- ],
137
- carrierType: 'heading'
132
+ types: types.map(t => t.object.value),
133
+ predicates: [],
134
+ context: { ...context },
135
+ carrierType: 'heading',
136
+ attrsRange: { start: currentPos + headingText.indexOf('{'), end: currentPos + headingText.indexOf('}') + 1 },
137
+ valueRange: { start: currentPos + 2, end: currentPos + 2 + localSubjectName.length }
138
138
  };
139
- blocks.set(blockId, headingBlock);
140
139
 
141
- // Add type quads to index
140
+ // Add type quads to quadMap
142
141
  types.forEach((quad, i) => {
143
142
  const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
144
- quadIndex.set(key, createSlotInfo(blockId, i, {
143
+ quadMap.set(key, createUnifiedSlot(headingBlock, i, {
145
144
  kind: 'type',
146
145
  subject: quad.subject,
147
146
  predicate: quad.predicate,
@@ -158,34 +157,30 @@ function buildDeterministicMDLD(subjectGroups, context) {
158
157
  const predShort = shortenIRI(quad.predicate.value, context);
159
158
  let annotation = predShort;
160
159
 
160
+ // Use DataFactory XSD constants for datatype comparison
161
+ const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
161
162
  if (quad.object.language) {
162
163
  annotation += ` @${quad.object.language}`;
163
- } else if (quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
164
+ } else if (quad.object.datatype.value !== xsdString) {
164
165
  annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
165
166
  }
166
167
 
167
168
  const literalText = `[${quad.object.value}] {${annotation}}\n`;
168
- const literalBlockId = generateBlockId();
169
169
  const literalBlock = {
170
- id: literalBlockId,
170
+ id: generateBlockId(),
171
171
  range: { start: currentPos, end: currentPos + literalText.length },
172
172
  subject: subjectIRI,
173
- entries: [{
174
- kind: 'property',
175
- raw: annotation,
176
- expandedPredicate: quad.predicate.value,
177
- form: '',
178
- entryIndex: 0
179
- }],
173
+ types: [],
174
+ predicates: [{ iri: quad.predicate.value, form: '' }],
175
+ context: { ...context },
180
176
  carrierType: 'span',
181
177
  valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
182
178
  attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
183
179
  };
184
- blocks.set(literalBlockId, literalBlock);
185
180
 
186
- // Add to quad index
181
+ // Add to quadMap
187
182
  const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
188
- quadIndex.set(key, createSlotInfo(literalBlockId, 0, {
183
+ quadMap.set(key, createUnifiedSlot(literalBlock, 0, {
189
184
  kind: 'pred',
190
185
  subject: quad.subject,
191
186
  predicate: quad.predicate,
@@ -200,29 +195,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
200
195
  // Add objects (deterministic order)
201
196
  const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
202
197
  for (const quad of sortedObjects) {
203
- const predShort = shortenIRI(quad.predicate.value, context);
204
198
  const objShort = shortenIRI(quad.object.value, context);
205
- const localName = extractLocalName(quad.object.value);
199
+ const predShort = shortenIRI(quad.predicate.value, context);
200
+ const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
206
201
 
207
- const objectText = `[${localName}] {+${objShort} ?${predShort}}\n`;
208
- const objectBlockId = generateBlockId();
209
202
  const objectBlock = {
210
- id: objectBlockId,
203
+ id: generateBlockId(),
211
204
  range: { start: currentPos, end: currentPos + objectText.length },
212
205
  subject: subjectIRI,
213
- entries: [{
214
- kind: 'object',
215
- raw: objShort,
216
- expandedObject: quad.object.value,
217
- entryIndex: 0
218
- }],
219
- carrierType: 'span'
206
+ types: [],
207
+ predicates: [{ iri: quad.predicate.value, form: '?' }],
208
+ context: { ...context },
209
+ carrierType: 'span',
210
+ valueRange: { start: currentPos + 1, end: currentPos + 1 + objShort.length },
211
+ attrsRange: { start: currentPos + objectText.indexOf('{'), end: currentPos + objectText.indexOf('}') + 1 }
220
212
  };
221
- blocks.set(objectBlockId, objectBlock);
222
213
 
223
- // Add to quad index
214
+ // Add to quadMap
224
215
  const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
225
- quadIndex.set(key, createSlotInfo(objectBlockId, 0, {
216
+ quadMap.set(key, createUnifiedSlot(objectBlock, 0, {
226
217
  kind: 'pred',
227
218
  subject: quad.subject,
228
219
  predicate: quad.predicate,
@@ -234,13 +225,11 @@ function buildDeterministicMDLD(subjectGroups, context) {
234
225
  currentPos += objectText.length;
235
226
  }
236
227
 
237
- if (sortedLiterals.length > 0 || sortedObjects.length > 0) {
238
- text += '\n';
239
- currentPos += 1;
240
- }
228
+ text += '\n';
229
+ currentPos += 1;
241
230
  }
242
231
 
243
- return { text: text.trim(), blocks, quadIndex };
232
+ return { text, quadMap };
244
233
  }
245
234
 
246
235
  function generateBlockId() {
package/src/index.js CHANGED
@@ -2,6 +2,7 @@ export { parse } from './parse.js';
2
2
  export { applyDiff } from './applyDiff.js';
3
3
  export { generate } from './generate.js';
4
4
  export { locate } from './locate.js';
5
+ export { render } from './render.js';
5
6
  export {
6
7
  DEFAULT_CONTEXT,
7
8
  DataFactory,
package/src/locate.js CHANGED
@@ -17,7 +17,7 @@ export function locate(quad, origin, text = '', context = {}) {
17
17
  origin = parseResult.origin;
18
18
  }
19
19
 
20
- if (!quad || !origin || !origin.quadIndex || !origin.blocks) {
20
+ if (!quad || !origin || !origin.quadMap) {
21
21
  return null;
22
22
  }
23
23
 
@@ -27,57 +27,40 @@ export function locate(quad, origin, text = '', context = {}) {
27
27
  return null;
28
28
  }
29
29
 
30
- // Generate the quad key to lookup in quadIndex
30
+ // Generate the quad key to lookup in quadMap
31
31
  const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
32
32
 
33
- // Find the slot information in quadIndex
34
- const slotInfo = origin.quadIndex.get(quadKey);
33
+ // Find the slot information in quadMap
34
+ const slotInfo = origin.quadMap.get(quadKey);
35
35
  if (!slotInfo) {
36
36
  return null;
37
37
  }
38
38
 
39
- // Get the block information
40
- const block = origin.blocks.get(slotInfo.blockId);
41
- if (!block) {
42
- return null;
43
- }
39
+ // In unified structure, slotInfo contains all block information
40
+ const block = slotInfo;
44
41
 
45
- // Extract the actual text content based on carrier type and entry
42
+ // Extract the actual text content based on carrier type
46
43
  let contentRange = null;
47
44
  let content = '';
48
45
 
49
46
  if (block.carrierType === 'heading') {
50
- // For headings, use the block's main range
51
- contentRange = block.range;
52
- content = text.substring(block.range.start, block.range.end);
53
- } else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
54
- // For blockquotes, lists, and spans, extract from block range
55
- contentRange = block.range;
56
- content = text.substring(block.range.start, block.range.end);
57
-
58
- // For blockquotes, try to extract the specific carrier content from entries
59
- if (slotInfo.entryIndex != null && block.entries && block.entries[slotInfo.entryIndex]) {
60
- const entry = block.entries[slotInfo.entryIndex];
61
- if (entry.raw) {
62
- // For blockquotes, the entry.raw contains the full carrier text
63
- // Extract just the content part before the annotation
64
- const annotationStart = entry.raw.indexOf('{');
65
- if (annotationStart !== -1) {
66
- const carrierContent = entry.raw.substring(0, annotationStart).trim();
67
- // Find this content in the block text
68
- const contentStart = text.indexOf(carrierContent, block.range.start);
69
- if (contentStart !== -1) {
70
- const contentEnd = contentStart + carrierContent.length;
71
- contentRange = { start: contentStart, end: contentEnd };
72
- content = text.substring(contentStart, contentEnd);
73
- }
74
- }
75
- }
47
+ // For headings, use the value range for the heading text
48
+ contentRange = block.valueRange;
49
+ content = text.substring(block.valueRange.start, block.valueRange.end);
50
+ } else if (block.carrierType === 'emphasis' || block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
51
+ // For emphasis, blockquotes, lists, and spans, use the value range
52
+ if (block.valueRange) {
53
+ contentRange = block.valueRange;
54
+ content = text.substring(block.valueRange.start, block.valueRange.end);
55
+ } else {
56
+ // Fallback to block range
57
+ contentRange = block.range;
58
+ content = text.substring(block.range.start, block.range.end);
76
59
  }
77
60
  }
78
61
 
79
62
  return {
80
- blockId: slotInfo.blockId,
63
+ blockId: slotInfo.id,
81
64
  entryIndex: slotInfo.entryIndex,
82
65
  kind: slotInfo.kind,
83
66
  subject: normalizedQuad.subject,
package/src/parse.js CHANGED
@@ -4,7 +4,7 @@ import {
4
4
  expandIRI,
5
5
  parseSemanticBlock,
6
6
  quadIndexKey,
7
- createSlotInfo,
7
+ createUnifiedSlot,
8
8
  createLiteral,
9
9
  hash
10
10
  } from './utils.js';
@@ -16,7 +16,6 @@ const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
16
16
  const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
17
17
  const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
18
18
  const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
19
- const LIST_CONTEXT_REGEX = /^(.+?)\s*\{([^}]+)\}$/;
20
19
  const INLINE_CARRIER_PATTERNS = {
21
20
  EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
22
21
  CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
@@ -87,13 +86,12 @@ function getCarriers(token) {
87
86
  return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
88
87
  }
89
88
 
90
- const createListToken = (type, line, lineStart, pos, match, indent = null) => {
89
+ const createListToken = (type, line, lineStart, pos, match) => {
91
90
  const attrs = match[4] || null;
92
91
  const prefix = match[1].length + (match[2] ? match[2].length : 0);
93
92
  const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
94
- const extra = indent !== null ? { indent } : { indent: match[1].length };
95
93
  return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
96
- rangeInfo.attrsRange, rangeInfo.valueRange, extra);
94
+ rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
97
95
  };
98
96
 
99
97
  function scanTokens(text) {
@@ -173,7 +171,7 @@ function scanTokens(text) {
173
171
 
174
172
  function handleList(line, lineStart, pos) {
175
173
  const match = UNORDERED_LIST_REGEX.exec(line);
176
- tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
174
+ tokens.push(createListToken('list', line, lineStart, pos, match));
177
175
  return true;
178
176
  }
179
177
 
@@ -277,14 +275,14 @@ function extractInlineCarriers(text, baseOffset = 0) {
277
275
  }
278
276
 
279
277
  function calcCarrierRanges(match, baseOffset, matchStart) {
280
- const valueStart = baseOffset + matchStart;
278
+ const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
281
279
  const valueEnd = valueStart + match[1].length;
282
280
  const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
283
281
  const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
284
282
  return {
285
283
  valueRange: [valueStart, valueEnd],
286
284
  attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
287
- range: [valueStart, attrsEnd],
285
+ range: [baseOffset + matchStart, attrsEnd],
288
286
  pos: matchStart + match[0].length // pos should be relative to current text, not document
289
287
  };
290
288
  }
@@ -345,7 +343,7 @@ function determineCarrierType(url) {
345
343
  return { carrierType: 'span', resourceIRI: null };
346
344
  }
347
345
 
348
- function createBlock(subject, types, predicates, entries, range, attrsRange, valueRange, carrierType, ctx) {
346
+ function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx) {
349
347
  const expanded = {
350
348
  subject,
351
349
  types: types.map(t => expandIRI(typeof t === 'string' ? t : t.iri, ctx)),
@@ -364,23 +362,26 @@ function createBlock(subject, types, predicates, entries, range, attrsRange, val
364
362
  subject,
365
363
  types: expanded.types,
366
364
  predicates: expanded.predicates,
367
- entries: entries || [],
368
- context: { ...ctx }
365
+ context: ctx
369
366
  };
370
367
  }
371
368
 
372
- function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFactory, meta = null) {
369
+ function emitQuad(quads, quadMap, block, subject, predicate, object, dataFactory, meta = null) {
373
370
  if (!subject || !predicate || !object) return;
371
+
374
372
  const quad = dataFactory.quad(subject, predicate, object);
375
373
  quads.push(quad);
376
374
 
377
- const slotInfo = createSlotInfo(blockId, meta?.entryIndex, {
375
+ const unifiedSlot = createUnifiedSlot(block, meta?.entryIndex, {
378
376
  ...meta,
379
- subject, predicate, object
377
+ subject,
378
+ predicate,
379
+ object
380
380
  });
381
381
 
382
- quadIndex.set(quadIndexKey(quad.subject, quad.predicate, quad.object), slotInfo);
382
+ quadMap.set(quadIndexKey(quad.subject, quad.predicate, quad.object), unifiedSlot);
383
383
  }
384
+
384
385
  const resolveFragment = (fragment, state) => {
385
386
  if (!state.currentSubject) return null;
386
387
  const baseIRI = state.currentSubject.value.split('#')[0];
@@ -403,10 +404,10 @@ function resolveObject(sem, state) {
403
404
  return state.df.namedNode(expandIRI(sem.object, state.ctx));
404
405
  }
405
406
 
406
- const createTypeQuad = (typeIRI, subject, state, blockId, entryIndex = null) => {
407
+ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
407
408
  const expandedType = expandIRI(typeIRI, state.ctx);
408
409
  emitQuad(
409
- state.quads, state.origin.quadIndex, blockId,
410
+ state.quads, state.origin.quadMap, block,
410
411
  subject,
411
412
  state.df.namedNode(expandIRI('rdf:type', state.ctx)),
412
413
  state.df.namedNode(expandedType),
@@ -419,9 +420,9 @@ function processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block
419
420
  sem.types.forEach(t => {
420
421
  const typeIRI = typeof t === 'string' ? t : t.iri;
421
422
  const entryIndex = typeof t === 'string' ? null : t.entryIndex;
422
- // Type subject priority: explicit subject > soft object > URL > current subject
423
+ // Type subject priority: explicit subject > soft object > carrier URL > current subject
423
424
  let typeSubject = newSubject || localObject || carrierO || S;
424
- createTypeQuad(typeIRI, typeSubject, state, block.id, entryIndex);
425
+ createTypeQuad(typeIRI, typeSubject, state, block, entryIndex);
425
426
  });
426
427
  }
427
428
 
@@ -452,7 +453,7 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
452
453
  const role = determinePredicateRole(pred, carrier, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L);
453
454
  if (role) {
454
455
  const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
455
- emitQuad(state.quads, state.origin.quadIndex, block.id,
456
+ emitQuad(state.quads, state.origin.quadMap, block,
456
457
  role.subject, P, role.object, state.df,
457
458
  { kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex }
458
459
  );
@@ -480,11 +481,10 @@ function processAnnotation(carrier, sem, state, options = {}) {
480
481
  if (!S) return;
481
482
 
482
483
  const block = createBlock(
483
- S.value, sem.types, sem.predicates, sem.entries,
484
+ S.value, sem.types, sem.predicates,
484
485
  carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
485
486
  carrier.type || null, state.ctx
486
487
  );
487
- state.origin.blocks.set(block.id, block);
488
488
 
489
489
  const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
490
490
  const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
@@ -494,171 +494,12 @@ function processAnnotation(carrier, sem, state, options = {}) {
494
494
  processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
495
495
  }
496
496
 
497
- export function findItemSubject(listToken, carriers, state) {
498
- const sem = parseSemCached(listToken.attrs);
499
- if (sem.subject && sem.subject !== 'RESET') {
500
- const subject = resolveSubject(sem, state);
501
- if (subject) {
502
- return {
503
- subject,
504
- carrier: { type: 'list', text: listToken.text, attrs: listToken.attrs, range: listToken.range }
505
- };
506
- }
507
- }
508
497
 
509
- for (const carrier of carriers) {
510
- const carrierSem = parseSemCached(carrier.attrs);
511
- if (carrierSem.subject && carrierSem.subject !== 'RESET') {
512
- const subject = resolveSubject(carrierSem, state);
513
- if (subject) {
514
- return { subject, carrier };
515
- }
516
- }
517
- }
518
498
 
519
- return null;
520
- }
521
499
 
522
- const processContextSem = ({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) => {
523
- sem.types.forEach(t => {
524
- const typeIRI = typeof t === 'string' ? t : t.iri;
525
- emitQuad(
526
- state.quads, state.origin.quadIndex, blockId,
527
- itemSubject,
528
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
529
- state.df.namedNode(expandIRI(typeIRI, state.ctx)),
530
- state.df
531
- );
532
- });
533
500
 
534
- sem.predicates.forEach(pred => {
535
- const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
536
- if (pred.form === '!') {
537
- emitQuad(state.quads, state.origin.quadIndex, blockId, itemSubject, P, contextSubject, state.df);
538
- } else if (pred.form === '?') {
539
- emitQuad(state.quads, state.origin.quadIndex, blockId, contextSubject, P, itemSubject, state.df);
540
- }
541
- });
542
501
 
543
- if (inheritLiterals) {
544
- const literalPredicates = sem.predicates.filter(p => p.form === '');
545
- if (literalPredicates.length > 0) {
546
- return {
547
- subject: null, object: null, types: [],
548
- predicates: literalPredicates.map(p => ({ iri: p.iri, form: p.form, entryIndex: p.entryIndex })),
549
- datatype: null, language: null, entries: []
550
- };
551
- }
552
- }
553
- return null;
554
- };
555
502
 
556
- const manageListStack = (token, state) => {
557
- while (state.listStack.length && token.indent < state.listStack[state.listStack.length - 1].indent) {
558
- state.listStack.pop();
559
- }
560
-
561
- if (state.pendingListContext) {
562
- state.listStack.push({
563
- indent: token.indent,
564
- anchorSubject: state.pendingListContext.subject,
565
- contextSubject: state.pendingListContext.subject,
566
- contextSem: state.pendingListContext.sem,
567
- contextText: state.pendingListContext.contextText,
568
- contextToken: state.pendingListContext.contextToken // Store context token for origins
569
- });
570
- state.pendingListContext = null;
571
- } else if (state.listStack.length === 0 || token.indent > state.listStack[state.listStack.length - 1].indent) {
572
- const parentFrame = state.listStack.length > 0 ? state.listStack[state.listStack.length - 1] : null;
573
- state.listStack.push({
574
- indent: token.indent,
575
- anchorSubject: parentFrame?.anchorSubject || null,
576
- contextSubject: parentFrame?.anchorSubject || null,
577
- contextSem: null
578
- });
579
- }
580
- };
581
-
582
- const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
583
- const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
584
- const addSem = (sem) => {
585
- const entryIndex = combinedSem.entries.length;
586
- combinedSem.types.push(...sem.types);
587
- combinedSem.predicates.push(...sem.predicates);
588
- combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
589
- };
590
-
591
- if (listFrame?.contextSem) {
592
- const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
593
- if (inheritedSem) addSem(inheritedSem);
594
- }
595
-
596
- if (token.attrs) addSem(parseSemCached(token.attrs));
597
- carriers.forEach(carrier => { if (carrier.attrs) addSem(parseSemCached(carrier.attrs)); });
598
-
599
- return combinedSem;
600
- };
601
-
602
- const processListItem = (token, state) => {
603
- const carriers = getCarriers(token);
604
- const itemInfo = findItemSubject(token, carriers, state);
605
- if (!itemInfo) return;
606
-
607
- const { subject: itemSubject } = itemInfo;
608
- if (state.listStack.length > 0) state.listStack[state.listStack.length - 1].anchorSubject = itemSubject;
609
-
610
- const listFrame = state.listStack[state.listStack.length - 1];
611
- const combinedSem = combineSemanticInfo(token, carriers, listFrame, state, itemSubject);
612
-
613
- if (combinedSem.entries.length > 0) {
614
- const prevSubject = state.currentSubject;
615
- state.currentSubject = itemSubject;
616
-
617
- processAnnotation({ type: 'list', text: token.text, range: token.range, attrsRange: token.attrsRange || null, valueRange: token.valueRange || null }, combinedSem, state, { preserveGlobalSubject: !state.listStack.length, implicitSubject: itemSubject });
618
-
619
- state.currentSubject = prevSubject;
620
- }
621
- };
622
-
623
-
624
- function processListContextFromParagraph(token, state) {
625
- const contextMatch = LIST_CONTEXT_REGEX.exec(token.text);
626
- if (!contextMatch) return;
627
-
628
- const contextSem = parseSemCached(`{${contextMatch[2]}}`);
629
- let contextSubject = state.currentSubject || state.documentSubject;
630
-
631
- if (!contextSubject && state.tokens) {
632
- for (let i = state.currentTokenIndex - 1; i >= 0; i--) {
633
- const prevToken = state.tokens[i];
634
- if (prevToken.type === 'heading' && prevToken.attrs) {
635
- const prevSem = parseSemCached(prevToken.attrs);
636
- if (prevSem.subject) {
637
- const resolvedSubject = resolveSubject(prevSem, state);
638
- if (resolvedSubject) {
639
- contextSubject = resolvedSubject.value;
640
- break;
641
- }
642
- }
643
- }
644
- }
645
- }
646
-
647
- const nextToken = state.tokens?.[state.currentTokenIndex + 1];
648
- if (state.listStack.length > 0 && nextToken && nextToken.type === 'list') {
649
- const currentFrame = state.listStack[state.listStack.length - 1];
650
- if (currentFrame.anchorSubject && nextToken.indent > currentFrame.indent) {
651
- contextSubject = currentFrame.anchorSubject;
652
- }
653
- }
654
-
655
- state.pendingListContext = {
656
- sem: contextSem,
657
- subject: contextSubject,
658
- contextText: contextMatch[1].replace(':', '').trim(),
659
- contextToken: token // Store the context token for origin ranges
660
- };
661
- }
662
503
 
663
504
  function processTokenAnnotations(token, state, tokenType) {
664
505
  if (token.attrs) {
@@ -692,13 +533,6 @@ function processStandaloneSubject(token, state) {
692
533
 
693
534
  const TOKEN_PROCESSORS = {
694
535
  heading: (token, state) => {
695
- if (token.attrs) {
696
- const headingSem = parseSemCached(token.attrs);
697
- if (headingSem.subject) {
698
- const subject = resolveSubject(headingSem, state);
699
- if (subject) state.documentSubject = subject;
700
- }
701
- }
702
536
  processTokenAnnotations(token, state, token.type);
703
537
  },
704
538
  code: (token, state) => {
@@ -709,12 +543,10 @@ const TOKEN_PROCESSORS = {
709
543
  },
710
544
  para: (token, state) => {
711
545
  processStandaloneSubject(token, state);
712
- processListContextFromParagraph(token, state);
713
546
  processTokenAnnotations(token, state, token.type);
714
547
  },
715
548
  list: (token, state) => {
716
- manageListStack(token, state);
717
- processListItem(token, state);
549
+ processTokenAnnotations(token, state, token.type);
718
550
  },
719
551
  };
720
552
 
@@ -723,11 +555,8 @@ export function parse(text, options = {}) {
723
555
  ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
724
556
  df: options.dataFactory || DataFactory,
725
557
  quads: [],
726
- origin: { blocks: new Map(), quadIndex: new Map() },
558
+ origin: { quadMap: new Map() },
727
559
  currentSubject: null,
728
- documentSubject: null,
729
- listStack: [],
730
- pendingListContext: null,
731
560
  tokens: null,
732
561
  currentTokenIndex: -1
733
562
  };
package/src/utils.js CHANGED
@@ -218,6 +218,15 @@ export const DataFactory = {
218
218
  fromQuad: (inQuad) => {
219
219
  if (inQuad instanceof Quad) return inQuad;
220
220
  if (inQuad.termType !== 'Quad') {
221
+ // Handle plain object quads by treating them as quads
222
+ if (inQuad.subject && inQuad.predicate && inQuad.object) {
223
+ return new Quad(
224
+ DataFactory.fromTerm(inQuad.subject),
225
+ DataFactory.fromTerm(inQuad.predicate),
226
+ DataFactory.fromTerm(inQuad.object),
227
+ DataFactory.fromTerm(inQuad.graph || DataFactory.defaultGraph())
228
+ );
229
+ }
221
230
  throw new Error(`Unexpected termType: ${inQuad.termType}`);
222
231
  }
223
232
  return new Quad(
@@ -265,9 +274,18 @@ export function expandIRI(term, ctx) {
265
274
  export function shortenIRI(iri, ctx) {
266
275
  if (!iri || !iri.startsWith('http')) return iri;
267
276
  if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
277
+
278
+ // Find the best matching prefix - more precise matching
268
279
  for (const [prefix, namespace] of Object.entries(ctx)) {
269
280
  if (prefix !== '@vocab' && iri.startsWith(namespace)) {
270
- return prefix + ':' + iri.substring(namespace.length);
281
+ // Check if this is the best match (longest namespace)
282
+ const isBestMatch = Object.entries(ctx)
283
+ .filter(([p, ns]) => p !== '@vocab' && iri.startsWith(ns))
284
+ .every(([p, ns]) => namespace.length >= ns.length || (p === prefix && ns.length === namespace.length));
285
+
286
+ if (isBestMatch) {
287
+ return prefix + ':' + iri.substring(namespace.length);
288
+ }
271
289
  }
272
290
  }
273
291
  return iri;
@@ -407,16 +425,29 @@ export function parseQuadIndexKey(key) {
407
425
  }
408
426
  }
409
427
 
410
- // Direct slot management functions - no factory needed
411
- export function createSlotInfo(blockId, entryIndex, meta = {}) {
428
+ // Direct slot management functions - unified with block data
429
+ export function createUnifiedSlot(block, entryIndex, meta = {}) {
412
430
  const slotId = meta.subject && meta.predicate ? hash(`${meta.subject.value}|${meta.predicate.value}`) : null;
413
431
  return {
414
- blockId,
432
+ // Block metadata
433
+ id: block.id,
434
+ range: block.range,
435
+ attrsRange: block.attrsRange,
436
+ valueRange: block.valueRange,
437
+ carrierType: block.carrierType,
438
+ subject: block.subject,
439
+ types: block.types,
440
+ predicates: block.predicates,
441
+ context: block.context,
442
+
443
+ // Slot metadata
415
444
  entryIndex,
416
445
  slotId,
417
446
  isVacant: false,
418
447
  lastValue: null,
419
448
  vacantSince: null,
449
+
450
+ // Quad metadata
420
451
  ...meta
421
452
  };
422
453
  }
@@ -430,9 +461,9 @@ export function markSlotAsVacant(slotInfo, deletedValue) {
430
461
  } : null;
431
462
  }
432
463
 
433
- export function findVacantSlot(quadIndex, subject, predicate) {
464
+ export function findVacantSlot(quadMap, subject, predicate) {
434
465
  const targetSlotId = hash(`${subject.value}|${predicate.value}`);
435
- return Array.from(quadIndex.values())
466
+ return Array.from(quadMap.values())
436
467
  .find(slot => slot.slotId === targetSlotId && slot.isVacant);
437
468
  }
438
469