mdld-parse 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,26 +11,55 @@
11
11
  MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
12
12
 
13
13
  ```markdown
14
- # Apollo 11 {=ex:apollo11 .SpaceMission}
14
+ [my] <tag:alice@example.com,2026:>
15
15
 
16
- Launch: [1969-07-16] {startDate ^^xsd:date}
17
- Crew: [Neil Armstrong] {+ex:armstrong ?crewMember name}
18
- Description: [First crewed Moon landing] {description}
16
+ # 2024-07-18 {=my:journal-2024-07-18 .my:Event my:date ^^xsd:date}
17
+
18
+ ## A good day {label}
19
+
20
+ Mood: [Happy] {my:mood}
21
+ Energy level: [8] {my:energyLevel ^^xsd:integer}
22
+
23
+ Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
24
+
25
+ Activities: {?my:hasActivity .my:Activity label}
26
+
27
+ - Walking {=#walking}
28
+ - Reading {=#reading}
19
29
 
20
- [Section] {+#overview ?hasPart}
21
- Overview: [Mission summary] {description}
22
30
  ```
23
31
 
24
32
  Generates valid RDF triples:
25
33
 
26
34
  ```turtle
27
- ex:apollo11 a schema:SpaceMission ;
28
- schema:startDate "1969-07-16"^^xsd:date ;
29
- schema:crewMember ex:armstrong ;
30
- schema:description "First crewed Moon landing" .
31
-
32
- ex:armstrong schema:name "Neil Armstrong" .
33
- ```
35
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
36
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
37
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
38
+ @prefix sh: <http://www.w3.org/ns/shacl#>.
39
+ @prefix prov: <http://www.w3.org/ns/prov#>.
40
+ @prefix ex: <http://example.org/>.
41
+ @prefix my: <tag:alice@example.com,2026:>.
42
+
43
+ my:journal-2024-07-18 a my:Event;
44
+ my:date "2024-07-18"^^xsd:date;
45
+ rdfs:label "A good day";
46
+ my:mood "Happy";
47
+ my:energyLevel 8;
48
+ my:attendee my:sam;
49
+ my:location my:central-park;
50
+ my:weather "Sunny";
51
+ my:hasActivity <tag:alice@example.com,2026:journal-2024-07-18#walking>, <tag:alice@example.com,2026:journal-2024-07-18#reading>.
52
+ my:sam a my:Person.
53
+ my:central-park a my:Place;
54
+ rdfs:label "Central Park"@en.
55
+ <tag:alice@example.com,2026:journal-2024-07-18#walking> a my:Activity;
56
+ rdfs:label "Walking".
57
+ <tag:alice@example.com,2026:journal-2024-07-18#reading> a my:Activity;
58
+ rdfs:label "Reading".
59
+
60
+ ```
61
+
62
+ Read the [FULL SPEC](./docs/Spec/Spec.md).
34
63
 
35
64
  ## Core Features
36
65
 
@@ -329,7 +358,7 @@ console.log(result.quads);
329
358
  // ]
330
359
  ```
331
360
 
332
- ### `serialize({ text, diff, origin, options })`
361
+ ### `applyDiff({ text, diff, origin, options })`
333
362
 
334
363
  Apply RDF changes back to markdown with proper positioning.
335
364
 
@@ -364,7 +393,7 @@ const newQuad = {
364
393
  object: { termType: 'Literal', value: '2024-01-01' }
365
394
  };
366
395
 
367
- const updated = serialize({
396
+ const updated = applyDiff({
368
397
  text: original,
369
398
  diff: { add: [newQuad] },
370
399
  origin: result.origin,
@@ -378,6 +407,92 @@ console.log(updated.text);
378
407
  // [2024-01-01] {datePublished}
379
408
  ```
380
409
 
410
+ ### `generate(quads, context)`
411
+
412
+ Generate deterministic MDLD from RDF quads with origin tracking.
413
+
414
+ **Parameters:**
415
+
416
+ - `quads` (array) — Array of RDF/JS Quads to convert
417
+ - `context` (object, optional) — Prefix mappings (default: `{}`)
418
+ - Merged with DEFAULT_CONTEXT for proper CURIE shortening
419
+ - Only user-defined prefixes are rendered in output
420
+
421
+ **Returns:** `{ text, origin, context }`
422
+
423
+ - `text` — Generated MDLD markdown
424
+ - `origin` — Origin tracking object with:
425
+ - `blocks` — Map of block IDs to source locations
426
+ - `quadIndex` — Map of quads to block IDs
427
+ - `context` — Final context used (includes defaults)
428
+
429
+ **Example:**
430
+
431
+ ```javascript
432
+ const quads = [
433
+ {
434
+ subject: { termType: 'NamedNode', value: 'http://example.org/article' },
435
+ predicate: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' },
436
+ object: { termType: 'NamedNode', value: 'http://example.org/Article' }
437
+ },
438
+ {
439
+ subject: { termType: 'NamedNode', value: 'http://example.org/article' },
440
+ predicate: { termType: 'NamedNode', value: 'http://schema.org/author' },
441
+ object: { termType: 'NamedNode', value: 'http://example.org/alice' }
442
+ }
443
+ ];
444
+
445
+ const result = generate(quads, {
446
+ ex: 'http://example.org/',
447
+ schema: 'http://schema.org/'
448
+ });
449
+
450
+ console.log(result.text);
451
+ // # Article {=ex:article .ex:Article}
452
+ //
453
+ // > alice {+ex:alice ?schema:author}
454
+ ```
455
+
456
+ ### `locate(quad, origin, text, context)`
457
+
458
+ Locate the precise text range of a quad in MDLD text using origin tracking.
459
+
460
+ **Parameters:**
461
+
462
+ - `quad` (object) — The quad to locate (subject, predicate, object)
463
+ - `origin` (object, optional) — Origin object containing blocks and quadIndex
464
+ - `text` (string, optional) — MDLD text (auto-parsed if origin not provided)
465
+ - `context` (object, optional) — Context for parsing when text needs to be parsed
466
+
467
+ **Returns:** `{ blockId, entryIndex, range, content, blockRange, carrierType, isVacant }` or `null`
468
+
469
+ - `blockId` — ID of the containing block
470
+ - `entryIndex` — Position within block entries
471
+ - `range` — Precise character range of the quad content
472
+ - `content` — Actual text content at that range
473
+ - `blockRange` — Full range of the containing block
474
+ - `carrierType` — Type of carrier (heading, blockquote, list, span)
475
+ - `isVacant` — Whether the slot is marked as vacant
476
+
477
+ **Example:**
478
+
479
+ ```javascript
480
+ import { parse, locate } from './src/index.js';
481
+
482
+ const result = parse(mdldText, { context: { ex: 'http://example.org/' } });
483
+ const quad = result.quads[0]; // Find a quad to locate
484
+
485
+ // Pattern 1: With origin (most efficient)
486
+ const location1 = locate(quad, result.origin, mdldText);
487
+
488
+ // Pattern 2: Auto-parse text (convenient)
489
+ const location2 = locate(quad, null, mdldText, { ex: 'http://example.org/' });
490
+
491
+ console.log(location1.range); // { start: 38, end: 44 }
492
+ console.log(location1.content); // " Alice"
493
+ console.log(location1.carrierType); // "blockquote"
494
+ ```
495
+
381
496
  ## Value Carriers
382
497
 
383
498
  Only specific markdown elements can carry semantic values:
@@ -464,14 +579,14 @@ Therefore, the algebra is **closed**.
464
579
  ```markdown
465
580
  [alice] <tag:alice@example.com,2026:>
466
581
 
467
- # Meeting Notes {=alice:meeting-2024-01-15 .Meeting}
582
+ # Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
468
583
 
469
- Attendees: {?attendee name}
584
+ Attendees: {?alice:attendee label}
470
585
 
471
586
  - Alice {=alice:alice}
472
587
  - Bob {=alice:bob}
473
588
 
474
- Action items: {?actionItem name}
589
+ Action items: {?alice:actionItem label}
475
590
 
476
591
  - Review proposal {=alice:task-1}
477
592
  ```
@@ -479,14 +594,14 @@ Action items: {?actionItem name}
479
594
  ### Developer Documentation
480
595
 
481
596
  ````markdown
482
- # API Endpoint {=api:/users/:id .APIEndpoint}
597
+ # API Endpoint {=api:/users/:id .api:Endpoint}
483
598
 
484
- [GET] {method}
485
- [/users/:id] {path}
599
+ [GET] {api:method}
600
+ [/users/:id] {api:path}
486
601
 
487
602
  Example:
488
603
 
489
- ```bash {=api:/users/:id#example .CodeExample text}
604
+ ```bash {=api:/users/:id#example .api:CodeExample api:code}
490
605
  curl https://api.example.com/users/123
491
606
  ```
492
607
  ````
@@ -496,13 +611,13 @@ curl https://api.example.com/users/123
496
611
  ```markdown
497
612
  [alice] <tag:alice@example.com,2026:>
498
613
 
499
- # Paper {=alice:paper-semantic-markdown .ScholarlyArticle}
614
+ # Paper {=alice:paper-semantic-markdown .alice:ScholarlyArticle}
500
615
 
501
- [Semantic Web] {about}
502
- [Alice Johnson] {=alice:alice-johnson ?author}
503
- [2024-01] {datePublished ^^xsd:gYearMonth}
616
+ [Semantic Web] {label}
617
+ [Alice Johnson] {=alice:alice-johnson ?alice:author}
618
+ [2024-01] {alice:datePublished ^^xsd:gYearMonth}
504
619
 
505
- > This paper explores semantic markup in Markdown. {abstract @en}
620
+ > This paper explores semantic markup in Markdown. {comment @en}
506
621
  ```
507
622
 
508
623
  ## Testing
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -12,7 +12,8 @@
12
12
  "src"
13
13
  ],
14
14
  "scripts": {
15
- "test": "node tests/index.js"
15
+ "test": "node tests/index.js",
16
+ "dev": "pnpx live-server"
16
17
  },
17
18
  "keywords": [
18
19
  "mdld",
@@ -158,7 +158,7 @@ function markEntryAsVacant(entry, quad) {
158
158
  return null;
159
159
  }
160
160
 
161
- export function serialize({ text, diff, origin, options = {} }) {
161
+ export function applyDiff({ text, diff, origin, options = {} }) {
162
162
  if (!diff || (!diff.add?.length && !diff.delete?.length)) {
163
163
  const reparsed = parse(text, { context: options.context || {} });
164
164
  return { text, origin: reparsed.origin };
@@ -0,0 +1,248 @@
1
+ import { shortenIRI, expandIRI, quadIndexKey, createSlotInfo, DEFAULT_CONTEXT } from './utils.js';
2
+
3
+
4
+ function extractLocalName(iri) {
5
+ const separators = ['#', '/', ':'];
6
+ for (const sep of separators) {
7
+ const lastSep = iri.lastIndexOf(sep);
8
+ if (lastSep !== -1 && lastSep < iri.length - 1) {
9
+ return iri.substring(lastSep + 1);
10
+ }
11
+ }
12
+ return iri;
13
+ }
14
+
15
+ /**
16
+ * Generate deterministic MDLD from RDF quads
17
+ * Purpose: TTL→MDLD conversion with canonical structure
18
+ * Input: RDF quads + context
19
+ * Output: MDLD text + origin + context
20
+ */
21
+ export function generate(quads, context = {}) {
22
+ const fullContext = { ...DEFAULT_CONTEXT, ...context };
23
+
24
+ const normalizedQuads = normalizeAndSortQuads(quads);
25
+
26
+ const subjectGroups = groupQuadsBySubject(normalizedQuads);
27
+
28
+ const { text, blocks, quadIndex } = buildDeterministicMDLD(subjectGroups, fullContext);
29
+
30
+ return {
31
+ text,
32
+ origin: { blocks, quadIndex },
33
+ context: fullContext
34
+ };
35
+ }
36
+
37
+ function normalizeAndSortQuads(quads) {
38
+ return quads
39
+ .map(quad => ({
40
+ subject: { termType: quad.subject.termType, value: quad.subject.value },
41
+ predicate: { termType: quad.predicate.termType, value: quad.predicate.value },
42
+ object: quad.object.termType === 'Literal'
43
+ ? {
44
+ termType: 'Literal',
45
+ value: quad.object.value,
46
+ language: quad.object.language || null,
47
+ datatype: quad.object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
48
+ }
49
+ : { termType: 'NamedNode', value: quad.object.value }
50
+ }))
51
+ .sort((a, b) => {
52
+ // Deterministic sorting: subject -> predicate -> object
53
+ const sComp = a.subject.value.localeCompare(b.subject.value);
54
+ if (sComp !== 0) return sComp;
55
+ const pComp = a.predicate.value.localeCompare(b.predicate.value);
56
+ if (pComp !== 0) return pComp;
57
+ const oA = a.object.termType === 'Literal' ? a.object.value : a.object.value;
58
+ const oB = b.object.termType === 'Literal' ? b.object.value : b.object.value;
59
+ return oA.localeCompare(oB);
60
+ });
61
+ }
62
+
63
+ function groupQuadsBySubject(quads) {
64
+ const groups = new Map();
65
+ for (const quad of quads) {
66
+ if (!groups.has(quad.subject.value)) {
67
+ groups.set(quad.subject.value, []);
68
+ }
69
+ groups.get(quad.subject.value).push(quad);
70
+ }
71
+ return groups;
72
+ }
73
+
74
+ function buildDeterministicMDLD(subjectGroups, context) {
75
+ let text = '';
76
+ let currentPos = 0;
77
+ const blocks = new Map();
78
+ const quadIndex = new Map();
79
+
80
+ // Add prefixes first (deterministic order), but exclude default context prefixes
81
+ const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
82
+ for (const [prefix, namespace] of sortedPrefixes) {
83
+ // Skip default context prefixes - they're implicit in MDLD
84
+ if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
85
+ const prefixDecl = `[${prefix}] <${namespace}>\n`;
86
+ const blockId = generateBlockId();
87
+ blocks.set(blockId, {
88
+ id: blockId,
89
+ range: { start: currentPos, end: currentPos + prefixDecl.length },
90
+ subject: null,
91
+ entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
92
+ carrierType: 'prefix'
93
+ });
94
+ text += prefixDecl;
95
+ currentPos += prefixDecl.length;
96
+ }
97
+ }
98
+
99
+ if (sortedPrefixes.length > 0) {
100
+ text += '\n';
101
+ currentPos += 1;
102
+ }
103
+
104
+ // Process subjects in deterministic order
105
+ const sortedSubjects = Array.from(subjectGroups.keys()).sort();
106
+
107
+ for (const subjectIRI of sortedSubjects) {
108
+ const subjectQuads = subjectGroups.get(subjectIRI);
109
+ const shortSubject = shortenIRI(subjectIRI, context);
110
+
111
+ // Separate types, literals, and objects
112
+ const types = subjectQuads.filter(q => q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
113
+ const literals = subjectQuads.filter(q => q.object.termType === 'Literal' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
114
+ const objects = subjectQuads.filter(q => q.object.termType === 'NamedNode' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
115
+
116
+ // Generate heading
117
+ const localSubjectName = extractLocalName(subjectIRI);
118
+ const typeAnnotations = types.length > 0
119
+ ? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
120
+ : '';
121
+
122
+ const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
123
+ const blockId = generateBlockId();
124
+ const headingBlock = {
125
+ id: blockId,
126
+ range: { start: currentPos, end: currentPos + headingText.length },
127
+ subject: subjectIRI,
128
+ entries: [
129
+ { kind: 'subject', raw: `=${shortSubject}`, expandedSubject: subjectIRI },
130
+ ...types.map((t, i) => ({
131
+ kind: 'type',
132
+ raw: '.' + extractLocalName(t.object.value),
133
+ expandedType: t.object.value,
134
+ entryIndex: i
135
+ }))
136
+ ],
137
+ carrierType: 'heading'
138
+ };
139
+ blocks.set(blockId, headingBlock);
140
+
141
+ // Add type quads to index
142
+ types.forEach((quad, i) => {
143
+ const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
144
+ quadIndex.set(key, createSlotInfo(blockId, i, {
145
+ kind: 'type',
146
+ subject: quad.subject,
147
+ predicate: quad.predicate,
148
+ object: quad.object
149
+ }));
150
+ });
151
+
152
+ text += headingText;
153
+ currentPos += headingText.length;
154
+
155
+ // Add literals (deterministic order)
156
+ const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
157
+ for (const quad of sortedLiterals) {
158
+ const predShort = shortenIRI(quad.predicate.value, context);
159
+ let annotation = predShort;
160
+
161
+ if (quad.object.language) {
162
+ annotation += ` @${quad.object.language}`;
163
+ } else if (quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
164
+ annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
165
+ }
166
+
167
+ const literalText = `> ${quad.object.value} {${annotation}}\n`;
168
+ const literalBlockId = generateBlockId();
169
+ const literalBlock = {
170
+ id: literalBlockId,
171
+ range: { start: currentPos, end: currentPos + literalText.length },
172
+ subject: subjectIRI,
173
+ entries: [{
174
+ kind: 'property',
175
+ raw: annotation,
176
+ expandedPredicate: quad.predicate.value,
177
+ form: '',
178
+ entryIndex: 0
179
+ }],
180
+ carrierType: 'span',
181
+ valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
182
+ attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
183
+ };
184
+ blocks.set(literalBlockId, literalBlock);
185
+
186
+ // Add to quad index
187
+ const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
188
+ quadIndex.set(key, createSlotInfo(literalBlockId, 0, {
189
+ kind: 'pred',
190
+ subject: quad.subject,
191
+ predicate: quad.predicate,
192
+ object: quad.object,
193
+ form: ''
194
+ }));
195
+
196
+ text += literalText;
197
+ currentPos += literalText.length;
198
+ }
199
+
200
+ // Add objects (deterministic order)
201
+ const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
202
+ for (const quad of sortedObjects) {
203
+ const predShort = shortenIRI(quad.predicate.value, context);
204
+ const objShort = shortenIRI(quad.object.value, context);
205
+ const localName = extractLocalName(quad.object.value);
206
+
207
+ const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
208
+ const objectBlockId = generateBlockId();
209
+ const objectBlock = {
210
+ id: objectBlockId,
211
+ range: { start: currentPos, end: currentPos + objectText.length },
212
+ subject: subjectIRI,
213
+ entries: [{
214
+ kind: 'object',
215
+ raw: objShort,
216
+ expandedObject: quad.object.value,
217
+ entryIndex: 0
218
+ }],
219
+ carrierType: 'span'
220
+ };
221
+ blocks.set(objectBlockId, objectBlock);
222
+
223
+ // Add to quad index
224
+ const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
225
+ quadIndex.set(key, createSlotInfo(objectBlockId, 0, {
226
+ kind: 'pred',
227
+ subject: quad.subject,
228
+ predicate: quad.predicate,
229
+ object: quad.object,
230
+ form: '?'
231
+ }));
232
+
233
+ text += objectText;
234
+ currentPos += objectText.length;
235
+ }
236
+
237
+ if (sortedLiterals.length > 0 || sortedObjects.length > 0) {
238
+ text += '\n';
239
+ currentPos += 1;
240
+ }
241
+ }
242
+
243
+ return { text: text.trim(), blocks, quadIndex };
244
+ }
245
+
246
+ function generateBlockId() {
247
+ return Math.random().toString(36).substring(2, 10);
248
+ }
package/src/index.js CHANGED
@@ -1,5 +1,7 @@
1
1
  export { parse } from './parse.js';
2
- export { serialize } from './serialize.js';
2
+ export { applyDiff } from './applyDiff.js';
3
+ export { generate } from './generate.js';
4
+ export { locate } from './locate.js';
3
5
  export {
4
6
  DEFAULT_CONTEXT,
5
7
  DataFactory,
package/src/locate.js ADDED
@@ -0,0 +1,92 @@
1
+ import { parse } from './parse.js';
2
+ import { normalizeQuad, quadIndexKey } from './utils.js';
3
+
4
+ /**
5
+ * Locate the precise text range of a quad in MDLD text using origin tracking
6
+ *
7
+ * @param {Object} quad - The quad to locate (subject, predicate, object)
8
+ * @param {Object} origin - Origin object containing blocks and quadIndex (optional)
9
+ * @param {string} text - Original MDLD text (optional, parsed if origin not provided)
10
+ * @param {Object} context - Context for parsing (optional, used if text needs parsing)
11
+ * @returns {Object|null} Range information or null if not found
12
+ */
13
+ export function locate(quad, origin, text = '', context = {}) {
14
+ // If origin not provided, parse text to get origin
15
+ if (!origin && text) {
16
+ const parseResult = parse(text, { context });
17
+ origin = parseResult.origin;
18
+ }
19
+
20
+ if (!quad || !origin || !origin.quadIndex || !origin.blocks) {
21
+ return null;
22
+ }
23
+
24
+ // Normalize the quad for consistent key generation
25
+ const normalizedQuad = normalizeQuad(quad);
26
+ if (!normalizedQuad) {
27
+ return null;
28
+ }
29
+
30
+ // Generate the quad key to lookup in quadIndex
31
+ const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
32
+
33
+ // Find the slot information in quadIndex
34
+ const slotInfo = origin.quadIndex.get(quadKey);
35
+ if (!slotInfo) {
36
+ return null;
37
+ }
38
+
39
+ // Get the block information
40
+ const block = origin.blocks.get(slotInfo.blockId);
41
+ if (!block) {
42
+ return null;
43
+ }
44
+
45
+ // Extract the actual text content based on carrier type and entry
46
+ let contentRange = null;
47
+ let content = '';
48
+
49
+ if (block.carrierType === 'heading') {
50
+ // For headings, use the block's main range
51
+ contentRange = block.range;
52
+ content = text.substring(block.range.start, block.range.end);
53
+ } else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
54
+ // For blockquotes, lists, and spans, extract from block range
55
+ contentRange = block.range;
56
+ content = text.substring(block.range.start, block.range.end);
57
+
58
+ // For blockquotes, try to extract the specific carrier content from entries
59
+ if (slotInfo.entryIndex != null && block.entries && block.entries[slotInfo.entryIndex]) {
60
+ const entry = block.entries[slotInfo.entryIndex];
61
+ if (entry.raw) {
62
+ // For blockquotes, the entry.raw contains the full carrier text
63
+ // Extract just the content part before the annotation
64
+ const annotationStart = entry.raw.indexOf('{');
65
+ if (annotationStart !== -1) {
66
+ const carrierContent = entry.raw.substring(0, annotationStart).trim();
67
+ // Find this content in the block text
68
+ const contentStart = text.indexOf(carrierContent, block.range.start);
69
+ if (contentStart !== -1) {
70
+ const contentEnd = contentStart + carrierContent.length;
71
+ contentRange = { start: contentStart, end: contentEnd };
72
+ content = text.substring(contentStart, contentEnd);
73
+ }
74
+ }
75
+ }
76
+ }
77
+ }
78
+
79
+ return {
80
+ blockId: slotInfo.blockId,
81
+ entryIndex: slotInfo.entryIndex,
82
+ kind: slotInfo.kind,
83
+ subject: normalizedQuad.subject,
84
+ predicate: normalizedQuad.predicate,
85
+ object: normalizedQuad.object,
86
+ range: contentRange,
87
+ content: content,
88
+ blockRange: block.range,
89
+ carrierType: block.carrierType,
90
+ isVacant: slotInfo.isVacant || false
91
+ };
92
+ }
package/src/parse.js CHANGED
@@ -562,7 +562,12 @@ const manageListStack = (token, state) => {
562
562
 
563
563
  const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
564
564
  const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
565
- const addSem = (sem) => { combinedSem.types.push(...sem.types); combinedSem.predicates.push(...sem.predicates); combinedSem.entries.push(...sem.entries); };
565
+ const addSem = (sem) => {
566
+ const entryIndex = combinedSem.entries.length;
567
+ combinedSem.types.push(...sem.types);
568
+ combinedSem.predicates.push(...sem.predicates);
569
+ combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
570
+ };
566
571
 
567
572
  if (listFrame?.contextSem) {
568
573
  const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });