mdld-parse 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -27
- package/package.json +3 -2
- package/src/{serialize.js → applyDiff.js} +1 -1
- package/src/generate.js +248 -0
- package/src/index.js +3 -1
- package/src/locate.js +92 -0
- package/src/parse.js +6 -1
package/README.md
CHANGED
|
@@ -11,26 +11,55 @@
|
|
|
11
11
|
MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
|
|
12
12
|
|
|
13
13
|
```markdown
|
|
14
|
-
|
|
14
|
+
[my] <tag:alice@example.com,2026:>
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
# 2024-07-18 {=my:journal-2024-07-18 .my:Event my:date ^^xsd:date}
|
|
17
|
+
|
|
18
|
+
## A good day {label}
|
|
19
|
+
|
|
20
|
+
Mood: [Happy] {my:mood}
|
|
21
|
+
Energy level: [8] {my:energyLevel ^^xsd:integer}
|
|
22
|
+
|
|
23
|
+
Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
|
|
24
|
+
|
|
25
|
+
Activities: {?my:hasActivity .my:Activity label}
|
|
26
|
+
|
|
27
|
+
- Walking {=#walking}
|
|
28
|
+
- Reading {=#reading}
|
|
19
29
|
|
|
20
|
-
[Section] {+#overview ?hasPart}
|
|
21
|
-
Overview: [Mission summary] {description}
|
|
22
30
|
```
|
|
23
31
|
|
|
24
32
|
Generates valid RDF triples:
|
|
25
33
|
|
|
26
34
|
```turtle
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
ex:
|
|
33
|
-
|
|
35
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
|
|
36
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
|
|
37
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
|
|
38
|
+
@prefix sh: <http://www.w3.org/ns/shacl#>.
|
|
39
|
+
@prefix prov: <http://www.w3.org/ns/prov#>.
|
|
40
|
+
@prefix ex: <http://example.org/>.
|
|
41
|
+
@prefix my: <tag:alice@example.com,2026:>.
|
|
42
|
+
|
|
43
|
+
my:journal-2024-07-18 a my:Event;
|
|
44
|
+
my:date "2024-07-18"^^xsd:date;
|
|
45
|
+
rdfs:label "A good day";
|
|
46
|
+
my:mood "Happy";
|
|
47
|
+
my:energyLevel 8;
|
|
48
|
+
my:attendee my:sam;
|
|
49
|
+
my:location my:central-park;
|
|
50
|
+
my:weather "Sunny";
|
|
51
|
+
my:hasActivity <tag:alice@example.com,2026:journal-2024-07-18#walking>, <tag:alice@example.com,2026:journal-2024-07-18#reading>.
|
|
52
|
+
my:sam a my:Person.
|
|
53
|
+
my:central-park a my:Place;
|
|
54
|
+
rdfs:label "Central Park"@en.
|
|
55
|
+
<tag:alice@example.com,2026:journal-2024-07-18#walking> a my:Activity;
|
|
56
|
+
rdfs:label "Walking".
|
|
57
|
+
<tag:alice@example.com,2026:journal-2024-07-18#reading> a my:Activity;
|
|
58
|
+
rdfs:label "Reading".
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Read the [FULL SPEC](./docs/Spec/Spec.md).
|
|
34
63
|
|
|
35
64
|
## Core Features
|
|
36
65
|
|
|
@@ -329,7 +358,7 @@ console.log(result.quads);
|
|
|
329
358
|
// ]
|
|
330
359
|
```
|
|
331
360
|
|
|
332
|
-
### `
|
|
361
|
+
### `applyDiff({ text, diff, origin, options })`
|
|
333
362
|
|
|
334
363
|
Apply RDF changes back to markdown with proper positioning.
|
|
335
364
|
|
|
@@ -364,7 +393,7 @@ const newQuad = {
|
|
|
364
393
|
object: { termType: 'Literal', value: '2024-01-01' }
|
|
365
394
|
};
|
|
366
395
|
|
|
367
|
-
const updated =
|
|
396
|
+
const updated = applyDiff({
|
|
368
397
|
text: original,
|
|
369
398
|
diff: { add: [newQuad] },
|
|
370
399
|
origin: result.origin,
|
|
@@ -378,6 +407,92 @@ console.log(updated.text);
|
|
|
378
407
|
// [2024-01-01] {datePublished}
|
|
379
408
|
```
|
|
380
409
|
|
|
410
|
+
### `generate(quads, context)`
|
|
411
|
+
|
|
412
|
+
Generate deterministic MDLD from RDF quads with origin tracking.
|
|
413
|
+
|
|
414
|
+
**Parameters:**
|
|
415
|
+
|
|
416
|
+
- `quads` (array) — Array of RDF/JS Quads to convert
|
|
417
|
+
- `context` (object, optional) — Prefix mappings (default: `{}`)
|
|
418
|
+
- Merged with DEFAULT_CONTEXT for proper CURIE shortening
|
|
419
|
+
- Only user-defined prefixes are rendered in output
|
|
420
|
+
|
|
421
|
+
**Returns:** `{ text, origin, context }`
|
|
422
|
+
|
|
423
|
+
- `text` — Generated MDLD markdown
|
|
424
|
+
- `origin` — Origin tracking object with:
|
|
425
|
+
- `blocks` — Map of block IDs to source locations
|
|
426
|
+
- `quadIndex` — Map of quads to block IDs
|
|
427
|
+
- `context` — Final context used (includes defaults)
|
|
428
|
+
|
|
429
|
+
**Example:**
|
|
430
|
+
|
|
431
|
+
```javascript
|
|
432
|
+
const quads = [
|
|
433
|
+
{
|
|
434
|
+
subject: { termType: 'NamedNode', value: 'http://example.org/article' },
|
|
435
|
+
predicate: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' },
|
|
436
|
+
object: { termType: 'NamedNode', value: 'http://example.org/Article' }
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
subject: { termType: 'NamedNode', value: 'http://example.org/article' },
|
|
440
|
+
predicate: { termType: 'NamedNode', value: 'http://schema.org/author' },
|
|
441
|
+
object: { termType: 'NamedNode', value: 'http://example.org/alice' }
|
|
442
|
+
}
|
|
443
|
+
];
|
|
444
|
+
|
|
445
|
+
const result = generate(quads, {
|
|
446
|
+
ex: 'http://example.org/',
|
|
447
|
+
schema: 'http://schema.org/'
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
console.log(result.text);
|
|
451
|
+
// # Article {=ex:article .ex:Article}
|
|
452
|
+
//
|
|
453
|
+
// > alice {+ex:alice ?schema:author}
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### `locate(quad, origin, text, context)`
|
|
457
|
+
|
|
458
|
+
Locate the precise text range of a quad in MDLD text using origin tracking.
|
|
459
|
+
|
|
460
|
+
**Parameters:**
|
|
461
|
+
|
|
462
|
+
- `quad` (object) — The quad to locate (subject, predicate, object)
|
|
463
|
+
- `origin` (object, optional) — Origin object containing blocks and quadIndex
|
|
464
|
+
- `text` (string, optional) — MDLD text (auto-parsed if origin not provided)
|
|
465
|
+
- `context` (object, optional) — Context for parsing when text needs to be parsed
|
|
466
|
+
|
|
467
|
+
**Returns:** `{ blockId, entryIndex, range, content, blockRange, carrierType, isVacant }` or `null`
|
|
468
|
+
|
|
469
|
+
- `blockId` — ID of the containing block
|
|
470
|
+
- `entryIndex` — Position within block entries
|
|
471
|
+
- `range` — Precise character range of the quad content
|
|
472
|
+
- `content` — Actual text content at that range
|
|
473
|
+
- `blockRange` — Full range of the containing block
|
|
474
|
+
- `carrierType` — Type of carrier (heading, blockquote, list, span)
|
|
475
|
+
- `isVacant` — Whether the slot is marked as vacant
|
|
476
|
+
|
|
477
|
+
**Example:**
|
|
478
|
+
|
|
479
|
+
```javascript
|
|
480
|
+
import { parse, locate } from './src/index.js';
|
|
481
|
+
|
|
482
|
+
const result = parse(mdldText, { context: { ex: 'http://example.org/' } });
|
|
483
|
+
const quad = result.quads[0]; // Find a quad to locate
|
|
484
|
+
|
|
485
|
+
// Pattern 1: With origin (most efficient)
|
|
486
|
+
const location1 = locate(quad, result.origin, mdldText);
|
|
487
|
+
|
|
488
|
+
// Pattern 2: Auto-parse text (convenient)
|
|
489
|
+
const location2 = locate(quad, null, mdldText, { ex: 'http://example.org/' });
|
|
490
|
+
|
|
491
|
+
console.log(location1.range); // { start: 38, end: 44 }
|
|
492
|
+
console.log(location1.content); // " Alice"
|
|
493
|
+
console.log(location1.carrierType); // "blockquote"
|
|
494
|
+
```
|
|
495
|
+
|
|
381
496
|
## Value Carriers
|
|
382
497
|
|
|
383
498
|
Only specific markdown elements can carry semantic values:
|
|
@@ -464,14 +579,14 @@ Therefore, the algebra is **closed**.
|
|
|
464
579
|
```markdown
|
|
465
580
|
[alice] <tag:alice@example.com,2026:>
|
|
466
581
|
|
|
467
|
-
# Meeting Notes {=alice:meeting-2024-01-15 .Meeting}
|
|
582
|
+
# Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
|
|
468
583
|
|
|
469
|
-
Attendees: {?attendee
|
|
584
|
+
Attendees: {?alice:attendee label}
|
|
470
585
|
|
|
471
586
|
- Alice {=alice:alice}
|
|
472
587
|
- Bob {=alice:bob}
|
|
473
588
|
|
|
474
|
-
Action items: {?actionItem
|
|
589
|
+
Action items: {?alice:actionItem label}
|
|
475
590
|
|
|
476
591
|
- Review proposal {=alice:task-1}
|
|
477
592
|
```
|
|
@@ -479,14 +594,14 @@ Action items: {?actionItem name}
|
|
|
479
594
|
### Developer Documentation
|
|
480
595
|
|
|
481
596
|
````markdown
|
|
482
|
-
# API Endpoint {=api:/users/:id .
|
|
597
|
+
# API Endpoint {=api:/users/:id .api:Endpoint}
|
|
483
598
|
|
|
484
|
-
[GET] {method}
|
|
485
|
-
[/users/:id] {path}
|
|
599
|
+
[GET] {api:method}
|
|
600
|
+
[/users/:id] {api:path}
|
|
486
601
|
|
|
487
602
|
Example:
|
|
488
603
|
|
|
489
|
-
```bash {=api:/users/:id#example .CodeExample
|
|
604
|
+
```bash {=api:/users/:id#example .api:CodeExample api:code}
|
|
490
605
|
curl https://api.example.com/users/123
|
|
491
606
|
```
|
|
492
607
|
````
|
|
@@ -496,13 +611,13 @@ curl https://api.example.com/users/123
|
|
|
496
611
|
```markdown
|
|
497
612
|
[alice] <tag:alice@example.com,2026:>
|
|
498
613
|
|
|
499
|
-
# Paper {=alice:paper-semantic-markdown .ScholarlyArticle}
|
|
614
|
+
# Paper {=alice:paper-semantic-markdown .alice:ScholarlyArticle}
|
|
500
615
|
|
|
501
|
-
[Semantic Web] {
|
|
502
|
-
[Alice Johnson] {=alice:alice-johnson ?author}
|
|
503
|
-
[2024-01] {datePublished ^^xsd:gYearMonth}
|
|
616
|
+
[Semantic Web] {label}
|
|
617
|
+
[Alice Johnson] {=alice:alice-johnson ?alice:author}
|
|
618
|
+
[2024-01] {alice:datePublished ^^xsd:gYearMonth}
|
|
504
619
|
|
|
505
|
-
> This paper explores semantic markup in Markdown. {
|
|
620
|
+
> This paper explores semantic markup in Markdown. {comment @en}
|
|
506
621
|
```
|
|
507
622
|
|
|
508
623
|
## Testing
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.3",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
"src"
|
|
13
13
|
],
|
|
14
14
|
"scripts": {
|
|
15
|
-
"test": "node tests/index.js"
|
|
15
|
+
"test": "node tests/index.js",
|
|
16
|
+
"dev": "pnpx live-server"
|
|
16
17
|
},
|
|
17
18
|
"keywords": [
|
|
18
19
|
"mdld",
|
|
@@ -158,7 +158,7 @@ function markEntryAsVacant(entry, quad) {
|
|
|
158
158
|
return null;
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
export function
|
|
161
|
+
export function applyDiff({ text, diff, origin, options = {} }) {
|
|
162
162
|
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
163
163
|
const reparsed = parse(text, { context: options.context || {} });
|
|
164
164
|
return { text, origin: reparsed.origin };
|
package/src/generate.js
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import { shortenIRI, expandIRI, quadIndexKey, createSlotInfo, DEFAULT_CONTEXT } from './utils.js';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
function extractLocalName(iri) {
|
|
5
|
+
const separators = ['#', '/', ':'];
|
|
6
|
+
for (const sep of separators) {
|
|
7
|
+
const lastSep = iri.lastIndexOf(sep);
|
|
8
|
+
if (lastSep !== -1 && lastSep < iri.length - 1) {
|
|
9
|
+
return iri.substring(lastSep + 1);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
return iri;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Generate deterministic MDLD from RDF quads
|
|
17
|
+
* Purpose: TTL→MDLD conversion with canonical structure
|
|
18
|
+
* Input: RDF quads + context
|
|
19
|
+
* Output: MDLD text + origin + context
|
|
20
|
+
*/
|
|
21
|
+
export function generate(quads, context = {}) {
|
|
22
|
+
const fullContext = { ...DEFAULT_CONTEXT, ...context };
|
|
23
|
+
|
|
24
|
+
const normalizedQuads = normalizeAndSortQuads(quads);
|
|
25
|
+
|
|
26
|
+
const subjectGroups = groupQuadsBySubject(normalizedQuads);
|
|
27
|
+
|
|
28
|
+
const { text, blocks, quadIndex } = buildDeterministicMDLD(subjectGroups, fullContext);
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
text,
|
|
32
|
+
origin: { blocks, quadIndex },
|
|
33
|
+
context: fullContext
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizeAndSortQuads(quads) {
|
|
38
|
+
return quads
|
|
39
|
+
.map(quad => ({
|
|
40
|
+
subject: { termType: quad.subject.termType, value: quad.subject.value },
|
|
41
|
+
predicate: { termType: quad.predicate.termType, value: quad.predicate.value },
|
|
42
|
+
object: quad.object.termType === 'Literal'
|
|
43
|
+
? {
|
|
44
|
+
termType: 'Literal',
|
|
45
|
+
value: quad.object.value,
|
|
46
|
+
language: quad.object.language || null,
|
|
47
|
+
datatype: quad.object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
|
|
48
|
+
}
|
|
49
|
+
: { termType: 'NamedNode', value: quad.object.value }
|
|
50
|
+
}))
|
|
51
|
+
.sort((a, b) => {
|
|
52
|
+
// Deterministic sorting: subject -> predicate -> object
|
|
53
|
+
const sComp = a.subject.value.localeCompare(b.subject.value);
|
|
54
|
+
if (sComp !== 0) return sComp;
|
|
55
|
+
const pComp = a.predicate.value.localeCompare(b.predicate.value);
|
|
56
|
+
if (pComp !== 0) return pComp;
|
|
57
|
+
const oA = a.object.termType === 'Literal' ? a.object.value : a.object.value;
|
|
58
|
+
const oB = b.object.termType === 'Literal' ? b.object.value : b.object.value;
|
|
59
|
+
return oA.localeCompare(oB);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function groupQuadsBySubject(quads) {
|
|
64
|
+
const groups = new Map();
|
|
65
|
+
for (const quad of quads) {
|
|
66
|
+
if (!groups.has(quad.subject.value)) {
|
|
67
|
+
groups.set(quad.subject.value, []);
|
|
68
|
+
}
|
|
69
|
+
groups.get(quad.subject.value).push(quad);
|
|
70
|
+
}
|
|
71
|
+
return groups;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function buildDeterministicMDLD(subjectGroups, context) {
|
|
75
|
+
let text = '';
|
|
76
|
+
let currentPos = 0;
|
|
77
|
+
const blocks = new Map();
|
|
78
|
+
const quadIndex = new Map();
|
|
79
|
+
|
|
80
|
+
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
81
|
+
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
82
|
+
for (const [prefix, namespace] of sortedPrefixes) {
|
|
83
|
+
// Skip default context prefixes - they're implicit in MDLD
|
|
84
|
+
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
85
|
+
const prefixDecl = `[${prefix}] <${namespace}>\n`;
|
|
86
|
+
const blockId = generateBlockId();
|
|
87
|
+
blocks.set(blockId, {
|
|
88
|
+
id: blockId,
|
|
89
|
+
range: { start: currentPos, end: currentPos + prefixDecl.length },
|
|
90
|
+
subject: null,
|
|
91
|
+
entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
|
|
92
|
+
carrierType: 'prefix'
|
|
93
|
+
});
|
|
94
|
+
text += prefixDecl;
|
|
95
|
+
currentPos += prefixDecl.length;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (sortedPrefixes.length > 0) {
|
|
100
|
+
text += '\n';
|
|
101
|
+
currentPos += 1;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Process subjects in deterministic order
|
|
105
|
+
const sortedSubjects = Array.from(subjectGroups.keys()).sort();
|
|
106
|
+
|
|
107
|
+
for (const subjectIRI of sortedSubjects) {
|
|
108
|
+
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
109
|
+
const shortSubject = shortenIRI(subjectIRI, context);
|
|
110
|
+
|
|
111
|
+
// Separate types, literals, and objects
|
|
112
|
+
const types = subjectQuads.filter(q => q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
113
|
+
const literals = subjectQuads.filter(q => q.object.termType === 'Literal' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
114
|
+
const objects = subjectQuads.filter(q => q.object.termType === 'NamedNode' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
115
|
+
|
|
116
|
+
// Generate heading
|
|
117
|
+
const localSubjectName = extractLocalName(subjectIRI);
|
|
118
|
+
const typeAnnotations = types.length > 0
|
|
119
|
+
? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
|
|
120
|
+
: '';
|
|
121
|
+
|
|
122
|
+
const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
|
|
123
|
+
const blockId = generateBlockId();
|
|
124
|
+
const headingBlock = {
|
|
125
|
+
id: blockId,
|
|
126
|
+
range: { start: currentPos, end: currentPos + headingText.length },
|
|
127
|
+
subject: subjectIRI,
|
|
128
|
+
entries: [
|
|
129
|
+
{ kind: 'subject', raw: `=${shortSubject}`, expandedSubject: subjectIRI },
|
|
130
|
+
...types.map((t, i) => ({
|
|
131
|
+
kind: 'type',
|
|
132
|
+
raw: '.' + extractLocalName(t.object.value),
|
|
133
|
+
expandedType: t.object.value,
|
|
134
|
+
entryIndex: i
|
|
135
|
+
}))
|
|
136
|
+
],
|
|
137
|
+
carrierType: 'heading'
|
|
138
|
+
};
|
|
139
|
+
blocks.set(blockId, headingBlock);
|
|
140
|
+
|
|
141
|
+
// Add type quads to index
|
|
142
|
+
types.forEach((quad, i) => {
|
|
143
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
144
|
+
quadIndex.set(key, createSlotInfo(blockId, i, {
|
|
145
|
+
kind: 'type',
|
|
146
|
+
subject: quad.subject,
|
|
147
|
+
predicate: quad.predicate,
|
|
148
|
+
object: quad.object
|
|
149
|
+
}));
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
text += headingText;
|
|
153
|
+
currentPos += headingText.length;
|
|
154
|
+
|
|
155
|
+
// Add literals (deterministic order)
|
|
156
|
+
const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
157
|
+
for (const quad of sortedLiterals) {
|
|
158
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
159
|
+
let annotation = predShort;
|
|
160
|
+
|
|
161
|
+
if (quad.object.language) {
|
|
162
|
+
annotation += ` @${quad.object.language}`;
|
|
163
|
+
} else if (quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
164
|
+
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const literalText = `> ${quad.object.value} {${annotation}}\n`;
|
|
168
|
+
const literalBlockId = generateBlockId();
|
|
169
|
+
const literalBlock = {
|
|
170
|
+
id: literalBlockId,
|
|
171
|
+
range: { start: currentPos, end: currentPos + literalText.length },
|
|
172
|
+
subject: subjectIRI,
|
|
173
|
+
entries: [{
|
|
174
|
+
kind: 'property',
|
|
175
|
+
raw: annotation,
|
|
176
|
+
expandedPredicate: quad.predicate.value,
|
|
177
|
+
form: '',
|
|
178
|
+
entryIndex: 0
|
|
179
|
+
}],
|
|
180
|
+
carrierType: 'span',
|
|
181
|
+
valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
|
|
182
|
+
attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
|
|
183
|
+
};
|
|
184
|
+
blocks.set(literalBlockId, literalBlock);
|
|
185
|
+
|
|
186
|
+
// Add to quad index
|
|
187
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
188
|
+
quadIndex.set(key, createSlotInfo(literalBlockId, 0, {
|
|
189
|
+
kind: 'pred',
|
|
190
|
+
subject: quad.subject,
|
|
191
|
+
predicate: quad.predicate,
|
|
192
|
+
object: quad.object,
|
|
193
|
+
form: ''
|
|
194
|
+
}));
|
|
195
|
+
|
|
196
|
+
text += literalText;
|
|
197
|
+
currentPos += literalText.length;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Add objects (deterministic order)
|
|
201
|
+
const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
202
|
+
for (const quad of sortedObjects) {
|
|
203
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
204
|
+
const objShort = shortenIRI(quad.object.value, context);
|
|
205
|
+
const localName = extractLocalName(quad.object.value);
|
|
206
|
+
|
|
207
|
+
const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
|
|
208
|
+
const objectBlockId = generateBlockId();
|
|
209
|
+
const objectBlock = {
|
|
210
|
+
id: objectBlockId,
|
|
211
|
+
range: { start: currentPos, end: currentPos + objectText.length },
|
|
212
|
+
subject: subjectIRI,
|
|
213
|
+
entries: [{
|
|
214
|
+
kind: 'object',
|
|
215
|
+
raw: objShort,
|
|
216
|
+
expandedObject: quad.object.value,
|
|
217
|
+
entryIndex: 0
|
|
218
|
+
}],
|
|
219
|
+
carrierType: 'span'
|
|
220
|
+
};
|
|
221
|
+
blocks.set(objectBlockId, objectBlock);
|
|
222
|
+
|
|
223
|
+
// Add to quad index
|
|
224
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
225
|
+
quadIndex.set(key, createSlotInfo(objectBlockId, 0, {
|
|
226
|
+
kind: 'pred',
|
|
227
|
+
subject: quad.subject,
|
|
228
|
+
predicate: quad.predicate,
|
|
229
|
+
object: quad.object,
|
|
230
|
+
form: '?'
|
|
231
|
+
}));
|
|
232
|
+
|
|
233
|
+
text += objectText;
|
|
234
|
+
currentPos += objectText.length;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (sortedLiterals.length > 0 || sortedObjects.length > 0) {
|
|
238
|
+
text += '\n';
|
|
239
|
+
currentPos += 1;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { text: text.trim(), blocks, quadIndex };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function generateBlockId() {
|
|
247
|
+
return Math.random().toString(36).substring(2, 10);
|
|
248
|
+
}
|
package/src/index.js
CHANGED
package/src/locate.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { parse } from './parse.js';
|
|
2
|
+
import { normalizeQuad, quadIndexKey } from './utils.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Locate the precise text range of a quad in MDLD text using origin tracking
|
|
6
|
+
*
|
|
7
|
+
* @param {Object} quad - The quad to locate (subject, predicate, object)
|
|
8
|
+
* @param {Object} origin - Origin object containing blocks and quadIndex (optional)
|
|
9
|
+
* @param {string} text - Original MDLD text (optional, parsed if origin not provided)
|
|
10
|
+
* @param {Object} context - Context for parsing (optional, used if text needs parsing)
|
|
11
|
+
* @returns {Object|null} Range information or null if not found
|
|
12
|
+
*/
|
|
13
|
+
export function locate(quad, origin, text = '', context = {}) {
|
|
14
|
+
// If origin not provided, parse text to get origin
|
|
15
|
+
if (!origin && text) {
|
|
16
|
+
const parseResult = parse(text, { context });
|
|
17
|
+
origin = parseResult.origin;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if (!quad || !origin || !origin.quadIndex || !origin.blocks) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Normalize the quad for consistent key generation
|
|
25
|
+
const normalizedQuad = normalizeQuad(quad);
|
|
26
|
+
if (!normalizedQuad) {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Generate the quad key to lookup in quadIndex
|
|
31
|
+
const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
|
|
32
|
+
|
|
33
|
+
// Find the slot information in quadIndex
|
|
34
|
+
const slotInfo = origin.quadIndex.get(quadKey);
|
|
35
|
+
if (!slotInfo) {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Get the block information
|
|
40
|
+
const block = origin.blocks.get(slotInfo.blockId);
|
|
41
|
+
if (!block) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Extract the actual text content based on carrier type and entry
|
|
46
|
+
let contentRange = null;
|
|
47
|
+
let content = '';
|
|
48
|
+
|
|
49
|
+
if (block.carrierType === 'heading') {
|
|
50
|
+
// For headings, use the block's main range
|
|
51
|
+
contentRange = block.range;
|
|
52
|
+
content = text.substring(block.range.start, block.range.end);
|
|
53
|
+
} else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
54
|
+
// For blockquotes, lists, and spans, extract from block range
|
|
55
|
+
contentRange = block.range;
|
|
56
|
+
content = text.substring(block.range.start, block.range.end);
|
|
57
|
+
|
|
58
|
+
// For blockquotes, try to extract the specific carrier content from entries
|
|
59
|
+
if (slotInfo.entryIndex != null && block.entries && block.entries[slotInfo.entryIndex]) {
|
|
60
|
+
const entry = block.entries[slotInfo.entryIndex];
|
|
61
|
+
if (entry.raw) {
|
|
62
|
+
// For blockquotes, the entry.raw contains the full carrier text
|
|
63
|
+
// Extract just the content part before the annotation
|
|
64
|
+
const annotationStart = entry.raw.indexOf('{');
|
|
65
|
+
if (annotationStart !== -1) {
|
|
66
|
+
const carrierContent = entry.raw.substring(0, annotationStart).trim();
|
|
67
|
+
// Find this content in the block text
|
|
68
|
+
const contentStart = text.indexOf(carrierContent, block.range.start);
|
|
69
|
+
if (contentStart !== -1) {
|
|
70
|
+
const contentEnd = contentStart + carrierContent.length;
|
|
71
|
+
contentRange = { start: contentStart, end: contentEnd };
|
|
72
|
+
content = text.substring(contentStart, contentEnd);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
blockId: slotInfo.blockId,
|
|
81
|
+
entryIndex: slotInfo.entryIndex,
|
|
82
|
+
kind: slotInfo.kind,
|
|
83
|
+
subject: normalizedQuad.subject,
|
|
84
|
+
predicate: normalizedQuad.predicate,
|
|
85
|
+
object: normalizedQuad.object,
|
|
86
|
+
range: contentRange,
|
|
87
|
+
content: content,
|
|
88
|
+
blockRange: block.range,
|
|
89
|
+
carrierType: block.carrierType,
|
|
90
|
+
isVacant: slotInfo.isVacant || false
|
|
91
|
+
};
|
|
92
|
+
}
|
package/src/parse.js
CHANGED
|
@@ -562,7 +562,12 @@ const manageListStack = (token, state) => {
|
|
|
562
562
|
|
|
563
563
|
const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
|
|
564
564
|
const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
565
|
-
const addSem = (sem) => {
|
|
565
|
+
const addSem = (sem) => {
|
|
566
|
+
const entryIndex = combinedSem.entries.length;
|
|
567
|
+
combinedSem.types.push(...sem.types);
|
|
568
|
+
combinedSem.predicates.push(...sem.predicates);
|
|
569
|
+
combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
|
|
570
|
+
};
|
|
566
571
|
|
|
567
572
|
if (listFrame?.contextSem) {
|
|
568
573
|
const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
|