mdld-parse 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -59
- package/package.json +3 -2
- package/src/{serialize.js → applyDiff.js} +1 -1
- package/src/generate.js +248 -0
- package/src/index.js +3 -1
- package/src/locate.js +92 -0
- package/src/parse.js +120 -96
- package/src/utils.js +19 -4
package/README.md
CHANGED
|
@@ -11,26 +11,55 @@
|
|
|
11
11
|
MD-LD allows you to author RDF graphs directly in Markdown using explicit `{...}` annotations:
|
|
12
12
|
|
|
13
13
|
```markdown
|
|
14
|
-
|
|
14
|
+
[my] <tag:alice@example.com,2026:>
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
# 2024-07-18 {=my:journal-2024-07-18 .my:Event my:date ^^xsd:date}
|
|
17
|
+
|
|
18
|
+
## A good day {label}
|
|
19
|
+
|
|
20
|
+
Mood: [Happy] {my:mood}
|
|
21
|
+
Energy level: [8] {my:energyLevel ^^xsd:integer}
|
|
22
|
+
|
|
23
|
+
Met [Sam] {+my:sam .my:Person ?my:attendee} on my regular walk at [Central Park] {+my:central-park ?my:location .my:Place label @en} and talked about [Sunny] {my:weather} weather.
|
|
24
|
+
|
|
25
|
+
Activities: {?my:hasActivity .my:Activity label}
|
|
26
|
+
|
|
27
|
+
- Walking {=#walking}
|
|
28
|
+
- Reading {=#reading}
|
|
19
29
|
|
|
20
|
-
[Section] {+#overview ?hasPart}
|
|
21
|
-
Overview: [Mission summary] {description}
|
|
22
30
|
```
|
|
23
31
|
|
|
24
32
|
Generates valid RDF triples:
|
|
25
33
|
|
|
26
34
|
```turtle
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
ex:
|
|
33
|
-
|
|
35
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
|
|
36
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
|
|
37
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
|
|
38
|
+
@prefix sh: <http://www.w3.org/ns/shacl#>.
|
|
39
|
+
@prefix prov: <http://www.w3.org/ns/prov#>.
|
|
40
|
+
@prefix ex: <http://example.org/>.
|
|
41
|
+
@prefix my: <tag:alice@example.com,2026:>.
|
|
42
|
+
|
|
43
|
+
my:journal-2024-07-18 a my:Event;
|
|
44
|
+
my:date "2024-07-18"^^xsd:date;
|
|
45
|
+
rdfs:label "A good day";
|
|
46
|
+
my:mood "Happy";
|
|
47
|
+
my:energyLevel 8;
|
|
48
|
+
my:attendee my:sam;
|
|
49
|
+
my:location my:central-park;
|
|
50
|
+
my:weather "Sunny";
|
|
51
|
+
my:hasActivity <tag:alice@example.com,2026:journal-2024-07-18#walking>, <tag:alice@example.com,2026:journal-2024-07-18#reading>.
|
|
52
|
+
my:sam a my:Person.
|
|
53
|
+
my:central-park a my:Place;
|
|
54
|
+
rdfs:label "Central Park"@en.
|
|
55
|
+
<tag:alice@example.com,2026:journal-2024-07-18#walking> a my:Activity;
|
|
56
|
+
rdfs:label "Walking".
|
|
57
|
+
<tag:alice@example.com,2026:journal-2024-07-18#reading> a my:Activity;
|
|
58
|
+
rdfs:label "Reading".
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Read the [FULL SPEC](./docs/Spec/Spec.md).
|
|
34
63
|
|
|
35
64
|
## Core Features
|
|
36
65
|
|
|
@@ -112,11 +141,11 @@ Create fragment IRIs relative to current subject:
|
|
|
112
141
|
```markdown
|
|
113
142
|
# Document {=ex:document}
|
|
114
143
|
{=#summary}
|
|
115
|
-
[Content] {
|
|
144
|
+
[Content] {label}
|
|
116
145
|
```
|
|
117
146
|
|
|
118
147
|
```turtle
|
|
119
|
-
ex:document#summary
|
|
148
|
+
ex:document#summary rdfs:label "Content" .
|
|
120
149
|
```
|
|
121
150
|
|
|
122
151
|
Fragments replace any existing fragment and require a current subject.
|
|
@@ -128,11 +157,11 @@ Subject remains in scope until reset with `{=}` or new subject declared.
|
|
|
128
157
|
Emit `rdf:type` triple:
|
|
129
158
|
|
|
130
159
|
```markdown
|
|
131
|
-
## Apollo 11 {=ex:apollo11 .SpaceMission .Event}
|
|
160
|
+
## Apollo 11 {=ex:apollo11 .ex:SpaceMission .ex:Event}
|
|
132
161
|
```
|
|
133
162
|
|
|
134
163
|
```turtle
|
|
135
|
-
ex:apollo11 a
|
|
164
|
+
ex:apollo11 a ex:SpaceMission, ex:Event .
|
|
136
165
|
```
|
|
137
166
|
|
|
138
167
|
### Literal Properties
|
|
@@ -142,15 +171,15 @@ Inline value carriers emit literal properties:
|
|
|
142
171
|
```markdown
|
|
143
172
|
# Mission {=ex:apollo11}
|
|
144
173
|
|
|
145
|
-
[Neil Armstrong] {commander}
|
|
146
|
-
[1969] {year ^^xsd:gYear}
|
|
147
|
-
[Historic mission] {description @en}
|
|
174
|
+
[Neil Armstrong] {ex:commander}
|
|
175
|
+
[1969] {ex:year ^^xsd:gYear}
|
|
176
|
+
[Historic mission] {ex:description @en}
|
|
148
177
|
```
|
|
149
178
|
|
|
150
179
|
```turtle
|
|
151
|
-
ex:apollo11
|
|
152
|
-
|
|
153
|
-
|
|
180
|
+
ex:apollo11 ex:commander "Neil Armstrong" ;
|
|
181
|
+
ex:year "1969"^^xsd:gYear ;
|
|
182
|
+
ex:description "Historic mission"@en .
|
|
154
183
|
```
|
|
155
184
|
|
|
156
185
|
### Object Properties
|
|
@@ -160,11 +189,11 @@ Links create relationships (use `?` prefix):
|
|
|
160
189
|
```markdown
|
|
161
190
|
# Mission {=ex:apollo11}
|
|
162
191
|
|
|
163
|
-
[NASA] {=ex:nasa ?organizer}
|
|
192
|
+
[NASA] {=ex:nasa ?ex:organizer}
|
|
164
193
|
```
|
|
165
194
|
|
|
166
195
|
```turtle
|
|
167
|
-
ex:apollo11
|
|
196
|
+
ex:apollo11 ex:organizer ex:nasa .
|
|
168
197
|
```
|
|
169
198
|
|
|
170
199
|
### Resource Declaration
|
|
@@ -174,12 +203,12 @@ Declare resources inline with `{=iri}`:
|
|
|
174
203
|
```markdown
|
|
175
204
|
# Mission {=ex:apollo11}
|
|
176
205
|
|
|
177
|
-
[Neil Armstrong] {=ex:armstrong ?commander .Person}
|
|
206
|
+
[Neil Armstrong] {=ex:armstrong ?ex:commander .prov:Person}
|
|
178
207
|
```
|
|
179
208
|
|
|
180
209
|
```turtle
|
|
181
|
-
ex:apollo11
|
|
182
|
-
ex:armstrong a
|
|
210
|
+
ex:apollo11 ex:commander ex:armstrong .
|
|
211
|
+
ex:armstrong a prov:Person .
|
|
183
212
|
```
|
|
184
213
|
|
|
185
214
|
### Lists
|
|
@@ -189,15 +218,15 @@ Lists require explicit subjects per item.
|
|
|
189
218
|
```markdown
|
|
190
219
|
# Recipe {=ex:recipe}
|
|
191
220
|
|
|
192
|
-
Ingredients: {?ingredient .Ingredient}
|
|
193
|
-
- Flour {=ex:flour
|
|
194
|
-
- Water {=ex:water
|
|
221
|
+
Ingredients: {?ex:ingredient .ex:Ingredient}
|
|
222
|
+
- Flour {=ex:flour label}
|
|
223
|
+
- Water {=ex:water label}
|
|
195
224
|
```
|
|
196
225
|
|
|
197
226
|
```turtle
|
|
198
|
-
ex:recipe
|
|
199
|
-
ex:flour a
|
|
200
|
-
ex:water a
|
|
227
|
+
ex:recipe ex:ingredient ex:flour, ex:water .
|
|
228
|
+
ex:flour a ex:Ingredient ; rdfs:label "Flour" .
|
|
229
|
+
ex:water a ex:Ingredient ; rdfs:label "Water" .
|
|
201
230
|
```
|
|
202
231
|
|
|
203
232
|
### Code Blocks
|
|
@@ -207,14 +236,14 @@ Code blocks are value carriers:
|
|
|
207
236
|
````markdown
|
|
208
237
|
# Example {=ex:example}
|
|
209
238
|
|
|
210
|
-
```javascript {=ex:code .SoftwareSourceCode text}
|
|
239
|
+
```javascript {=ex:code .ex:SoftwareSourceCode ex:text}
|
|
211
240
|
console.log("hello");
|
|
212
241
|
```
|
|
213
242
|
````
|
|
214
243
|
|
|
215
244
|
```turtle
|
|
216
|
-
ex:code a
|
|
217
|
-
|
|
245
|
+
ex:code a ex:SoftwareSourceCode ;
|
|
246
|
+
ex:text "console.log(\"hello\")" .
|
|
218
247
|
```
|
|
219
248
|
|
|
220
249
|
### Blockquotes
|
|
@@ -222,11 +251,11 @@ ex:code a schema:SoftwareSourceCode ;
|
|
|
222
251
|
```markdown
|
|
223
252
|
# Article {=ex:article}
|
|
224
253
|
|
|
225
|
-
> MD-LD bridges Markdown and RDF. {
|
|
254
|
+
> MD-LD bridges Markdown and RDF. {comment}
|
|
226
255
|
```
|
|
227
256
|
|
|
228
257
|
```turtle
|
|
229
|
-
ex:article
|
|
258
|
+
ex:article rdfs:comment "MD-LD bridges Markdown and RDF." .
|
|
230
259
|
```
|
|
231
260
|
|
|
232
261
|
### Reverse Relations
|
|
@@ -236,13 +265,13 @@ Reverse the relationship direction:
|
|
|
236
265
|
```markdown
|
|
237
266
|
# Part {=ex:part}
|
|
238
267
|
|
|
239
|
-
Part of: {!hasPart}
|
|
268
|
+
Part of: {!ex:hasPart}
|
|
240
269
|
|
|
241
270
|
- Book {=ex:book}
|
|
242
271
|
```
|
|
243
272
|
|
|
244
273
|
```turtle
|
|
245
|
-
ex:book
|
|
274
|
+
ex:book ex:hasPart ex:part .
|
|
246
275
|
```
|
|
247
276
|
|
|
248
277
|
### Prefix Declarations
|
|
@@ -250,7 +279,6 @@ ex:book schema:hasPart ex:part .
|
|
|
250
279
|
```markdown
|
|
251
280
|
[ex] <http://example.org/>
|
|
252
281
|
[foaf] <http://xmlns.com/foaf/0.1/>
|
|
253
|
-
[@vocab] <http://schema.org/>
|
|
254
282
|
|
|
255
283
|
# Person {=ex:alice .foaf:Person}
|
|
256
284
|
```
|
|
@@ -297,7 +325,7 @@ Parse MD-LD markdown and return RDF quads with origin tracking.
|
|
|
297
325
|
|
|
298
326
|
- `markdown` (string) — MD-LD formatted text
|
|
299
327
|
- `options` (object, optional):
|
|
300
|
-
- `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd,
|
|
328
|
+
- `context` (object) — Prefix mappings (default: `{ '@vocab': 'http://www.w3.org/2000/01/rdf-schema#', rdf, rdfs, xsd, sh, prov }`)
|
|
301
329
|
- `dataFactory` (object) — Custom RDF/JS DataFactory
|
|
302
330
|
|
|
303
331
|
**Returns:** `{ quads, origin, context }`
|
|
@@ -329,7 +357,7 @@ console.log(result.quads);
|
|
|
329
357
|
// ]
|
|
330
358
|
```
|
|
331
359
|
|
|
332
|
-
### `
|
|
360
|
+
### `applyDiff({ text, diff, origin, options })`
|
|
333
361
|
|
|
334
362
|
Apply RDF changes back to markdown with proper positioning.
|
|
335
363
|
|
|
@@ -353,18 +381,18 @@ Apply RDF changes back to markdown with proper positioning.
|
|
|
353
381
|
```javascript
|
|
354
382
|
const original = `# Article {=ex:article}
|
|
355
383
|
|
|
356
|
-
[Alice] {author}`;
|
|
384
|
+
[Alice] {ex:author}`;
|
|
357
385
|
|
|
358
386
|
const result = parse(original, { context: { ex: 'http://example.org/' } });
|
|
359
387
|
|
|
360
388
|
// Add a new property
|
|
361
389
|
const newQuad = {
|
|
362
390
|
subject: { termType: 'NamedNode', value: 'http://example.org/article' },
|
|
363
|
-
predicate: { termType: 'NamedNode', value: 'http://
|
|
391
|
+
predicate: { termType: 'NamedNode', value: 'http://example.org/datePublished' },
|
|
364
392
|
object: { termType: 'Literal', value: '2024-01-01' }
|
|
365
393
|
};
|
|
366
394
|
|
|
367
|
-
const updated =
|
|
395
|
+
const updated = applyDiff({
|
|
368
396
|
text: original,
|
|
369
397
|
diff: { add: [newQuad] },
|
|
370
398
|
origin: result.origin,
|
|
@@ -378,6 +406,91 @@ console.log(updated.text);
|
|
|
378
406
|
// [2024-01-01] {datePublished}
|
|
379
407
|
```
|
|
380
408
|
|
|
409
|
+
### `generate(quads, context)`
|
|
410
|
+
|
|
411
|
+
Generate deterministic MDLD from RDF quads with origin tracking.
|
|
412
|
+
|
|
413
|
+
**Parameters:**
|
|
414
|
+
|
|
415
|
+
- `quads` (array) — Array of RDF/JS Quads to convert
|
|
416
|
+
- `context` (object, optional) — Prefix mappings (default: `{}`)
|
|
417
|
+
- Merged with DEFAULT_CONTEXT for proper CURIE shortening
|
|
418
|
+
- Only user-defined prefixes are rendered in output
|
|
419
|
+
|
|
420
|
+
**Returns:** `{ text, origin, context }`
|
|
421
|
+
|
|
422
|
+
- `text` — Generated MDLD markdown
|
|
423
|
+
- `origin` — Origin tracking object with:
|
|
424
|
+
- `blocks` — Map of block IDs to source locations
|
|
425
|
+
- `quadIndex` — Map of quads to block IDs
|
|
426
|
+
- `context` — Final context used (includes defaults)
|
|
427
|
+
|
|
428
|
+
**Example:**
|
|
429
|
+
|
|
430
|
+
```javascript
|
|
431
|
+
const quads = [
|
|
432
|
+
{
|
|
433
|
+
subject: { termType: 'NamedNode', value: 'http://example.org/article' },
|
|
434
|
+
predicate: { termType: 'NamedNode', value: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' },
|
|
435
|
+
object: { termType: 'NamedNode', value: 'http://example.org/Article' }
|
|
436
|
+
},
|
|
437
|
+
{
|
|
438
|
+
subject: { termType: 'NamedNode', value: 'http://example.org/article' },
|
|
439
|
+
predicate: { termType: 'NamedNode', value: 'http://example.org/author' },
|
|
440
|
+
object: { termType: 'NamedNode', value: 'http://example.org/alice' }
|
|
441
|
+
}
|
|
442
|
+
];
|
|
443
|
+
|
|
444
|
+
const result = generate(quads, {
|
|
445
|
+
ex: 'http://example.org/',
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
console.log(result.text);
|
|
449
|
+
// # Article {=ex:article .ex:Article}
|
|
450
|
+
//
|
|
451
|
+
// > alice {+ex:alice ?ex:author}
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
### `locate(quad, origin, text, context)`
|
|
455
|
+
|
|
456
|
+
Locate the precise text range of a quad in MDLD text using origin tracking.
|
|
457
|
+
|
|
458
|
+
**Parameters:**
|
|
459
|
+
|
|
460
|
+
- `quad` (object) — The quad to locate (subject, predicate, object)
|
|
461
|
+
- `origin` (object, optional) — Origin object containing blocks and quadIndex
|
|
462
|
+
- `text` (string, optional) — MDLD text (auto-parsed if origin not provided)
|
|
463
|
+
- `context` (object, optional) — Context for parsing when text needs to be parsed
|
|
464
|
+
|
|
465
|
+
**Returns:** `{ blockId, entryIndex, range, content, blockRange, carrierType, isVacant }` or `null`
|
|
466
|
+
|
|
467
|
+
- `blockId` — ID of the containing block
|
|
468
|
+
- `entryIndex` — Position within block entries
|
|
469
|
+
- `range` — Precise character range of the quad content
|
|
470
|
+
- `content` — Actual text content at that range
|
|
471
|
+
- `blockRange` — Full range of the containing block
|
|
472
|
+
- `carrierType` — Type of carrier (heading, blockquote, list, span)
|
|
473
|
+
- `isVacant` — Whether the slot is marked as vacant
|
|
474
|
+
|
|
475
|
+
**Example:**
|
|
476
|
+
|
|
477
|
+
```javascript
|
|
478
|
+
import { parse, locate } from './src/index.js';
|
|
479
|
+
|
|
480
|
+
const result = parse(mdldText, { context: { ex: 'http://example.org/' } });
|
|
481
|
+
const quad = result.quads[0]; // Find a quad to locate
|
|
482
|
+
|
|
483
|
+
// Pattern 1: With origin (most efficient)
|
|
484
|
+
const location1 = locate(quad, result.origin, mdldText);
|
|
485
|
+
|
|
486
|
+
// Pattern 2: Auto-parse text (convenient)
|
|
487
|
+
const location2 = locate(quad, null, mdldText, { ex: 'http://example.org/' });
|
|
488
|
+
|
|
489
|
+
console.log(location1.range); // { start: 38, end: 44 }
|
|
490
|
+
console.log(location1.content); // " Alice"
|
|
491
|
+
console.log(location1.carrierType); // "blockquote"
|
|
492
|
+
```
|
|
493
|
+
|
|
381
494
|
## Value Carriers
|
|
382
495
|
|
|
383
496
|
Only specific markdown elements can carry semantic values:
|
|
@@ -464,14 +577,14 @@ Therefore, the algebra is **closed**.
|
|
|
464
577
|
```markdown
|
|
465
578
|
[alice] <tag:alice@example.com,2026:>
|
|
466
579
|
|
|
467
|
-
# Meeting Notes {=alice:meeting-2024-01-15 .Meeting}
|
|
580
|
+
# Meeting Notes {=alice:meeting-2024-01-15 .alice:Meeting}
|
|
468
581
|
|
|
469
|
-
Attendees: {?attendee
|
|
582
|
+
Attendees: {?alice:attendee label}
|
|
470
583
|
|
|
471
584
|
- Alice {=alice:alice}
|
|
472
585
|
- Bob {=alice:bob}
|
|
473
586
|
|
|
474
|
-
Action items: {?actionItem
|
|
587
|
+
Action items: {?alice:actionItem label}
|
|
475
588
|
|
|
476
589
|
- Review proposal {=alice:task-1}
|
|
477
590
|
```
|
|
@@ -479,14 +592,14 @@ Action items: {?actionItem name}
|
|
|
479
592
|
### Developer Documentation
|
|
480
593
|
|
|
481
594
|
````markdown
|
|
482
|
-
# API Endpoint {=api:/users/:id .
|
|
595
|
+
# API Endpoint {=api:/users/:id .api:Endpoint}
|
|
483
596
|
|
|
484
|
-
[GET] {method}
|
|
485
|
-
[/users/:id] {path}
|
|
597
|
+
[GET] {api:method}
|
|
598
|
+
[/users/:id] {api:path}
|
|
486
599
|
|
|
487
600
|
Example:
|
|
488
601
|
|
|
489
|
-
```bash {=api:/users/:id#example .CodeExample
|
|
602
|
+
```bash {=api:/users/:id#example .api:CodeExample api:code}
|
|
490
603
|
curl https://api.example.com/users/123
|
|
491
604
|
```
|
|
492
605
|
````
|
|
@@ -496,13 +609,13 @@ curl https://api.example.com/users/123
|
|
|
496
609
|
```markdown
|
|
497
610
|
[alice] <tag:alice@example.com,2026:>
|
|
498
611
|
|
|
499
|
-
# Paper {=alice:paper-semantic-markdown .ScholarlyArticle}
|
|
612
|
+
# Paper {=alice:paper-semantic-markdown .alice:ScholarlyArticle}
|
|
500
613
|
|
|
501
|
-
[Semantic Web] {
|
|
502
|
-
[Alice Johnson] {=alice:alice-johnson ?author}
|
|
503
|
-
[2024-01] {datePublished ^^xsd:gYearMonth}
|
|
614
|
+
[Semantic Web] {label}
|
|
615
|
+
[Alice Johnson] {=alice:alice-johnson ?alice:author}
|
|
616
|
+
[2024-01] {alice:datePublished ^^xsd:gYearMonth}
|
|
504
617
|
|
|
505
|
-
> This paper explores semantic markup in Markdown. {
|
|
618
|
+
> This paper explores semantic markup in Markdown. {comment @en}
|
|
506
619
|
```
|
|
507
620
|
|
|
508
621
|
## Testing
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.4",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
"src"
|
|
13
13
|
],
|
|
14
14
|
"scripts": {
|
|
15
|
-
"test": "node tests/index.js"
|
|
15
|
+
"test": "node tests/index.js",
|
|
16
|
+
"dev": "pnpx live-server"
|
|
16
17
|
},
|
|
17
18
|
"keywords": [
|
|
18
19
|
"mdld",
|
|
@@ -158,7 +158,7 @@ function markEntryAsVacant(entry, quad) {
|
|
|
158
158
|
return null;
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
export function
|
|
161
|
+
export function applyDiff({ text, diff, origin, options = {} }) {
|
|
162
162
|
if (!diff || (!diff.add?.length && !diff.delete?.length)) {
|
|
163
163
|
const reparsed = parse(text, { context: options.context || {} });
|
|
164
164
|
return { text, origin: reparsed.origin };
|
package/src/generate.js
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import { shortenIRI, expandIRI, quadIndexKey, createSlotInfo, DEFAULT_CONTEXT } from './utils.js';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
function extractLocalName(iri) {
|
|
5
|
+
const separators = ['#', '/', ':'];
|
|
6
|
+
for (const sep of separators) {
|
|
7
|
+
const lastSep = iri.lastIndexOf(sep);
|
|
8
|
+
if (lastSep !== -1 && lastSep < iri.length - 1) {
|
|
9
|
+
return iri.substring(lastSep + 1);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
return iri;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Generate deterministic MDLD from RDF quads
|
|
17
|
+
* Purpose: TTL→MDLD conversion with canonical structure
|
|
18
|
+
* Input: RDF quads + context
|
|
19
|
+
* Output: MDLD text + origin + context
|
|
20
|
+
*/
|
|
21
|
+
export function generate(quads, context = {}) {
|
|
22
|
+
const fullContext = { ...DEFAULT_CONTEXT, ...context };
|
|
23
|
+
|
|
24
|
+
const normalizedQuads = normalizeAndSortQuads(quads);
|
|
25
|
+
|
|
26
|
+
const subjectGroups = groupQuadsBySubject(normalizedQuads);
|
|
27
|
+
|
|
28
|
+
const { text, blocks, quadIndex } = buildDeterministicMDLD(subjectGroups, fullContext);
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
text,
|
|
32
|
+
origin: { blocks, quadIndex },
|
|
33
|
+
context: fullContext
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizeAndSortQuads(quads) {
|
|
38
|
+
return quads
|
|
39
|
+
.map(quad => ({
|
|
40
|
+
subject: { termType: quad.subject.termType, value: quad.subject.value },
|
|
41
|
+
predicate: { termType: quad.predicate.termType, value: quad.predicate.value },
|
|
42
|
+
object: quad.object.termType === 'Literal'
|
|
43
|
+
? {
|
|
44
|
+
termType: 'Literal',
|
|
45
|
+
value: quad.object.value,
|
|
46
|
+
language: quad.object.language || null,
|
|
47
|
+
datatype: quad.object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' }
|
|
48
|
+
}
|
|
49
|
+
: { termType: 'NamedNode', value: quad.object.value }
|
|
50
|
+
}))
|
|
51
|
+
.sort((a, b) => {
|
|
52
|
+
// Deterministic sorting: subject -> predicate -> object
|
|
53
|
+
const sComp = a.subject.value.localeCompare(b.subject.value);
|
|
54
|
+
if (sComp !== 0) return sComp;
|
|
55
|
+
const pComp = a.predicate.value.localeCompare(b.predicate.value);
|
|
56
|
+
if (pComp !== 0) return pComp;
|
|
57
|
+
const oA = a.object.termType === 'Literal' ? a.object.value : a.object.value;
|
|
58
|
+
const oB = b.object.termType === 'Literal' ? b.object.value : b.object.value;
|
|
59
|
+
return oA.localeCompare(oB);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function groupQuadsBySubject(quads) {
|
|
64
|
+
const groups = new Map();
|
|
65
|
+
for (const quad of quads) {
|
|
66
|
+
if (!groups.has(quad.subject.value)) {
|
|
67
|
+
groups.set(quad.subject.value, []);
|
|
68
|
+
}
|
|
69
|
+
groups.get(quad.subject.value).push(quad);
|
|
70
|
+
}
|
|
71
|
+
return groups;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function buildDeterministicMDLD(subjectGroups, context) {
|
|
75
|
+
let text = '';
|
|
76
|
+
let currentPos = 0;
|
|
77
|
+
const blocks = new Map();
|
|
78
|
+
const quadIndex = new Map();
|
|
79
|
+
|
|
80
|
+
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
81
|
+
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
82
|
+
for (const [prefix, namespace] of sortedPrefixes) {
|
|
83
|
+
// Skip default context prefixes - they're implicit in MDLD
|
|
84
|
+
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
85
|
+
const prefixDecl = `[${prefix}] <${namespace}>\n`;
|
|
86
|
+
const blockId = generateBlockId();
|
|
87
|
+
blocks.set(blockId, {
|
|
88
|
+
id: blockId,
|
|
89
|
+
range: { start: currentPos, end: currentPos + prefixDecl.length },
|
|
90
|
+
subject: null,
|
|
91
|
+
entries: [{ kind: 'prefix', prefix, namespace, raw: prefixDecl.trim() }],
|
|
92
|
+
carrierType: 'prefix'
|
|
93
|
+
});
|
|
94
|
+
text += prefixDecl;
|
|
95
|
+
currentPos += prefixDecl.length;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (sortedPrefixes.length > 0) {
|
|
100
|
+
text += '\n';
|
|
101
|
+
currentPos += 1;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Process subjects in deterministic order
|
|
105
|
+
const sortedSubjects = Array.from(subjectGroups.keys()).sort();
|
|
106
|
+
|
|
107
|
+
for (const subjectIRI of sortedSubjects) {
|
|
108
|
+
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
109
|
+
const shortSubject = shortenIRI(subjectIRI, context);
|
|
110
|
+
|
|
111
|
+
// Separate types, literals, and objects
|
|
112
|
+
const types = subjectQuads.filter(q => q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
113
|
+
const literals = subjectQuads.filter(q => q.object.termType === 'Literal' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
114
|
+
const objects = subjectQuads.filter(q => q.object.termType === 'NamedNode' && q.predicate.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
115
|
+
|
|
116
|
+
// Generate heading
|
|
117
|
+
const localSubjectName = extractLocalName(subjectIRI);
|
|
118
|
+
const typeAnnotations = types.length > 0
|
|
119
|
+
? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
|
|
120
|
+
: '';
|
|
121
|
+
|
|
122
|
+
const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
|
|
123
|
+
const blockId = generateBlockId();
|
|
124
|
+
const headingBlock = {
|
|
125
|
+
id: blockId,
|
|
126
|
+
range: { start: currentPos, end: currentPos + headingText.length },
|
|
127
|
+
subject: subjectIRI,
|
|
128
|
+
entries: [
|
|
129
|
+
{ kind: 'subject', raw: `=${shortSubject}`, expandedSubject: subjectIRI },
|
|
130
|
+
...types.map((t, i) => ({
|
|
131
|
+
kind: 'type',
|
|
132
|
+
raw: '.' + extractLocalName(t.object.value),
|
|
133
|
+
expandedType: t.object.value,
|
|
134
|
+
entryIndex: i
|
|
135
|
+
}))
|
|
136
|
+
],
|
|
137
|
+
carrierType: 'heading'
|
|
138
|
+
};
|
|
139
|
+
blocks.set(blockId, headingBlock);
|
|
140
|
+
|
|
141
|
+
// Add type quads to index
|
|
142
|
+
types.forEach((quad, i) => {
|
|
143
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
144
|
+
quadIndex.set(key, createSlotInfo(blockId, i, {
|
|
145
|
+
kind: 'type',
|
|
146
|
+
subject: quad.subject,
|
|
147
|
+
predicate: quad.predicate,
|
|
148
|
+
object: quad.object
|
|
149
|
+
}));
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
text += headingText;
|
|
153
|
+
currentPos += headingText.length;
|
|
154
|
+
|
|
155
|
+
// Add literals (deterministic order)
|
|
156
|
+
const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
157
|
+
for (const quad of sortedLiterals) {
|
|
158
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
159
|
+
let annotation = predShort;
|
|
160
|
+
|
|
161
|
+
if (quad.object.language) {
|
|
162
|
+
annotation += ` @${quad.object.language}`;
|
|
163
|
+
} else if (quad.object.datatype.value !== 'http://www.w3.org/2001/XMLSchema#string') {
|
|
164
|
+
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const literalText = `[${quad.object.value}] {${annotation}}\n`;
|
|
168
|
+
const literalBlockId = generateBlockId();
|
|
169
|
+
const literalBlock = {
|
|
170
|
+
id: literalBlockId,
|
|
171
|
+
range: { start: currentPos, end: currentPos + literalText.length },
|
|
172
|
+
subject: subjectIRI,
|
|
173
|
+
entries: [{
|
|
174
|
+
kind: 'property',
|
|
175
|
+
raw: annotation,
|
|
176
|
+
expandedPredicate: quad.predicate.value,
|
|
177
|
+
form: '',
|
|
178
|
+
entryIndex: 0
|
|
179
|
+
}],
|
|
180
|
+
carrierType: 'span',
|
|
181
|
+
valueRange: { start: currentPos + 1, end: currentPos + 1 + quad.object.value.length },
|
|
182
|
+
attrsRange: { start: currentPos + literalText.indexOf('{'), end: currentPos + literalText.indexOf('}') + 1 }
|
|
183
|
+
};
|
|
184
|
+
blocks.set(literalBlockId, literalBlock);
|
|
185
|
+
|
|
186
|
+
// Add to quad index
|
|
187
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
188
|
+
quadIndex.set(key, createSlotInfo(literalBlockId, 0, {
|
|
189
|
+
kind: 'pred',
|
|
190
|
+
subject: quad.subject,
|
|
191
|
+
predicate: quad.predicate,
|
|
192
|
+
object: quad.object,
|
|
193
|
+
form: ''
|
|
194
|
+
}));
|
|
195
|
+
|
|
196
|
+
text += literalText;
|
|
197
|
+
currentPos += literalText.length;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Add objects (deterministic order)
|
|
201
|
+
const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
202
|
+
for (const quad of sortedObjects) {
|
|
203
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
204
|
+
const objShort = shortenIRI(quad.object.value, context);
|
|
205
|
+
const localName = extractLocalName(quad.object.value);
|
|
206
|
+
|
|
207
|
+
const objectText = `> ${localName} {+${objShort} ?${predShort}}\n`;
|
|
208
|
+
const objectBlockId = generateBlockId();
|
|
209
|
+
const objectBlock = {
|
|
210
|
+
id: objectBlockId,
|
|
211
|
+
range: { start: currentPos, end: currentPos + objectText.length },
|
|
212
|
+
subject: subjectIRI,
|
|
213
|
+
entries: [{
|
|
214
|
+
kind: 'object',
|
|
215
|
+
raw: objShort,
|
|
216
|
+
expandedObject: quad.object.value,
|
|
217
|
+
entryIndex: 0
|
|
218
|
+
}],
|
|
219
|
+
carrierType: 'span'
|
|
220
|
+
};
|
|
221
|
+
blocks.set(objectBlockId, objectBlock);
|
|
222
|
+
|
|
223
|
+
// Add to quad index
|
|
224
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
225
|
+
quadIndex.set(key, createSlotInfo(objectBlockId, 0, {
|
|
226
|
+
kind: 'pred',
|
|
227
|
+
subject: quad.subject,
|
|
228
|
+
predicate: quad.predicate,
|
|
229
|
+
object: quad.object,
|
|
230
|
+
form: '?'
|
|
231
|
+
}));
|
|
232
|
+
|
|
233
|
+
text += objectText;
|
|
234
|
+
currentPos += objectText.length;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (sortedLiterals.length > 0 || sortedObjects.length > 0) {
|
|
238
|
+
text += '\n';
|
|
239
|
+
currentPos += 1;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { text: text.trim(), blocks, quadIndex };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function generateBlockId() {
|
|
247
|
+
return Math.random().toString(36).substring(2, 10);
|
|
248
|
+
}
|
package/src/index.js
CHANGED
package/src/locate.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { parse } from './parse.js';
|
|
2
|
+
import { normalizeQuad, quadIndexKey } from './utils.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Locate the precise text range of a quad in MDLD text using origin tracking
|
|
6
|
+
*
|
|
7
|
+
* @param {Object} quad - The quad to locate (subject, predicate, object)
|
|
8
|
+
* @param {Object} origin - Origin object containing blocks and quadIndex (optional)
|
|
9
|
+
* @param {string} text - Original MDLD text (optional, parsed if origin not provided)
|
|
10
|
+
* @param {Object} context - Context for parsing (optional, used if text needs parsing)
|
|
11
|
+
* @returns {Object|null} Range information or null if not found
|
|
12
|
+
*/
|
|
13
|
+
export function locate(quad, origin, text = '', context = {}) {
|
|
14
|
+
// If origin not provided, parse text to get origin
|
|
15
|
+
if (!origin && text) {
|
|
16
|
+
const parseResult = parse(text, { context });
|
|
17
|
+
origin = parseResult.origin;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if (!quad || !origin || !origin.quadIndex || !origin.blocks) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Normalize the quad for consistent key generation
|
|
25
|
+
const normalizedQuad = normalizeQuad(quad);
|
|
26
|
+
if (!normalizedQuad) {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Generate the quad key to lookup in quadIndex
|
|
31
|
+
const quadKey = quadIndexKey(normalizedQuad.subject, normalizedQuad.predicate, normalizedQuad.object);
|
|
32
|
+
|
|
33
|
+
// Find the slot information in quadIndex
|
|
34
|
+
const slotInfo = origin.quadIndex.get(quadKey);
|
|
35
|
+
if (!slotInfo) {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Get the block information
|
|
40
|
+
const block = origin.blocks.get(slotInfo.blockId);
|
|
41
|
+
if (!block) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Extract the actual text content based on carrier type and entry
|
|
46
|
+
let contentRange = null;
|
|
47
|
+
let content = '';
|
|
48
|
+
|
|
49
|
+
if (block.carrierType === 'heading') {
|
|
50
|
+
// For headings, use the block's main range
|
|
51
|
+
contentRange = block.range;
|
|
52
|
+
content = text.substring(block.range.start, block.range.end);
|
|
53
|
+
} else if (block.carrierType === 'blockquote' || block.carrierType === 'list' || block.carrierType === 'span') {
|
|
54
|
+
// For blockquotes, lists, and spans, extract from block range
|
|
55
|
+
contentRange = block.range;
|
|
56
|
+
content = text.substring(block.range.start, block.range.end);
|
|
57
|
+
|
|
58
|
+
// For blockquotes, try to extract the specific carrier content from entries
|
|
59
|
+
if (slotInfo.entryIndex != null && block.entries && block.entries[slotInfo.entryIndex]) {
|
|
60
|
+
const entry = block.entries[slotInfo.entryIndex];
|
|
61
|
+
if (entry.raw) {
|
|
62
|
+
// For blockquotes, the entry.raw contains the full carrier text
|
|
63
|
+
// Extract just the content part before the annotation
|
|
64
|
+
const annotationStart = entry.raw.indexOf('{');
|
|
65
|
+
if (annotationStart !== -1) {
|
|
66
|
+
const carrierContent = entry.raw.substring(0, annotationStart).trim();
|
|
67
|
+
// Find this content in the block text
|
|
68
|
+
const contentStart = text.indexOf(carrierContent, block.range.start);
|
|
69
|
+
if (contentStart !== -1) {
|
|
70
|
+
const contentEnd = contentStart + carrierContent.length;
|
|
71
|
+
contentRange = { start: contentStart, end: contentEnd };
|
|
72
|
+
content = text.substring(contentStart, contentEnd);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
blockId: slotInfo.blockId,
|
|
81
|
+
entryIndex: slotInfo.entryIndex,
|
|
82
|
+
kind: slotInfo.kind,
|
|
83
|
+
subject: normalizedQuad.subject,
|
|
84
|
+
predicate: normalizedQuad.predicate,
|
|
85
|
+
object: normalizedQuad.object,
|
|
86
|
+
range: contentRange,
|
|
87
|
+
content: content,
|
|
88
|
+
blockRange: block.range,
|
|
89
|
+
carrierType: block.carrierType,
|
|
90
|
+
isVacant: slotInfo.isVacant || false
|
|
91
|
+
};
|
|
92
|
+
}
|
package/src/parse.js
CHANGED
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
} from './utils.js';
|
|
11
11
|
|
|
12
12
|
const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
|
|
13
|
-
const FENCE_REGEX = /^(`{3,})(.*)/;
|
|
13
|
+
const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
14
14
|
const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
15
15
|
const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
16
16
|
const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
@@ -22,6 +22,29 @@ const INLINE_CARRIER_PATTERNS = {
|
|
|
22
22
|
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
// Cache for fence regex patterns to avoid recreation
|
|
26
|
+
const FENCE_CLOSE_PATTERNS = new Map();
|
|
27
|
+
|
|
28
|
+
function getFenceClosePattern(fenceChar) {
|
|
29
|
+
if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
|
|
30
|
+
FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
|
|
31
|
+
}
|
|
32
|
+
return FENCE_CLOSE_PATTERNS.get(fenceChar);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function parseLangAndAttrs(langAndAttrs) {
|
|
36
|
+
const spaceIndex = langAndAttrs.indexOf(' ');
|
|
37
|
+
const braceIndex = langAndAttrs.indexOf('{');
|
|
38
|
+
const langEnd = Math.min(
|
|
39
|
+
spaceIndex > -1 ? spaceIndex : Infinity,
|
|
40
|
+
braceIndex > -1 ? braceIndex : Infinity
|
|
41
|
+
);
|
|
42
|
+
return {
|
|
43
|
+
lang: langAndAttrs.substring(0, langEnd),
|
|
44
|
+
attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
25
48
|
const semCache = {};
|
|
26
49
|
const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
|
|
27
50
|
|
|
@@ -79,108 +102,104 @@ function scanTokens(text) {
|
|
|
79
102
|
let pos = 0;
|
|
80
103
|
let codeBlock = null;
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
const match = PREFIX_REGEX.exec(line);
|
|
131
|
-
tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
|
|
132
|
-
return true;
|
|
133
|
-
}
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
test: line => HEADING_REGEX.test(line),
|
|
137
|
-
process: (line, lineStart, pos) => {
|
|
138
|
-
const match = HEADING_REGEX.exec(line);
|
|
139
|
-
const attrs = match[3] || null;
|
|
140
|
-
const afterHashes = match[1].length;
|
|
141
|
-
const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
|
|
142
|
-
tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
|
|
143
|
-
rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
|
|
144
|
-
return true;
|
|
145
|
-
}
|
|
146
|
-
},
|
|
147
|
-
{
|
|
148
|
-
test: line => UNORDERED_LIST_REGEX.test(line),
|
|
149
|
-
process: (line, lineStart, pos) => {
|
|
150
|
-
const match = UNORDERED_LIST_REGEX.exec(line);
|
|
151
|
-
tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
|
|
152
|
-
return true;
|
|
153
|
-
}
|
|
154
|
-
},
|
|
155
|
-
{
|
|
156
|
-
test: line => BLOCKQUOTE_REGEX.test(line),
|
|
157
|
-
process: (line, lineStart, pos) => {
|
|
158
|
-
const match = BLOCKQUOTE_REGEX.exec(line);
|
|
159
|
-
const attrs = match[2] || null;
|
|
160
|
-
const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
|
|
161
|
-
const valueEndInLine = valueStartInLine + match[1].length;
|
|
162
|
-
tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
|
|
163
|
-
calcAttrsRange(line, attrs, lineStart),
|
|
164
|
-
[lineStart + valueStartInLine, lineStart + valueEndInLine]));
|
|
165
|
-
return true;
|
|
166
|
-
}
|
|
167
|
-
},
|
|
168
|
-
{
|
|
169
|
-
test: line => line.trim(),
|
|
170
|
-
process: (line, lineStart, pos) => {
|
|
171
|
-
tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
|
|
172
|
-
return true;
|
|
105
|
+
// Direct lookup instead of linear search
|
|
106
|
+
const PROCESSORS = [
|
|
107
|
+
{ type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: handleFence },
|
|
108
|
+
{ type: 'content', test: () => codeBlock, process: line => codeBlock.content.push(line) },
|
|
109
|
+
{ type: 'prefix', test: line => PREFIX_REGEX.test(line), process: handlePrefix },
|
|
110
|
+
{ type: 'heading', test: line => HEADING_REGEX.test(line), process: handleHeading },
|
|
111
|
+
{ type: 'list', test: line => UNORDERED_LIST_REGEX.test(line), process: handleList },
|
|
112
|
+
{ type: 'blockquote', test: line => BLOCKQUOTE_REGEX.test(line), process: handleBlockquote },
|
|
113
|
+
{ type: 'para', test: line => line.trim(), process: handlePara }
|
|
114
|
+
];
|
|
115
|
+
|
|
116
|
+
function handleFence(line, lineStart, pos) {
|
|
117
|
+
const trimmedLine = line.trim();
|
|
118
|
+
if (!codeBlock) {
|
|
119
|
+
const fenceMatch = trimmedLine.match(FENCE_REGEX);
|
|
120
|
+
if (!fenceMatch) return false;
|
|
121
|
+
|
|
122
|
+
const { lang, attrsText } = parseLangAndAttrs(fenceMatch[2]);
|
|
123
|
+
const attrsStartInLine = attrsText ? line.indexOf(attrsText) : -1;
|
|
124
|
+
const contentStart = lineStart + line.length + 1;
|
|
125
|
+
|
|
126
|
+
codeBlock = {
|
|
127
|
+
fence: fenceMatch[1],
|
|
128
|
+
start: lineStart,
|
|
129
|
+
content: [],
|
|
130
|
+
lang,
|
|
131
|
+
attrs: attrsText,
|
|
132
|
+
attrsRange: attrsText && attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrsText.length] : null,
|
|
133
|
+
valueRangeStart: contentStart
|
|
134
|
+
};
|
|
135
|
+
} else {
|
|
136
|
+
const fenceChar = codeBlock.fence[0];
|
|
137
|
+
const expectedFence = fenceChar.repeat(codeBlock.fence.length);
|
|
138
|
+
const fenceMatch = trimmedLine.match(getFenceClosePattern(fenceChar));
|
|
139
|
+
|
|
140
|
+
if (fenceMatch && fenceMatch[1] === expectedFence) {
|
|
141
|
+
const valueStart = codeBlock.valueRangeStart;
|
|
142
|
+
const valueEnd = Math.max(valueStart, lineStart - 1);
|
|
143
|
+
tokens.push({
|
|
144
|
+
type: 'code',
|
|
145
|
+
range: [codeBlock.start, lineStart],
|
|
146
|
+
text: codeBlock.content.join('\n'),
|
|
147
|
+
lang: codeBlock.lang,
|
|
148
|
+
attrs: codeBlock.attrs,
|
|
149
|
+
attrsRange: codeBlock.attrsRange,
|
|
150
|
+
valueRange: [valueStart, valueEnd]
|
|
151
|
+
});
|
|
152
|
+
codeBlock = null;
|
|
173
153
|
}
|
|
174
154
|
}
|
|
175
|
-
|
|
155
|
+
return true;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function handlePrefix(line, lineStart, pos) {
|
|
159
|
+
const match = PREFIX_REGEX.exec(line);
|
|
160
|
+
tokens.push({ type: 'prefix', prefix: match[1], iri: match[2].trim() });
|
|
161
|
+
return true;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function handleHeading(line, lineStart, pos) {
|
|
165
|
+
const match = HEADING_REGEX.exec(line);
|
|
166
|
+
const attrs = match[3] || null;
|
|
167
|
+
const afterHashes = match[1].length;
|
|
168
|
+
const rangeInfo = calcRangeInfo(line, attrs, lineStart, afterHashes, match[2].length);
|
|
169
|
+
tokens.push(createToken('heading', [lineStart, pos - 1], match[2].trim(), attrs,
|
|
170
|
+
rangeInfo.attrsRange, rangeInfo.valueRange, { depth: match[1].length }));
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function handleList(line, lineStart, pos) {
|
|
175
|
+
const match = UNORDERED_LIST_REGEX.exec(line);
|
|
176
|
+
tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
|
|
177
|
+
return true;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function handleBlockquote(line, lineStart, pos) {
|
|
181
|
+
const match = BLOCKQUOTE_REGEX.exec(line);
|
|
182
|
+
const attrs = match[2] || null;
|
|
183
|
+
const valueStartInLine = line.startsWith('> ') ? 2 : line.indexOf('>') + 1;
|
|
184
|
+
const valueEndInLine = valueStartInLine + match[1].length;
|
|
185
|
+
tokens.push(createToken('blockquote', [lineStart, pos - 1], match[1].trim(), attrs,
|
|
186
|
+
calcAttrsRange(line, attrs, lineStart),
|
|
187
|
+
[lineStart + valueStartInLine, lineStart + valueEndInLine]));
|
|
188
|
+
return true;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function handlePara(line, lineStart, pos) {
|
|
192
|
+
tokens.push(createToken('para', [lineStart, pos - 1], line.trim()));
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
176
195
|
|
|
177
196
|
for (let i = 0; i < lines.length; i++) {
|
|
178
197
|
const line = lines[i];
|
|
179
198
|
const lineStart = pos;
|
|
180
199
|
pos += line.length + 1;
|
|
181
200
|
|
|
182
|
-
//
|
|
183
|
-
for (const processor of
|
|
201
|
+
// Direct processor lookup - O(n) instead of O(n*m)
|
|
202
|
+
for (const processor of PROCESSORS) {
|
|
184
203
|
if (processor.test(line) && processor.process(line, lineStart, pos)) {
|
|
185
204
|
break;
|
|
186
205
|
}
|
|
@@ -562,7 +581,12 @@ const manageListStack = (token, state) => {
|
|
|
562
581
|
|
|
563
582
|
const combineSemanticInfo = (token, carriers, listFrame, state, itemSubject) => {
|
|
564
583
|
const combinedSem = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
565
|
-
const addSem = (sem) => {
|
|
584
|
+
const addSem = (sem) => {
|
|
585
|
+
const entryIndex = combinedSem.entries.length;
|
|
586
|
+
combinedSem.types.push(...sem.types);
|
|
587
|
+
combinedSem.predicates.push(...sem.predicates);
|
|
588
|
+
combinedSem.entries.push(...sem.entries.map(entry => ({ ...entry, entryIndex })));
|
|
589
|
+
};
|
|
566
590
|
|
|
567
591
|
if (listFrame?.contextSem) {
|
|
568
592
|
const inheritedSem = processContextSem({ sem: listFrame.contextSem, itemSubject, contextSubject: listFrame.contextSubject, inheritLiterals: true, state });
|
package/src/utils.js
CHANGED
|
@@ -25,16 +25,31 @@ export function hash(str) {
|
|
|
25
25
|
return Math.abs(h).toString(16).slice(0, 12);
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
const iriCache = new Map();
|
|
29
|
+
|
|
28
30
|
export function expandIRI(term, ctx) {
|
|
29
31
|
if (term == null) return null;
|
|
32
|
+
|
|
33
|
+
const cacheKey = `${term}|${ctx['@vocab'] || ''}|${Object.keys(ctx).filter(k => k !== '@vocab').sort().map(k => `${k}:${ctx[k]}`).join(',')}`;
|
|
34
|
+
if (iriCache.has(cacheKey)) {
|
|
35
|
+
return iriCache.get(cacheKey);
|
|
36
|
+
}
|
|
37
|
+
|
|
30
38
|
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
31
39
|
const t = raw.trim();
|
|
32
|
-
|
|
33
|
-
|
|
40
|
+
let result;
|
|
41
|
+
|
|
42
|
+
if (t.match(/^https?:/)) {
|
|
43
|
+
result = t;
|
|
44
|
+
} else if (t.includes(':')) {
|
|
34
45
|
const [prefix, ref] = t.split(':', 2);
|
|
35
|
-
|
|
46
|
+
result = ctx[prefix] ? ctx[prefix] + ref : t;
|
|
47
|
+
} else {
|
|
48
|
+
result = (ctx['@vocab'] || '') + t;
|
|
36
49
|
}
|
|
37
|
-
|
|
50
|
+
|
|
51
|
+
iriCache.set(cacheKey, result);
|
|
52
|
+
return result;
|
|
38
53
|
}
|
|
39
54
|
|
|
40
55
|
export function shortenIRI(iri, ctx) {
|