mdld-parse 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/src/generate.js +2 -1
- package/src/index.js +1 -1
- package/src/merge.js +11 -3
- package/src/parse.js +246 -72
- package/src/render.js +345 -345
- package/src/shared.js +212 -0
- package/src/utils.js +2 -9
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -38,5 +38,8 @@
|
|
|
38
38
|
"homepage": "https://mdld.js.org",
|
|
39
39
|
"bugs": {
|
|
40
40
|
"url": "https://github.com/davay42/mdld-parse/issues"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"rdfa-parse": "^1.0.1"
|
|
41
44
|
}
|
|
42
45
|
}
|
package/src/generate.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { shortenIRI, expandIRI,
|
|
1
|
+
import { shortenIRI, expandIRI, DataFactory } from './utils.js';
|
|
2
|
+
import { DEFAULT_CONTEXT } from './shared.js';
|
|
2
3
|
|
|
3
4
|
// Helper functions for cleaner term type checking
|
|
4
5
|
function isLiteral(term) {
|
package/src/index.js
CHANGED
|
@@ -3,8 +3,8 @@ export { merge } from './merge.js';
|
|
|
3
3
|
export { generate } from './generate.js';
|
|
4
4
|
export { locate } from './locate.js';
|
|
5
5
|
export { render } from './render.js';
|
|
6
|
+
export { DEFAULT_CONTEXT } from './shared.js';
|
|
6
7
|
export {
|
|
7
|
-
DEFAULT_CONTEXT,
|
|
8
8
|
DataFactory,
|
|
9
9
|
hash,
|
|
10
10
|
expandIRI,
|
package/src/merge.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { parse } from './parse.js';
|
|
2
|
-
import { DEFAULT_CONTEXT } from './
|
|
2
|
+
import { DEFAULT_CONTEXT } from './shared.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* Creates a unique key for quad identity matching
|
|
@@ -34,13 +34,14 @@ function normalizeInput(input, options, docContext) {
|
|
|
34
34
|
* Merges multiple MDLD documents with diff polarity resolution
|
|
35
35
|
* @param {Array<string|ParseResult>} docs
|
|
36
36
|
* @param {Object} options
|
|
37
|
-
* @returns {Object}
|
|
37
|
+
* @returns {Object} Merge result with quads, remove, statements, origin, and context
|
|
38
38
|
*/
|
|
39
39
|
export function merge(docs, options = {}) {
|
|
40
40
|
const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
|
|
41
41
|
const sessionRemoveSet = new Set();
|
|
42
42
|
const allDocuments = [];
|
|
43
43
|
const quadIndex = new Map();
|
|
44
|
+
const allStatements = []; // Collect statements from all documents
|
|
44
45
|
|
|
45
46
|
// Process each document in order
|
|
46
47
|
for (let i = 0; i < docs.length; i++) {
|
|
@@ -57,10 +58,16 @@ export function merge(docs, options = {}) {
|
|
|
57
58
|
index: i,
|
|
58
59
|
input: typeof input === 'string' ? 'string' : 'ParseResult',
|
|
59
60
|
origin: doc.origin,
|
|
60
|
-
context: doc.context
|
|
61
|
+
context: doc.context,
|
|
62
|
+
statementsCount: doc.statements?.length || 0 // Track statements count
|
|
61
63
|
};
|
|
62
64
|
allDocuments.push(documentOrigin);
|
|
63
65
|
|
|
66
|
+
// Collect statements from this document
|
|
67
|
+
if (doc.statements && doc.statements.length > 0) {
|
|
68
|
+
allStatements.push(...doc.statements);
|
|
69
|
+
}
|
|
70
|
+
|
|
64
71
|
// Fold assertions into session buffer
|
|
65
72
|
for (const quad of doc.quads) {
|
|
66
73
|
const key = quadKey(quad);
|
|
@@ -125,6 +132,7 @@ export function merge(docs, options = {}) {
|
|
|
125
132
|
return {
|
|
126
133
|
quads: filteredQuads,
|
|
127
134
|
remove: filteredRemove,
|
|
135
|
+
statements: allStatements, // Include all collected statements
|
|
128
136
|
origin: mergeOrigin,
|
|
129
137
|
context: finalContext
|
|
130
138
|
};
|
package/src/parse.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import {
|
|
2
|
-
DEFAULT_CONTEXT,
|
|
3
2
|
DataFactory,
|
|
4
3
|
expandIRI,
|
|
5
4
|
parseSemanticBlock,
|
|
@@ -7,18 +6,7 @@ import {
|
|
|
7
6
|
createLiteral,
|
|
8
7
|
hash
|
|
9
8
|
} from './utils.js';
|
|
10
|
-
|
|
11
|
-
const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
|
|
12
|
-
const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
13
|
-
const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
14
|
-
const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
15
|
-
const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
16
|
-
const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
17
|
-
const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
18
|
-
const INLINE_CARRIER_PATTERNS = {
|
|
19
|
-
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
20
|
-
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
21
|
-
};
|
|
9
|
+
import { DEFAULT_CONTEXT, URL_REGEX, FENCE_REGEX, PREFIX_REGEX, HEADING_REGEX, UNORDERED_LIST_REGEX, BLOCKQUOTE_REGEX, STANDALONE_SUBJECT_REGEX, INLINE_CARRIER_PATTERNS } from './shared.js';
|
|
22
10
|
|
|
23
11
|
// Cache for fence regex patterns to avoid recreation
|
|
24
12
|
const FENCE_CLOSE_PATTERNS = new Map();
|
|
@@ -210,6 +198,12 @@ function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, ex
|
|
|
210
198
|
return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
|
|
211
199
|
}
|
|
212
200
|
|
|
201
|
+
// Pre-compiled carrier patterns for better performance
|
|
202
|
+
const CARRIER_PATTERN_ARRAY = [
|
|
203
|
+
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
204
|
+
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
205
|
+
];
|
|
206
|
+
|
|
213
207
|
function extractInlineCarriers(text, baseOffset = 0) {
|
|
214
208
|
const carriers = [];
|
|
215
209
|
let pos = 0;
|
|
@@ -243,7 +237,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
243
237
|
const extractor = CARRIER_EXTRACTORS[text[pos]];
|
|
244
238
|
if (extractor) return extractor(text, pos, baseOffset);
|
|
245
239
|
|
|
246
|
-
|
|
240
|
+
// Use pre-compiled patterns instead of Object.entries()
|
|
241
|
+
for (const [type, pattern] of CARRIER_PATTERN_ARRAY) {
|
|
247
242
|
pattern.lastIndex = pos;
|
|
248
243
|
const match = pattern.exec(text);
|
|
249
244
|
if (match) {
|
|
@@ -342,6 +337,150 @@ function determineCarrierType(url) {
|
|
|
342
337
|
return { carrierType: 'span', resourceIRI: null };
|
|
343
338
|
}
|
|
344
339
|
|
|
340
|
+
function createBlockEntry(token, state) {
|
|
341
|
+
const blockId = token._blockId || hash(`${token.type}:${token.range?.[0]}:${token.range?.[1]}`);
|
|
342
|
+
token._blockId = blockId; // Store for later reference
|
|
343
|
+
|
|
344
|
+
const cleanText = extractCleanText(token);
|
|
345
|
+
|
|
346
|
+
const blockEntry = {
|
|
347
|
+
id: blockId,
|
|
348
|
+
type: token.type,
|
|
349
|
+
range: token.range,
|
|
350
|
+
text: cleanText,
|
|
351
|
+
subject: null,
|
|
352
|
+
types: [],
|
|
353
|
+
predicates: [],
|
|
354
|
+
carriers: [],
|
|
355
|
+
listLevel: token.indent || 0,
|
|
356
|
+
parentBlockId: state.blockStack.length > 0 ? state.blockStack[state.blockStack.length - 1] : null,
|
|
357
|
+
quadKeys: [] // Will be populated during quad emission
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
// Store block and add to document structure
|
|
361
|
+
state.origin.blocks.set(blockId, blockEntry);
|
|
362
|
+
state.origin.documentStructure.push(blockEntry);
|
|
363
|
+
|
|
364
|
+
return blockEntry;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function extractCleanText(token) {
|
|
368
|
+
if (!token.text) return '';
|
|
369
|
+
|
|
370
|
+
let text = token.text;
|
|
371
|
+
|
|
372
|
+
// Remove semantic annotations
|
|
373
|
+
if (token.attrsRange) {
|
|
374
|
+
const beforeAttrs = text.substring(0, token.attrsRange[0] - (token.range?.[0] || 0));
|
|
375
|
+
const afterAttrs = text.substring(token.attrsRange[1] - (token.range?.[0] || 0));
|
|
376
|
+
text = beforeAttrs + afterAttrs;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Clean based on token type
|
|
380
|
+
switch (token.type) {
|
|
381
|
+
case 'heading':
|
|
382
|
+
return text.replace(/^#+\s*/, '').trim();
|
|
383
|
+
case 'list':
|
|
384
|
+
return text.replace(/^[-*+]\s*/, '').trim();
|
|
385
|
+
case 'blockquote':
|
|
386
|
+
return text.replace(/^>\s*/, '').trim();
|
|
387
|
+
default:
|
|
388
|
+
return text.trim();
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function enrichBlockFromAnnotation(blockEntry, sem, carrier, state) {
|
|
393
|
+
// Update subject if available
|
|
394
|
+
if (sem.subject && sem.subject !== 'RESET') {
|
|
395
|
+
const resolvedSubject = resolveSubject(sem, state);
|
|
396
|
+
if (resolvedSubject) {
|
|
397
|
+
blockEntry.subject = resolvedSubject.value;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Add types
|
|
402
|
+
if (sem.types && sem.types.length > 0) {
|
|
403
|
+
sem.types.forEach(t => {
|
|
404
|
+
const typeIRI = typeof t === 'string' ? t : t.iri;
|
|
405
|
+
const expanded = expandIRI(typeIRI, state.ctx);
|
|
406
|
+
if (!blockEntry.types.includes(expanded)) {
|
|
407
|
+
blockEntry.types.push(expanded);
|
|
408
|
+
}
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Add predicates
|
|
413
|
+
if (sem.predicates && sem.predicates.length > 0) {
|
|
414
|
+
sem.predicates.forEach(pred => {
|
|
415
|
+
const expandedPred = {
|
|
416
|
+
iri: expandIRI(pred.iri, state.ctx),
|
|
417
|
+
form: pred.form || '',
|
|
418
|
+
object: null // Will be filled during quad emission
|
|
419
|
+
};
|
|
420
|
+
blockEntry.predicates.push(expandedPred);
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// Add carrier information
|
|
425
|
+
if (carrier) {
|
|
426
|
+
const carrierInfo = {
|
|
427
|
+
type: carrier.type,
|
|
428
|
+
range: carrier.range,
|
|
429
|
+
text: carrier.text,
|
|
430
|
+
subject: null,
|
|
431
|
+
predicates: []
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
// Extract carrier-specific semantics
|
|
435
|
+
if (carrier.attrs) {
|
|
436
|
+
const carrierSem = parseSemCached(carrier.attrs);
|
|
437
|
+
if (carrierSem.types) {
|
|
438
|
+
carrierInfo.predicates = carrierSem.predicates || [];
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
blockEntry.carriers.push(carrierInfo);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
function processAnnotationWithBlockTracking(carrier, sem, state, options = {}) {
|
|
447
|
+
const { preserveGlobalSubject = false, implicitSubject = null } = options;
|
|
448
|
+
|
|
449
|
+
if (sem.subject === 'RESET') {
|
|
450
|
+
state.currentSubject = null;
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
const previousSubject = state.currentSubject;
|
|
455
|
+
const newSubject = resolveSubject(sem, state);
|
|
456
|
+
const localObject = resolveObject(sem, state);
|
|
457
|
+
|
|
458
|
+
const effectiveSubject = implicitSubject || (newSubject && !preserveGlobalSubject ? newSubject : previousSubject);
|
|
459
|
+
if (newSubject && !preserveGlobalSubject && !implicitSubject) {
|
|
460
|
+
state.currentSubject = newSubject;
|
|
461
|
+
}
|
|
462
|
+
const S = preserveGlobalSubject ? (newSubject || previousSubject) : (implicitSubject || state.currentSubject);
|
|
463
|
+
if (!S) return;
|
|
464
|
+
|
|
465
|
+
const block = createBlock(
|
|
466
|
+
S.value, sem.types, sem.predicates,
|
|
467
|
+
carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
|
|
468
|
+
carrier.type || null, state.ctx, carrier.text
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
472
|
+
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
473
|
+
const newSubjectOrCarrierO = newSubject || carrierO;
|
|
474
|
+
|
|
475
|
+
// Enrich current block with semantic information
|
|
476
|
+
if (state.currentBlock) {
|
|
477
|
+
enrichBlockFromAnnotation(state.currentBlock, sem, carrier, state);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier);
|
|
481
|
+
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
|
|
482
|
+
}
|
|
483
|
+
|
|
345
484
|
function createBlock(subject, types, predicates, range, attrsRange, valueRange, carrierType, ctx, text) {
|
|
346
485
|
const expanded = {
|
|
347
486
|
subject,
|
|
@@ -364,7 +503,7 @@ function createBlock(subject, types, predicates, range, attrsRange, valueRange,
|
|
|
364
503
|
};
|
|
365
504
|
}
|
|
366
505
|
|
|
367
|
-
function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null) {
|
|
506
|
+
function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predicate, object, dataFactory, meta = null, statements = null, statementCandidates = null, state = null) {
|
|
368
507
|
if (!subject || !predicate || !object) return;
|
|
369
508
|
|
|
370
509
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
@@ -400,19 +539,27 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
400
539
|
// Detect rdf:Statement pattern during single-pass parsing
|
|
401
540
|
detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
|
|
402
541
|
|
|
403
|
-
// Create lean origin entry
|
|
542
|
+
// Create lean origin entry - avoid spread operator for better performance
|
|
404
543
|
const originEntry = {
|
|
405
544
|
blockId: block.id,
|
|
406
545
|
range: block.range,
|
|
407
546
|
carrierType: block.carrierType,
|
|
408
547
|
subject: subject.value,
|
|
409
548
|
predicate: predicate.value,
|
|
410
|
-
context:
|
|
549
|
+
context: block.context, // Direct reference instead of spread
|
|
411
550
|
polarity: meta?.remove ? '-' : '+',
|
|
412
551
|
value: block.text || ''
|
|
413
552
|
};
|
|
414
553
|
|
|
415
554
|
quadIndex.set(quadKey, originEntry);
|
|
555
|
+
|
|
556
|
+
// Link block to this quad for reverse lookup during rendering
|
|
557
|
+
if (state.currentBlock && block.id === state.currentBlock.id) {
|
|
558
|
+
if (!state.currentBlock.quadKeys) {
|
|
559
|
+
state.currentBlock.quadKeys = [];
|
|
560
|
+
}
|
|
561
|
+
state.currentBlock.quadKeys.push(quadKey);
|
|
562
|
+
}
|
|
416
563
|
}
|
|
417
564
|
}
|
|
418
565
|
|
|
@@ -474,8 +621,10 @@ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements =
|
|
|
474
621
|
|
|
475
622
|
const resolveFragment = (fragment, state) => {
|
|
476
623
|
if (!state.currentSubject) return null;
|
|
477
|
-
const
|
|
478
|
-
|
|
624
|
+
const subjectValue = state.currentSubject.value;
|
|
625
|
+
const hashIndex = subjectValue.indexOf('#');
|
|
626
|
+
const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
|
|
627
|
+
return state.df.namedNode(baseIRI + '#' + fragment);
|
|
479
628
|
};
|
|
480
629
|
|
|
481
630
|
function resolveSubject(sem, state) {
|
|
@@ -504,7 +653,8 @@ const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
|
504
653
|
state.df.namedNode(expandedType),
|
|
505
654
|
state.df,
|
|
506
655
|
{ kind: 'type', token: `.${typeIRI}`, expandedType, entryIndex: typeInfo.entryIndex, remove: typeInfo.remove },
|
|
507
|
-
state.statements, state.statementCandidates
|
|
656
|
+
state.statements, state.statementCandidates,
|
|
657
|
+
state
|
|
508
658
|
);
|
|
509
659
|
};
|
|
510
660
|
|
|
@@ -548,43 +698,16 @@ function processPredicateAnnotations(sem, newSubject, previousSubject, localObje
|
|
|
548
698
|
emitQuad(state.quads, state.quadBuffer, state.removeSet, state.origin.quadIndex, block,
|
|
549
699
|
role.subject, P, role.object, state.df,
|
|
550
700
|
{ kind: 'pred', token: `${pred.form}${pred.iri}`, form: pred.form, expandedPredicate: P.value, entryIndex: pred.entryIndex, remove: pred.remove || false },
|
|
551
|
-
state.statements, state.statementCandidates
|
|
701
|
+
state.statements, state.statementCandidates,
|
|
702
|
+
state
|
|
552
703
|
);
|
|
553
704
|
}
|
|
554
705
|
});
|
|
555
706
|
}
|
|
556
707
|
|
|
557
708
|
function processAnnotation(carrier, sem, state, options = {}) {
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
if (sem.subject === 'RESET') {
|
|
561
|
-
state.currentSubject = null;
|
|
562
|
-
return;
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
const previousSubject = state.currentSubject;
|
|
566
|
-
const newSubject = resolveSubject(sem, state);
|
|
567
|
-
const localObject = resolveObject(sem, state);
|
|
568
|
-
|
|
569
|
-
const effectiveSubject = implicitSubject || (newSubject && !preserveGlobalSubject ? newSubject : previousSubject);
|
|
570
|
-
if (newSubject && !preserveGlobalSubject && !implicitSubject) {
|
|
571
|
-
state.currentSubject = newSubject;
|
|
572
|
-
}
|
|
573
|
-
const S = preserveGlobalSubject ? (newSubject || previousSubject) : (implicitSubject || state.currentSubject);
|
|
574
|
-
if (!S) return;
|
|
575
|
-
|
|
576
|
-
const block = createBlock(
|
|
577
|
-
S.value, sem.types, sem.predicates,
|
|
578
|
-
carrier.range, carrier.attrsRange || null, carrier.valueRange || null,
|
|
579
|
-
carrier.type || null, state.ctx, carrier.text
|
|
580
|
-
);
|
|
581
|
-
|
|
582
|
-
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
583
|
-
const carrierO = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
584
|
-
const newSubjectOrCarrierO = newSubject || carrierO;
|
|
585
|
-
|
|
586
|
-
processTypeAnnotations(sem, newSubject, localObject, carrierO, S, block, state, carrier);
|
|
587
|
-
processPredicateAnnotations(sem, newSubject, previousSubject, localObject, newSubjectOrCarrierO, S, L, block, state, carrier);
|
|
709
|
+
// Use the enhanced block tracking version
|
|
710
|
+
processAnnotationWithBlockTracking(carrier, sem, state, options);
|
|
588
711
|
}
|
|
589
712
|
|
|
590
713
|
|
|
@@ -626,20 +749,60 @@ function processStandaloneSubject(token, state) {
|
|
|
626
749
|
|
|
627
750
|
const TOKEN_PROCESSORS = {
|
|
628
751
|
heading: (token, state) => {
|
|
752
|
+
const blockEntry = createBlockEntry(token, state);
|
|
753
|
+
state.currentBlock = blockEntry;
|
|
754
|
+
state.blockStack.push(blockEntry.id);
|
|
755
|
+
|
|
629
756
|
processTokenAnnotations(token, state, token.type);
|
|
757
|
+
|
|
758
|
+
state.blockStack.pop();
|
|
759
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
760
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
630
761
|
},
|
|
631
762
|
code: (token, state) => {
|
|
763
|
+
const blockEntry = createBlockEntry(token, state);
|
|
764
|
+
state.currentBlock = blockEntry;
|
|
765
|
+
state.blockStack.push(blockEntry.id);
|
|
766
|
+
|
|
632
767
|
processTokenAnnotations(token, state, token.type);
|
|
768
|
+
|
|
769
|
+
state.blockStack.pop();
|
|
770
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
771
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
633
772
|
},
|
|
634
773
|
blockquote: (token, state) => {
|
|
774
|
+
const blockEntry = createBlockEntry(token, state);
|
|
775
|
+
state.currentBlock = blockEntry;
|
|
776
|
+
state.blockStack.push(blockEntry.id);
|
|
777
|
+
|
|
635
778
|
processTokenAnnotations(token, state, token.type);
|
|
779
|
+
|
|
780
|
+
state.blockStack.pop();
|
|
781
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
782
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
636
783
|
},
|
|
637
784
|
para: (token, state) => {
|
|
785
|
+
const blockEntry = createBlockEntry(token, state);
|
|
786
|
+
state.currentBlock = blockEntry;
|
|
787
|
+
state.blockStack.push(blockEntry.id);
|
|
788
|
+
|
|
638
789
|
processStandaloneSubject(token, state);
|
|
639
790
|
processTokenAnnotations(token, state, token.type);
|
|
791
|
+
|
|
792
|
+
state.blockStack.pop();
|
|
793
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
794
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
640
795
|
},
|
|
641
796
|
list: (token, state) => {
|
|
797
|
+
const blockEntry = createBlockEntry(token, state);
|
|
798
|
+
state.currentBlock = blockEntry;
|
|
799
|
+
state.blockStack.push(blockEntry.id);
|
|
800
|
+
|
|
642
801
|
processTokenAnnotations(token, state, token.type);
|
|
802
|
+
|
|
803
|
+
state.blockStack.pop();
|
|
804
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
805
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
643
806
|
},
|
|
644
807
|
};
|
|
645
808
|
|
|
@@ -650,28 +813,37 @@ export function parse(text, options = {}) {
|
|
|
650
813
|
quads: [],
|
|
651
814
|
quadBuffer: new Map(),
|
|
652
815
|
removeSet: new Set(),
|
|
653
|
-
origin: {
|
|
816
|
+
origin: {
|
|
817
|
+
quadIndex: new Map(),
|
|
818
|
+
blocks: new Map(),
|
|
819
|
+
documentStructure: []
|
|
820
|
+
},
|
|
654
821
|
currentSubject: null,
|
|
655
822
|
tokens: null,
|
|
656
823
|
currentTokenIndex: -1,
|
|
657
824
|
statements: [],
|
|
658
|
-
statementCandidates: new Map() // Track incomplete rdf:Statement patterns
|
|
825
|
+
statementCandidates: new Map(), // Track incomplete rdf:Statement patterns
|
|
826
|
+
currentBlock: null,
|
|
827
|
+
blockStack: []
|
|
659
828
|
};
|
|
660
829
|
|
|
661
830
|
state.tokens = scanTokens(text);
|
|
662
831
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
if (
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
832
|
+
// Single loop instead of filter+forEach for better performance
|
|
833
|
+
for (const token of state.tokens) {
|
|
834
|
+
if (token.type === 'prefix') {
|
|
835
|
+
let resolvedIri = token.iri;
|
|
836
|
+
if (token.iri.includes(':')) {
|
|
837
|
+
const colonIndex = token.iri.indexOf(':');
|
|
838
|
+
const potentialPrefix = token.iri.substring(0, colonIndex);
|
|
839
|
+
const reference = token.iri.substring(colonIndex + 1);
|
|
840
|
+
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
841
|
+
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
842
|
+
}
|
|
671
843
|
}
|
|
844
|
+
state.ctx[token.prefix] = resolvedIri;
|
|
672
845
|
}
|
|
673
|
-
|
|
674
|
-
});
|
|
846
|
+
}
|
|
675
847
|
|
|
676
848
|
for (let i = 0; i < state.tokens.length; i++) {
|
|
677
849
|
const token = state.tokens[i];
|
|
@@ -679,18 +851,20 @@ export function parse(text, options = {}) {
|
|
|
679
851
|
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
680
852
|
}
|
|
681
853
|
|
|
682
|
-
//
|
|
683
|
-
const removeArray = Array.from(state.removeSet);
|
|
854
|
+
// Optimize array operations - avoid Array.from() and filter()
|
|
684
855
|
const quadKeys = new Set();
|
|
685
|
-
state.quads
|
|
686
|
-
quadKeys.add(quadIndexKey(
|
|
687
|
-
}
|
|
856
|
+
for (const quad of state.quads) {
|
|
857
|
+
quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
|
|
858
|
+
}
|
|
688
859
|
|
|
689
|
-
//
|
|
690
|
-
const filteredRemove =
|
|
860
|
+
// Direct iteration instead of Array.from() + filter()
|
|
861
|
+
const filteredRemove = [];
|
|
862
|
+
for (const quad of state.removeSet) {
|
|
691
863
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
692
|
-
|
|
693
|
-
|
|
864
|
+
if (!quadKeys.has(key)) {
|
|
865
|
+
filteredRemove.push(quad);
|
|
866
|
+
}
|
|
867
|
+
}
|
|
694
868
|
|
|
695
869
|
return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
|
|
696
870
|
}
|