mdld-parse 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/merge.js +10 -2
- package/src/parse.js +38 -24
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.2",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/src/merge.js
CHANGED
|
@@ -34,13 +34,14 @@ function normalizeInput(input, options, docContext) {
|
|
|
34
34
|
* Merges multiple MDLD documents with diff polarity resolution
|
|
35
35
|
* @param {Array<string|ParseResult>} docs
|
|
36
36
|
* @param {Object} options
|
|
37
|
-
* @returns {Object}
|
|
37
|
+
* @returns {Object} Merge result with quads, remove, statements, origin, and context
|
|
38
38
|
*/
|
|
39
39
|
export function merge(docs, options = {}) {
|
|
40
40
|
const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
|
|
41
41
|
const sessionRemoveSet = new Set();
|
|
42
42
|
const allDocuments = [];
|
|
43
43
|
const quadIndex = new Map();
|
|
44
|
+
const allStatements = []; // Collect statements from all documents
|
|
44
45
|
|
|
45
46
|
// Process each document in order
|
|
46
47
|
for (let i = 0; i < docs.length; i++) {
|
|
@@ -57,10 +58,16 @@ export function merge(docs, options = {}) {
|
|
|
57
58
|
index: i,
|
|
58
59
|
input: typeof input === 'string' ? 'string' : 'ParseResult',
|
|
59
60
|
origin: doc.origin,
|
|
60
|
-
context: doc.context
|
|
61
|
+
context: doc.context,
|
|
62
|
+
statementsCount: doc.statements?.length || 0 // Track statements count
|
|
61
63
|
};
|
|
62
64
|
allDocuments.push(documentOrigin);
|
|
63
65
|
|
|
66
|
+
// Collect statements from this document
|
|
67
|
+
if (doc.statements && doc.statements.length > 0) {
|
|
68
|
+
allStatements.push(...doc.statements);
|
|
69
|
+
}
|
|
70
|
+
|
|
64
71
|
// Fold assertions into session buffer
|
|
65
72
|
for (const quad of doc.quads) {
|
|
66
73
|
const key = quadKey(quad);
|
|
@@ -125,6 +132,7 @@ export function merge(docs, options = {}) {
|
|
|
125
132
|
return {
|
|
126
133
|
quads: filteredQuads,
|
|
127
134
|
remove: filteredRemove,
|
|
135
|
+
statements: allStatements, // Include all collected statements
|
|
128
136
|
origin: mergeOrigin,
|
|
129
137
|
context: finalContext
|
|
130
138
|
};
|
package/src/parse.js
CHANGED
|
@@ -210,6 +210,12 @@ function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, ex
|
|
|
210
210
|
return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
// Pre-compiled carrier patterns for better performance
|
|
214
|
+
const CARRIER_PATTERN_ARRAY = [
|
|
215
|
+
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
216
|
+
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
217
|
+
];
|
|
218
|
+
|
|
213
219
|
function extractInlineCarriers(text, baseOffset = 0) {
|
|
214
220
|
const carriers = [];
|
|
215
221
|
let pos = 0;
|
|
@@ -243,7 +249,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
243
249
|
const extractor = CARRIER_EXTRACTORS[text[pos]];
|
|
244
250
|
if (extractor) return extractor(text, pos, baseOffset);
|
|
245
251
|
|
|
246
|
-
|
|
252
|
+
// Use pre-compiled patterns instead of Object.entries()
|
|
253
|
+
for (const [type, pattern] of CARRIER_PATTERN_ARRAY) {
|
|
247
254
|
pattern.lastIndex = pos;
|
|
248
255
|
const match = pattern.exec(text);
|
|
249
256
|
if (match) {
|
|
@@ -400,14 +407,14 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
400
407
|
// Detect rdf:Statement pattern during single-pass parsing
|
|
401
408
|
detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
|
|
402
409
|
|
|
403
|
-
// Create lean origin entry
|
|
410
|
+
// Create lean origin entry - avoid spread operator for better performance
|
|
404
411
|
const originEntry = {
|
|
405
412
|
blockId: block.id,
|
|
406
413
|
range: block.range,
|
|
407
414
|
carrierType: block.carrierType,
|
|
408
415
|
subject: subject.value,
|
|
409
416
|
predicate: predicate.value,
|
|
410
|
-
context:
|
|
417
|
+
context: block.context, // Direct reference instead of spread
|
|
411
418
|
polarity: meta?.remove ? '-' : '+',
|
|
412
419
|
value: block.text || ''
|
|
413
420
|
};
|
|
@@ -474,8 +481,10 @@ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements =
|
|
|
474
481
|
|
|
475
482
|
const resolveFragment = (fragment, state) => {
|
|
476
483
|
if (!state.currentSubject) return null;
|
|
477
|
-
const
|
|
478
|
-
|
|
484
|
+
const subjectValue = state.currentSubject.value;
|
|
485
|
+
const hashIndex = subjectValue.indexOf('#');
|
|
486
|
+
const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
|
|
487
|
+
return state.df.namedNode(baseIRI + '#' + fragment);
|
|
479
488
|
};
|
|
480
489
|
|
|
481
490
|
function resolveSubject(sem, state) {
|
|
@@ -660,18 +669,21 @@ export function parse(text, options = {}) {
|
|
|
660
669
|
|
|
661
670
|
state.tokens = scanTokens(text);
|
|
662
671
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
if (
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
672
|
+
// Single loop instead of filter+forEach for better performance
|
|
673
|
+
for (const token of state.tokens) {
|
|
674
|
+
if (token.type === 'prefix') {
|
|
675
|
+
let resolvedIri = token.iri;
|
|
676
|
+
if (token.iri.includes(':')) {
|
|
677
|
+
const colonIndex = token.iri.indexOf(':');
|
|
678
|
+
const potentialPrefix = token.iri.substring(0, colonIndex);
|
|
679
|
+
const reference = token.iri.substring(colonIndex + 1);
|
|
680
|
+
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
681
|
+
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
682
|
+
}
|
|
671
683
|
}
|
|
684
|
+
state.ctx[token.prefix] = resolvedIri;
|
|
672
685
|
}
|
|
673
|
-
|
|
674
|
-
});
|
|
686
|
+
}
|
|
675
687
|
|
|
676
688
|
for (let i = 0; i < state.tokens.length; i++) {
|
|
677
689
|
const token = state.tokens[i];
|
|
@@ -679,18 +691,20 @@ export function parse(text, options = {}) {
|
|
|
679
691
|
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
680
692
|
}
|
|
681
693
|
|
|
682
|
-
//
|
|
683
|
-
const removeArray = Array.from(state.removeSet);
|
|
694
|
+
// Optimize array operations - avoid Array.from() and filter()
|
|
684
695
|
const quadKeys = new Set();
|
|
685
|
-
state.quads
|
|
686
|
-
quadKeys.add(quadIndexKey(
|
|
687
|
-
}
|
|
696
|
+
for (const quad of state.quads) {
|
|
697
|
+
quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
|
|
698
|
+
}
|
|
688
699
|
|
|
689
|
-
//
|
|
690
|
-
const filteredRemove =
|
|
700
|
+
// Direct iteration instead of Array.from() + filter()
|
|
701
|
+
const filteredRemove = [];
|
|
702
|
+
for (const quad of state.removeSet) {
|
|
691
703
|
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
692
|
-
|
|
693
|
-
|
|
704
|
+
if (!quadKeys.has(key)) {
|
|
705
|
+
filteredRemove.push(quad);
|
|
706
|
+
}
|
|
707
|
+
}
|
|
694
708
|
|
|
695
709
|
return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
|
|
696
710
|
}
|