mdld-parse 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/src/merge.js CHANGED
@@ -34,13 +34,14 @@ function normalizeInput(input, options, docContext) {
34
34
  * Merges multiple MDLD documents with diff polarity resolution
35
35
  * @param {Array<string|ParseResult>} docs
36
36
  * @param {Object} options
37
- * @returns {Object}
37
+ * @returns {Object} Merge result with quads, remove, statements, origin, and context
38
38
  */
39
39
  export function merge(docs, options = {}) {
40
40
  const sessionBuffer = new Map(); // Use Map instead of Set for proper quad storage
41
41
  const sessionRemoveSet = new Set();
42
42
  const allDocuments = [];
43
43
  const quadIndex = new Map();
44
+ const allStatements = []; // Collect statements from all documents
44
45
 
45
46
  // Process each document in order
46
47
  for (let i = 0; i < docs.length; i++) {
@@ -57,10 +58,16 @@ export function merge(docs, options = {}) {
57
58
  index: i,
58
59
  input: typeof input === 'string' ? 'string' : 'ParseResult',
59
60
  origin: doc.origin,
60
- context: doc.context
61
+ context: doc.context,
62
+ statementsCount: doc.statements?.length || 0 // Track statements count
61
63
  };
62
64
  allDocuments.push(documentOrigin);
63
65
 
66
+ // Collect statements from this document
67
+ if (doc.statements && doc.statements.length > 0) {
68
+ allStatements.push(...doc.statements);
69
+ }
70
+
64
71
  // Fold assertions into session buffer
65
72
  for (const quad of doc.quads) {
66
73
  const key = quadKey(quad);
@@ -125,6 +132,7 @@ export function merge(docs, options = {}) {
125
132
  return {
126
133
  quads: filteredQuads,
127
134
  remove: filteredRemove,
135
+ statements: allStatements, // Include all collected statements
128
136
  origin: mergeOrigin,
129
137
  context: finalContext
130
138
  };
package/src/parse.js CHANGED
@@ -210,6 +210,12 @@ function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, ex
210
210
  return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
211
211
  }
212
212
 
213
+ // Pre-compiled carrier patterns for better performance
214
+ const CARRIER_PATTERN_ARRAY = [
215
+ ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
216
+ ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
217
+ ];
218
+
213
219
  function extractInlineCarriers(text, baseOffset = 0) {
214
220
  const carriers = [];
215
221
  let pos = 0;
@@ -243,7 +249,8 @@ function extractInlineCarriers(text, baseOffset = 0) {
243
249
  const extractor = CARRIER_EXTRACTORS[text[pos]];
244
250
  if (extractor) return extractor(text, pos, baseOffset);
245
251
 
246
- for (const [type, pattern] of Object.entries(INLINE_CARRIER_PATTERNS)) {
252
+ // Use pre-compiled patterns instead of Object.entries()
253
+ for (const [type, pattern] of CARRIER_PATTERN_ARRAY) {
247
254
  pattern.lastIndex = pos;
248
255
  const match = pattern.exec(text);
249
256
  if (match) {
@@ -400,14 +407,14 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
400
407
  // Detect rdf:Statement pattern during single-pass parsing
401
408
  detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
402
409
 
403
- // Create lean origin entry
410
+ // Create lean origin entry - avoid spread operator for better performance
404
411
  const originEntry = {
405
412
  blockId: block.id,
406
413
  range: block.range,
407
414
  carrierType: block.carrierType,
408
415
  subject: subject.value,
409
416
  predicate: predicate.value,
410
- context: { ...block.context },
417
+ context: block.context, // Direct reference instead of spread
411
418
  polarity: meta?.remove ? '-' : '+',
412
419
  value: block.text || ''
413
420
  };
@@ -474,8 +481,10 @@ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements =
474
481
 
475
482
  const resolveFragment = (fragment, state) => {
476
483
  if (!state.currentSubject) return null;
477
- const baseIRI = state.currentSubject.value.split('#')[0];
478
- return state.df.namedNode(`${baseIRI}#${fragment}`);
484
+ const subjectValue = state.currentSubject.value;
485
+ const hashIndex = subjectValue.indexOf('#');
486
+ const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
487
+ return state.df.namedNode(baseIRI + '#' + fragment);
479
488
  };
480
489
 
481
490
  function resolveSubject(sem, state) {
@@ -660,18 +669,21 @@ export function parse(text, options = {}) {
660
669
 
661
670
  state.tokens = scanTokens(text);
662
671
 
663
- state.tokens.filter(t => t.type === 'prefix').forEach(t => {
664
- let resolvedIri = t.iri;
665
- if (t.iri.includes(':')) {
666
- const colonIndex = t.iri.indexOf(':');
667
- const potentialPrefix = t.iri.substring(0, colonIndex);
668
- const reference = t.iri.substring(colonIndex + 1);
669
- if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
670
- resolvedIri = state.ctx[potentialPrefix] + reference;
672
+ // Single loop instead of filter+forEach for better performance
673
+ for (const token of state.tokens) {
674
+ if (token.type === 'prefix') {
675
+ let resolvedIri = token.iri;
676
+ if (token.iri.includes(':')) {
677
+ const colonIndex = token.iri.indexOf(':');
678
+ const potentialPrefix = token.iri.substring(0, colonIndex);
679
+ const reference = token.iri.substring(colonIndex + 1);
680
+ if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
681
+ resolvedIri = state.ctx[potentialPrefix] + reference;
682
+ }
671
683
  }
684
+ state.ctx[token.prefix] = resolvedIri;
672
685
  }
673
- state.ctx[t.prefix] = resolvedIri;
674
- });
686
+ }
675
687
 
676
688
  for (let i = 0; i < state.tokens.length; i++) {
677
689
  const token = state.tokens[i];
@@ -679,18 +691,20 @@ export function parse(text, options = {}) {
679
691
  TOKEN_PROCESSORS[token.type]?.(token, state);
680
692
  }
681
693
 
682
- // Convert removeSet to array and ensure hard invariant: quads ∩ remove = ∅
683
- const removeArray = Array.from(state.removeSet);
694
+ // Optimize array operations - avoid Array.from() and filter()
684
695
  const quadKeys = new Set();
685
- state.quads.forEach(q => {
686
- quadKeys.add(quadIndexKey(q.subject, q.predicate, q.object));
687
- });
696
+ for (const quad of state.quads) {
697
+ quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
698
+ }
688
699
 
689
- // Filter removeArray to ensure no overlap with quads
690
- const filteredRemove = removeArray.filter(quad => {
700
+ // Direct iteration instead of Array.from() + filter()
701
+ const filteredRemove = [];
702
+ for (const quad of state.removeSet) {
691
703
  const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
692
- return !quadKeys.has(key);
693
- });
704
+ if (!quadKeys.has(key)) {
705
+ filteredRemove.push(quad);
706
+ }
707
+ }
694
708
 
695
709
  return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
696
710
  }