mdld-parse 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.7.2",
3
+ "version": "0.7.4",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Shared utilities for MD-LD Parser and Renderer
3
+ * Ensures DRY code and consistent CommonMark processing
4
+ */
5
+
6
+
7
+
8
+ export const DEFAULT_CONTEXT = {
9
+ '@vocab': "http://www.w3.org/2000/01/rdf-schema#",
10
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
11
+ rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
12
+ xsd: 'http://www.w3.org/2001/XMLSchema#',
13
+ sh: "http://www.w3.org/ns/shacl#",
14
+ prov: 'http://www.w3.org/ns/prov#'
15
+ };
16
+
17
+ // CommonMark patterns - shared between parser and renderer
18
+ export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
19
+ export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
20
+ export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
21
+ export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
22
+ export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
23
+ export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
24
+ export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
25
+
26
+ // Pre-compiled carrier patterns for performance
27
+ export const CARRIER_PATTERN_ARRAY = [
28
+ ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
29
+ ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
30
+ ];
package/src/generate.js CHANGED
@@ -1,20 +1,26 @@
1
- import { shortenIRI, expandIRI, DEFAULT_CONTEXT, DataFactory } from './utils.js';
2
-
3
- // Helper functions for cleaner term type checking
4
- function isLiteral(term) {
5
- return term?.termType === 'Literal';
6
- }
7
-
8
- function isNamedNode(term) {
9
- return term?.termType === 'NamedNode';
10
- }
11
-
12
- function isRdfType(term) {
13
- return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
14
- }
15
-
1
+ import { shortenIRI, expandIRI, DataFactory } from './utils.js';
2
+ import { DEFAULT_CONTEXT } from './constants.js';
3
+ import {
4
+ isLiteral,
5
+ collectUsedPrefixes,
6
+ sortQuadsByPredicate,
7
+ generatePrefixDeclaration,
8
+ generateLiteralText,
9
+ generateObjectText,
10
+ filterQuadsByType
11
+ } from './shared.js';
12
+
13
+ export function extractLocalName(iri, ctx = {}) {
14
+ if (!iri) return iri;
15
+
16
+ // Check for exact prefix matches first
17
+ for (const [prefix, namespace] of Object.entries(ctx)) {
18
+ if (iri.startsWith(namespace) || iri.startsWith(namespace.slice(0, -1))) {
19
+ return iri.substring(namespace.length);
20
+ }
21
+ }
16
22
 
17
- function extractLocalName(iri) {
23
+ // Fallback to original logic for local names
18
24
  const separators = ['#', '/', ':'];
19
25
  for (const sep of separators) {
20
26
  const lastSep = iri.lastIndexOf(sep);
@@ -82,14 +88,14 @@ function groupQuadsBySubject(quads) {
82
88
 
83
89
  function buildDeterministicMDLD(subjectGroups, context) {
84
90
  let text = '';
91
+ const usedPrefixes = collectUsedPrefixes(subjectGroups, context);
85
92
 
86
93
  // Add prefixes first (deterministic order), but exclude default context prefixes
87
94
  const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
88
95
  for (const [prefix, namespace] of sortedPrefixes) {
89
96
  // Skip default context prefixes - they're implicit in MDLD
90
- if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
91
- const prefixDecl = `[${prefix}] <${namespace}>\n`;
92
- text += prefixDecl;
97
+ if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
98
+ text += generatePrefixDeclaration(prefix, namespace);
93
99
  }
94
100
  }
95
101
 
@@ -104,47 +110,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
104
110
  const subjectQuads = subjectGroups.get(subjectIRI);
105
111
  const shortSubject = shortenIRI(subjectIRI, context);
106
112
 
107
- // Separate types, literals, and objects using helper functions
108
- const types = subjectQuads.filter(q => isRdfType(q.predicate));
109
- const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
110
- const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
113
+ // Separate types, literals, and objects using shared utility
114
+ const { types, literals, objects } = filterQuadsByType(subjectQuads);
111
115
 
112
116
  // Generate heading
113
- const localSubjectName = extractLocalName(subjectIRI);
117
+ const localSubjectName = extractLocalName(subjectIRI, context);
114
118
  const typeAnnotations = types.length > 0
115
- ? ' ' + types.map(t => '.' + extractLocalName(t.object.value)).sort().join(' ')
119
+ ? ' ' + types.map(t => '.' + shortenIRI(t.object.value, context)).sort().join(' ')
116
120
  : '';
117
121
 
118
- const headingText = `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n\n`;
119
-
120
- text += headingText;
121
-
122
- // Add literals (deterministic order)
123
- const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
124
- for (const quad of sortedLiterals) {
125
- const predShort = shortenIRI(quad.predicate.value, context);
126
- let annotation = predShort;
127
-
128
- // Use DataFactory XSD constants for datatype comparison
129
- const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
130
- if (quad.object.language) {
131
- annotation += ` @${quad.object.language}`;
132
- } else if (quad.object.datatype.value !== xsdString) {
133
- annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
134
- }
122
+ text += `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n`;
135
123
 
136
- const literalText = `[${quad.object.value}] {${annotation}}\n`;
137
- text += literalText;
138
- }
124
+ // Add literals and objects using shared utilities
125
+ sortQuadsByPredicate(literals).forEach(quad => {
126
+ text += generateLiteralText(quad, context);
127
+ });
139
128
 
140
- // Add objects (deterministic order)
141
- const sortedObjects = objects.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
142
- for (const quad of sortedObjects) {
143
- const objShort = shortenIRI(quad.object.value, context);
144
- const predShort = shortenIRI(quad.predicate.value, context);
145
- const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
146
- text += objectText;
147
- }
129
+ sortQuadsByPredicate(objects).forEach(quad => {
130
+ text += generateObjectText(quad, context);
131
+ });
148
132
 
149
133
  text += '\n';
150
134
  }
package/src/index.js CHANGED
@@ -3,8 +3,8 @@ export { merge } from './merge.js';
3
3
  export { generate } from './generate.js';
4
4
  export { locate } from './locate.js';
5
5
  export { render } from './render.js';
6
+ export { DEFAULT_CONTEXT } from './constants.js';
6
7
  export {
7
- DEFAULT_CONTEXT,
8
8
  DataFactory,
9
9
  hash,
10
10
  expandIRI,
package/src/locate.js CHANGED
@@ -18,21 +18,6 @@ export function locate(quad, origin) {
18
18
  return null;
19
19
  }
20
20
 
21
- // Find the origin entry in quadIndex
22
- const entry = origin.quadIndex.get(quadKey);
23
- if (!entry) {
24
- return null;
25
- }
26
-
27
- // Return the lean origin entry structure
28
- return {
29
- blockId: entry.blockId,
30
- range: entry.range,
31
- carrierType: entry.carrierType,
32
- subject: entry.subject,
33
- predicate: entry.predicate,
34
- context: entry.context,
35
- value: entry.value,
36
- polarity: entry.polarity
37
- };
21
+ // Return the origin entry directly - no need to create new object
22
+ return origin.quadIndex.get(quadKey) || null;
38
23
  }
package/src/merge.js CHANGED
@@ -1,15 +1,14 @@
1
1
  import { parse } from './parse.js';
2
- import { DEFAULT_CONTEXT } from './utils.js';
2
+ import { quadIndexKey } from './shared.js';
3
+ import { DEFAULT_CONTEXT } from './constants.js';
3
4
 
4
5
  /**
5
- * Creates a unique key for quad identity matching
6
+ * Creates a unique key for quad identity matching - using shared utility
6
7
  * @param {Quad} quad
7
8
  * @returns {string}
8
9
  */
9
10
  function quadKey(quad) {
10
- const datatype = quad.object.datatype?.value || '';
11
- const language = quad.object.language || '';
12
- return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
11
+ return quadIndexKey(quad.subject, quad.predicate, quad.object);
13
12
  }
14
13
 
15
14
  /**