mdld-parse 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/constants.js +30 -0
- package/src/generate.js +37 -53
- package/src/index.js +1 -1
- package/src/locate.js +2 -17
- package/src/merge.js +4 -5
- package/src/parse.js +222 -282
- package/src/render.js +320 -357
- package/src/shared.js +529 -0
- package/src/utils.js +2 -9
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.4",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/src/constants.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for MD-LD Parser and Renderer
|
|
3
|
+
* Ensures DRY code and consistent CommonMark processing
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
export const DEFAULT_CONTEXT = {
|
|
9
|
+
'@vocab': "http://www.w3.org/2000/01/rdf-schema#",
|
|
10
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
11
|
+
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
12
|
+
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
13
|
+
sh: "http://www.w3.org/ns/shacl#",
|
|
14
|
+
prov: 'http://www.w3.org/ns/prov#'
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// CommonMark patterns - shared between parser and renderer
|
|
18
|
+
export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
|
|
19
|
+
export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
20
|
+
export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
21
|
+
export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
22
|
+
export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
23
|
+
export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
24
|
+
export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
25
|
+
|
|
26
|
+
// Pre-compiled carrier patterns for performance
|
|
27
|
+
export const CARRIER_PATTERN_ARRAY = [
|
|
28
|
+
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
29
|
+
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
30
|
+
];
|
package/src/generate.js
CHANGED
|
@@ -1,20 +1,26 @@
|
|
|
1
|
-
import { shortenIRI, expandIRI,
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
import { shortenIRI, expandIRI, DataFactory } from './utils.js';
|
|
2
|
+
import { DEFAULT_CONTEXT } from './constants.js';
|
|
3
|
+
import {
|
|
4
|
+
isLiteral,
|
|
5
|
+
collectUsedPrefixes,
|
|
6
|
+
sortQuadsByPredicate,
|
|
7
|
+
generatePrefixDeclaration,
|
|
8
|
+
generateLiteralText,
|
|
9
|
+
generateObjectText,
|
|
10
|
+
filterQuadsByType
|
|
11
|
+
} from './shared.js';
|
|
12
|
+
|
|
13
|
+
export function extractLocalName(iri, ctx = {}) {
|
|
14
|
+
if (!iri) return iri;
|
|
15
|
+
|
|
16
|
+
// Check for exact prefix matches first
|
|
17
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
18
|
+
if (iri.startsWith(namespace) || iri.startsWith(namespace.slice(0, -1))) {
|
|
19
|
+
return iri.substring(namespace.length);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
16
22
|
|
|
17
|
-
|
|
23
|
+
// Fallback to original logic for local names
|
|
18
24
|
const separators = ['#', '/', ':'];
|
|
19
25
|
for (const sep of separators) {
|
|
20
26
|
const lastSep = iri.lastIndexOf(sep);
|
|
@@ -82,14 +88,14 @@ function groupQuadsBySubject(quads) {
|
|
|
82
88
|
|
|
83
89
|
function buildDeterministicMDLD(subjectGroups, context) {
|
|
84
90
|
let text = '';
|
|
91
|
+
const usedPrefixes = collectUsedPrefixes(subjectGroups, context);
|
|
85
92
|
|
|
86
93
|
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
87
94
|
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
88
95
|
for (const [prefix, namespace] of sortedPrefixes) {
|
|
89
96
|
// Skip default context prefixes - they're implicit in MDLD
|
|
90
|
-
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
91
|
-
|
|
92
|
-
text += prefixDecl;
|
|
97
|
+
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
|
|
98
|
+
text += generatePrefixDeclaration(prefix, namespace);
|
|
93
99
|
}
|
|
94
100
|
}
|
|
95
101
|
|
|
@@ -104,47 +110,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
104
110
|
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
105
111
|
const shortSubject = shortenIRI(subjectIRI, context);
|
|
106
112
|
|
|
107
|
-
// Separate types, literals, and objects using
|
|
108
|
-
const types
|
|
109
|
-
const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
|
|
110
|
-
const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
|
|
113
|
+
// Separate types, literals, and objects using shared utility
|
|
114
|
+
const { types, literals, objects } = filterQuadsByType(subjectQuads);
|
|
111
115
|
|
|
112
116
|
// Generate heading
|
|
113
|
-
const localSubjectName = extractLocalName(subjectIRI);
|
|
117
|
+
const localSubjectName = extractLocalName(subjectIRI, context);
|
|
114
118
|
const typeAnnotations = types.length > 0
|
|
115
|
-
? ' ' + types.map(t => '.' +
|
|
119
|
+
? ' ' + types.map(t => '.' + shortenIRI(t.object.value, context)).sort().join(' ')
|
|
116
120
|
: '';
|
|
117
121
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
text += headingText;
|
|
121
|
-
|
|
122
|
-
// Add literals (deterministic order)
|
|
123
|
-
const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
124
|
-
for (const quad of sortedLiterals) {
|
|
125
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
126
|
-
let annotation = predShort;
|
|
127
|
-
|
|
128
|
-
// Use DataFactory XSD constants for datatype comparison
|
|
129
|
-
const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
|
|
130
|
-
if (quad.object.language) {
|
|
131
|
-
annotation += ` @${quad.object.language}`;
|
|
132
|
-
} else if (quad.object.datatype.value !== xsdString) {
|
|
133
|
-
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
134
|
-
}
|
|
122
|
+
text += `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n`;
|
|
135
123
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
124
|
+
// Add literals and objects using shared utilities
|
|
125
|
+
sortQuadsByPredicate(literals).forEach(quad => {
|
|
126
|
+
text += generateLiteralText(quad, context);
|
|
127
|
+
});
|
|
139
128
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const objShort = shortenIRI(quad.object.value, context);
|
|
144
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
145
|
-
const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
146
|
-
text += objectText;
|
|
147
|
-
}
|
|
129
|
+
sortQuadsByPredicate(objects).forEach(quad => {
|
|
130
|
+
text += generateObjectText(quad, context);
|
|
131
|
+
});
|
|
148
132
|
|
|
149
133
|
text += '\n';
|
|
150
134
|
}
|
package/src/index.js
CHANGED
|
@@ -3,8 +3,8 @@ export { merge } from './merge.js';
|
|
|
3
3
|
export { generate } from './generate.js';
|
|
4
4
|
export { locate } from './locate.js';
|
|
5
5
|
export { render } from './render.js';
|
|
6
|
+
export { DEFAULT_CONTEXT } from './constants.js';
|
|
6
7
|
export {
|
|
7
|
-
DEFAULT_CONTEXT,
|
|
8
8
|
DataFactory,
|
|
9
9
|
hash,
|
|
10
10
|
expandIRI,
|
package/src/locate.js
CHANGED
|
@@ -18,21 +18,6 @@ export function locate(quad, origin) {
|
|
|
18
18
|
return null;
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
//
|
|
22
|
-
|
|
23
|
-
if (!entry) {
|
|
24
|
-
return null;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// Return the lean origin entry structure
|
|
28
|
-
return {
|
|
29
|
-
blockId: entry.blockId,
|
|
30
|
-
range: entry.range,
|
|
31
|
-
carrierType: entry.carrierType,
|
|
32
|
-
subject: entry.subject,
|
|
33
|
-
predicate: entry.predicate,
|
|
34
|
-
context: entry.context,
|
|
35
|
-
value: entry.value,
|
|
36
|
-
polarity: entry.polarity
|
|
37
|
-
};
|
|
21
|
+
// Return the origin entry directly - no need to create new object
|
|
22
|
+
return origin.quadIndex.get(quadKey) || null;
|
|
38
23
|
}
|
package/src/merge.js
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import { parse } from './parse.js';
|
|
2
|
-
import {
|
|
2
|
+
import { quadIndexKey } from './shared.js';
|
|
3
|
+
import { DEFAULT_CONTEXT } from './constants.js';
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
|
-
* Creates a unique key for quad identity matching
|
|
6
|
+
* Creates a unique key for quad identity matching - using shared utility
|
|
6
7
|
* @param {Quad} quad
|
|
7
8
|
* @returns {string}
|
|
8
9
|
*/
|
|
9
10
|
function quadKey(quad) {
|
|
10
|
-
|
|
11
|
-
const language = quad.object.language || '';
|
|
12
|
-
return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
|
|
11
|
+
return quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
13
12
|
}
|
|
14
13
|
|
|
15
14
|
/**
|