mdld-parse 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/shared.js ADDED
@@ -0,0 +1,212 @@
1
+ /**
2
+ * Shared utilities for MD-LD Parser and Renderer
3
+ * Ensures DRY code and consistent CommonMark processing
4
+ */
5
+
6
+ export const DEFAULT_CONTEXT = {
7
+ '@vocab': "http://www.w3.org/2000/01/rdf-schema#",
8
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
9
+ rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
10
+ xsd: 'http://www.w3.org/2001/XMLSchema#',
11
+ sh: "http://www.w3.org/ns/shacl#",
12
+ prov: 'http://www.w3.org/ns/prov#'
13
+ };
14
+
15
+ // CommonMark patterns - shared between parser and renderer
16
+ export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
17
+ export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
18
+ export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
19
+ export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
20
+ export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
21
+ export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
22
+ export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
23
+
24
+ // Inline carrier patterns - shared extraction logic
25
+ export const INLINE_CARRIER_PATTERNS = {
26
+ EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
27
+ CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
28
+ };
29
+
30
+ // Pre-compiled carrier patterns for performance
31
+ export const CARRIER_PATTERN_ARRAY = [
32
+ ['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
33
+ ['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
34
+ ];
35
+
36
+ // Cache for fence regex patterns
37
+ export const FENCE_CLOSE_PATTERNS = new Map();
38
+
39
+ export function getFenceClosePattern(fenceChar) {
40
+ if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
41
+ FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
42
+ }
43
+ return FENCE_CLOSE_PATTERNS.get(fenceChar);
44
+ }
45
+
46
+ // Range calculation utilities - shared between parser and renderer
47
+ export function calcRangeInfo(line, attrs, lineStart, prefixLength, valueLength) {
48
+ const wsLength = prefixLength < line.length && line[prefixLength] === ' ' ? 1 :
49
+ line.slice(prefixLength).match(/^\s+/)?.[0]?.length || 0;
50
+ const valueStartInLine = prefixLength + wsLength;
51
+ return {
52
+ valueRange: [lineStart + valueStartInLine, lineStart + valueStartInLine + valueLength],
53
+ attrsRange: calcAttrsRange(line, attrs, lineStart)
54
+ };
55
+ }
56
+
57
+ export function calcAttrsRange(line, attrs, lineStart) {
58
+ if (!attrs) return null;
59
+ const attrsStartInLine = line.lastIndexOf(attrs);
60
+ return attrsStartInLine >= 0 ? [lineStart + attrsStartInLine, lineStart + attrsStartInLine + attrs.length] : null;
61
+ }
62
+
63
+ // Token creation utilities - shared structure
64
+ export function createToken(type, range, text, attrs = null, attrsRange = null, valueRange = null, extra = {}) {
65
+ const token = { type, range, text, attrs, attrsRange, valueRange, ...extra };
66
+ Object.defineProperty(token, '_carriers', {
67
+ enumerable: false, writable: true, value: null
68
+ });
69
+ return token;
70
+ }
71
+
72
+ export function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, extra = {}) {
73
+ return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
74
+ }
75
+
76
+ // List token creation - shared logic
77
+ export function createListToken(type, line, lineStart, pos, match) {
78
+ const attrs = match[4] || null;
79
+ const prefix = match[1].length + (match[2] ? match[2].length : 0);
80
+ const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
81
+ return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
82
+ rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
83
+ }
84
+
85
+ // Semantic block parsing - shared between parser and renderer
86
+ export const semCache = {};
87
+ export const EMPTY_SEM = Object.freeze({ predicates: [], types: [], subject: null });
88
+
89
+ export function parseSemCached(attrs) {
90
+ if (!attrs) return EMPTY_SEM;
91
+ let sem = semCache[attrs];
92
+ if (!sem) {
93
+ sem = Object.freeze(parseSemanticBlock(attrs));
94
+ semCache[attrs] = sem;
95
+ }
96
+ return sem;
97
+ }
98
+
99
+ // Indentation utilities - shared for list processing
100
+ export function getIndentLevel(block, sourceText) {
101
+ if (!block.range || !sourceText) return 0;
102
+
103
+ const text = sourceText.substring(block.range.start, block.range.end);
104
+ const indentMatch = text.match(/^(\s*)/);
105
+ const indentSpaces = indentMatch ? indentMatch[1].length : 0;
106
+
107
+ // CommonMark: 4 spaces or 1 tab = one level
108
+ // We'll use 2 spaces for better readability (configurable)
109
+ return Math.floor(indentSpaces / 2);
110
+ }
111
+
112
+ // Content extraction utilities - shared between parser and renderer
113
+ export function extractContentFromRange(sourceText, range, attrsRange = null) {
114
+ if (!range || !sourceText) return '';
115
+
116
+ let text = sourceText.substring(range[0], range[1]);
117
+
118
+ // Remove MD-LD annotations, preserve content
119
+ if (attrsRange) {
120
+ const beforeAttrs = text.substring(0, attrsRange[0] - range[0]);
121
+ const afterAttrs = text.substring(attrsRange[1] - range[0]);
122
+ text = beforeAttrs + afterAttrs;
123
+ }
124
+
125
+ return text.trim();
126
+ }
127
+
128
+ // List marker utilities - shared for advanced list processing
129
+ export function getListMarker(block, sourceText) {
130
+ if (!block.range) return null;
131
+
132
+ const text = sourceText.substring(block.range.start, block.range.end);
133
+ const markerMatch = text.match(/^(\s*)([-*+]|\d+\[\.|\])\s+/);
134
+
135
+ if (!markerMatch) return null;
136
+
137
+ return {
138
+ type: markerMatch[2].startsWith('-') ? 'dash' :
139
+ markerMatch[2].startsWith('*') ? 'asterisk' :
140
+ markerMatch[2].startsWith('+') ? 'plus' : 'ordered',
141
+ marker: markerMatch[2],
142
+ indent: markerMatch[1].length
143
+ };
144
+ }
145
+
146
+ // CommonMark line processors - shared between parser and renderer
147
+ export const PROCESSORS = [
148
+ { test: line => line.startsWith('```'), process: null }, // Code blocks handled separately
149
+ { test: line => line.startsWith('`'), process: null }, // Code spans handled separately
150
+ { test: line => PREFIX_REGEX.test(line), process: null }, // Prefixes handled separately
151
+ { test: line => HEADING_REGEX.test(line), process: null }, // Headings handled separately
152
+ { test: line => UNORDERED_LIST_REGEX.test(line), process: null }, // Lists handled separately
153
+ { test: line => BLOCKQUOTE_REGEX.test(line), process: null }, // Blockquotes handled separately
154
+ { test: line => STANDALONE_SUBJECT_REGEX.test(line), process: null }, // Standalone subjects handled separately
155
+ { test: line => line.trim() === '', process: null }, // Empty lines handled separately
156
+ { test: line => true, process: null } // Default: paragraph
157
+ ];
158
+
159
+ // HTML escaping - shared utility
160
+ export function escapeHtml(text) {
161
+ if (!text) return '';
162
+ return text
163
+ .replace(/&/g, '&amp;')
164
+ .replace(/</g, '&lt;')
165
+ .replace(/>/g, '&gt;')
166
+ .replace(/"/g, '&quot;')
167
+ .replace(/'/g, '&#x27;');
168
+ }
169
+
170
+ // Quad key generation - shared between parser and renderer
171
+ export function quadIndexKey(subject, predicate, object) {
172
+ const datatype = object.datatype?.value || '';
173
+ const language = object.language || '';
174
+ return `${subject.value}|${predicate.value}|${object.value}|${datatype}|${language}`;
175
+ }
176
+
177
+ // IRI expansion and shortening - shared utilities
178
+ export function expandAndShortenIRI(iri, ctx) {
179
+ const expanded = expandIRI(iri, ctx);
180
+ return shortenIRI(expanded, ctx);
181
+ }
182
+
183
+ // Subject resolution utilities - shared between parser and renderer
184
+ export function resolveSubjectType(subjectDecl) {
185
+ if (!subjectDecl) return 'none';
186
+
187
+ if (subjectDecl.startsWith('=#')) {
188
+ return 'fragment';
189
+ }
190
+
191
+ if (subjectDecl.startsWith('+')) {
192
+ return 'soft-object';
193
+ }
194
+
195
+ if (subjectDecl === 'RESET') {
196
+ return 'reset';
197
+ }
198
+
199
+ return 'full-iri';
200
+ }
201
+
202
+ // Fragment resolution - shared logic
203
+ export function resolveFragment(fragment, currentSubject) {
204
+ if (!currentSubject) {
205
+ throw new Error('Fragment requires current subject');
206
+ }
207
+ const fragmentName = fragment.substring(2); // Remove =#
208
+ const baseIRI = currentSubject.value;
209
+ const hashIndex = baseIRI.indexOf('#');
210
+ const base = hashIndex > -1 ? baseIRI.slice(0, hashIndex) : baseIRI;
211
+ return base + '#' + fragmentName;
212
+ }
package/src/utils.js CHANGED
@@ -1,11 +1,4 @@
1
- export const DEFAULT_CONTEXT = {
2
- '@vocab': "http://www.w3.org/2000/01/rdf-schema#",
3
- rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
4
- rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
5
- xsd: 'http://www.w3.org/2001/XMLSchema#',
6
- sh: "http://www.w3.org/ns/shacl#",
7
- prov: 'http://www.w3.org/ns/prov#'
8
- };
1
+ import { URL_REGEX, DEFAULT_CONTEXT } from './shared.js';
9
2
 
10
3
  // Base Term class for RDF/JS compatibility
11
4
  export class Term {
@@ -258,7 +251,7 @@ export function expandIRI(term, ctx) {
258
251
  const t = raw.trim();
259
252
  let result;
260
253
 
261
- if (t.match(/^https?:/)) {
254
+ if (t.match(URL_REGEX)) {
262
255
  result = t;
263
256
  } else if (t.includes(':')) {
264
257
  const [prefix, ref] = t.split(':', 2);