mdld-parse 0.7.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -4
- package/src/constants.js +30 -0
- package/src/generate.js +36 -53
- package/src/index.js +1 -1
- package/src/locate.js +2 -17
- package/src/merge.js +32 -22
- package/src/parse.js +103 -325
- package/src/render.js +8 -45
- package/src/shared.js +360 -43
- package/src/utils.js +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdld-parse",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.5",
|
|
4
4
|
"description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -38,8 +38,5 @@
|
|
|
38
38
|
"homepage": "https://mdld.js.org",
|
|
39
39
|
"bugs": {
|
|
40
40
|
"url": "https://github.com/davay42/mdld-parse/issues"
|
|
41
|
-
},
|
|
42
|
-
"dependencies": {
|
|
43
|
-
"rdfa-parse": "^1.0.1"
|
|
44
41
|
}
|
|
45
42
|
}
|
package/src/constants.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for MD-LD Parser and Renderer
|
|
3
|
+
* Ensures DRY code and consistent CommonMark processing
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
export const DEFAULT_CONTEXT = {
|
|
9
|
+
'@vocab': "http://www.w3.org/2000/01/rdf-schema#",
|
|
10
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
11
|
+
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
12
|
+
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
13
|
+
sh: "http://www.w3.org/ns/shacl#",
|
|
14
|
+
prov: 'http://www.w3.org/ns/prov#'
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// CommonMark patterns - shared between parser and renderer
|
|
18
|
+
export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
|
|
19
|
+
export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
20
|
+
export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
21
|
+
export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
22
|
+
export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
23
|
+
export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
24
|
+
export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
25
|
+
|
|
26
|
+
// Pre-compiled carrier patterns for performance
|
|
27
|
+
export const CARRIER_PATTERN_ARRAY = [
|
|
28
|
+
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
29
|
+
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
30
|
+
];
|
package/src/generate.js
CHANGED
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
import { shortenIRI, expandIRI, DataFactory } from './utils.js';
|
|
2
|
-
import { DEFAULT_CONTEXT } from './
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
function
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
2
|
+
import { DEFAULT_CONTEXT } from './constants.js';
|
|
3
|
+
import {
|
|
4
|
+
isLiteral,
|
|
5
|
+
collectUsedPrefixes,
|
|
6
|
+
sortQuadsByPredicate,
|
|
7
|
+
generatePrefixDeclaration,
|
|
8
|
+
generateLiteralText,
|
|
9
|
+
generateObjectText,
|
|
10
|
+
filterQuadsByType
|
|
11
|
+
} from './shared.js';
|
|
12
|
+
|
|
13
|
+
export function extractLocalName(iri, ctx = {}) {
|
|
14
|
+
if (!iri) return iri;
|
|
15
|
+
|
|
16
|
+
// Check for exact prefix matches first
|
|
17
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
18
|
+
if (iri.startsWith(namespace) || iri.startsWith(namespace.slice(0, -1))) {
|
|
19
|
+
return iri.substring(namespace.length);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
17
22
|
|
|
18
|
-
|
|
23
|
+
// Fallback to original logic for local names
|
|
19
24
|
const separators = ['#', '/', ':'];
|
|
20
25
|
for (const sep of separators) {
|
|
21
26
|
const lastSep = iri.lastIndexOf(sep);
|
|
@@ -83,14 +88,14 @@ function groupQuadsBySubject(quads) {
|
|
|
83
88
|
|
|
84
89
|
function buildDeterministicMDLD(subjectGroups, context) {
|
|
85
90
|
let text = '';
|
|
91
|
+
const usedPrefixes = collectUsedPrefixes(subjectGroups, context);
|
|
86
92
|
|
|
87
93
|
// Add prefixes first (deterministic order), but exclude default context prefixes
|
|
88
94
|
const sortedPrefixes = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
89
95
|
for (const [prefix, namespace] of sortedPrefixes) {
|
|
90
96
|
// Skip default context prefixes - they're implicit in MDLD
|
|
91
|
-
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix]) {
|
|
92
|
-
|
|
93
|
-
text += prefixDecl;
|
|
97
|
+
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
|
|
98
|
+
text += generatePrefixDeclaration(prefix, namespace);
|
|
94
99
|
}
|
|
95
100
|
}
|
|
96
101
|
|
|
@@ -105,47 +110,25 @@ function buildDeterministicMDLD(subjectGroups, context) {
|
|
|
105
110
|
const subjectQuads = subjectGroups.get(subjectIRI);
|
|
106
111
|
const shortSubject = shortenIRI(subjectIRI, context);
|
|
107
112
|
|
|
108
|
-
// Separate types, literals, and objects using
|
|
109
|
-
const types
|
|
110
|
-
const literals = subjectQuads.filter(q => isLiteral(q.object) && !isRdfType(q.predicate));
|
|
111
|
-
const objects = subjectQuads.filter(q => isNamedNode(q.object) && !isRdfType(q.predicate));
|
|
113
|
+
// Separate types, literals, and objects using shared utility
|
|
114
|
+
const { types, literals, objects } = filterQuadsByType(subjectQuads);
|
|
112
115
|
|
|
113
116
|
// Generate heading
|
|
114
|
-
const localSubjectName = extractLocalName(subjectIRI);
|
|
117
|
+
const localSubjectName = extractLocalName(subjectIRI, context);
|
|
115
118
|
const typeAnnotations = types.length > 0
|
|
116
|
-
? ' ' + types.map(t => '.' +
|
|
119
|
+
? ' ' + types.map(t => '.' + shortenIRI(t.object.value, context)).sort().join(' ')
|
|
117
120
|
: '';
|
|
118
121
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
text += headingText;
|
|
122
|
-
|
|
123
|
-
// Add literals (deterministic order)
|
|
124
|
-
const sortedLiterals = literals.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
125
|
-
for (const quad of sortedLiterals) {
|
|
126
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
127
|
-
let annotation = predShort;
|
|
128
|
-
|
|
129
|
-
// Use DataFactory XSD constants for datatype comparison
|
|
130
|
-
const xsdString = 'http://www.w3.org/2001/XMLSchema#string';
|
|
131
|
-
if (quad.object.language) {
|
|
132
|
-
annotation += ` @${quad.object.language}`;
|
|
133
|
-
} else if (quad.object.datatype.value !== xsdString) {
|
|
134
|
-
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
135
|
-
}
|
|
122
|
+
text += `# ${localSubjectName} {=${shortSubject}${typeAnnotations}}\n`;
|
|
136
123
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
124
|
+
// Add literals and objects using shared utilities
|
|
125
|
+
sortQuadsByPredicate(literals).forEach(quad => {
|
|
126
|
+
text += generateLiteralText(quad, context);
|
|
127
|
+
});
|
|
140
128
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
const objShort = shortenIRI(quad.object.value, context);
|
|
145
|
-
const predShort = shortenIRI(quad.predicate.value, context);
|
|
146
|
-
const objectText = `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
147
|
-
text += objectText;
|
|
148
|
-
}
|
|
129
|
+
sortQuadsByPredicate(objects).forEach(quad => {
|
|
130
|
+
text += generateObjectText(quad, context);
|
|
131
|
+
});
|
|
149
132
|
|
|
150
133
|
text += '\n';
|
|
151
134
|
}
|
package/src/index.js
CHANGED
|
@@ -3,7 +3,7 @@ export { merge } from './merge.js';
|
|
|
3
3
|
export { generate } from './generate.js';
|
|
4
4
|
export { locate } from './locate.js';
|
|
5
5
|
export { render } from './render.js';
|
|
6
|
-
export { DEFAULT_CONTEXT } from './
|
|
6
|
+
export { DEFAULT_CONTEXT } from './constants.js';
|
|
7
7
|
export {
|
|
8
8
|
DataFactory,
|
|
9
9
|
hash,
|
package/src/locate.js
CHANGED
|
@@ -18,21 +18,6 @@ export function locate(quad, origin) {
|
|
|
18
18
|
return null;
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
//
|
|
22
|
-
|
|
23
|
-
if (!entry) {
|
|
24
|
-
return null;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// Return the lean origin entry structure
|
|
28
|
-
return {
|
|
29
|
-
blockId: entry.blockId,
|
|
30
|
-
range: entry.range,
|
|
31
|
-
carrierType: entry.carrierType,
|
|
32
|
-
subject: entry.subject,
|
|
33
|
-
predicate: entry.predicate,
|
|
34
|
-
context: entry.context,
|
|
35
|
-
value: entry.value,
|
|
36
|
-
polarity: entry.polarity
|
|
37
|
-
};
|
|
21
|
+
// Return the origin entry directly - no need to create new object
|
|
22
|
+
return origin.quadIndex.get(quadKey) || null;
|
|
38
23
|
}
|
package/src/merge.js
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import { parse } from './parse.js';
|
|
2
|
-
import {
|
|
2
|
+
import { quadToKeyForOrigin } from './utils.js';
|
|
3
|
+
import { DEFAULT_CONTEXT } from './constants.js';
|
|
3
4
|
|
|
4
5
|
/**
|
|
5
|
-
* Creates a unique key for quad identity matching
|
|
6
|
+
* Creates a unique key for quad identity matching - using shared utility
|
|
6
7
|
* @param {Quad} quad
|
|
7
8
|
* @returns {string}
|
|
8
9
|
*/
|
|
9
10
|
function quadKey(quad) {
|
|
10
|
-
|
|
11
|
-
const language = quad.object.language || '';
|
|
12
|
-
return `${quad.subject.value}|${quad.predicate.value}|${quad.object.value}|${datatype}|${language}`;
|
|
11
|
+
return quadToKeyForOrigin(quad);
|
|
13
12
|
}
|
|
14
13
|
|
|
15
14
|
/**
|
|
@@ -42,6 +41,7 @@ export function merge(docs, options = {}) {
|
|
|
42
41
|
const allDocuments = [];
|
|
43
42
|
const quadIndex = new Map();
|
|
44
43
|
const allStatements = []; // Collect statements from all documents
|
|
44
|
+
const accumulatedContext = new Map(); // Track all unique prefixes across documents
|
|
45
45
|
|
|
46
46
|
// Process each document in order
|
|
47
47
|
for (let i = 0; i < docs.length; i++) {
|
|
@@ -53,6 +53,16 @@ export function merge(docs, options = {}) {
|
|
|
53
53
|
// Normalize input to ParseResult
|
|
54
54
|
const doc = normalizeInput(input, options, docContext);
|
|
55
55
|
|
|
56
|
+
// Accumulate context from this document
|
|
57
|
+
if (doc.context) {
|
|
58
|
+
for (const [prefix, namespace] of Object.entries(doc.context)) {
|
|
59
|
+
// Don't override default context entries unless explicitly provided in options
|
|
60
|
+
if (!accumulatedContext.has(prefix) && !DEFAULT_CONTEXT[prefix]) {
|
|
61
|
+
accumulatedContext.set(prefix, namespace);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
56
66
|
// Create document origin
|
|
57
67
|
const documentOrigin = {
|
|
58
68
|
index: i,
|
|
@@ -74,14 +84,12 @@ export function merge(docs, options = {}) {
|
|
|
74
84
|
sessionBuffer.set(key, quad);
|
|
75
85
|
|
|
76
86
|
// Create quad origin with document index and polarity
|
|
77
|
-
const existingOrigin = doc.origin.quadIndex.get(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
});
|
|
84
|
-
}
|
|
87
|
+
const existingOrigin = doc.origin.quadIndex.get(key);
|
|
88
|
+
quadIndex.set(key, {
|
|
89
|
+
...(existingOrigin || {}),
|
|
90
|
+
documentIndex: i,
|
|
91
|
+
polarity: '+'
|
|
92
|
+
});
|
|
85
93
|
}
|
|
86
94
|
|
|
87
95
|
// Fold retractions
|
|
@@ -97,14 +105,12 @@ export function merge(docs, options = {}) {
|
|
|
97
105
|
}
|
|
98
106
|
|
|
99
107
|
// Create quad origin for remove quads
|
|
100
|
-
const existingOrigin = doc.origin.quadIndex.get(
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
+
const existingOrigin = doc.origin.quadIndex.get(key);
|
|
109
|
+
quadIndex.set(key, {
|
|
110
|
+
...(existingOrigin || {}),
|
|
111
|
+
documentIndex: i,
|
|
112
|
+
polarity: '-'
|
|
113
|
+
});
|
|
108
114
|
}
|
|
109
115
|
}
|
|
110
116
|
|
|
@@ -119,7 +125,11 @@ export function merge(docs, options = {}) {
|
|
|
119
125
|
};
|
|
120
126
|
|
|
121
127
|
// Build final context (union of all contexts)
|
|
122
|
-
const finalContext = {
|
|
128
|
+
const finalContext = {
|
|
129
|
+
...DEFAULT_CONTEXT,
|
|
130
|
+
...options.context,
|
|
131
|
+
...Object.fromEntries(accumulatedContext)
|
|
132
|
+
};
|
|
123
133
|
|
|
124
134
|
// Enforce hard invariant
|
|
125
135
|
const quadKeys = new Set(finalQuads.map(quadKey));
|
package/src/parse.js
CHANGED
|
@@ -1,93 +1,128 @@
|
|
|
1
1
|
import {
|
|
2
2
|
DataFactory,
|
|
3
3
|
expandIRI,
|
|
4
|
-
parseSemanticBlock,
|
|
5
4
|
quadIndexKey,
|
|
6
5
|
createLiteral,
|
|
7
6
|
hash
|
|
8
7
|
} from './utils.js';
|
|
9
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
DEFAULT_CONTEXT,
|
|
10
|
+
URL_REGEX,
|
|
11
|
+
FENCE_REGEX,
|
|
12
|
+
PREFIX_REGEX,
|
|
13
|
+
HEADING_REGEX,
|
|
14
|
+
UNORDERED_LIST_REGEX,
|
|
15
|
+
BLOCKQUOTE_REGEX,
|
|
16
|
+
STANDALONE_SUBJECT_REGEX,
|
|
17
|
+
CARRIER_PATTERN_ARRAY,
|
|
18
|
+
|
|
19
|
+
} from './constants.js';
|
|
20
|
+
import {
|
|
10
21
|
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
getFenceClosePattern,
|
|
23
|
+
calcRangeInfo,
|
|
24
|
+
calcAttrsRange,
|
|
25
|
+
createToken,
|
|
26
|
+
createCarrier,
|
|
27
|
+
createListToken,
|
|
28
|
+
parseSemCached,
|
|
29
|
+
parseLangAndAttrs,
|
|
30
|
+
findMatchingBracket,
|
|
31
|
+
extractUrlFromBrackets,
|
|
32
|
+
extractAttributesFromText,
|
|
33
|
+
determineCarrierType,
|
|
34
|
+
calcCarrierRanges,
|
|
35
|
+
extractCleanText,
|
|
36
|
+
RDF_TYPE,
|
|
37
|
+
RDF_STATEMENT,
|
|
38
|
+
RDF_SUBJECT,
|
|
39
|
+
RDF_PREDICATE,
|
|
40
|
+
RDF_OBJECT,
|
|
41
|
+
createLeanOriginEntry,
|
|
42
|
+
resolveSubject,
|
|
43
|
+
resolveObject,
|
|
44
|
+
processTokenWithBlockTracking
|
|
45
|
+
} from './shared.js';
|
|
13
46
|
|
|
14
|
-
function getFenceClosePattern(fenceChar) {
|
|
15
|
-
if (!FENCE_CLOSE_PATTERNS.has(fenceChar)) {
|
|
16
|
-
FENCE_CLOSE_PATTERNS.set(fenceChar, new RegExp(`^(${fenceChar}{3,})`));
|
|
17
|
-
}
|
|
18
|
-
return FENCE_CLOSE_PATTERNS.get(fenceChar);
|
|
19
|
-
}
|
|
20
47
|
|
|
21
|
-
function
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
48
|
+
export function parse(text, options = {}) {
|
|
49
|
+
const state = {
|
|
50
|
+
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
51
|
+
df: options.dataFactory || DataFactory,
|
|
52
|
+
quads: [],
|
|
53
|
+
quadBuffer: new Map(),
|
|
54
|
+
removeSet: new Set(),
|
|
55
|
+
origin: {
|
|
56
|
+
quadIndex: new Map(),
|
|
57
|
+
blocks: new Map(),
|
|
58
|
+
documentStructure: []
|
|
59
|
+
},
|
|
60
|
+
currentSubject: null,
|
|
61
|
+
tokens: null,
|
|
62
|
+
currentTokenIndex: -1,
|
|
63
|
+
statements: [],
|
|
64
|
+
statementCandidates: new Map(),
|
|
65
|
+
currentBlock: null,
|
|
66
|
+
blockStack: []
|
|
31
67
|
};
|
|
32
|
-
}
|
|
33
68
|
|
|
34
|
-
|
|
35
|
-
|
|
69
|
+
state.tokens = scanTokens(text);
|
|
70
|
+
|
|
71
|
+
// Single-pass processing: resolve prefixes AND process tokens together
|
|
72
|
+
for (let i = 0; i < state.tokens.length; i++) {
|
|
73
|
+
const token = state.tokens[i];
|
|
74
|
+
state.currentTokenIndex = i;
|
|
75
|
+
|
|
76
|
+
// Handle prefix tokens immediately during main pass
|
|
77
|
+
if (token.type === 'prefix') {
|
|
78
|
+
let resolvedIri = token.iri;
|
|
79
|
+
if (token.iri.includes(':')) {
|
|
80
|
+
const colonIndex = token.iri.indexOf(':');
|
|
81
|
+
const potentialPrefix = token.iri.substring(0, colonIndex);
|
|
82
|
+
const reference = token.iri.substring(colonIndex + 1);
|
|
83
|
+
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
84
|
+
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
state.ctx[token.prefix] = resolvedIri;
|
|
88
|
+
continue; // Skip token processor for prefixes
|
|
89
|
+
}
|
|
36
90
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
let sem = semCache[attrs];
|
|
40
|
-
if (!sem) {
|
|
41
|
-
sem = Object.freeze(parseSemanticBlock(attrs));
|
|
42
|
-
semCache[attrs] = sem;
|
|
91
|
+
// Process all other tokens
|
|
92
|
+
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
43
93
|
}
|
|
44
|
-
return sem;
|
|
45
|
-
}
|
|
46
94
|
|
|
47
|
-
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
valueRange: [lineStart + valueStartInLine, lineStart + valueStartInLine + valueLength],
|
|
53
|
-
attrsRange: calcAttrsRange(line, attrs, lineStart)
|
|
54
|
-
};
|
|
55
|
-
}
|
|
95
|
+
// Optimized quad filtering - use Set.has() instead of array.includes()
|
|
96
|
+
const quadKeys = new Set();
|
|
97
|
+
for (const quad of state.quads) {
|
|
98
|
+
quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
|
|
99
|
+
}
|
|
56
100
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
101
|
+
// Direct Set iteration - more efficient than filter()
|
|
102
|
+
const filteredRemove = [];
|
|
103
|
+
for (const quad of state.removeSet) {
|
|
104
|
+
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
105
|
+
if (!quadKeys.has(key)) {
|
|
106
|
+
filteredRemove.push(quad);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
62
109
|
|
|
63
|
-
|
|
64
|
-
const token = { type, range, text, attrs, attrsRange, valueRange, ...extra };
|
|
65
|
-
Object.defineProperty(token, '_carriers', {
|
|
66
|
-
enumerable: false, writable: true, value: null
|
|
67
|
-
});
|
|
68
|
-
return token;
|
|
110
|
+
return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
|
|
69
111
|
}
|
|
70
112
|
|
|
113
|
+
|
|
114
|
+
// Cache for fence regex patterns - using shared utility
|
|
115
|
+
|
|
71
116
|
function getCarriers(token) {
|
|
72
117
|
if (token.type === 'code') return [];
|
|
73
118
|
return token._carriers || (token._carriers = extractInlineCarriers(token.text, token.range[0]));
|
|
74
119
|
}
|
|
75
120
|
|
|
76
|
-
const createListToken = (type, line, lineStart, pos, match) => {
|
|
77
|
-
const attrs = match[4] || null;
|
|
78
|
-
const prefix = match[1].length + (match[2] ? match[2].length : 0);
|
|
79
|
-
const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
|
|
80
|
-
return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
|
|
81
|
-
rangeInfo.attrsRange, rangeInfo.valueRange, { indent: match[1].length });
|
|
82
|
-
};
|
|
83
|
-
|
|
84
121
|
function scanTokens(text) {
|
|
85
122
|
const tokens = [];
|
|
86
123
|
const lines = text.split('\n');
|
|
87
124
|
let pos = 0;
|
|
88
125
|
let codeBlock = null;
|
|
89
|
-
|
|
90
|
-
// Direct lookup instead of linear search
|
|
91
126
|
const PROCESSORS = [
|
|
92
127
|
{ type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: handleFence },
|
|
93
128
|
{ type: 'content', test: () => codeBlock, process: line => codeBlock.content.push(line) },
|
|
@@ -194,16 +229,6 @@ function scanTokens(text) {
|
|
|
194
229
|
return tokens;
|
|
195
230
|
}
|
|
196
231
|
|
|
197
|
-
function createCarrier(type, text, attrs, attrsRange, valueRange, range, pos, extra = {}) {
|
|
198
|
-
return { type, text, attrs, attrsRange, valueRange, range, pos, ...extra };
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
// Pre-compiled carrier patterns for better performance
|
|
202
|
-
const CARRIER_PATTERN_ARRAY = [
|
|
203
|
-
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
204
|
-
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
205
|
-
];
|
|
206
|
-
|
|
207
232
|
function extractInlineCarriers(text, baseOffset = 0) {
|
|
208
233
|
const carriers = [];
|
|
209
234
|
let pos = 0;
|
|
@@ -268,74 +293,6 @@ function extractInlineCarriers(text, baseOffset = 0) {
|
|
|
268
293
|
return carriers;
|
|
269
294
|
}
|
|
270
295
|
|
|
271
|
-
function calcCarrierRanges(match, baseOffset, matchStart) {
|
|
272
|
-
const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
|
|
273
|
-
const valueEnd = valueStart + match[1].length;
|
|
274
|
-
const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
|
|
275
|
-
const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
|
|
276
|
-
return {
|
|
277
|
-
valueRange: [valueStart, valueEnd],
|
|
278
|
-
attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
|
|
279
|
-
range: [baseOffset + matchStart, attrsEnd],
|
|
280
|
-
pos: matchStart + match[0].length // pos should be relative to current text, not document
|
|
281
|
-
};
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
function findMatchingBracket(text, bracketStart) {
|
|
285
|
-
let bracketDepth = 1;
|
|
286
|
-
let bracketEnd = bracketStart + 1;
|
|
287
|
-
|
|
288
|
-
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
289
|
-
if (text[bracketEnd] === '[') bracketDepth++;
|
|
290
|
-
else if (text[bracketEnd] === ']') bracketDepth--;
|
|
291
|
-
bracketEnd++;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
return bracketDepth > 0 ? null : bracketEnd;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
function extractUrlFromBrackets(text, bracketEnd) {
|
|
298
|
-
let url = null;
|
|
299
|
-
let spanEnd = bracketEnd;
|
|
300
|
-
|
|
301
|
-
if (text[spanEnd] === '(') {
|
|
302
|
-
const parenEnd = text.indexOf(')', spanEnd);
|
|
303
|
-
if (parenEnd !== -1) {
|
|
304
|
-
url = text.substring(spanEnd + 1, parenEnd);
|
|
305
|
-
spanEnd = parenEnd + 1;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
return { url, spanEnd };
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
function extractAttributesFromText(text, spanEnd, baseOffset) {
|
|
313
|
-
let attrs = null;
|
|
314
|
-
let attrsRange = null;
|
|
315
|
-
const remaining = text.substring(spanEnd);
|
|
316
|
-
|
|
317
|
-
const wsMatch = remaining.match(/^\s+/);
|
|
318
|
-
const attrsStart = wsMatch ? wsMatch[0].length : 0;
|
|
319
|
-
|
|
320
|
-
if (remaining[attrsStart] === '{') {
|
|
321
|
-
const braceEnd = remaining.indexOf('}', attrsStart);
|
|
322
|
-
if (braceEnd !== -1) {
|
|
323
|
-
attrs = remaining.substring(attrsStart, braceEnd + 1);
|
|
324
|
-
const absStart = baseOffset + spanEnd + attrsStart;
|
|
325
|
-
attrsRange = [absStart, absStart + attrs.length];
|
|
326
|
-
spanEnd += braceEnd + 1;
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
return { attrs, attrsRange, finalSpanEnd: spanEnd };
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
function determineCarrierType(url) {
|
|
334
|
-
if (url && !url.startsWith('=')) {
|
|
335
|
-
return { carrierType: 'link', resourceIRI: url };
|
|
336
|
-
}
|
|
337
|
-
return { carrierType: 'span', resourceIRI: null };
|
|
338
|
-
}
|
|
339
296
|
|
|
340
297
|
function createBlockEntry(token, state) {
|
|
341
298
|
const blockId = token._blockId || hash(`${token.type}:${token.range?.[0]}:${token.range?.[1]}`);
|
|
@@ -364,31 +321,6 @@ function createBlockEntry(token, state) {
|
|
|
364
321
|
return blockEntry;
|
|
365
322
|
}
|
|
366
323
|
|
|
367
|
-
function extractCleanText(token) {
|
|
368
|
-
if (!token.text) return '';
|
|
369
|
-
|
|
370
|
-
let text = token.text;
|
|
371
|
-
|
|
372
|
-
// Remove semantic annotations
|
|
373
|
-
if (token.attrsRange) {
|
|
374
|
-
const beforeAttrs = text.substring(0, token.attrsRange[0] - (token.range?.[0] || 0));
|
|
375
|
-
const afterAttrs = text.substring(token.attrsRange[1] - (token.range?.[0] || 0));
|
|
376
|
-
text = beforeAttrs + afterAttrs;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
// Clean based on token type
|
|
380
|
-
switch (token.type) {
|
|
381
|
-
case 'heading':
|
|
382
|
-
return text.replace(/^#+\s*/, '').trim();
|
|
383
|
-
case 'list':
|
|
384
|
-
return text.replace(/^[-*+]\s*/, '').trim();
|
|
385
|
-
case 'blockquote':
|
|
386
|
-
return text.replace(/^>\s*/, '').trim();
|
|
387
|
-
default:
|
|
388
|
-
return text.trim();
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
|
|
392
324
|
function enrichBlockFromAnnotation(blockEntry, sem, carrier, state) {
|
|
393
325
|
// Update subject if available
|
|
394
326
|
if (sem.subject && sem.subject !== 'RESET') {
|
|
@@ -539,17 +471,8 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
539
471
|
// Detect rdf:Statement pattern during single-pass parsing
|
|
540
472
|
detectStatementPatternSinglePass(quad, dataFactory, meta, statements, statementCandidates);
|
|
541
473
|
|
|
542
|
-
// Create lean origin entry
|
|
543
|
-
const originEntry =
|
|
544
|
-
blockId: block.id,
|
|
545
|
-
range: block.range,
|
|
546
|
-
carrierType: block.carrierType,
|
|
547
|
-
subject: subject.value,
|
|
548
|
-
predicate: predicate.value,
|
|
549
|
-
context: block.context, // Direct reference instead of spread
|
|
550
|
-
polarity: meta?.remove ? '-' : '+',
|
|
551
|
-
value: block.text || ''
|
|
552
|
-
};
|
|
474
|
+
// Create lean origin entry using shared utility
|
|
475
|
+
const originEntry = createLeanOriginEntry(block, subject, predicate, meta);
|
|
553
476
|
|
|
554
477
|
quadIndex.set(quadKey, originEntry);
|
|
555
478
|
|
|
@@ -563,13 +486,6 @@ function emitQuad(quads, quadBuffer, removeSet, quadIndex, block, subject, predi
|
|
|
563
486
|
}
|
|
564
487
|
}
|
|
565
488
|
|
|
566
|
-
// Extract RDF constants once at module level for efficiency
|
|
567
|
-
const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
568
|
-
const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
|
|
569
|
-
const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
|
|
570
|
-
const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
|
|
571
|
-
const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
|
|
572
|
-
|
|
573
489
|
function detectStatementPatternSinglePass(quad, dataFactory, meta, statements = null, statementCandidates = null) {
|
|
574
490
|
// Skip if not called from parse context (for testing compatibility)
|
|
575
491
|
if (!statements || !statementCandidates) return;
|
|
@@ -619,30 +535,6 @@ function detectStatementPatternSinglePass(quad, dataFactory, meta, statements =
|
|
|
619
535
|
}
|
|
620
536
|
}
|
|
621
537
|
|
|
622
|
-
const resolveFragment = (fragment, state) => {
|
|
623
|
-
if (!state.currentSubject) return null;
|
|
624
|
-
const subjectValue = state.currentSubject.value;
|
|
625
|
-
const hashIndex = subjectValue.indexOf('#');
|
|
626
|
-
const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
|
|
627
|
-
return state.df.namedNode(baseIRI + '#' + fragment);
|
|
628
|
-
};
|
|
629
|
-
|
|
630
|
-
function resolveSubject(sem, state) {
|
|
631
|
-
if (!sem.subject) return null;
|
|
632
|
-
if (sem.subject === 'RESET') {
|
|
633
|
-
state.currentSubject = null;
|
|
634
|
-
return null;
|
|
635
|
-
}
|
|
636
|
-
if (sem.subject.startsWith('=#')) return resolveFragment(sem.subject.substring(2), state);
|
|
637
|
-
return state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
function resolveObject(sem, state) {
|
|
641
|
-
if (!sem.object) return null;
|
|
642
|
-
if (sem.object.startsWith('#')) return resolveFragment(sem.object.substring(1), state);
|
|
643
|
-
return state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
644
|
-
}
|
|
645
|
-
|
|
646
538
|
const createTypeQuad = (typeIRI, subject, state, block, entryIndex = null) => {
|
|
647
539
|
const expandedType = expandIRI(typeIRI, state.ctx);
|
|
648
540
|
const typeInfo = typeof entryIndex === 'object' ? entryIndex : { entryIndex, remove: false };
|
|
@@ -748,123 +640,9 @@ function processStandaloneSubject(token, state) {
|
|
|
748
640
|
}
|
|
749
641
|
|
|
750
642
|
const TOKEN_PROCESSORS = {
|
|
751
|
-
heading: (token, state) =>
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
processTokenAnnotations(token, state, token.type);
|
|
757
|
-
|
|
758
|
-
state.blockStack.pop();
|
|
759
|
-
state.currentBlock = state.blockStack.length > 0 ?
|
|
760
|
-
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
761
|
-
},
|
|
762
|
-
code: (token, state) => {
|
|
763
|
-
const blockEntry = createBlockEntry(token, state);
|
|
764
|
-
state.currentBlock = blockEntry;
|
|
765
|
-
state.blockStack.push(blockEntry.id);
|
|
766
|
-
|
|
767
|
-
processTokenAnnotations(token, state, token.type);
|
|
768
|
-
|
|
769
|
-
state.blockStack.pop();
|
|
770
|
-
state.currentBlock = state.blockStack.length > 0 ?
|
|
771
|
-
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
772
|
-
},
|
|
773
|
-
blockquote: (token, state) => {
|
|
774
|
-
const blockEntry = createBlockEntry(token, state);
|
|
775
|
-
state.currentBlock = blockEntry;
|
|
776
|
-
state.blockStack.push(blockEntry.id);
|
|
777
|
-
|
|
778
|
-
processTokenAnnotations(token, state, token.type);
|
|
779
|
-
|
|
780
|
-
state.blockStack.pop();
|
|
781
|
-
state.currentBlock = state.blockStack.length > 0 ?
|
|
782
|
-
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
783
|
-
},
|
|
784
|
-
para: (token, state) => {
|
|
785
|
-
const blockEntry = createBlockEntry(token, state);
|
|
786
|
-
state.currentBlock = blockEntry;
|
|
787
|
-
state.blockStack.push(blockEntry.id);
|
|
788
|
-
|
|
789
|
-
processStandaloneSubject(token, state);
|
|
790
|
-
processTokenAnnotations(token, state, token.type);
|
|
791
|
-
|
|
792
|
-
state.blockStack.pop();
|
|
793
|
-
state.currentBlock = state.blockStack.length > 0 ?
|
|
794
|
-
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
795
|
-
},
|
|
796
|
-
list: (token, state) => {
|
|
797
|
-
const blockEntry = createBlockEntry(token, state);
|
|
798
|
-
state.currentBlock = blockEntry;
|
|
799
|
-
state.blockStack.push(blockEntry.id);
|
|
800
|
-
|
|
801
|
-
processTokenAnnotations(token, state, token.type);
|
|
802
|
-
|
|
803
|
-
state.blockStack.pop();
|
|
804
|
-
state.currentBlock = state.blockStack.length > 0 ?
|
|
805
|
-
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
806
|
-
},
|
|
643
|
+
heading: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
|
|
644
|
+
code: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
|
|
645
|
+
blockquote: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
|
|
646
|
+
para: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry, [processStandaloneSubject]),
|
|
647
|
+
list: (token, state) => processTokenWithBlockTracking(token, state, processTokenAnnotations, createBlockEntry),
|
|
807
648
|
};
|
|
808
|
-
|
|
809
|
-
export function parse(text, options = {}) {
|
|
810
|
-
const state = {
|
|
811
|
-
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
812
|
-
df: options.dataFactory || DataFactory,
|
|
813
|
-
quads: [],
|
|
814
|
-
quadBuffer: new Map(),
|
|
815
|
-
removeSet: new Set(),
|
|
816
|
-
origin: {
|
|
817
|
-
quadIndex: new Map(),
|
|
818
|
-
blocks: new Map(),
|
|
819
|
-
documentStructure: []
|
|
820
|
-
},
|
|
821
|
-
currentSubject: null,
|
|
822
|
-
tokens: null,
|
|
823
|
-
currentTokenIndex: -1,
|
|
824
|
-
statements: [],
|
|
825
|
-
statementCandidates: new Map(), // Track incomplete rdf:Statement patterns
|
|
826
|
-
currentBlock: null,
|
|
827
|
-
blockStack: []
|
|
828
|
-
};
|
|
829
|
-
|
|
830
|
-
state.tokens = scanTokens(text);
|
|
831
|
-
|
|
832
|
-
// Single loop instead of filter+forEach for better performance
|
|
833
|
-
for (const token of state.tokens) {
|
|
834
|
-
if (token.type === 'prefix') {
|
|
835
|
-
let resolvedIri = token.iri;
|
|
836
|
-
if (token.iri.includes(':')) {
|
|
837
|
-
const colonIndex = token.iri.indexOf(':');
|
|
838
|
-
const potentialPrefix = token.iri.substring(0, colonIndex);
|
|
839
|
-
const reference = token.iri.substring(colonIndex + 1);
|
|
840
|
-
if (state.ctx[potentialPrefix] && potentialPrefix !== '@vocab') {
|
|
841
|
-
resolvedIri = state.ctx[potentialPrefix] + reference;
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
state.ctx[token.prefix] = resolvedIri;
|
|
845
|
-
}
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
for (let i = 0; i < state.tokens.length; i++) {
|
|
849
|
-
const token = state.tokens[i];
|
|
850
|
-
state.currentTokenIndex = i;
|
|
851
|
-
TOKEN_PROCESSORS[token.type]?.(token, state);
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
// Optimize array operations - avoid Array.from() and filter()
|
|
855
|
-
const quadKeys = new Set();
|
|
856
|
-
for (const quad of state.quads) {
|
|
857
|
-
quadKeys.add(quadIndexKey(quad.subject, quad.predicate, quad.object));
|
|
858
|
-
}
|
|
859
|
-
|
|
860
|
-
// Direct iteration instead of Array.from() + filter()
|
|
861
|
-
const filteredRemove = [];
|
|
862
|
-
for (const quad of state.removeSet) {
|
|
863
|
-
const key = quadIndexKey(quad.subject, quad.predicate, quad.object);
|
|
864
|
-
if (!quadKeys.has(key)) {
|
|
865
|
-
filteredRemove.push(quad);
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
|
|
869
|
-
return { quads: state.quads, remove: filteredRemove, statements: state.statements, origin: state.origin, context: state.ctx };
|
|
870
|
-
}
|
package/src/render.js
CHANGED
|
@@ -6,7 +6,12 @@ import {
|
|
|
6
6
|
parseSemanticBlock,
|
|
7
7
|
hash
|
|
8
8
|
} from './utils.js';
|
|
9
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
escapeHtml,
|
|
11
|
+
getIndentLevel,
|
|
12
|
+
processPredicates
|
|
13
|
+
} from './shared.js';
|
|
14
|
+
import { DEFAULT_CONTEXT } from './constants.js';
|
|
10
15
|
|
|
11
16
|
/**
|
|
12
17
|
* Render MD-LD to HTML+RDFa
|
|
@@ -302,17 +307,6 @@ function parseMarkdownList(markdownList, blocks, state) {
|
|
|
302
307
|
return html;
|
|
303
308
|
}
|
|
304
309
|
|
|
305
|
-
/**
|
|
306
|
-
* Get indent level from source text
|
|
307
|
-
*/
|
|
308
|
-
function getIndentLevel(block, sourceText) {
|
|
309
|
-
if (!block.range || !sourceText) return 0;
|
|
310
|
-
|
|
311
|
-
const text = sourceText.substring(block.range.start, block.range.end);
|
|
312
|
-
const indentMatch = text.match(/^(\s*)/);
|
|
313
|
-
return indentMatch ? indentMatch[1].length : 0;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
310
|
/**
|
|
317
311
|
* Render a single block
|
|
318
312
|
*/
|
|
@@ -408,26 +402,9 @@ function buildRDFaAttrsFromBlock(block, ctx) {
|
|
|
408
402
|
attrs.push(`typeof="${escapeHtml(types)}"`);
|
|
409
403
|
}
|
|
410
404
|
|
|
411
|
-
// Predicates
|
|
405
|
+
// Predicates using shared utility
|
|
412
406
|
if (block.predicates && block.predicates.length > 0) {
|
|
413
|
-
const literalProps =
|
|
414
|
-
const objectProps = [];
|
|
415
|
-
const reverseProps = [];
|
|
416
|
-
|
|
417
|
-
block.predicates.forEach(pred => {
|
|
418
|
-
const iri = typeof pred === 'string' ? pred : pred.iri;
|
|
419
|
-
const expanded = expandIRI(iri, ctx);
|
|
420
|
-
const shortened = shortenIRI(expanded, ctx);
|
|
421
|
-
const form = typeof pred === 'string' ? '' : (pred.form || '');
|
|
422
|
-
|
|
423
|
-
if (form === '!') {
|
|
424
|
-
reverseProps.push(shortened);
|
|
425
|
-
} else if (form === '?') {
|
|
426
|
-
objectProps.push(shortened);
|
|
427
|
-
} else {
|
|
428
|
-
literalProps.push(shortened);
|
|
429
|
-
}
|
|
430
|
-
});
|
|
407
|
+
const { literalProps, objectProps, reverseProps } = processPredicates(block.predicates, ctx);
|
|
431
408
|
|
|
432
409
|
if (literalProps.length > 0) {
|
|
433
410
|
attrs.push(`property="${escapeHtml(literalProps.join(' '))}"`);
|
|
@@ -474,17 +451,3 @@ function wrapWithRDFaContext(html, ctx) {
|
|
|
474
451
|
|
|
475
452
|
return `<div${prefixDecl}${vocabDecl}>${html}</div>`;
|
|
476
453
|
}
|
|
477
|
-
|
|
478
|
-
/**
|
|
479
|
-
* Escape HTML special characters
|
|
480
|
-
*/
|
|
481
|
-
function escapeHtml(text) {
|
|
482
|
-
const map = {
|
|
483
|
-
'&': '&',
|
|
484
|
-
'<': '<',
|
|
485
|
-
'>': '>',
|
|
486
|
-
'"': '"',
|
|
487
|
-
"'": '''
|
|
488
|
-
};
|
|
489
|
-
return String(text || '').replace(/[&<>"']/g, m => map[m]);
|
|
490
|
-
}
|
package/src/shared.js
CHANGED
|
@@ -1,37 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* Ensures DRY code and consistent CommonMark processing
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
export const DEFAULT_CONTEXT = {
|
|
7
|
-
'@vocab': "http://www.w3.org/2000/01/rdf-schema#",
|
|
8
|
-
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
9
|
-
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
10
|
-
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
11
|
-
sh: "http://www.w3.org/ns/shacl#",
|
|
12
|
-
prov: 'http://www.w3.org/ns/prov#'
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
// CommonMark patterns - shared between parser and renderer
|
|
16
|
-
export const URL_REGEX = /^(https?|ftp|mailto|tag|nih|urn|uuid|did|web|ipfs|ipns|data|file|urn:uuid):/;
|
|
17
|
-
export const FENCE_REGEX = /^(`{3,}|~{3,})(.*)/;
|
|
18
|
-
export const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
|
|
19
|
-
export const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
20
|
-
export const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
|
|
21
|
-
export const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
|
|
22
|
-
export const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
|
|
23
|
-
|
|
24
|
-
// Inline carrier patterns - shared extraction logic
|
|
25
|
-
export const INLINE_CARRIER_PATTERNS = {
|
|
26
|
-
EMPHASIS: /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y,
|
|
27
|
-
CODE_SPAN: /``(.+?)``\s*\{([^}]+)\}/y
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
// Pre-compiled carrier patterns for performance
|
|
31
|
-
export const CARRIER_PATTERN_ARRAY = [
|
|
32
|
-
['EMPHASIS', /[*__`]+(.+?)[*__`]+\s*\{([^}]+)\}/y],
|
|
33
|
-
['CODE_SPAN', /``(.+?)``\s*\{([^}]+)\}/y]
|
|
34
|
-
];
|
|
1
|
+
import { DEFAULT_CONTEXT, STANDALONE_SUBJECT_REGEX, FENCE_REGEX, PREFIX_REGEX, HEADING_REGEX, UNORDERED_LIST_REGEX, BLOCKQUOTE_REGEX } from './constants.js';
|
|
2
|
+
import { parseSemanticBlock, expandIRI, shortenIRI } from './utils.js';
|
|
35
3
|
|
|
36
4
|
// Cache for fence regex patterns
|
|
37
5
|
export const FENCE_CLOSE_PATTERNS = new Map();
|
|
@@ -156,6 +124,172 @@ export const PROCESSORS = [
|
|
|
156
124
|
{ test: line => true, process: null } // Default: paragraph
|
|
157
125
|
];
|
|
158
126
|
|
|
127
|
+
// Token scanning processors - shared between parser and renderer
|
|
128
|
+
export const TOKEN_PROCESSORS = [
|
|
129
|
+
{ type: 'fence', test: line => FENCE_REGEX.test(line.trim()), process: null }, // Will be overridden in parse.js
|
|
130
|
+
{ type: 'content', test: line => false, process: null }, // Will be overridden in parse.js
|
|
131
|
+
{ type: 'prefix', test: line => PREFIX_REGEX.test(line), process: null }, // Will be overridden in parse.js
|
|
132
|
+
{ type: 'heading', test: line => HEADING_REGEX.test(line), process: null }, // Will be overridden in parse.js
|
|
133
|
+
{ type: 'list', test: line => UNORDERED_LIST_REGEX.test(line), process: null }, // Will be overridden in parse.js
|
|
134
|
+
{ type: 'blockquote', test: line => BLOCKQUOTE_REGEX.test(line), process: null }, // Will be overridden in parse.js
|
|
135
|
+
{ type: 'para', test: line => line.trim(), process: null } // Will be overridden in parse.js
|
|
136
|
+
];
|
|
137
|
+
|
|
138
|
+
// Language and attributes parsing
|
|
139
|
+
export function parseLangAndAttrs(langAndAttrs) {
|
|
140
|
+
const spaceIndex = langAndAttrs.indexOf(' ');
|
|
141
|
+
const braceIndex = langAndAttrs.indexOf('{');
|
|
142
|
+
const langEnd = Math.min(
|
|
143
|
+
spaceIndex > -1 ? spaceIndex : Infinity,
|
|
144
|
+
braceIndex > -1 ? braceIndex : Infinity
|
|
145
|
+
);
|
|
146
|
+
return {
|
|
147
|
+
lang: langAndAttrs.substring(0, langEnd),
|
|
148
|
+
attrsText: langAndAttrs.substring(langEnd).match(/\{[^{}]*\}/)?.[0] || null
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Carrier extraction utilities
|
|
153
|
+
export function findMatchingBracket(text, bracketStart) {
|
|
154
|
+
let bracketDepth = 1;
|
|
155
|
+
let bracketEnd = bracketStart + 1;
|
|
156
|
+
|
|
157
|
+
while (bracketEnd < text.length && bracketDepth > 0) {
|
|
158
|
+
if (text[bracketEnd] === '[') bracketDepth++;
|
|
159
|
+
else if (text[bracketEnd] === ']') bracketDepth--;
|
|
160
|
+
bracketEnd++;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return bracketDepth > 0 ? null : bracketEnd;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export function extractUrlFromBrackets(text, bracketEnd) {
|
|
167
|
+
let url = null;
|
|
168
|
+
let spanEnd = bracketEnd;
|
|
169
|
+
|
|
170
|
+
if (text[spanEnd] === '(') {
|
|
171
|
+
const parenEnd = text.indexOf(')', spanEnd);
|
|
172
|
+
if (parenEnd !== -1) {
|
|
173
|
+
url = text.substring(spanEnd + 1, parenEnd);
|
|
174
|
+
spanEnd = parenEnd + 1;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return { url, spanEnd };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function extractAttributesFromText(text, spanEnd, baseOffset) {
|
|
182
|
+
let attrs = null;
|
|
183
|
+
let attrsRange = null;
|
|
184
|
+
const remaining = text.substring(spanEnd);
|
|
185
|
+
|
|
186
|
+
const wsMatch = remaining.match(/^\s+/);
|
|
187
|
+
const attrsStart = wsMatch ? wsMatch[0].length : 0;
|
|
188
|
+
|
|
189
|
+
if (remaining[attrsStart] === '{') {
|
|
190
|
+
const braceEnd = remaining.indexOf('}', attrsStart);
|
|
191
|
+
if (braceEnd !== -1) {
|
|
192
|
+
attrs = remaining.substring(attrsStart, braceEnd + 1);
|
|
193
|
+
const absStart = baseOffset + spanEnd + attrsStart;
|
|
194
|
+
attrsRange = [absStart, absStart + attrs.length];
|
|
195
|
+
spanEnd += braceEnd + 1;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return { attrs, attrsRange, finalSpanEnd: spanEnd };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
export function determineCarrierType(url) {
|
|
203
|
+
if (url && !url.startsWith('=')) {
|
|
204
|
+
return { carrierType: 'link', resourceIRI: url };
|
|
205
|
+
}
|
|
206
|
+
return { carrierType: 'span', resourceIRI: null };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export function calcCarrierRanges(match, baseOffset, matchStart) {
|
|
210
|
+
const valueStart = baseOffset + matchStart + match[0].indexOf(match[1]);
|
|
211
|
+
const valueEnd = valueStart + match[1].length;
|
|
212
|
+
const attrsStart = baseOffset + matchStart + match[0].indexOf('{');
|
|
213
|
+
const attrsEnd = attrsStart + match[2].length + 2; // +2 for { and }
|
|
214
|
+
return {
|
|
215
|
+
valueRange: [valueStart, valueEnd],
|
|
216
|
+
attrsRange: [attrsStart + 1, attrsEnd - 1], // Exclude braces
|
|
217
|
+
range: [baseOffset + matchStart, attrsEnd],
|
|
218
|
+
pos: matchStart + match[0].length // pos should be relative to current text, not document
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Clean text extraction utilities
|
|
223
|
+
export function extractCleanText(token) {
|
|
224
|
+
if (!token.text) return '';
|
|
225
|
+
|
|
226
|
+
let text = token.text;
|
|
227
|
+
|
|
228
|
+
// Remove semantic annotations
|
|
229
|
+
if (token.attrsRange) {
|
|
230
|
+
const beforeAttrs = text.substring(0, token.attrsRange[0] - (token.range?.[0] || 0));
|
|
231
|
+
const afterAttrs = text.substring(token.attrsRange[1] - (token.range?.[0] || 0));
|
|
232
|
+
text = beforeAttrs + afterAttrs;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Clean based on token type
|
|
236
|
+
switch (token.type) {
|
|
237
|
+
case 'heading':
|
|
238
|
+
return text.replace(/^#+\s*/, '').trim();
|
|
239
|
+
case 'list':
|
|
240
|
+
return text.replace(/^[-*+]\s*/, '').trim();
|
|
241
|
+
case 'blockquote':
|
|
242
|
+
return text.replace(/^>\s*/, '').trim();
|
|
243
|
+
default:
|
|
244
|
+
return text.trim();
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Quad emission utilities
|
|
249
|
+
export const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
250
|
+
export const RDF_STATEMENT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement';
|
|
251
|
+
export const RDF_SUBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject';
|
|
252
|
+
export const RDF_PREDICATE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate';
|
|
253
|
+
export const RDF_OBJECT = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#object';
|
|
254
|
+
|
|
255
|
+
export function createLeanOriginEntry(block, subject, predicate, meta = null) {
|
|
256
|
+
return {
|
|
257
|
+
blockId: block.id,
|
|
258
|
+
range: block.range,
|
|
259
|
+
carrierType: block.carrierType,
|
|
260
|
+
subject: subject.value,
|
|
261
|
+
predicate: predicate.value,
|
|
262
|
+
context: block.context, // Direct reference instead of spread
|
|
263
|
+
polarity: meta?.remove ? '-' : '+',
|
|
264
|
+
value: block.text || ''
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Fragment resolution utilities
|
|
269
|
+
export function resolveFragment(fragment, currentSubject, dataFactory) {
|
|
270
|
+
if (!currentSubject) return null;
|
|
271
|
+
const subjectValue = currentSubject.value;
|
|
272
|
+
const hashIndex = subjectValue.indexOf('#');
|
|
273
|
+
const baseIRI = hashIndex > -1 ? subjectValue.slice(0, hashIndex) : subjectValue;
|
|
274
|
+
return dataFactory.namedNode(baseIRI + '#' + fragment);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
export function resolveSubject(sem, state) {
|
|
278
|
+
if (!sem.subject) return null;
|
|
279
|
+
if (sem.subject === 'RESET') {
|
|
280
|
+
state.currentSubject = null;
|
|
281
|
+
return null;
|
|
282
|
+
}
|
|
283
|
+
if (sem.subject.startsWith('=#')) return resolveFragment(sem.subject.substring(2), state.currentSubject, state.df);
|
|
284
|
+
return state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
export function resolveObject(sem, state) {
|
|
288
|
+
if (!sem.object) return null;
|
|
289
|
+
if (sem.object.startsWith('#')) return resolveFragment(sem.object.substring(1), state.currentSubject, state.df);
|
|
290
|
+
return state.df.namedNode(expandIRI(sem.object, state.ctx));
|
|
291
|
+
}
|
|
292
|
+
|
|
159
293
|
// HTML escaping - shared utility
|
|
160
294
|
export function escapeHtml(text) {
|
|
161
295
|
if (!text) return '';
|
|
@@ -167,6 +301,77 @@ export function escapeHtml(text) {
|
|
|
167
301
|
.replace(/'/g, ''');
|
|
168
302
|
}
|
|
169
303
|
|
|
304
|
+
// RDF term type checking utilities - shared across modules
|
|
305
|
+
export function isLiteral(term) {
|
|
306
|
+
return term?.termType === 'Literal';
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export function isNamedNode(term) {
|
|
310
|
+
return term?.termType === 'NamedNode';
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
export function isRdfType(term) {
|
|
314
|
+
return term?.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// IRI prefix extraction utility
|
|
318
|
+
export function getPrefixFromIRI(iri, context) {
|
|
319
|
+
if (!iri) return null;
|
|
320
|
+
const shortened = shortenIRI(iri, context);
|
|
321
|
+
if (shortened.includes(':')) {
|
|
322
|
+
return shortened.split(':')[0];
|
|
323
|
+
}
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Prefix collection utility - used by generate.js
|
|
328
|
+
export function collectUsedPrefixes(subjectGroups, context) {
|
|
329
|
+
const usedPrefixes = new Set();
|
|
330
|
+
|
|
331
|
+
for (const subjectQuads of subjectGroups.values()) {
|
|
332
|
+
for (const quad of subjectQuads) {
|
|
333
|
+
// Check subject prefix
|
|
334
|
+
const subjectPrefix = getPrefixFromIRI(quad.subject.value, context);
|
|
335
|
+
if (subjectPrefix) usedPrefixes.add(subjectPrefix);
|
|
336
|
+
|
|
337
|
+
// Check predicate prefix
|
|
338
|
+
const predicatePrefix = getPrefixFromIRI(quad.predicate.value, context);
|
|
339
|
+
if (predicatePrefix) usedPrefixes.add(predicatePrefix);
|
|
340
|
+
|
|
341
|
+
// Check object prefix if it's a named node
|
|
342
|
+
if (isNamedNode(quad.object)) {
|
|
343
|
+
const objectPrefix = getPrefixFromIRI(quad.object.value, context);
|
|
344
|
+
if (objectPrefix) usedPrefixes.add(objectPrefix);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Check datatype prefix if present
|
|
348
|
+
if (quad.object.datatype && quad.object.datatype.value) {
|
|
349
|
+
const datatypePrefix = getPrefixFromIRI(quad.object.datatype.value, context);
|
|
350
|
+
if (datatypePrefix) usedPrefixes.add(datatypePrefix);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return usedPrefixes;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Token processing utility - eliminates duplication in TOKEN_PROCESSORS
|
|
359
|
+
export function processTokenWithBlockTracking(token, state, processAnnotations, createBlockEntry, additionalProcessors = []) {
|
|
360
|
+
const blockEntry = createBlockEntry(token, state);
|
|
361
|
+
state.currentBlock = blockEntry;
|
|
362
|
+
state.blockStack.push(blockEntry.id);
|
|
363
|
+
|
|
364
|
+
// Run any additional processors first
|
|
365
|
+
additionalProcessors.forEach(processor => processor(token, state));
|
|
366
|
+
|
|
367
|
+
// Process annotations
|
|
368
|
+
processAnnotations(token, state, token.type);
|
|
369
|
+
|
|
370
|
+
state.blockStack.pop();
|
|
371
|
+
state.currentBlock = state.blockStack.length > 0 ?
|
|
372
|
+
state.origin.blocks.get(state.blockStack[state.blockStack.length - 1]) : null;
|
|
373
|
+
}
|
|
374
|
+
|
|
170
375
|
// Quad key generation - shared between parser and renderer
|
|
171
376
|
export function quadIndexKey(subject, predicate, object) {
|
|
172
377
|
const datatype = object.datatype?.value || '';
|
|
@@ -199,14 +404,126 @@ export function resolveSubjectType(subjectDecl) {
|
|
|
199
404
|
return 'full-iri';
|
|
200
405
|
}
|
|
201
406
|
|
|
202
|
-
//
|
|
203
|
-
export
|
|
204
|
-
|
|
205
|
-
|
|
407
|
+
// Constants - shared across modules (bundle-size optimized)
|
|
408
|
+
export const XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string';
|
|
409
|
+
|
|
410
|
+
// Optimized sorting utilities - inline for better minification
|
|
411
|
+
export function sortQuadsByPredicate(quads) {
|
|
412
|
+
return quads.sort((a, b) => a.predicate.value.localeCompare(b.predicate.value));
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Optimized text generation - template literals for smaller bundle
|
|
416
|
+
export const generatePrefixDeclaration = (prefix, namespace) => `[${prefix}] <${namespace}>\n`;
|
|
417
|
+
|
|
418
|
+
export function generateLiteralText(quad, context) {
|
|
419
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
420
|
+
let annotation = predShort;
|
|
421
|
+
|
|
422
|
+
if (quad.object.language) {
|
|
423
|
+
annotation += ` @${quad.object.language}`;
|
|
424
|
+
} else if (quad.object.datatype.value !== XSD_STRING) {
|
|
425
|
+
annotation += ` ^^${shortenIRI(quad.object.datatype.value, context)}`;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return `[${quad.object.value}] {${annotation}}\n`;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
export const generateObjectText = (quad, context) => {
|
|
432
|
+
const objShort = shortenIRI(quad.object.value, context);
|
|
433
|
+
const predShort = shortenIRI(quad.predicate.value, context);
|
|
434
|
+
return `[${objShort}] {+${objShort} ?${predShort}}\n`;
|
|
435
|
+
};
|
|
436
|
+
|
|
437
|
+
// Optimized quad filtering - destructuring for smaller minified output
|
|
438
|
+
export function filterQuadsByType(subjectQuads) {
|
|
439
|
+
const types = [], literals = [], objects = [];
|
|
440
|
+
for (const q of subjectQuads) {
|
|
441
|
+
if (isRdfType(q.predicate)) {
|
|
442
|
+
types.push(q);
|
|
443
|
+
} else if (isLiteral(q.object)) {
|
|
444
|
+
literals.push(q);
|
|
445
|
+
} else if (isNamedNode(q.object)) {
|
|
446
|
+
objects.push(q);
|
|
447
|
+
}
|
|
206
448
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
449
|
+
return { types, literals, objects };
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// Predicate processing utilities - common RDFa patterns
|
|
453
|
+
export function processPredicates(predicates, ctx) {
|
|
454
|
+
const literalProps = [];
|
|
455
|
+
const objectProps = [];
|
|
456
|
+
const reverseProps = [];
|
|
457
|
+
|
|
458
|
+
predicates.forEach(pred => {
|
|
459
|
+
const iri = typeof pred === 'string' ? pred : pred.iri;
|
|
460
|
+
const expanded = expandIRI(iri, ctx);
|
|
461
|
+
const shortened = shortenIRI(expanded, ctx);
|
|
462
|
+
const form = typeof pred === 'string' ? '' : (pred.form || '');
|
|
463
|
+
|
|
464
|
+
if (form === '!') {
|
|
465
|
+
reverseProps.push(shortened);
|
|
466
|
+
} else if (form === '?') {
|
|
467
|
+
objectProps.push(shortened);
|
|
468
|
+
} else {
|
|
469
|
+
literalProps.push(shortened);
|
|
470
|
+
}
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
return { literalProps, objectProps, reverseProps };
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Deterministic sorting utilities - ensure consistent output
|
|
477
|
+
export function sortDeterministic(array, keyFn) {
|
|
478
|
+
return array.sort((a, b) => {
|
|
479
|
+
const keyA = keyFn(a);
|
|
480
|
+
const keyB = keyFn(b);
|
|
481
|
+
return keyA.localeCompare(keyB);
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
export function sortQuadsDeterministically(quads) {
|
|
486
|
+
return quads.sort((a, b) => {
|
|
487
|
+
// Deterministic sorting: subject -> predicate -> object
|
|
488
|
+
const sComp = a.subject.value.localeCompare(b.subject.value);
|
|
489
|
+
if (sComp !== 0) return sComp;
|
|
490
|
+
const pComp = a.predicate.value.localeCompare(b.predicate.value);
|
|
491
|
+
if (pComp !== 0) return pComp;
|
|
492
|
+
const oA = isLiteral(a.object) ? a.object.value : a.object.value;
|
|
493
|
+
const oB = isLiteral(b.object) ? b.object.value : b.object.value;
|
|
494
|
+
return oA.localeCompare(oB);
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Optimized deterministic prefix generation
|
|
499
|
+
export function generateDeterministicPrefixes(context, usedPrefixes) {
|
|
500
|
+
const sortedEntries = Object.entries(context).sort(([a], [b]) => a.localeCompare(b));
|
|
501
|
+
let text = '';
|
|
502
|
+
|
|
503
|
+
for (const [prefix, namespace] of sortedEntries) {
|
|
504
|
+
if (prefix !== '@vocab' && !prefix.startsWith('@') && !DEFAULT_CONTEXT[prefix] && usedPrefixes.has(prefix)) {
|
|
505
|
+
text += generatePrefixDeclaration(prefix, namespace);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
return text;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Memory-efficient block creation
|
|
513
|
+
export function createOptimizedBlockEntry(token, state) {
|
|
514
|
+
const id = hash(`${token.range[0]}-${token.range[1]}-${token.text.slice(0, 50)}`);
|
|
515
|
+
const block = {
|
|
516
|
+
id,
|
|
517
|
+
type: token.type,
|
|
518
|
+
carrierType: token.type,
|
|
519
|
+
range: token.range,
|
|
520
|
+
text: token.text,
|
|
521
|
+
carriers: [],
|
|
522
|
+
predicates: [],
|
|
523
|
+
subject: state.currentSubject,
|
|
524
|
+
context: { ...state.ctx }
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
state.origin.blocks.set(id, block);
|
|
528
|
+
return block;
|
|
212
529
|
}
|
package/src/utils.js
CHANGED