mdld-parse 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/package.json +5 -5
- package/src/index.js +10 -0
- package/src/parse.js +788 -0
- package/src/serialize.js +531 -0
- package/src/utils.js +305 -0
- package/index.js +0 -1364
package/src/utils.js
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
export const DEFAULT_CONTEXT = {
|
|
2
|
+
'@vocab': 'http://schema.org/',
|
|
3
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
4
|
+
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
5
|
+
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
6
|
+
schema: 'http://schema.org/'
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export const DataFactory = {
|
|
10
|
+
namedNode: (v) => ({ termType: 'NamedNode', value: v }),
|
|
11
|
+
blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
|
|
12
|
+
literal: (v, lang) => {
|
|
13
|
+
if (typeof lang === 'string') {
|
|
14
|
+
return { termType: 'Literal', value: v, language: lang, datatype: DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString') };
|
|
15
|
+
}
|
|
16
|
+
return { termType: 'Literal', value: v, language: '', datatype: lang || DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#string') };
|
|
17
|
+
},
|
|
18
|
+
quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export function hash(str) {
|
|
22
|
+
let h = 5381;
|
|
23
|
+
for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
|
|
24
|
+
return Math.abs(h).toString(16).slice(0, 12);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function expandIRI(term, ctx) {
|
|
28
|
+
if (term == null) return null;
|
|
29
|
+
const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
|
|
30
|
+
const t = raw.trim();
|
|
31
|
+
if (t.match(/^https?:/)) return t;
|
|
32
|
+
if (t.includes(':')) {
|
|
33
|
+
const [prefix, ref] = t.split(':', 2);
|
|
34
|
+
return ctx[prefix] ? ctx[prefix] + ref : t;
|
|
35
|
+
}
|
|
36
|
+
return (ctx['@vocab'] || '') + t;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function shortenIRI(iri, ctx) {
|
|
40
|
+
if (!iri || !iri.startsWith('http')) return iri;
|
|
41
|
+
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
|
|
42
|
+
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
43
|
+
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
44
|
+
return prefix + ':' + iri.substring(namespace.length);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return iri;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Token pattern definitions for semantic block parsing
|
|
51
|
+
const TOKEN_PATTERNS = {
|
|
52
|
+
'=#': { kind: 'fragment', extract: t => t.substring(2) },
|
|
53
|
+
'+#': { kind: 'softFragment', extract: t => t.substring(2) },
|
|
54
|
+
'+': { kind: 'object', extract: t => t.substring(1) },
|
|
55
|
+
'^^': { kind: 'datatype', extract: t => t.substring(2) },
|
|
56
|
+
'@': { kind: 'language', extract: t => t.substring(1) },
|
|
57
|
+
'.': { kind: 'type', extract: t => t.substring(1) },
|
|
58
|
+
'!': { kind: 'property', form: '!', extract: t => t.substring(1) },
|
|
59
|
+
'?': { kind: 'property', form: '?', extract: t => t.substring(1) }
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
export function parseSemanticBlock(raw) {
|
|
63
|
+
try {
|
|
64
|
+
const src = String(raw || '').trim();
|
|
65
|
+
const cleaned = src.replace(/^\{|\}$/g, '').trim();
|
|
66
|
+
if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
67
|
+
|
|
68
|
+
const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
69
|
+
const re = /\S+/g;
|
|
70
|
+
let m;
|
|
71
|
+
|
|
72
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
73
|
+
const token = m[0];
|
|
74
|
+
const relStart = 1 + m.index;
|
|
75
|
+
const relEnd = relStart + token.length;
|
|
76
|
+
const entryIndex = result.entries.length;
|
|
77
|
+
|
|
78
|
+
// Handle special tokens first
|
|
79
|
+
if (token === '=') {
|
|
80
|
+
result.subject = 'RESET';
|
|
81
|
+
result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Handle '=' pattern for subject declarations (not reset)
|
|
86
|
+
if (token.startsWith('=') && !token.startsWith('=#')) {
|
|
87
|
+
const iri = token.substring(1);
|
|
88
|
+
result.subject = iri;
|
|
89
|
+
result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Handle pattern-based tokens
|
|
94
|
+
let processed = false;
|
|
95
|
+
for (const [pattern, config] of Object.entries(TOKEN_PATTERNS)) {
|
|
96
|
+
if (token.startsWith(pattern)) {
|
|
97
|
+
const entry = {
|
|
98
|
+
kind: config.kind,
|
|
99
|
+
relRange: { start: relStart, end: relEnd },
|
|
100
|
+
raw: token
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
if (config.extract) {
|
|
104
|
+
const extracted = config.extract(token);
|
|
105
|
+
if (config.kind === 'fragment') {
|
|
106
|
+
result.subject = `=#${extracted}`;
|
|
107
|
+
entry.fragment = extracted;
|
|
108
|
+
} else if (config.kind === 'softFragment') {
|
|
109
|
+
result.object = `#${extracted}`;
|
|
110
|
+
entry.fragment = extracted;
|
|
111
|
+
} else if (config.kind === 'object') {
|
|
112
|
+
result.object = extracted;
|
|
113
|
+
entry.iri = extracted;
|
|
114
|
+
} else if (config.kind === 'datatype') {
|
|
115
|
+
if (!result.language) result.datatype = extracted;
|
|
116
|
+
entry.datatype = extracted;
|
|
117
|
+
} else if (config.kind === 'language') {
|
|
118
|
+
result.language = extracted;
|
|
119
|
+
result.datatype = null;
|
|
120
|
+
entry.language = extracted;
|
|
121
|
+
} else if (config.kind === 'type') {
|
|
122
|
+
result.types.push({ iri: extracted, entryIndex });
|
|
123
|
+
entry.iri = extracted;
|
|
124
|
+
} else if (config.kind === 'property') {
|
|
125
|
+
result.predicates.push({ iri: extracted, form: config.form, entryIndex });
|
|
126
|
+
entry.iri = extracted;
|
|
127
|
+
entry.form = config.form;
|
|
128
|
+
}
|
|
129
|
+
} else {
|
|
130
|
+
// For '=' pattern (subjectReset handled above)
|
|
131
|
+
if (config.kind === 'subjectReset') continue;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
result.entries.push(entry);
|
|
135
|
+
processed = true;
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Handle default case (no pattern match)
|
|
141
|
+
if (!processed) {
|
|
142
|
+
result.predicates.push({ iri: token, form: '', entryIndex });
|
|
143
|
+
result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return result;
|
|
148
|
+
} catch (error) {
|
|
149
|
+
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
150
|
+
return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function quadIndexKey(subject, predicate, object) {
|
|
155
|
+
const objKey = object.termType === 'Literal'
|
|
156
|
+
? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
|
|
157
|
+
: JSON.stringify({ t: object.termType, v: object.value });
|
|
158
|
+
return JSON.stringify([subject.value, predicate.value, objKey]);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function normalizeQuad(q) {
|
|
162
|
+
if (!q) return null;
|
|
163
|
+
const { subject, predicate, object } = q;
|
|
164
|
+
if (object?.termType === 'Literal') {
|
|
165
|
+
const language = typeof object.language === 'string' ? object.language : '';
|
|
166
|
+
const datatype = object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
|
|
167
|
+
return { ...q, subject, predicate, object: { ...object, language, datatype } };
|
|
168
|
+
}
|
|
169
|
+
return { ...q, subject, predicate, object };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
export function objectSignature(o) {
|
|
173
|
+
if (!o) return '';
|
|
174
|
+
if (o.termType === 'Literal') {
|
|
175
|
+
return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
|
|
176
|
+
}
|
|
177
|
+
return JSON.stringify({ t: o.termType, v: o.value });
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export function quadToKeyForOrigin(q) {
|
|
181
|
+
const nq = normalizeQuad(q);
|
|
182
|
+
return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export function parseQuadIndexKey(key) {
|
|
186
|
+
try {
|
|
187
|
+
const [s, p, objKey] = JSON.parse(key);
|
|
188
|
+
return { s, p, o: JSON.parse(objKey) };
|
|
189
|
+
} catch {
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export function createSemanticSlotId(subject, predicate) {
|
|
195
|
+
return hash(`${subject.value}|${predicate.value}`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Consolidated quad management
|
|
199
|
+
export function createQuadManager() {
|
|
200
|
+
return {
|
|
201
|
+
createSlot: (blockId, entryIndex, meta = {}) => {
|
|
202
|
+
const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
|
|
203
|
+
return {
|
|
204
|
+
blockId,
|
|
205
|
+
entryIndex,
|
|
206
|
+
slotId,
|
|
207
|
+
isVacant: false,
|
|
208
|
+
lastValue: null,
|
|
209
|
+
vacantSince: null,
|
|
210
|
+
...meta
|
|
211
|
+
};
|
|
212
|
+
},
|
|
213
|
+
|
|
214
|
+
markVacant: (slotInfo, deletedValue) => {
|
|
215
|
+
if (!slotInfo) return null;
|
|
216
|
+
return {
|
|
217
|
+
...slotInfo,
|
|
218
|
+
isVacant: true,
|
|
219
|
+
lastValue: deletedValue,
|
|
220
|
+
vacantSince: Date.now()
|
|
221
|
+
};
|
|
222
|
+
},
|
|
223
|
+
|
|
224
|
+
findVacant: (quadIndex, subject, predicate) => {
|
|
225
|
+
const targetSlotId = createSemanticSlotId(subject, predicate);
|
|
226
|
+
return Array.from(quadIndex.values())
|
|
227
|
+
.find(slot => slot.slotId === targetSlotId && slot.isVacant);
|
|
228
|
+
},
|
|
229
|
+
|
|
230
|
+
occupy: (slotInfo, newValue) => {
|
|
231
|
+
if (!slotInfo || !slotInfo.isVacant) return null;
|
|
232
|
+
return {
|
|
233
|
+
...slotInfo,
|
|
234
|
+
isVacant: false,
|
|
235
|
+
lastValue: newValue,
|
|
236
|
+
vacantSince: null
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Backward compatibility exports
|
|
243
|
+
export function createSlotInfo(blockId, entryIndex, meta = {}) {
|
|
244
|
+
return createQuadManager().createSlot(blockId, entryIndex, meta);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
export function markSlotAsVacant(slotInfo, deletedValue) {
|
|
248
|
+
return createQuadManager().markVacant(slotInfo, deletedValue);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export function findVacantSlot(quadIndex, subject, predicate) {
|
|
252
|
+
return createQuadManager().findVacant(quadIndex, subject, predicate);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
export function occupySlot(slotInfo, newValue) {
|
|
256
|
+
return createQuadManager().occupy(slotInfo, newValue);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export function normalizeAttrsTokens(attrsText) {
|
|
260
|
+
const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
|
|
261
|
+
return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export function writeAttrsTokens(tokens) {
|
|
265
|
+
return `{${tokens.join(' ').trim()}}`;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export function removeOneToken(tokens, matchFn) {
|
|
269
|
+
const i = tokens.findIndex(matchFn);
|
|
270
|
+
return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Consolidated token management
|
|
274
|
+
function manageToken(tokens, action, tokenType, value) {
|
|
275
|
+
const token = tokenType === 'object' ? `+${value}` :
|
|
276
|
+
tokenType === 'softFragment' ? `+#${value}` : value;
|
|
277
|
+
|
|
278
|
+
switch (action) {
|
|
279
|
+
case 'add': return tokens.includes(token) ? tokens : [...tokens, token];
|
|
280
|
+
case 'remove': return removeOneToken(tokens, t => t === token);
|
|
281
|
+
default: return tokens;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export function addObjectToken(tokens, iri) {
|
|
286
|
+
return manageToken(tokens, 'add', 'object', iri);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export function removeObjectToken(tokens, iri) {
|
|
290
|
+
return manageToken(tokens, 'remove', 'object', iri);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
export function addSoftFragmentToken(tokens, fragment) {
|
|
294
|
+
return manageToken(tokens, 'add', 'softFragment', fragment);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
export function removeSoftFragmentToken(tokens, fragment) {
|
|
298
|
+
return manageToken(tokens, 'remove', 'softFragment', fragment);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export function createLiteral(value, datatype, language, context, dataFactory) {
|
|
302
|
+
if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
303
|
+
if (language) return dataFactory.literal(value, language);
|
|
304
|
+
return dataFactory.literal(value);
|
|
305
|
+
}
|