mdld-parse 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils.js ADDED
@@ -0,0 +1,305 @@
1
+ export const DEFAULT_CONTEXT = {
2
+ '@vocab': 'http://schema.org/',
3
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
4
+ rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
5
+ xsd: 'http://www.w3.org/2001/XMLSchema#',
6
+ schema: 'http://schema.org/'
7
+ };
8
+
9
+ export const DataFactory = {
10
+ namedNode: (v) => ({ termType: 'NamedNode', value: v }),
11
+ blankNode: (v = `b${Math.random().toString(36).slice(2, 11)}`) => ({ termType: 'BlankNode', value: v }),
12
+ literal: (v, lang) => {
13
+ if (typeof lang === 'string') {
14
+ return { termType: 'Literal', value: v, language: lang, datatype: DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString') };
15
+ }
16
+ return { termType: 'Literal', value: v, language: '', datatype: lang || DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#string') };
17
+ },
18
+ quad: (s, p, o, g) => ({ subject: s, predicate: p, object: o, graph: g || DataFactory.namedNode('') })
19
+ };
20
+
21
+ export function hash(str) {
22
+ let h = 5381;
23
+ for (let i = 0; i < str.length; i++) h = ((h << 5) + h) + str.charCodeAt(i);
24
+ return Math.abs(h).toString(16).slice(0, 12);
25
+ }
26
+
27
+ export function expandIRI(term, ctx) {
28
+ if (term == null) return null;
29
+ const raw = typeof term === 'string' ? term : (typeof term === 'object' && typeof term.value === 'string') ? term.value : String(term);
30
+ const t = raw.trim();
31
+ if (t.match(/^https?:/)) return t;
32
+ if (t.includes(':')) {
33
+ const [prefix, ref] = t.split(':', 2);
34
+ return ctx[prefix] ? ctx[prefix] + ref : t;
35
+ }
36
+ return (ctx['@vocab'] || '') + t;
37
+ }
38
+
39
+ export function shortenIRI(iri, ctx) {
40
+ if (!iri || !iri.startsWith('http')) return iri;
41
+ if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) return iri.substring(ctx['@vocab'].length);
42
+ for (const [prefix, namespace] of Object.entries(ctx)) {
43
+ if (prefix !== '@vocab' && iri.startsWith(namespace)) {
44
+ return prefix + ':' + iri.substring(namespace.length);
45
+ }
46
+ }
47
+ return iri;
48
+ }
49
+
50
+ // Token pattern definitions for semantic block parsing
51
+ const TOKEN_PATTERNS = {
52
+ '=#': { kind: 'fragment', extract: t => t.substring(2) },
53
+ '+#': { kind: 'softFragment', extract: t => t.substring(2) },
54
+ '+': { kind: 'object', extract: t => t.substring(1) },
55
+ '^^': { kind: 'datatype', extract: t => t.substring(2) },
56
+ '@': { kind: 'language', extract: t => t.substring(1) },
57
+ '.': { kind: 'type', extract: t => t.substring(1) },
58
+ '!': { kind: 'property', form: '!', extract: t => t.substring(1) },
59
+ '?': { kind: 'property', form: '?', extract: t => t.substring(1) }
60
+ };
61
+
62
+ export function parseSemanticBlock(raw) {
63
+ try {
64
+ const src = String(raw || '').trim();
65
+ const cleaned = src.replace(/^\{|\}$/g, '').trim();
66
+ if (!cleaned) return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
67
+
68
+ const result = { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
69
+ const re = /\S+/g;
70
+ let m;
71
+
72
+ while ((m = re.exec(cleaned)) !== null) {
73
+ const token = m[0];
74
+ const relStart = 1 + m.index;
75
+ const relEnd = relStart + token.length;
76
+ const entryIndex = result.entries.length;
77
+
78
+ // Handle special tokens first
79
+ if (token === '=') {
80
+ result.subject = 'RESET';
81
+ result.entries.push({ kind: 'subjectReset', relRange: { start: relStart, end: relEnd }, raw: token });
82
+ continue;
83
+ }
84
+
85
+ // Handle '=' pattern for subject declarations (not reset)
86
+ if (token.startsWith('=') && !token.startsWith('=#')) {
87
+ const iri = token.substring(1);
88
+ result.subject = iri;
89
+ result.entries.push({ kind: 'subject', iri, relRange: { start: relStart, end: relEnd }, raw: token });
90
+ continue;
91
+ }
92
+
93
+ // Handle pattern-based tokens
94
+ let processed = false;
95
+ for (const [pattern, config] of Object.entries(TOKEN_PATTERNS)) {
96
+ if (token.startsWith(pattern)) {
97
+ const entry = {
98
+ kind: config.kind,
99
+ relRange: { start: relStart, end: relEnd },
100
+ raw: token
101
+ };
102
+
103
+ if (config.extract) {
104
+ const extracted = config.extract(token);
105
+ if (config.kind === 'fragment') {
106
+ result.subject = `=#${extracted}`;
107
+ entry.fragment = extracted;
108
+ } else if (config.kind === 'softFragment') {
109
+ result.object = `#${extracted}`;
110
+ entry.fragment = extracted;
111
+ } else if (config.kind === 'object') {
112
+ result.object = extracted;
113
+ entry.iri = extracted;
114
+ } else if (config.kind === 'datatype') {
115
+ if (!result.language) result.datatype = extracted;
116
+ entry.datatype = extracted;
117
+ } else if (config.kind === 'language') {
118
+ result.language = extracted;
119
+ result.datatype = null;
120
+ entry.language = extracted;
121
+ } else if (config.kind === 'type') {
122
+ result.types.push({ iri: extracted, entryIndex });
123
+ entry.iri = extracted;
124
+ } else if (config.kind === 'property') {
125
+ result.predicates.push({ iri: extracted, form: config.form, entryIndex });
126
+ entry.iri = extracted;
127
+ entry.form = config.form;
128
+ }
129
+ } else {
130
+ // For '=' pattern (subjectReset handled above)
131
+ if (config.kind === 'subjectReset') continue;
132
+ }
133
+
134
+ result.entries.push(entry);
135
+ processed = true;
136
+ break;
137
+ }
138
+ }
139
+
140
+ // Handle default case (no pattern match)
141
+ if (!processed) {
142
+ result.predicates.push({ iri: token, form: '', entryIndex });
143
+ result.entries.push({ kind: 'property', iri: token, form: '', relRange: { start: relStart, end: relEnd }, raw: token });
144
+ }
145
+ }
146
+
147
+ return result;
148
+ } catch (error) {
149
+ console.error(`Error parsing semantic block ${raw}:`, error);
150
+ return { subject: null, object: null, types: [], predicates: [], datatype: null, language: null, entries: [] };
151
+ }
152
+ }
153
+
154
+ export function quadIndexKey(subject, predicate, object) {
155
+ const objKey = object.termType === 'Literal'
156
+ ? JSON.stringify({ t: 'Literal', v: object.value, lang: object.language || '', dt: object.datatype?.value || '' })
157
+ : JSON.stringify({ t: object.termType, v: object.value });
158
+ return JSON.stringify([subject.value, predicate.value, objKey]);
159
+ }
160
+
161
+ export function normalizeQuad(q) {
162
+ if (!q) return null;
163
+ const { subject, predicate, object } = q;
164
+ if (object?.termType === 'Literal') {
165
+ const language = typeof object.language === 'string' ? object.language : '';
166
+ const datatype = object.datatype || { termType: 'NamedNode', value: 'http://www.w3.org/2001/XMLSchema#string' };
167
+ return { ...q, subject, predicate, object: { ...object, language, datatype } };
168
+ }
169
+ return { ...q, subject, predicate, object };
170
+ }
171
+
172
+ export function objectSignature(o) {
173
+ if (!o) return '';
174
+ if (o.termType === 'Literal') {
175
+ return JSON.stringify({ t: 'Literal', v: o.value, lang: o.language || '', dt: o.datatype?.value || '' });
176
+ }
177
+ return JSON.stringify({ t: o.termType, v: o.value });
178
+ }
179
+
180
+ export function quadToKeyForOrigin(q) {
181
+ const nq = normalizeQuad(q);
182
+ return nq ? quadIndexKey(nq.subject, nq.predicate, nq.object) : null;
183
+ }
184
+
185
+ export function parseQuadIndexKey(key) {
186
+ try {
187
+ const [s, p, objKey] = JSON.parse(key);
188
+ return { s, p, o: JSON.parse(objKey) };
189
+ } catch {
190
+ return null;
191
+ }
192
+ }
193
+
194
+ export function createSemanticSlotId(subject, predicate) {
195
+ return hash(`${subject.value}|${predicate.value}`);
196
+ }
197
+
198
+ // Consolidated quad management
199
+ export function createQuadManager() {
200
+ return {
201
+ createSlot: (blockId, entryIndex, meta = {}) => {
202
+ const slotId = meta.subject && meta.predicate ? createSemanticSlotId(meta.subject, meta.predicate) : null;
203
+ return {
204
+ blockId,
205
+ entryIndex,
206
+ slotId,
207
+ isVacant: false,
208
+ lastValue: null,
209
+ vacantSince: null,
210
+ ...meta
211
+ };
212
+ },
213
+
214
+ markVacant: (slotInfo, deletedValue) => {
215
+ if (!slotInfo) return null;
216
+ return {
217
+ ...slotInfo,
218
+ isVacant: true,
219
+ lastValue: deletedValue,
220
+ vacantSince: Date.now()
221
+ };
222
+ },
223
+
224
+ findVacant: (quadIndex, subject, predicate) => {
225
+ const targetSlotId = createSemanticSlotId(subject, predicate);
226
+ return Array.from(quadIndex.values())
227
+ .find(slot => slot.slotId === targetSlotId && slot.isVacant);
228
+ },
229
+
230
+ occupy: (slotInfo, newValue) => {
231
+ if (!slotInfo || !slotInfo.isVacant) return null;
232
+ return {
233
+ ...slotInfo,
234
+ isVacant: false,
235
+ lastValue: newValue,
236
+ vacantSince: null
237
+ };
238
+ }
239
+ };
240
+ }
241
+
242
+ // Backward compatibility exports
243
+ export function createSlotInfo(blockId, entryIndex, meta = {}) {
244
+ return createQuadManager().createSlot(blockId, entryIndex, meta);
245
+ }
246
+
247
+ export function markSlotAsVacant(slotInfo, deletedValue) {
248
+ return createQuadManager().markVacant(slotInfo, deletedValue);
249
+ }
250
+
251
+ export function findVacantSlot(quadIndex, subject, predicate) {
252
+ return createQuadManager().findVacant(quadIndex, subject, predicate);
253
+ }
254
+
255
+ export function occupySlot(slotInfo, newValue) {
256
+ return createQuadManager().occupy(slotInfo, newValue);
257
+ }
258
+
259
+ export function normalizeAttrsTokens(attrsText) {
260
+ const cleaned = String(attrsText || '').replace(/^\s*\{|\}\s*$/g, '').trim();
261
+ return cleaned ? cleaned.split(/\s+/).filter(Boolean) : [];
262
+ }
263
+
264
+ export function writeAttrsTokens(tokens) {
265
+ return `{${tokens.join(' ').trim()}}`;
266
+ }
267
+
268
+ export function removeOneToken(tokens, matchFn) {
269
+ const i = tokens.findIndex(matchFn);
270
+ return i === -1 ? { tokens, removed: false } : { tokens: [...tokens.slice(0, i), ...tokens.slice(i + 1)], removed: true };
271
+ }
272
+
273
+ // Consolidated token management
274
+ function manageToken(tokens, action, tokenType, value) {
275
+ const token = tokenType === 'object' ? `+${value}` :
276
+ tokenType === 'softFragment' ? `+#${value}` : value;
277
+
278
+ switch (action) {
279
+ case 'add': return tokens.includes(token) ? tokens : [...tokens, token];
280
+ case 'remove': return removeOneToken(tokens, t => t === token);
281
+ default: return tokens;
282
+ }
283
+ }
284
+
285
+ export function addObjectToken(tokens, iri) {
286
+ return manageToken(tokens, 'add', 'object', iri);
287
+ }
288
+
289
+ export function removeObjectToken(tokens, iri) {
290
+ return manageToken(tokens, 'remove', 'object', iri);
291
+ }
292
+
293
+ export function addSoftFragmentToken(tokens, fragment) {
294
+ return manageToken(tokens, 'add', 'softFragment', fragment);
295
+ }
296
+
297
+ export function removeSoftFragmentToken(tokens, fragment) {
298
+ return manageToken(tokens, 'remove', 'softFragment', fragment);
299
+ }
300
+
301
+ export function createLiteral(value, datatype, language, context, dataFactory) {
302
+ if (datatype) return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
303
+ if (language) return dataFactory.literal(value, language);
304
+ return dataFactory.literal(value);
305
+ }