mdld-parse 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENCE +167 -0
- package/README.md +345 -190
- package/index.js +264 -248
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -35,56 +35,43 @@ function expandIRI(term, ctx) {
|
|
|
35
35
|
return (ctx['@vocab'] || '') + t;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
function parseAnnotation(raw) {
|
|
38
|
+
function parseSemanticBlock(raw) {
|
|
40
39
|
try {
|
|
41
40
|
const cleaned = raw.replace(/^\{|\}$/g, '').trim();
|
|
42
|
-
if (!cleaned) return { subject: null,
|
|
41
|
+
if (!cleaned) return { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
for (let i = 0; i < cleaned.length; i++) {
|
|
47
|
-
if (cleaned[i] === '"') quoteCount++;
|
|
48
|
-
}
|
|
49
|
-
if (quoteCount % 2 !== 0) {
|
|
50
|
-
console.warn(`Unbalanced quotes in annotation: ${raw}`);
|
|
51
|
-
return { subject: null, entries: [], datatype: null, language: null };
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const result = { subject: null, entries: [], datatype: null, language: null };
|
|
55
|
-
const parts = cleaned.split(/\s+/).filter(p => p);
|
|
43
|
+
const result = { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
44
|
+
const tokens = cleaned.split(/\s+/).filter(t => t);
|
|
56
45
|
|
|
57
|
-
for (const
|
|
58
|
-
if (
|
|
46
|
+
for (const token of tokens) {
|
|
47
|
+
if (token === '=') {
|
|
59
48
|
result.subject = 'RESET';
|
|
60
|
-
} else if (
|
|
61
|
-
result.subject =
|
|
62
|
-
} else if (
|
|
63
|
-
result.
|
|
64
|
-
} else if (
|
|
65
|
-
result.
|
|
66
|
-
} else if (
|
|
67
|
-
result.
|
|
68
|
-
} else if (
|
|
69
|
-
result.
|
|
49
|
+
} else if (token.startsWith('=')) {
|
|
50
|
+
result.subject = token.substring(1);
|
|
51
|
+
} else if (token.startsWith('^^')) {
|
|
52
|
+
result.datatype = token.substring(2);
|
|
53
|
+
} else if (token.startsWith('@')) {
|
|
54
|
+
result.language = token.substring(1);
|
|
55
|
+
} else if (token.startsWith('.')) {
|
|
56
|
+
result.types.push(token.substring(1));
|
|
57
|
+
} else if (token.startsWith('^?')) {
|
|
58
|
+
result.predicates.push({ iri: token.substring(2), form: '^?' });
|
|
59
|
+
} else if (token.startsWith('^')) {
|
|
60
|
+
result.predicates.push({ iri: token.substring(1), form: '^' });
|
|
61
|
+
} else if (token.startsWith('?')) {
|
|
62
|
+
result.predicates.push({ iri: token.substring(1), form: '?' });
|
|
70
63
|
} else {
|
|
71
|
-
result.
|
|
64
|
+
result.predicates.push({ iri: token, form: '' });
|
|
72
65
|
}
|
|
73
66
|
}
|
|
74
67
|
|
|
75
|
-
if (result.entries.length === 0 && !result.subject) {
|
|
76
|
-
console.warn(`No valid entries found in annotation: ${raw}`);
|
|
77
|
-
return { subject: null, entries: [], datatype: null, language: null };
|
|
78
|
-
}
|
|
79
|
-
|
|
80
68
|
return result;
|
|
81
69
|
} catch (error) {
|
|
82
|
-
console.error(`Error parsing
|
|
83
|
-
return { subject: null,
|
|
70
|
+
console.error(`Error parsing semantic block ${raw}:`, error);
|
|
71
|
+
return { subject: null, types: [], predicates: [], datatype: null, language: null };
|
|
84
72
|
}
|
|
85
73
|
}
|
|
86
74
|
|
|
87
|
-
// Token scanning - consolidated helpers
|
|
88
75
|
function scanTokens(text) {
|
|
89
76
|
const tokens = [];
|
|
90
77
|
const lines = text.split('\n');
|
|
@@ -96,7 +83,6 @@ function scanTokens(text) {
|
|
|
96
83
|
const lineStart = pos;
|
|
97
84
|
pos += line.length + 1;
|
|
98
85
|
|
|
99
|
-
// Code blocks
|
|
100
86
|
if (line.startsWith('```')) {
|
|
101
87
|
if (!codeBlock) {
|
|
102
88
|
const fence = line.match(/^(`{3,})(.*)/);
|
|
@@ -104,7 +90,7 @@ function scanTokens(text) {
|
|
|
104
90
|
fence: fence[1],
|
|
105
91
|
start: lineStart,
|
|
106
92
|
content: [],
|
|
107
|
-
lang: fence[2].trim().split(
|
|
93
|
+
lang: fence[2].trim().split(/[\s{]/)[0],
|
|
108
94
|
attrs: fence[2].match(/\{[^}]+\}/)?.[0]
|
|
109
95
|
};
|
|
110
96
|
} else if (line.startsWith(codeBlock.fence)) {
|
|
@@ -125,94 +111,77 @@ function scanTokens(text) {
|
|
|
125
111
|
continue;
|
|
126
112
|
}
|
|
127
113
|
|
|
128
|
-
// Prefix declarations
|
|
129
114
|
const prefixMatch = line.match(/^\[([^\]]+)\]\s*\{:\s*([^}]+)\}/);
|
|
130
115
|
if (prefixMatch) {
|
|
131
116
|
tokens.push({ type: 'prefix', prefix: prefixMatch[1], iri: prefixMatch[2].trim() });
|
|
132
117
|
continue;
|
|
133
118
|
}
|
|
134
119
|
|
|
135
|
-
// Headings
|
|
136
120
|
const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
137
121
|
if (headingMatch) {
|
|
138
122
|
tokens.push({
|
|
139
123
|
type: 'heading',
|
|
140
124
|
depth: headingMatch[1].length,
|
|
141
|
-
range: [lineStart, pos],
|
|
125
|
+
range: [lineStart, pos - 1],
|
|
142
126
|
text: headingMatch[2].trim(),
|
|
143
127
|
attrs: headingMatch[3]
|
|
144
128
|
});
|
|
145
129
|
continue;
|
|
146
130
|
}
|
|
147
131
|
|
|
148
|
-
// Lists
|
|
149
132
|
const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
150
133
|
if (listMatch) {
|
|
151
134
|
tokens.push({
|
|
152
135
|
type: 'list',
|
|
153
136
|
indent: listMatch[1].length,
|
|
154
|
-
range: [lineStart, pos],
|
|
137
|
+
range: [lineStart, pos - 1],
|
|
155
138
|
text: listMatch[3].trim(),
|
|
156
139
|
attrs: listMatch[4]
|
|
157
140
|
});
|
|
158
141
|
continue;
|
|
159
142
|
}
|
|
160
143
|
|
|
161
|
-
// Blockquotes
|
|
162
144
|
const blockquoteMatch = line.match(/^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/);
|
|
163
145
|
if (blockquoteMatch) {
|
|
164
146
|
tokens.push({
|
|
165
147
|
type: 'blockquote',
|
|
166
|
-
range: [lineStart, pos],
|
|
148
|
+
range: [lineStart, pos - 1],
|
|
167
149
|
text: blockquoteMatch[1].trim(),
|
|
168
150
|
attrs: blockquoteMatch[2]
|
|
169
151
|
});
|
|
170
152
|
continue;
|
|
171
153
|
}
|
|
172
154
|
|
|
173
|
-
// Paragraphs
|
|
174
155
|
if (line.trim()) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
attrs: paraMatch[2] || null
|
|
182
|
-
});
|
|
183
|
-
}
|
|
156
|
+
tokens.push({
|
|
157
|
+
type: 'para',
|
|
158
|
+
range: [lineStart, pos - 1],
|
|
159
|
+
text: line.trim(),
|
|
160
|
+
attrs: null
|
|
161
|
+
});
|
|
184
162
|
}
|
|
185
163
|
}
|
|
186
164
|
|
|
187
165
|
return tokens;
|
|
188
166
|
}
|
|
189
167
|
|
|
190
|
-
//
|
|
191
|
-
function
|
|
192
|
-
const
|
|
168
|
+
// Extract inline carriers: [text] {attrs}, [text](url) {attrs}, [text](=iri) {attrs}
|
|
169
|
+
function extractInlineCarriers(text, baseOffset = 0) {
|
|
170
|
+
const carriers = [];
|
|
193
171
|
let pos = 0;
|
|
194
172
|
|
|
195
173
|
while (pos < text.length) {
|
|
196
174
|
const bracketStart = text.indexOf('[', pos);
|
|
197
|
-
if (bracketStart === -1)
|
|
198
|
-
if (pos < text.length) spans.push({ type: 'text', text: text.substring(pos) });
|
|
199
|
-
break;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (bracketStart > pos) spans.push({ type: 'text', text: text.substring(pos, bracketStart) });
|
|
175
|
+
if (bracketStart === -1) break;
|
|
203
176
|
|
|
204
177
|
const bracketEnd = text.indexOf(']', bracketStart);
|
|
205
|
-
if (bracketEnd === -1)
|
|
206
|
-
spans.push({ type: 'text', text: text.substring(bracketStart) });
|
|
207
|
-
break;
|
|
208
|
-
}
|
|
178
|
+
if (bracketEnd === -1) break;
|
|
209
179
|
|
|
210
|
-
const
|
|
180
|
+
const carrierText = text.substring(bracketStart + 1, bracketEnd);
|
|
211
181
|
let spanEnd = bracketEnd + 1;
|
|
212
182
|
let url = null;
|
|
213
|
-
let attrs = null;
|
|
214
183
|
|
|
215
|
-
//
|
|
184
|
+
// Check for (url) or (=iri)
|
|
216
185
|
if (text[spanEnd] === '(') {
|
|
217
186
|
const parenEnd = text.indexOf(')', spanEnd);
|
|
218
187
|
if (parenEnd !== -1) {
|
|
@@ -221,17 +190,32 @@ function extractInlineValue(text, baseOffset = 0) {
|
|
|
221
190
|
}
|
|
222
191
|
}
|
|
223
192
|
|
|
224
|
-
//
|
|
193
|
+
// Check for {attrs}
|
|
194
|
+
let attrs = null;
|
|
225
195
|
const attrsMatch = text.substring(spanEnd).match(/^\s*\{([^}]+)\}/);
|
|
226
196
|
if (attrsMatch) {
|
|
227
197
|
attrs = `{${attrsMatch[1]}}`;
|
|
228
198
|
spanEnd += attrsMatch[0].length;
|
|
229
199
|
}
|
|
230
200
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
201
|
+
// Determine type and resource
|
|
202
|
+
let carrierType = 'span';
|
|
203
|
+
let resourceIRI = null;
|
|
204
|
+
|
|
205
|
+
if (url) {
|
|
206
|
+
if (url.startsWith('=')) {
|
|
207
|
+
carrierType = 'resource';
|
|
208
|
+
resourceIRI = url.substring(1);
|
|
209
|
+
} else {
|
|
210
|
+
carrierType = 'link';
|
|
211
|
+
resourceIRI = url;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
carriers.push({
|
|
216
|
+
type: carrierType,
|
|
217
|
+
text: carrierText,
|
|
218
|
+
url: resourceIRI,
|
|
235
219
|
attrs: attrs,
|
|
236
220
|
range: [baseOffset + bracketStart, baseOffset + spanEnd]
|
|
237
221
|
});
|
|
@@ -239,23 +223,26 @@ function extractInlineValue(text, baseOffset = 0) {
|
|
|
239
223
|
pos = spanEnd;
|
|
240
224
|
}
|
|
241
225
|
|
|
242
|
-
return
|
|
226
|
+
return carriers;
|
|
243
227
|
}
|
|
244
228
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
229
|
+
function createBlock(subject, types, predicates, range, ctx) {
|
|
230
|
+
const expanded = {
|
|
231
|
+
subject: subject,
|
|
232
|
+
types: types.map(t => expandIRI(t, ctx)),
|
|
233
|
+
predicates: predicates.map(p => ({
|
|
234
|
+
iri: expandIRI(p.iri, ctx),
|
|
235
|
+
form: p.form
|
|
236
|
+
}))
|
|
237
|
+
};
|
|
252
238
|
|
|
253
|
-
const blockId = hash([subject,
|
|
239
|
+
const blockId = hash([subject, JSON.stringify(expanded)].join('|'));
|
|
254
240
|
return {
|
|
255
241
|
id: blockId,
|
|
256
242
|
range: { start: range[0], end: range[1] },
|
|
257
243
|
subject,
|
|
258
|
-
|
|
244
|
+
types: expanded.types,
|
|
245
|
+
predicates: expanded.predicates,
|
|
259
246
|
context: { ...ctx }
|
|
260
247
|
};
|
|
261
248
|
}
|
|
@@ -264,205 +251,233 @@ function emitQuad(quads, quadIndex, blockId, subject, predicate, object, dataFac
|
|
|
264
251
|
if (!subject || !predicate || !object) return;
|
|
265
252
|
const quad = dataFactory.quad(subject, predicate, object);
|
|
266
253
|
quads.push(quad);
|
|
267
|
-
|
|
254
|
+
const key = JSON.stringify([
|
|
255
|
+
quad.subject.value,
|
|
256
|
+
quad.predicate.value,
|
|
257
|
+
quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
|
|
258
|
+
]);
|
|
259
|
+
quadIndex.set(key, blockId);
|
|
268
260
|
}
|
|
269
261
|
|
|
270
|
-
function
|
|
271
|
-
if (datatype)
|
|
272
|
-
|
|
262
|
+
function createLiteral(value, datatype, language, context, dataFactory) {
|
|
263
|
+
if (datatype) {
|
|
264
|
+
return dataFactory.literal(value, dataFactory.namedNode(expandIRI(datatype, context)));
|
|
265
|
+
}
|
|
266
|
+
if (language) {
|
|
267
|
+
return dataFactory.literal(value, language);
|
|
268
|
+
}
|
|
273
269
|
return dataFactory.literal(value);
|
|
274
270
|
}
|
|
275
271
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
const originalSubject = state.currentSubject;
|
|
281
|
-
|
|
282
|
-
// Handle subject declaration
|
|
283
|
-
if (ann.subject === 'RESET') {
|
|
272
|
+
// Core processing: handle subject/type declarations and property emissions
|
|
273
|
+
function processAnnotation(carrier, sem, state) {
|
|
274
|
+
// §6.1 Subject declaration
|
|
275
|
+
if (sem.subject === 'RESET') {
|
|
284
276
|
state.currentSubject = null;
|
|
285
277
|
return;
|
|
286
278
|
}
|
|
287
|
-
if (ann.subject) {
|
|
288
|
-
state.currentSubject = state.df.namedNode(expandIRI(ann.subject, state.ctx));
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
if (!originalSubject && !ann.subject) return;
|
|
292
279
|
|
|
293
|
-
|
|
294
|
-
state.df.namedNode(expandIRI(
|
|
295
|
-
|
|
280
|
+
if (sem.subject) {
|
|
281
|
+
state.currentSubject = state.df.namedNode(expandIRI(sem.subject, state.ctx));
|
|
282
|
+
}
|
|
296
283
|
|
|
297
|
-
|
|
284
|
+
// Determine the subject for emissions
|
|
285
|
+
const S = state.currentSubject;
|
|
286
|
+
if (!S) return; // Need a subject to emit anything
|
|
287
|
+
|
|
288
|
+
// Create origin block
|
|
289
|
+
const block = createBlock(
|
|
290
|
+
S.value,
|
|
291
|
+
sem.types,
|
|
292
|
+
sem.predicates,
|
|
293
|
+
carrier.range,
|
|
294
|
+
state.ctx
|
|
295
|
+
);
|
|
298
296
|
state.origin.blocks.set(block.id, block);
|
|
299
297
|
|
|
300
|
-
//
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
298
|
+
// Extract L (literal) and O (object IRI)
|
|
299
|
+
const L = createLiteral(carrier.text, sem.datatype, sem.language, state.ctx, state.df);
|
|
300
|
+
const O = carrier.url ? state.df.namedNode(expandIRI(carrier.url, state.ctx)) : null;
|
|
301
|
+
|
|
302
|
+
// §7 Emit type triples
|
|
303
|
+
sem.types.forEach(typeIRI => {
|
|
304
|
+
const typeSubject = O || S;
|
|
305
|
+
emitQuad(
|
|
306
|
+
state.quads,
|
|
307
|
+
state.origin.quadIndex,
|
|
308
|
+
block.id,
|
|
309
|
+
typeSubject,
|
|
310
|
+
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
311
|
+
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
312
|
+
state.df
|
|
313
|
+
);
|
|
314
|
+
});
|
|
308
315
|
|
|
309
|
-
// Emit triples
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df) :
|
|
329
|
-
targetSubject;
|
|
330
|
-
}
|
|
331
|
-
} else {
|
|
332
|
-
object = createLiteralValue(textContent || token.text || '', ann.datatype, ann.language, state.ctx, state.df);
|
|
316
|
+
// §8 Emit predicate triples (routing table)
|
|
317
|
+
sem.predicates.forEach(pred => {
|
|
318
|
+
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
319
|
+
|
|
320
|
+
if (pred.form === '') {
|
|
321
|
+
// p: S → L
|
|
322
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, L, state.df);
|
|
323
|
+
} else if (pred.form === '?') {
|
|
324
|
+
// ?p: S → O
|
|
325
|
+
if (O) {
|
|
326
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, S, P, O, state.df);
|
|
327
|
+
}
|
|
328
|
+
} else if (pred.form === '^') {
|
|
329
|
+
// ^p: reverse literal (L → S impossible, emit nothing per spec)
|
|
330
|
+
// Note: Some interpretations might emit S → S or skip
|
|
331
|
+
} else if (pred.form === '^?') {
|
|
332
|
+
// ^?p: O → S
|
|
333
|
+
if (O) {
|
|
334
|
+
emitQuad(state.quads, state.origin.quadIndex, block.id, O, P, S, state.df);
|
|
333
335
|
}
|
|
334
|
-
|
|
335
|
-
const subject = e.direction === 'reverse' ? object :
|
|
336
|
-
(ann.subject && !token.url && token.type !== 'code') ? originalSubject : targetSubject;
|
|
337
|
-
const objectRef = e.direction === 'reverse' ? originalSubject : object;
|
|
338
|
-
|
|
339
|
-
emitQuad(state.quads, state.origin.quadIndex, block.id, subject, predicate, objectRef, state.df);
|
|
340
336
|
}
|
|
341
337
|
});
|
|
342
338
|
}
|
|
343
339
|
|
|
344
|
-
//
|
|
345
|
-
function
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
340
|
+
// Process list with context annotation
|
|
341
|
+
function processListContext(contextSem, listTokens, state) {
|
|
342
|
+
const contextSubject = state.currentSubject;
|
|
343
|
+
if (!contextSubject) return;
|
|
344
|
+
|
|
345
|
+
listTokens.forEach(listToken => {
|
|
346
|
+
// Extract carriers from list item text
|
|
347
|
+
const carriers = extractInlineCarriers(listToken.text, listToken.range[0]);
|
|
348
|
+
|
|
349
|
+
// Find subject from carriers or list item annotation
|
|
350
|
+
let itemSubject = null;
|
|
351
|
+
let itemSubjectCarrier = null;
|
|
352
|
+
|
|
353
|
+
// First check carriers for subject declarations
|
|
354
|
+
for (const carrier of carriers) {
|
|
355
|
+
if (carrier.url && carrier.type === 'resource') {
|
|
356
|
+
// [text](=iri) declares a subject
|
|
357
|
+
itemSubject = state.df.namedNode(expandIRI(carrier.url, state.ctx));
|
|
358
|
+
itemSubjectCarrier = carrier;
|
|
359
|
+
break;
|
|
360
|
+
}
|
|
361
|
+
if (carrier.attrs) {
|
|
362
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
363
|
+
if (itemSem.subject && itemSem.subject !== 'RESET') {
|
|
364
|
+
itemSubject = state.df.namedNode(expandIRI(itemSem.subject, state.ctx));
|
|
365
|
+
itemSubjectCarrier = carrier;
|
|
366
|
+
break;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
358
369
|
}
|
|
359
|
-
});
|
|
360
|
-
return true;
|
|
361
|
-
}
|
|
362
370
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
371
|
+
if (!itemSubject) return; // List items must declare subjects
|
|
372
|
+
|
|
373
|
+
// Apply context types to item
|
|
374
|
+
contextSem.types.forEach(typeIRI => {
|
|
375
|
+
emitQuad(
|
|
376
|
+
state.quads,
|
|
377
|
+
state.origin.quadIndex,
|
|
378
|
+
'list-context',
|
|
379
|
+
itemSubject,
|
|
380
|
+
state.df.namedNode(expandIRI('rdf:type', state.ctx)),
|
|
381
|
+
state.df.namedNode(expandIRI(typeIRI, state.ctx)),
|
|
382
|
+
state.df
|
|
383
|
+
);
|
|
384
|
+
});
|
|
366
385
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
386
|
+
// Emit context relationships
|
|
387
|
+
contextSem.predicates.forEach(pred => {
|
|
388
|
+
const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
|
|
370
389
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
state.currentSubject, predicate, object, state.df);
|
|
382
|
-
}
|
|
383
|
-
});
|
|
390
|
+
if (pred.form === '^' || pred.form === '^?') {
|
|
391
|
+
// Reverse: item → context
|
|
392
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
393
|
+
itemSubject, P, contextSubject, state.df);
|
|
394
|
+
} else {
|
|
395
|
+
// Forward: context → item
|
|
396
|
+
emitQuad(state.quads, state.origin.quadIndex, 'list-context',
|
|
397
|
+
contextSubject, P, itemSubject, state.df);
|
|
398
|
+
}
|
|
399
|
+
});
|
|
384
400
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
401
|
+
// Process item's own annotations
|
|
402
|
+
const prevSubject = state.currentSubject;
|
|
403
|
+
state.currentSubject = itemSubject;
|
|
404
|
+
|
|
405
|
+
// Process the list token's own attributes
|
|
406
|
+
if (listToken.attrs) {
|
|
407
|
+
const itemSem = parseSemanticBlock(listToken.attrs);
|
|
408
|
+
// For list item attributes, the literal is the text content without links
|
|
409
|
+
const carrier = { type: 'list', text: listToken.text.replace(/\[([^\]]+)\]\([^)]+\)/, '$1'), range: listToken.range };
|
|
410
|
+
processAnnotation(carrier, itemSem, state);
|
|
394
411
|
}
|
|
395
|
-
}
|
|
396
412
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
413
|
+
// Process inline carriers' attributes
|
|
414
|
+
carriers.forEach(carrier => {
|
|
415
|
+
if (carrier.attrs) {
|
|
416
|
+
const itemSem = parseSemanticBlock(carrier.attrs);
|
|
417
|
+
processAnnotation(carrier, itemSem, state);
|
|
418
|
+
}
|
|
403
419
|
});
|
|
404
|
-
}
|
|
405
420
|
|
|
406
|
-
|
|
421
|
+
state.currentSubject = prevSubject;
|
|
422
|
+
});
|
|
407
423
|
}
|
|
408
424
|
|
|
409
|
-
// Main parsing function
|
|
410
425
|
export function parse(text, options = {}) {
|
|
411
426
|
const state = {
|
|
412
427
|
ctx: { ...DEFAULT_CONTEXT, ...(options.context || {}) },
|
|
413
428
|
df: options.dataFactory || DataFactory,
|
|
414
429
|
quads: [],
|
|
415
430
|
origin: { blocks: new Map(), quadIndex: new Map() },
|
|
416
|
-
currentSubject: null
|
|
417
|
-
listContext: null
|
|
431
|
+
currentSubject: null
|
|
418
432
|
};
|
|
419
433
|
|
|
420
434
|
const tokens = scanTokens(text);
|
|
435
|
+
|
|
436
|
+
// Apply prefix declarations
|
|
421
437
|
tokens.filter(t => t.type === 'prefix').forEach(t => state.ctx[t.prefix] = t.iri);
|
|
422
438
|
|
|
423
439
|
for (let i = 0; i < tokens.length; i++) {
|
|
424
440
|
const token = tokens[i];
|
|
425
|
-
const nextToken = tokens[i + 1];
|
|
426
441
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
442
|
+
if (token.type === 'heading' && token.attrs) {
|
|
443
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
444
|
+
const carrier = { type: 'heading', text: token.text, range: token.range };
|
|
445
|
+
processAnnotation(carrier, sem, state);
|
|
446
|
+
} else if (token.type === 'code' && token.attrs) {
|
|
447
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
448
|
+
const carrier = { type: 'code', text: token.text, range: token.range };
|
|
449
|
+
processAnnotation(carrier, sem, state);
|
|
450
|
+
} else if (token.type === 'blockquote' && token.attrs) {
|
|
451
|
+
const sem = parseSemanticBlock(token.attrs);
|
|
452
|
+
const carrier = { type: 'blockquote', text: token.text, range: token.range };
|
|
453
|
+
processAnnotation(carrier, sem, state);
|
|
454
|
+
} else if (token.type === 'para') {
|
|
455
|
+
// Check for list context
|
|
456
|
+
const followingLists = [];
|
|
457
|
+
let j = i + 1;
|
|
458
|
+
while (j < tokens.length && tokens[j].type === 'list') {
|
|
459
|
+
followingLists.push(tokens[j]);
|
|
460
|
+
j++;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Check if this paragraph ends with {attrs} and is followed by lists
|
|
464
|
+
const contextMatch = token.text.match(/^(.+?)\s*\{([^}]+)\}$/);
|
|
465
|
+
if (contextMatch && followingLists.length > 0) {
|
|
466
|
+
// This is a list context annotation
|
|
467
|
+
const contextSem = parseSemanticBlock(`{${contextMatch[2]}}`);
|
|
468
|
+
processListContext(contextSem, followingLists, state);
|
|
469
|
+
i = j - 1;
|
|
470
|
+
continue;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Process inline carriers
|
|
474
|
+
const carriers = extractInlineCarriers(token.text, token.range[0]);
|
|
475
|
+
carriers.forEach(carrier => {
|
|
476
|
+
if (carrier.attrs) {
|
|
477
|
+
const sem = parseSemanticBlock(carrier.attrs);
|
|
478
|
+
processAnnotation(carrier, sem, state);
|
|
458
479
|
}
|
|
459
|
-
|
|
460
|
-
case 'list':
|
|
461
|
-
if (state.listContext) processListItem(token, state);
|
|
462
|
-
break;
|
|
463
|
-
case 'blockquote':
|
|
464
|
-
if (state.currentSubject) processAnnotation(token, state, token.text);
|
|
465
|
-
break;
|
|
480
|
+
});
|
|
466
481
|
}
|
|
467
482
|
}
|
|
468
483
|
|
|
@@ -472,19 +487,16 @@ export function parse(text, options = {}) {
|
|
|
472
487
|
function shortenIRI(iri, ctx) {
|
|
473
488
|
if (!iri || !iri.startsWith('http')) return iri;
|
|
474
489
|
|
|
475
|
-
// Check @vocab first
|
|
476
490
|
if (ctx['@vocab'] && iri.startsWith(ctx['@vocab'])) {
|
|
477
491
|
return iri.substring(ctx['@vocab'].length);
|
|
478
492
|
}
|
|
479
493
|
|
|
480
|
-
// Check prefixes
|
|
481
494
|
for (const [prefix, namespace] of Object.entries(ctx)) {
|
|
482
495
|
if (prefix !== '@vocab' && iri.startsWith(namespace)) {
|
|
483
496
|
return prefix + ':' + iri.substring(namespace.length);
|
|
484
497
|
}
|
|
485
498
|
}
|
|
486
499
|
|
|
487
|
-
// No prefix found, return full IRI
|
|
488
500
|
return iri;
|
|
489
501
|
}
|
|
490
502
|
|
|
@@ -497,7 +509,12 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
497
509
|
|
|
498
510
|
if (diff.delete) {
|
|
499
511
|
diff.delete.forEach(quad => {
|
|
500
|
-
|
|
512
|
+
if (!quad || !quad.subject) return;
|
|
513
|
+
const key = JSON.stringify([
|
|
514
|
+
quad.subject.value,
|
|
515
|
+
quad.predicate.value,
|
|
516
|
+
quad.object.termType === 'Literal' ? quad.object.value : quad.object.value
|
|
517
|
+
]);
|
|
501
518
|
const blockId = origin?.quadIndex.get(key);
|
|
502
519
|
if (!blockId) return;
|
|
503
520
|
|
|
@@ -507,7 +524,7 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
507
524
|
const start = block.range.start;
|
|
508
525
|
const end = block.range.end;
|
|
509
526
|
const before = text.substring(Math.max(0, start - 1), start);
|
|
510
|
-
const after = text.substring(end, end + 1);
|
|
527
|
+
const after = text.substring(end, Math.min(end + 1, text.length));
|
|
511
528
|
const deleteStart = before === '\n' ? start - 1 : start;
|
|
512
529
|
const deleteEnd = after === '\n' ? end + 1 : end;
|
|
513
530
|
|
|
@@ -536,7 +553,6 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
536
553
|
}
|
|
537
554
|
|
|
538
555
|
const newLine = `\n[${objText}] {${pred}}`;
|
|
539
|
-
|
|
540
556
|
edits.push({ start: insertPos, end: insertPos, text: newLine });
|
|
541
557
|
});
|
|
542
558
|
}
|
|
@@ -549,4 +565,4 @@ export function serialize({ text, diff, origin, options = {} }) {
|
|
|
549
565
|
return { text: result, origin };
|
|
550
566
|
}
|
|
551
567
|
|
|
552
|
-
export default { parse, serialize,
|
|
568
|
+
export default { parse, serialize, parseSemanticBlock };
|