mdld-parse 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # MD-LD Parse v0.4.1
1
+ # MD-LD
2
2
 
3
3
  **Markdown-Linked Data (MD-LD)** — a deterministic, streaming-friendly RDF authoring format that extends Markdown with explicit `{...}` annotations.
4
4
 
@@ -184,33 +184,20 @@ ex:armstrong a schema:Person .
184
184
 
185
185
  ### Lists
186
186
 
187
- Lists require explicit subjects per item. Use numbered ordered lists for `rdf:List` structures:
187
+ Lists require explicit subjects per item.
188
188
 
189
189
  ```markdown
190
190
  # Recipe {=ex:recipe}
191
191
 
192
192
  Ingredients: {?ingredient .Ingredient}
193
-
194
193
  - Flour {=ex:flour name}
195
194
  - Water {=ex:water name}
196
-
197
- ## Status Values {=ex:statusValues}
198
- Status values: {?ex:in .ex:StatusType label}
199
- 1. Active {=ex:Active}
200
- 2. Pending {=ex:Pending}
201
- 3. Inactive {=ex:Inactive}
202
195
  ```
203
196
 
204
197
  ```turtle
205
198
  ex:recipe schema:ingredient ex:flour, ex:water .
206
199
  ex:flour a schema:Ingredient ; schema:name "Flour" .
207
200
  ex:water a schema:Ingredient ; schema:name "Water" .
208
-
209
- # Ordered list generates W3C RDF Collections
210
- ex:statusValues ex:in ex:statusValues#list-1-1 .
211
- ex:statusValues#list-1-1 rdf:first ex:Active ; rdf:rest ex:statusValues#list-1-2 .
212
- ex:statusValues#list-1-2 rdf:first ex:Pending ; rdf:rest ex:statusValues#list-1-3 .
213
- ex:statusValues#list-1-3 rdf:first ex:Inactive ; rdf:rest rdf:nil .
214
201
  ```
215
202
 
216
203
  ### Code Blocks
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdld-parse",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "A standards-compliant parser for **MD-LD (Markdown-Linked Data)** — a human-friendly RDF authoring format that extends Markdown with semantic annotations.",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/src/parse.js CHANGED
@@ -13,8 +13,7 @@ const URL_REGEX = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
13
13
  const FENCE_REGEX = /^(`{3,})(.*)/;
14
14
  const PREFIX_REGEX = /^\[([^\]]+)\]\s*<([^>]+)>/;
15
15
  const HEADING_REGEX = /^(#{1,6})\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
16
- const UNORDERED_LIST_REGEX = /^(\s*)([-*+])\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
17
- const ORDERED_LIST_REGEX = /^(\s*)(\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
16
+ const UNORDERED_LIST_REGEX = /^(\s*)([-*+]|\d+\.)\s+(.+?)(?:\s*(\{[^}]+\}))?\s*$/;
18
17
  const BLOCKQUOTE_REGEX = /^>\s+(.+?)(?:\s*(\{[^}]+\}))?$/;
19
18
  const STANDALONE_SUBJECT_REGEX = /^\s*\{=(.*?)\}\s*$/;
20
19
  const LIST_CONTEXT_REGEX = /^(.+?)\s*\{([^}]+)\}$/;
@@ -69,7 +68,7 @@ const createListToken = (type, line, lineStart, pos, match, indent = null) => {
69
68
  const attrs = match[4] || null;
70
69
  const prefix = match[1].length + (match[2] ? match[2].length : 0);
71
70
  const rangeInfo = calcRangeInfo(line, attrs, lineStart, prefix, match[3].length);
72
- const extra = indent !== null ? { indent } : { indent: match[1].length, number: parseInt(match[2]) };
71
+ const extra = indent !== null ? { indent } : { indent: match[1].length };
73
72
  return createToken(type, [lineStart, pos - 1], match[3].trim(), attrs,
74
73
  rangeInfo.attrsRange, rangeInfo.valueRange, extra);
75
74
  };
@@ -149,15 +148,7 @@ function scanTokens(text) {
149
148
  test: line => UNORDERED_LIST_REGEX.test(line),
150
149
  process: (line, lineStart, pos) => {
151
150
  const match = UNORDERED_LIST_REGEX.exec(line);
152
- tokens.push(createListToken('unordered-list', line, lineStart, pos, match, match[1].length));
153
- return true;
154
- }
155
- },
156
- {
157
- test: line => ORDERED_LIST_REGEX.test(line),
158
- process: (line, lineStart, pos) => {
159
- const match = ORDERED_LIST_REGEX.exec(line);
160
- tokens.push(createListToken('ordered-list', line, lineStart, pos, match));
151
+ tokens.push(createListToken('list', line, lineStart, pos, match, match[1].length));
161
152
  return true;
162
153
  }
163
154
  },
@@ -507,19 +498,6 @@ export function findItemSubject(listToken, carriers, state) {
507
498
  return null;
508
499
  }
509
500
 
510
- function hasOwnPredicates(listToken, carriers) {
511
- if (listToken.attrs) {
512
- const attrs = parseSemCached(listToken.attrs);
513
- if (attrs.predicates.some(p => !p.subject && p.iri !== 'RESET')) {
514
- return true;
515
- }
516
- }
517
- return carriers.some(carrier => {
518
- const carrierAttrs = parseSemCached(carrier.attrs);
519
- return carrierAttrs.predicates.some(p => !p.subject && p.iri !== 'RESET');
520
- });
521
- }
522
-
523
501
  const processContextSem = ({ sem, itemSubject, contextSubject, inheritLiterals = false, state, blockId = 'list-context' }) => {
524
502
  sem.types.forEach(t => {
525
503
  const typeIRI = typeof t === 'string' ? t : t.iri;
@@ -616,190 +594,6 @@ const processListItem = (token, state) => {
616
594
  }
617
595
  };
618
596
 
619
- const applyListAnchorAnnotations = (itemSubject, contextSem, state, listItemText, contextToken) => {
620
- // Use the context token's ranges for proper origin tracking
621
- const baseToken = contextToken || { range: [0, 0], attrsRange: [0, 0] };
622
-
623
- const paragraphText = baseToken.text || '';
624
- const annotationMatch = paragraphText.match(/\{[^}]+\}/);
625
-
626
- let annotationStart;
627
- if (annotationMatch && baseToken.range) {
628
- // Found annotation in paragraph, calculate its absolute position
629
- const relativeStart = paragraphText.indexOf(annotationMatch[0]);
630
- annotationStart = baseToken.range[0] + relativeStart;
631
- } else {
632
- // Fallback to start of token
633
- annotationStart = baseToken.range ? baseToken.range[0] : 0;
634
- }
635
-
636
- // Apply types with proper ranges
637
- contextSem.types.forEach(type => {
638
- const entry = contextSem.entries.find(e => e.kind === 'type' && e.iri === type.iri);
639
- if (entry && entry.relRange) {
640
- // Calculate absolute range: annotation start + relative range within annotation
641
- const typeRange = [annotationStart + entry.relRange.start, annotationStart + entry.relRange.end];
642
-
643
- emitQuad(state.quads, state.origin.quadIndex, 'list-anchor-type',
644
- itemSubject,
645
- state.df.namedNode(expandIRI('rdf:type', state.ctx)),
646
- state.df.namedNode(expandIRI(type.iri, state.ctx)),
647
- state.df,
648
- { type: 'list-anchor', range: typeRange, entryIndex: type.entryIndex }
649
- );
650
- }
651
- });
652
-
653
- // Apply predicates with proper ranges
654
- contextSem.predicates.forEach(pred => {
655
- if (pred.form !== '?' && pred.form !== '!') { // Skip context predicates
656
- const entry = contextSem.entries.find(e => e.kind === 'property' && e.iri === pred.iri);
657
- if (entry && entry.relRange) {
658
- // Calculate absolute range: annotation start + relative range within annotation
659
- const predRange = [annotationStart + entry.relRange.start, annotationStart + entry.relRange.end];
660
-
661
- const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
662
-
663
- // For literal predicates, the value comes from the list item text
664
- let objectValue;
665
- if (pred.form === '') {
666
- objectValue = state.df.literal(listItemText || '');
667
- } else {
668
- // For other forms, this would need more complex handling
669
- objectValue = state.df.literal(listItemText || '');
670
- }
671
-
672
- emitQuad(state.quads, state.origin.quadIndex, 'list-anchor-predicate',
673
- itemSubject, P, objectValue, state.df,
674
- { type: 'list-anchor', range: predRange, entryIndex: pred.entryIndex }
675
- );
676
- }
677
- }
678
- });
679
- }
680
-
681
- function processOrderedListItem(token, state) {
682
- if (!state.isProcessingOrderedList) {
683
- state.listCounter = (state.listCounter || 0) + 1;
684
- state.rdfListIndex = 0;
685
- state.firstListNode = null;
686
- state.previousListNode = null;
687
- state.contextConnected = false;
688
- state.isProcessingOrderedList = true;
689
- }
690
-
691
- generateRdfListTriples(token, state);
692
-
693
- const listFrame = state.listStack[state.listStack.length - 1];
694
- if (listFrame?.contextSem) {
695
- const carriers = getCarriers(token);
696
- const itemInfo = findItemSubject(token, carriers, state);
697
- if (itemInfo?.subject) {
698
- applyListAnchorAnnotations(itemInfo.subject, listFrame.contextSem, state, token.text, listFrame.contextToken);
699
- }
700
- }
701
-
702
- if (listFrame?.contextSem && listFrame?.contextSubject && !state.contextConnected) {
703
- listFrame.contextSem.predicates.forEach(pred => {
704
- if (pred.form === '?') {
705
- const P = state.df.namedNode(expandIRI(pred.iri, state.ctx));
706
- const firstListNode = state.firstListNode;
707
- if (firstListNode) {
708
- emitQuad(state.quads, state.origin.quadIndex, 'ordered-list-context',
709
- listFrame.contextSubject, P, state.df.namedNode(firstListNode), state.df);
710
- state.contextConnected = true;
711
- }
712
- }
713
- });
714
- }
715
- }
716
-
717
- function generateRdfListTriples(token, state) {
718
- const carriers = getCarriers(token);
719
- const listIndex = (state.rdfListIndex || 0) + 1;
720
- state.rdfListIndex = listIndex;
721
- const listNodeName = `list-${state.listCounter}-${listIndex}`;
722
-
723
- const listFrame = state.listStack[state.listStack.length - 1];
724
- const contextSubject = listFrame?.contextSubject || state.currentSubject || state.documentSubject;
725
- const baseIRI = contextSubject ? contextSubject.value : (state.ctx[''] || '');
726
-
727
- const listNodeIri = baseIRI.includes('#')
728
- ? `${baseIRI.split('#')[0]}#${listNodeName}`
729
- : `${baseIRI}#${listNodeName}`;
730
-
731
- if (!state.firstListNode) state.firstListNode = listNodeIri;
732
-
733
- // Emit rdf:type triple with origin tracking
734
- emitQuad(state.quads, state.origin.quadIndex, 'ordered-list-rdf-type',
735
- DataFactory.namedNode(listNodeIri),
736
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
737
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#List'),
738
- DataFactory,
739
- { type: 'ordered-list', range: token.valueRange || token.range, listNodeName }
740
- );
741
-
742
- const itemInfo = findItemSubject(token, carriers, state);
743
- let firstObject;
744
- if (itemInfo?.value) {
745
- firstObject = itemInfo.value;
746
- } else if (itemInfo?.subject) {
747
- firstObject = itemInfo.subject;
748
- } else {
749
- firstObject = DataFactory.literal(token.text);
750
- }
751
-
752
- // Determine the appropriate range based on object type
753
- let originRange;
754
- if (itemInfo?.subject) {
755
- // For IRIs, target the annotation range
756
- originRange = token.attrsRange || token.valueRange || token.range;
757
- } else {
758
- // For literals, target the value range
759
- originRange = token.valueRange || token.range;
760
- }
761
-
762
- // Emit rdf:first triple with origin tracking
763
- emitQuad(state.quads, state.origin.quadIndex, 'ordered-list-rdf-first',
764
- DataFactory.namedNode(listNodeIri),
765
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'),
766
- firstObject,
767
- DataFactory,
768
- { type: 'ordered-list', range: originRange, listNodeName }
769
- );
770
-
771
- if (state.previousListNode) {
772
- // Find and remove the previous rdf:rest -> rdf:nil quad, then emit a new one
773
- const prevRestQuadIndex = state.quads.findIndex(q =>
774
- q.subject.value === state.previousListNode &&
775
- q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'
776
- );
777
- if (prevRestQuadIndex !== -1) {
778
- // Remove the old quad
779
- state.quads.splice(prevRestQuadIndex, 1);
780
-
781
- // Emit new rdf:rest quad with proper origin tracking
782
- emitQuad(state.quads, state.origin.quadIndex, 'ordered-list-rdf-rest-update',
783
- DataFactory.namedNode(state.previousListNode),
784
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'),
785
- DataFactory.namedNode(listNodeIri),
786
- DataFactory,
787
- { type: 'ordered-list', range: token.valueRange || token.range, listNodeName: state.previousListNode }
788
- );
789
- }
790
- }
791
-
792
- // Emit rdf:rest triple with origin tracking
793
- emitQuad(state.quads, state.origin.quadIndex, 'ordered-list-rdf-rest',
794
- DataFactory.namedNode(listNodeIri),
795
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'),
796
- DataFactory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'),
797
- DataFactory,
798
- { type: 'ordered-list', range: token.valueRange || token.range, listNodeName }
799
- );
800
-
801
- state.previousListNode = listNodeIri;
802
- }
803
597
 
804
598
  function processListContextFromParagraph(token, state) {
805
599
  const contextMatch = LIST_CONTEXT_REGEX.exec(token.text);
@@ -825,7 +619,7 @@ function processListContextFromParagraph(token, state) {
825
619
  }
826
620
 
827
621
  const nextToken = state.tokens?.[state.currentTokenIndex + 1];
828
- if (state.listStack.length > 0 && nextToken && (nextToken.type === 'unordered-list' || nextToken.type === 'ordered-list')) {
622
+ if (state.listStack.length > 0 && nextToken && nextToken.type === 'list') {
829
623
  const currentFrame = state.listStack[state.listStack.length - 1];
830
624
  if (currentFrame.anchorSubject && nextToken.indent > currentFrame.indent) {
831
625
  contextSubject = currentFrame.anchorSubject;
@@ -872,7 +666,6 @@ function processStandaloneSubject(token, state) {
872
666
 
873
667
  const TOKEN_PROCESSORS = {
874
668
  heading: (token, state) => {
875
- state.isProcessingOrderedList = false;
876
669
  if (token.attrs) {
877
670
  const headingSem = parseSemCached(token.attrs);
878
671
  if (headingSem.subject) {
@@ -883,30 +676,20 @@ const TOKEN_PROCESSORS = {
883
676
  processTokenAnnotations(token, state, token.type);
884
677
  },
885
678
  code: (token, state) => {
886
- state.isProcessingOrderedList = false;
887
679
  processTokenAnnotations(token, state, token.type);
888
680
  },
889
681
  blockquote: (token, state) => {
890
- state.isProcessingOrderedList = false;
891
682
  processTokenAnnotations(token, state, token.type);
892
683
  },
893
684
  para: (token, state) => {
894
- if (!token.text.includes('{?') && !token.text.includes('{!')) {
895
- state.isProcessingOrderedList = false;
896
- }
897
685
  processStandaloneSubject(token, state);
898
686
  processListContextFromParagraph(token, state);
899
687
  processTokenAnnotations(token, state, token.type);
900
688
  },
901
- 'unordered-list': (token, state) => {
902
- state.isProcessingOrderedList = false;
689
+ list: (token, state) => {
903
690
  manageListStack(token, state);
904
691
  processListItem(token, state);
905
692
  },
906
- 'ordered-list': (token, state) => {
907
- manageListStack(token, state);
908
- processOrderedListItem(token, state);
909
- }
910
693
  };
911
694
 
912
695
  export function parse(text, options = {}) {
@@ -920,13 +703,7 @@ export function parse(text, options = {}) {
920
703
  listStack: [],
921
704
  pendingListContext: null,
922
705
  tokens: null,
923
- currentTokenIndex: -1,
924
- listCounter: 0,
925
- rdfListIndex: 0,
926
- firstListNode: null,
927
- previousListNode: null,
928
- contextConnected: false,
929
- isProcessingOrderedList: false
706
+ currentTokenIndex: -1
930
707
  };
931
708
 
932
709
  state.tokens = scanTokens(text);
package/src/serialize.js CHANGED
@@ -177,175 +177,6 @@ export function serialize({ text, diff, origin, options = {} }) {
177
177
  return applyEdits(text, edits, ctx, base);
178
178
  }
179
179
 
180
- function determineListOperation(listData, normAdds, normDeletes, base) {
181
- // Check if this is list modification (has both adds and deletes for list structure)
182
- const hasListAdds = normAdds.some(quad =>
183
- quad.subject.value.includes('#list-') ||
184
- quad.predicate.value.endsWith('#first') ||
185
- quad.predicate.value.endsWith('#rest') ||
186
- quad.predicate.value.endsWith('#type') && quad.object.value.endsWith('#List')
187
- );
188
-
189
- const hasListDeletes = normDeletes.some(quad =>
190
- quad.subject.value.includes('#list-') ||
191
- quad.predicate.value.endsWith('#first') ||
192
- quad.predicate.value.endsWith('#rest') ||
193
- quad.predicate.value.endsWith('#type') && quad.object.value.endsWith('#List')
194
- );
195
-
196
- if (hasListAdds || hasListDeletes) {
197
- return { type: 'modify' };
198
- }
199
-
200
- // Check if this is a list creation (all list structure quads are in adds)
201
- const allListQuads = [];
202
- listData.items.forEach(item => {
203
- if (item.typeQuad) allListQuads.push(item.typeQuad);
204
- if (item.firstQuad) allListQuads.push(item.firstQuad);
205
- if (item.restQuad) allListQuads.push(item.restQuad);
206
- });
207
- if (listData.headQuad) allListQuads.push(listData.headQuad);
208
-
209
- const allInAdds = allListQuads.every(quad =>
210
- normAdds.some(add =>
211
- add.subject.value === quad.subject.value &&
212
- add.predicate.value === quad.predicate.value &&
213
- add.object.value === quad.object.value
214
- )
215
- );
216
-
217
- if (allInAdds) {
218
- return { type: 'create' };
219
- }
220
-
221
- // Check if this is a list deletion (all list structure quads are in deletes)
222
- const allInDeletes = allListQuads.every(quad =>
223
- normDeletes.some(del =>
224
- del.subject.value === quad.subject.value &&
225
- del.predicate.value === quad.predicate.value &&
226
- del.object.value === quad.object.value
227
- )
228
- );
229
-
230
- if (allInDeletes) {
231
- return { type: 'delete' };
232
- }
233
-
234
- // Default to modify for any list changes
235
- return { type: 'modify' };
236
- }
237
-
238
- function detectRdfLists(quads, base, ctx) {
239
- const lists = new Map();
240
-
241
- // Find all rdf:List instances (W3C standard approach)
242
- const listNodes = quads.filter(q =>
243
- q.predicate.value.endsWith('#type') &&
244
- q.object.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#List'
245
- );
246
-
247
- // Find parent subjects that point to list nodes
248
- const parentToListNodes = new Map();
249
-
250
- for (const listNode of listNodes) {
251
- const listSubject = listNode.subject.value;
252
-
253
- // Find the parent subject that points to this list node
254
- const parentQuad = quads.find(q =>
255
- q.object.value === listSubject &&
256
- q.subject.value !== listSubject // Don't match self-references
257
- );
258
-
259
- if (parentQuad) {
260
- const parentSubject = parentQuad.subject.value;
261
- if (!parentToListNodes.has(parentSubject)) {
262
- parentToListNodes.set(parentSubject, []);
263
- }
264
- parentToListNodes.get(parentSubject).push(listSubject);
265
- }
266
- }
267
-
268
- // For each parent, build the complete list
269
- for (const [parentSubject, listNodeSubjects] of parentToListNodes) {
270
- const allListItems = [];
271
-
272
- for (const listNodeSubject of listNodeSubjects) {
273
- // Find all quads for this list node
274
- const listNodeQuads = quads.filter(q => q.subject.value === listNodeSubject);
275
-
276
- const firstQuad = listNodeQuads.find(q =>
277
- q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
278
- );
279
-
280
- const restQuad = listNodeQuads.find(q =>
281
- q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'
282
- );
283
-
284
- if (firstQuad) {
285
- allListItems.push({
286
- listNode: listNodeSubject,
287
- item: firstQuad.object,
288
- typeQuad: listNode,
289
- firstQuad,
290
- restQuad
291
- });
292
- }
293
- }
294
-
295
- // Sort items by following rdf:rest chains to maintain order
296
- const orderedItems = [];
297
- const processed = new Set();
298
-
299
- // Find the head (item not referenced as a rest)
300
- const restTargets = new Set(
301
- allListItems
302
- .filter(item => item.restQuad?.object?.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil')
303
- .map(item => item.restQuad.object.value)
304
- );
305
-
306
- const headItems = allListItems.filter(item => !restTargets.has(item.listNode));
307
-
308
- if (headItems.length > 0) {
309
- // Start from head and follow rest chain
310
- let currentItem = headItems[0];
311
- let index = 0;
312
-
313
- while (currentItem && !processed.has(currentItem.listNode)) {
314
- currentItem.itemIndex = index++;
315
- orderedItems.push(currentItem);
316
- processed.add(currentItem.listNode);
317
-
318
- // Find next item in chain
319
- const nextListNode = currentItem.restQuad?.object?.value;
320
- currentItem = allListItems.find(item => item.listNode === nextListNode);
321
- }
322
- } else {
323
- // Fallback: sort by list node number
324
- allListItems.sort((a, b) => {
325
- const aNum = parseInt(a.listNode.split('-').pop()) || 0;
326
- const bNum = parseInt(b.listNode.split('-').pop()) || 0;
327
- return aNum - bNum;
328
- });
329
-
330
- allListItems.forEach((item, index) => {
331
- item.itemIndex = index;
332
- orderedItems.push(item);
333
- });
334
- }
335
-
336
- // Store the unified list under the parent subject
337
- lists.set(parentSubject, {
338
- subject: parentSubject,
339
- items: orderedItems,
340
- headQuad: quads.find(q =>
341
- q.subject.value === parentSubject &&
342
- listNodeSubjects.includes(q.object.value)
343
- )
344
- });
345
- }
346
-
347
- return lists;
348
- }
349
180
 
350
181
  function planOperations(diff, base, ctx) {
351
182
  // Normalize quads once
@@ -357,7 +188,6 @@ function planOperations(diff, base, ctx) {
357
188
  vacantSlotOccupations: [],
358
189
  deletes: [],
359
190
  adds: [],
360
- listOperations: [], // NEW: Support for ordered list operations
361
191
  consumedAdds: new Set()
362
192
  };
363
193
 
@@ -445,55 +275,6 @@ function planOperations(diff, base, ctx) {
445
275
  plan.adds.push({ quad, targetBlock });
446
276
  }
447
277
 
448
- // NEW: Detect RDF lists and plan list operations
449
- const allQuads = [...normAdds, ...normDeletes];
450
- const rdfLists = detectRdfLists(allQuads, base, ctx);
451
-
452
- for (const [listSubject, listData] of rdfLists) {
453
- const operation = determineListOperation(listData, normAdds, normDeletes, base);
454
- if (operation) {
455
- plan.listOperations.push({
456
- type: 'rdf-list',
457
- subject: listSubject,
458
- items: listData.items,
459
- operation,
460
- headQuad: listData.headQuad,
461
- normAdds, // Pass through for reconstruction
462
- normDeletes // Pass through for reconstruction
463
- });
464
-
465
- // Mark all list-related quads as consumed to prevent double processing
466
- const allListQuads = [];
467
- listData.items.forEach(item => {
468
- if (item.typeQuad) allListQuads.push(item.typeQuad);
469
- if (item.firstQuad) allListQuads.push(item.firstQuad);
470
- if (item.restQuad) allListQuads.push(item.restQuad);
471
- });
472
- if (listData.headQuad) allListQuads.push(listData.headQuad);
473
-
474
- // Also mark list item annotation quads as consumed
475
- Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
476
- const parsed = parseQuadIndexKey(key);
477
- if (parsed && listData.items.some(item => item.item.value === parsed.s)) {
478
- plan.consumedAdds.add(key);
479
- }
480
- });
481
-
482
- // Also mark any quads with list item subjects as consumed
483
- normAdds.forEach(quad => {
484
- if (listData.items.some(item => item.item.value === quad.subject.value)) {
485
- const quadKey = quadToKeyForOrigin(quad);
486
- plan.consumedAdds.add(quadKey);
487
- }
488
- });
489
-
490
- allListQuads.forEach(quad => {
491
- const quadKey = quadToKeyForOrigin(quad);
492
- plan.consumedAdds.add(quadKey);
493
- });
494
- }
495
- }
496
-
497
278
  return plan;
498
279
  }
499
280
 
@@ -596,7 +377,6 @@ function materializeEdits(plan, text, ctx, base) {
596
377
 
597
378
  // Materialize adds
598
379
  for (const { quad, targetBlock } of plan.adds) {
599
- // Skip if this quad was consumed by list operations
600
380
  const quadKey = quadToKeyForOrigin(quad);
601
381
  if (plan.consumedAdds.has(quadKey)) {
602
382
  continue;
@@ -656,310 +436,9 @@ function materializeEdits(plan, text, ctx, base) {
656
436
  }
657
437
  }
658
438
 
659
- // NEW: Materialize list operations
660
- if (plan.listOperations) {
661
- for (const listOp of plan.listOperations) {
662
- const listEdits = materializeListOperation(listOp, text, ctx, base);
663
- edits.push(...listEdits);
664
- }
665
- }
666
-
667
- return edits;
668
- }
669
-
670
- function findInsertionPointForSubject(subject, text, base) {
671
- // Try to find the subject's block in the origin data
672
- for (const [blockId, block] of base.blocks.entries()) {
673
- if (block.subject === subject && block.range) {
674
- // Found the subject block, insert after it
675
- return block.range.end;
676
- }
677
- }
678
-
679
- // Fallback: try to find the subject in the text directly
680
- const subjectShort = subject.includes('#') ? subject.split('#').pop() : subject.split('/').pop();
681
- const subjectRegex = new RegExp(`##\\s+[^{]*\\{=\\s*${subjectShort.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*[^}]*\\}`, 'm');
682
- const match = text.match(subjectRegex);
683
-
684
- if (match && match.index !== undefined) {
685
- // Find the end of this line
686
- const lineEnd = text.indexOf('\n', match.index);
687
- return lineEnd !== -1 ? lineEnd + 1 : match.index + match.length;
688
- }
689
-
690
- // Final fallback: end of document
691
- return text.length;
692
- }
693
-
694
- function findExistingListRange(subject, text, base) {
695
- // Try to find existing list items under this subject in the origin data
696
- for (const [blockId, block] of base.blocks.entries()) {
697
- if (block.subject === subject && block.range) {
698
- // Look for ordered list items that come after this subject
699
- const linesAfter = text.substring(block.range.end).split('\n');
700
- let listStart = -1;
701
- let listEnd = -1;
702
-
703
- for (let i = 0; i < linesAfter.length; i++) {
704
- const line = linesAfter[i].trim();
705
-
706
- // Check if this line starts a numbered list
707
- if (/^\d+\./.test(line)) {
708
- if (listStart === -1) {
709
- listStart = block.range.end + linesAfter.slice(0, i).join('\n').length + i;
710
- }
711
- listEnd = block.range.end + linesAfter.slice(0, i + 1).join('\n').length + i + 1;
712
- } else if (listStart !== -1 && !line.startsWith(' ') && line !== '') {
713
- // End of list
714
- break;
715
- }
716
- }
717
-
718
- if (listStart !== -1 && listEnd !== -1) {
719
- return { start: listStart, end: listEnd };
720
- }
721
- }
722
- }
723
-
724
- return null;
725
- }
726
-
727
- function reconstructListFromDiff(listData, normAdds, normDeletes, base) {
728
- // Start with the existing list items
729
- let currentItems = [...listData.items];
730
-
731
- // Apply deletes first - remove items that are being deleted
732
- const deletedItems = new Set();
733
- normDeletes.forEach(quad => {
734
- // Find list items that match the delete quad
735
- currentItems.forEach(item => {
736
- if ((item.firstQuad && quadMatches(item.firstQuad, quad)) ||
737
- (item.typeQuad && quadMatches(item.typeQuad, quad)) ||
738
- (item.restQuad && quadMatches(item.restQuad, quad))) {
739
- deletedItems.add(item.listNode);
740
- }
741
- });
742
- });
743
-
744
- // Remove deleted items
745
- currentItems = currentItems.filter(item => !deletedItems.has(item.listNode));
746
-
747
- // Apply adds - add new list items
748
- normAdds.forEach(quad => {
749
- if (quad.predicate.value.endsWith('#type') &&
750
- quad.object.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#List') {
751
- // This is a new list node
752
- const listNodeQuads = normAdds.filter(q => q.subject.value === quad.subject.value);
753
- const firstQuad = listNodeQuads.find(q => q.predicate.value.endsWith('#first'));
754
-
755
- if (firstQuad) {
756
- currentItems.push({
757
- listNode: quad.subject.value,
758
- item: firstQuad.object,
759
- typeQuad: quad,
760
- firstQuad,
761
- restQuad: listNodeQuads.find(q => q.predicate.value.endsWith('#rest'))
762
- });
763
- }
764
- }
765
- });
766
-
767
- // Apply modifications to existing items
768
- normAdds.forEach(quad => {
769
- currentItems.forEach(item => {
770
- if (quadMatches(item.firstQuad, quad)) {
771
- item.firstQuad = quad;
772
- item.item = quad.object;
773
- }
774
- });
775
- });
776
-
777
- // Reorder by list node name to maintain some order
778
- currentItems.sort((a, b) => {
779
- const aNum = parseInt(a.listNode.split('-').pop()) || 0;
780
- const bNum = parseInt(b.listNode.split('-').pop()) || 0;
781
- return aNum - bNum;
782
- });
783
-
784
- // Reassign indices
785
- currentItems.forEach((item, index) => {
786
- item.itemIndex = index;
787
- });
788
-
789
- return {
790
- subject: listData.subject,
791
- items: currentItems,
792
- headQuad: listData.headQuad
793
- };
794
- }
795
-
796
- function quadMatches(quad1, quad2) {
797
- if (!quad1 || !quad2) return false;
798
- return quad1.subject.value === quad2.subject.value &&
799
- quad1.predicate.value === quad2.predicate.value &&
800
- quad1.object.value === quad2.object.value;
801
- }
802
-
803
- function materializeListOperation(listOp, text, ctx, base) {
804
- const edits = [];
805
- const { subject, items, operation, normAdds, normDeletes } = listOp;
806
-
807
- switch (operation.type) {
808
- case 'create':
809
- // Create new ordered list
810
- const listText = serializeListItems(items, ctx, base);
811
- const contextText = serializeListContext(subject, ctx, base);
812
-
813
- // Find insertion point under the correct subject
814
- const insertPoint = findInsertionPointForSubject(subject, text, base);
815
-
816
- let fullListText = '';
817
- if (contextText) {
818
- fullListText += '\n' + contextText;
819
- }
820
- fullListText += '\n' + listText.map(item => item.text).join('\n');
821
-
822
- edits.push({
823
- start: insertPoint,
824
- end: insertPoint,
825
- text: fullListText
826
- });
827
- break;
828
-
829
- case 'modify':
830
- // For modifications, find and replace the existing list entirely
831
- const listData = { subject, items, headQuad: listOp.headQuad };
832
- const reconstructedList = reconstructListFromDiff(listData, normAdds, normDeletes, base);
833
- const modifyListText = serializeListItems(reconstructedList.items, ctx, base);
834
-
835
- // Use the parent subject for finding the existing list
836
- const parentSubject = listOp.headQuad?.subject?.value || subject;
837
-
838
- // Find existing list range to replace
839
- const existingListRange = findExistingListRange(parentSubject, text, base);
840
-
841
- let modifyFullListText = '\n' + modifyListText.map(item => item.text).join('\n');
842
-
843
- if (existingListRange) {
844
- // Replace existing list
845
- edits.push({
846
- start: existingListRange.start,
847
- end: existingListRange.end,
848
- text: modifyFullListText
849
- });
850
- } else {
851
- // Insert after subject (fallback)
852
- const modifyInsertPoint = findInsertionPointForSubject(parentSubject, text, base);
853
- edits.push({
854
- start: modifyInsertPoint,
855
- end: modifyInsertPoint,
856
- text: modifyFullListText
857
- });
858
- }
859
- break;
860
-
861
- case 'delete':
862
- // Find and delete the entire list
863
- // TODO: Implement proper list range detection
864
- break;
865
- }
866
-
867
439
  return edits;
868
440
  }
869
441
 
870
- function serializeListItems(items, ctx, base) {
871
- const serializedItems = [];
872
-
873
- for (let i = 0; i < items.length; i++) {
874
- const { item, listNode } = items[i];
875
-
876
- // Find list item annotations (types, predicates) from origin data
877
- const itemAnnotations = [];
878
-
879
- // Find type annotations for this list item
880
- Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
881
- const parsed = parseQuadIndexKey(key);
882
- if (parsed?.s === item.value &&
883
- parsed?.p.endsWith('#type') &&
884
- entry.type === 'list-anchor-type') {
885
- itemAnnotations.push({
886
- type: 'type',
887
- iri: parsed.o.v,
888
- range: entry.range
889
- });
890
- }
891
- });
892
-
893
- // Find predicate annotations for this list item
894
- Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
895
- const parsed = parseQuadIndexKey(key);
896
- if (parsed?.s === item.value &&
897
- entry.type === 'list-anchor-predicate') {
898
- itemAnnotations.push({
899
- type: 'predicate',
900
- iri: parsed.p,
901
- value: parsed.o.v,
902
- range: entry.range
903
- });
904
- }
905
- });
906
-
907
- // Serialize the list item as proper ordered list syntax
908
- let itemText = `${i + 1}. `;
909
-
910
- if (item.termType === 'NamedNode') {
911
- const itemShort = shortenIRI(item.value, ctx);
912
- itemText += `${itemShort}`;
913
-
914
- // Add subject annotation if we have one
915
- itemText += ` {=${itemShort}}`;
916
- } else {
917
- itemText += item.value;
918
- }
919
-
920
- // Add list anchor annotations
921
- const annotationTokens = [];
922
- for (const ann of itemAnnotations) {
923
- if (ann.type === 'type') {
924
- const typeShort = shortenIRI(ann.iri, ctx);
925
- annotationTokens.push(`.${typeShort}`);
926
- } else if (ann.type === 'predicate') {
927
- const predShort = shortenIRI(ann.iri, ctx);
928
- annotationTokens.push(predShort);
929
- }
930
- }
931
-
932
- if (annotationTokens.length > 0) {
933
- itemText += ` {${annotationTokens.join(' ')}}`;
934
- }
935
-
936
- serializedItems.push({
937
- text: itemText,
938
- listNode,
939
- item,
940
- annotations: itemAnnotations
941
- });
942
- }
943
-
944
- return serializedItems;
945
- }
946
-
947
- function serializeListContext(listSubject, ctx, base) {
948
- // Find context quads for this list
949
- const contextQuads = Array.from(base.quadIndex.entries())
950
- .filter(([key, entry]) => {
951
- const parsed = parseQuadIndexKey(key);
952
- return parsed?.s === listSubject &&
953
- parsed?.p.endsWith('#in');
954
- });
955
-
956
- if (contextQuads.length === 0) return null;
957
-
958
- // For now, return a generic context
959
- // TODO: Reconstruct actual context text from origin data
960
- return "Status values: {?sh:in .ex:StatusType label}";
961
- }
962
-
963
442
  function applyEdits(text, edits, ctx, base) {
964
443
  let result = text;
965
444