mdld-parse 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/serialize.js CHANGED
@@ -177,6 +177,176 @@ export function serialize({ text, diff, origin, options = {} }) {
177
177
  return applyEdits(text, edits, ctx, base);
178
178
  }
179
179
 
180
+ function determineListOperation(listData, normAdds, normDeletes, base) {
181
+ // Check if this is list modification (has both adds and deletes for list structure)
182
+ const hasListAdds = normAdds.some(quad =>
183
+ quad.subject.value.includes('#list-') ||
184
+ quad.predicate.value.endsWith('#first') ||
185
+ quad.predicate.value.endsWith('#rest') ||
186
+ quad.predicate.value.endsWith('#type') && quad.object.value.endsWith('#List')
187
+ );
188
+
189
+ const hasListDeletes = normDeletes.some(quad =>
190
+ quad.subject.value.includes('#list-') ||
191
+ quad.predicate.value.endsWith('#first') ||
192
+ quad.predicate.value.endsWith('#rest') ||
193
+ quad.predicate.value.endsWith('#type') && quad.object.value.endsWith('#List')
194
+ );
195
+
196
+ if (hasListAdds || hasListDeletes) {
197
+ return { type: 'modify' };
198
+ }
199
+
200
+ // Check if this is a list creation (all list structure quads are in adds)
201
+ const allListQuads = [];
202
+ listData.items.forEach(item => {
203
+ if (item.typeQuad) allListQuads.push(item.typeQuad);
204
+ if (item.firstQuad) allListQuads.push(item.firstQuad);
205
+ if (item.restQuad) allListQuads.push(item.restQuad);
206
+ });
207
+ if (listData.headQuad) allListQuads.push(listData.headQuad);
208
+
209
+ const allInAdds = allListQuads.every(quad =>
210
+ normAdds.some(add =>
211
+ add.subject.value === quad.subject.value &&
212
+ add.predicate.value === quad.predicate.value &&
213
+ add.object.value === quad.object.value
214
+ )
215
+ );
216
+
217
+ if (allInAdds) {
218
+ return { type: 'create' };
219
+ }
220
+
221
+ // Check if this is a list deletion (all list structure quads are in deletes)
222
+ const allInDeletes = allListQuads.every(quad =>
223
+ normDeletes.some(del =>
224
+ del.subject.value === quad.subject.value &&
225
+ del.predicate.value === quad.predicate.value &&
226
+ del.object.value === quad.object.value
227
+ )
228
+ );
229
+
230
+ if (allInDeletes) {
231
+ return { type: 'delete' };
232
+ }
233
+
234
+ // Default to modify for any list changes
235
+ return { type: 'modify' };
236
+ }
237
+
238
+ function detectRdfLists(quads, base, ctx) {
239
+ const lists = new Map();
240
+
241
+ // Find all rdf:List instances (W3C standard approach)
242
+ const listNodes = quads.filter(q =>
243
+ q.predicate.value.endsWith('#type') &&
244
+ q.object.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#List'
245
+ );
246
+
247
+ // Find parent subjects that point to list nodes
248
+ const parentToListNodes = new Map();
249
+
250
+ for (const listNode of listNodes) {
251
+ const listSubject = listNode.subject.value;
252
+
253
+ // Find the parent subject that points to this list node
254
+ const parentQuad = quads.find(q =>
255
+ q.object.value === listSubject &&
256
+ q.subject.value !== listSubject // Don't match self-references
257
+ );
258
+
259
+ if (parentQuad) {
260
+ const parentSubject = parentQuad.subject.value;
261
+ if (!parentToListNodes.has(parentSubject)) {
262
+ parentToListNodes.set(parentSubject, []);
263
+ }
264
+ parentToListNodes.get(parentSubject).push(listSubject);
265
+ }
266
+ }
267
+
268
+ // For each parent, build the complete list
269
+ for (const [parentSubject, listNodeSubjects] of parentToListNodes) {
270
+ const allListItems = [];
271
+
272
+ for (const listNodeSubject of listNodeSubjects) {
273
+ // Find all quads for this list node
274
+ const listNodeQuads = quads.filter(q => q.subject.value === listNodeSubject);
275
+
276
+ const firstQuad = listNodeQuads.find(q =>
277
+ q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
278
+ );
279
+
280
+ const restQuad = listNodeQuads.find(q =>
281
+ q.predicate.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'
282
+ );
283
+
284
+ if (firstQuad) {
285
+ allListItems.push({
286
+ listNode: listNodeSubject,
287
+ item: firstQuad.object,
288
+ typeQuad: listNode,
289
+ firstQuad,
290
+ restQuad
291
+ });
292
+ }
293
+ }
294
+
295
+ // Sort items by following rdf:rest chains to maintain order
296
+ const orderedItems = [];
297
+ const processed = new Set();
298
+
299
+ // Find the head (item not referenced as a rest)
300
+ const restTargets = new Set(
301
+ allListItems
302
+ .filter(item => item.restQuad?.object?.value !== 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil')
303
+ .map(item => item.restQuad.object.value)
304
+ );
305
+
306
+ const headItems = allListItems.filter(item => !restTargets.has(item.listNode));
307
+
308
+ if (headItems.length > 0) {
309
+ // Start from head and follow rest chain
310
+ let currentItem = headItems[0];
311
+ let index = 0;
312
+
313
+ while (currentItem && !processed.has(currentItem.listNode)) {
314
+ currentItem.itemIndex = index++;
315
+ orderedItems.push(currentItem);
316
+ processed.add(currentItem.listNode);
317
+
318
+ // Find next item in chain
319
+ const nextListNode = currentItem.restQuad?.object?.value;
320
+ currentItem = allListItems.find(item => item.listNode === nextListNode);
321
+ }
322
+ } else {
323
+ // Fallback: sort by list node number
324
+ allListItems.sort((a, b) => {
325
+ const aNum = parseInt(a.listNode.split('-').pop()) || 0;
326
+ const bNum = parseInt(b.listNode.split('-').pop()) || 0;
327
+ return aNum - bNum;
328
+ });
329
+
330
+ allListItems.forEach((item, index) => {
331
+ item.itemIndex = index;
332
+ orderedItems.push(item);
333
+ });
334
+ }
335
+
336
+ // Store the unified list under the parent subject
337
+ lists.set(parentSubject, {
338
+ subject: parentSubject,
339
+ items: orderedItems,
340
+ headQuad: quads.find(q =>
341
+ q.subject.value === parentSubject &&
342
+ listNodeSubjects.includes(q.object.value)
343
+ )
344
+ });
345
+ }
346
+
347
+ return lists;
348
+ }
349
+
180
350
  function planOperations(diff, base, ctx) {
181
351
  // Normalize quads once
182
352
  const normAdds = (diff.add || []).map(normalizeQuad).filter(isValidQuad);
@@ -187,6 +357,7 @@ function planOperations(diff, base, ctx) {
187
357
  vacantSlotOccupations: [],
188
358
  deletes: [],
189
359
  adds: [],
360
+ listOperations: [], // NEW: Support for ordered list operations
190
361
  consumedAdds: new Set()
191
362
  };
192
363
 
@@ -274,6 +445,55 @@ function planOperations(diff, base, ctx) {
274
445
  plan.adds.push({ quad, targetBlock });
275
446
  }
276
447
 
448
+ // NEW: Detect RDF lists and plan list operations
449
+ const allQuads = [...normAdds, ...normDeletes];
450
+ const rdfLists = detectRdfLists(allQuads, base, ctx);
451
+
452
+ for (const [listSubject, listData] of rdfLists) {
453
+ const operation = determineListOperation(listData, normAdds, normDeletes, base);
454
+ if (operation) {
455
+ plan.listOperations.push({
456
+ type: 'rdf-list',
457
+ subject: listSubject,
458
+ items: listData.items,
459
+ operation,
460
+ headQuad: listData.headQuad,
461
+ normAdds, // Pass through for reconstruction
462
+ normDeletes // Pass through for reconstruction
463
+ });
464
+
465
+ // Mark all list-related quads as consumed to prevent double processing
466
+ const allListQuads = [];
467
+ listData.items.forEach(item => {
468
+ if (item.typeQuad) allListQuads.push(item.typeQuad);
469
+ if (item.firstQuad) allListQuads.push(item.firstQuad);
470
+ if (item.restQuad) allListQuads.push(item.restQuad);
471
+ });
472
+ if (listData.headQuad) allListQuads.push(listData.headQuad);
473
+
474
+ // Also mark list item annotation quads as consumed
475
+ Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
476
+ const parsed = parseQuadIndexKey(key);
477
+ if (parsed && listData.items.some(item => item.item.value === parsed.s)) {
478
+ plan.consumedAdds.add(key);
479
+ }
480
+ });
481
+
482
+ // Also mark any quads with list item subjects as consumed
483
+ normAdds.forEach(quad => {
484
+ if (listData.items.some(item => item.item.value === quad.subject.value)) {
485
+ const quadKey = quadToKeyForOrigin(quad);
486
+ plan.consumedAdds.add(quadKey);
487
+ }
488
+ });
489
+
490
+ allListQuads.forEach(quad => {
491
+ const quadKey = quadToKeyForOrigin(quad);
492
+ plan.consumedAdds.add(quadKey);
493
+ });
494
+ }
495
+ }
496
+
277
497
  return plan;
278
498
  }
279
499
 
@@ -376,6 +596,12 @@ function materializeEdits(plan, text, ctx, base) {
376
596
 
377
597
  // Materialize adds
378
598
  for (const { quad, targetBlock } of plan.adds) {
599
+ // Skip if this quad was consumed by list operations
600
+ const quadKey = quadToKeyForOrigin(quad);
601
+ if (plan.consumedAdds.has(quadKey)) {
602
+ continue;
603
+ }
604
+
379
605
  if (quad.object.termType === 'Literal' || quad.object.termType === 'NamedNode') {
380
606
  if (!targetBlock) {
381
607
  const predShort = shortenIRI(quad.predicate.value, ctx);
@@ -430,9 +656,310 @@ function materializeEdits(plan, text, ctx, base) {
430
656
  }
431
657
  }
432
658
 
659
+ // NEW: Materialize list operations
660
+ if (plan.listOperations) {
661
+ for (const listOp of plan.listOperations) {
662
+ const listEdits = materializeListOperation(listOp, text, ctx, base);
663
+ edits.push(...listEdits);
664
+ }
665
+ }
666
+
667
+ return edits;
668
+ }
669
+
670
+ function findInsertionPointForSubject(subject, text, base) {
671
+ // Try to find the subject's block in the origin data
672
+ for (const [blockId, block] of base.blocks.entries()) {
673
+ if (block.subject === subject && block.range) {
674
+ // Found the subject block, insert after it
675
+ return block.range.end;
676
+ }
677
+ }
678
+
679
+ // Fallback: try to find the subject in the text directly
680
+ const subjectShort = subject.includes('#') ? subject.split('#').pop() : subject.split('/').pop();
681
+ const subjectRegex = new RegExp(`##\\s+[^{]*\\{=\\s*${subjectShort.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\s*[^}]*\\}`, 'm');
682
+ const match = text.match(subjectRegex);
683
+
684
+ if (match && match.index !== undefined) {
685
+ // Find the end of this line
686
+ const lineEnd = text.indexOf('\n', match.index);
687
+ return lineEnd !== -1 ? lineEnd + 1 : match.index + match.length;
688
+ }
689
+
690
+ // Final fallback: end of document
691
+ return text.length;
692
+ }
693
+
694
+ function findExistingListRange(subject, text, base) {
695
+ // Try to find existing list items under this subject in the origin data
696
+ for (const [blockId, block] of base.blocks.entries()) {
697
+ if (block.subject === subject && block.range) {
698
+ // Look for ordered list items that come after this subject
699
+ const linesAfter = text.substring(block.range.end).split('\n');
700
+ let listStart = -1;
701
+ let listEnd = -1;
702
+
703
+ for (let i = 0; i < linesAfter.length; i++) {
704
+ const line = linesAfter[i].trim();
705
+
706
+ // Check if this line starts a numbered list
707
+ if (/^\d+\./.test(line)) {
708
+ if (listStart === -1) {
709
+ listStart = block.range.end + linesAfter.slice(0, i).join('\n').length + i;
710
+ }
711
+ listEnd = block.range.end + linesAfter.slice(0, i + 1).join('\n').length + i + 1;
712
+ } else if (listStart !== -1 && !line.startsWith(' ') && line !== '') {
713
+ // End of list
714
+ break;
715
+ }
716
+ }
717
+
718
+ if (listStart !== -1 && listEnd !== -1) {
719
+ return { start: listStart, end: listEnd };
720
+ }
721
+ }
722
+ }
723
+
724
+ return null;
725
+ }
726
+
727
+ function reconstructListFromDiff(listData, normAdds, normDeletes, base) {
728
+ // Start with the existing list items
729
+ let currentItems = [...listData.items];
730
+
731
+ // Apply deletes first - remove items that are being deleted
732
+ const deletedItems = new Set();
733
+ normDeletes.forEach(quad => {
734
+ // Find list items that match the delete quad
735
+ currentItems.forEach(item => {
736
+ if ((item.firstQuad && quadMatches(item.firstQuad, quad)) ||
737
+ (item.typeQuad && quadMatches(item.typeQuad, quad)) ||
738
+ (item.restQuad && quadMatches(item.restQuad, quad))) {
739
+ deletedItems.add(item.listNode);
740
+ }
741
+ });
742
+ });
743
+
744
+ // Remove deleted items
745
+ currentItems = currentItems.filter(item => !deletedItems.has(item.listNode));
746
+
747
+ // Apply adds - add new list items
748
+ normAdds.forEach(quad => {
749
+ if (quad.predicate.value.endsWith('#type') &&
750
+ quad.object.value === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#List') {
751
+ // This is a new list node
752
+ const listNodeQuads = normAdds.filter(q => q.subject.value === quad.subject.value);
753
+ const firstQuad = listNodeQuads.find(q => q.predicate.value.endsWith('#first'));
754
+
755
+ if (firstQuad) {
756
+ currentItems.push({
757
+ listNode: quad.subject.value,
758
+ item: firstQuad.object,
759
+ typeQuad: quad,
760
+ firstQuad,
761
+ restQuad: listNodeQuads.find(q => q.predicate.value.endsWith('#rest'))
762
+ });
763
+ }
764
+ }
765
+ });
766
+
767
+ // Apply modifications to existing items
768
+ normAdds.forEach(quad => {
769
+ currentItems.forEach(item => {
770
+ if (quadMatches(item.firstQuad, quad)) {
771
+ item.firstQuad = quad;
772
+ item.item = quad.object;
773
+ }
774
+ });
775
+ });
776
+
777
+ // Reorder by list node name to maintain some order
778
+ currentItems.sort((a, b) => {
779
+ const aNum = parseInt(a.listNode.split('-').pop()) || 0;
780
+ const bNum = parseInt(b.listNode.split('-').pop()) || 0;
781
+ return aNum - bNum;
782
+ });
783
+
784
+ // Reassign indices
785
+ currentItems.forEach((item, index) => {
786
+ item.itemIndex = index;
787
+ });
788
+
789
+ return {
790
+ subject: listData.subject,
791
+ items: currentItems,
792
+ headQuad: listData.headQuad
793
+ };
794
+ }
795
+
796
+ function quadMatches(quad1, quad2) {
797
+ if (!quad1 || !quad2) return false;
798
+ return quad1.subject.value === quad2.subject.value &&
799
+ quad1.predicate.value === quad2.predicate.value &&
800
+ quad1.object.value === quad2.object.value;
801
+ }
802
+
803
+ function materializeListOperation(listOp, text, ctx, base) {
804
+ const edits = [];
805
+ const { subject, items, operation, normAdds, normDeletes } = listOp;
806
+
807
+ switch (operation.type) {
808
+ case 'create':
809
+ // Create new ordered list
810
+ const listText = serializeListItems(items, ctx, base);
811
+ const contextText = serializeListContext(subject, ctx, base);
812
+
813
+ // Find insertion point under the correct subject
814
+ const insertPoint = findInsertionPointForSubject(subject, text, base);
815
+
816
+ let fullListText = '';
817
+ if (contextText) {
818
+ fullListText += '\n' + contextText;
819
+ }
820
+ fullListText += '\n' + listText.map(item => item.text).join('\n');
821
+
822
+ edits.push({
823
+ start: insertPoint,
824
+ end: insertPoint,
825
+ text: fullListText
826
+ });
827
+ break;
828
+
829
+ case 'modify':
830
+ // For modifications, find and replace the existing list entirely
831
+ const listData = { subject, items, headQuad: listOp.headQuad };
832
+ const reconstructedList = reconstructListFromDiff(listData, normAdds, normDeletes, base);
833
+ const modifyListText = serializeListItems(reconstructedList.items, ctx, base);
834
+
835
+ // Use the parent subject for finding the existing list
836
+ const parentSubject = listOp.headQuad?.subject?.value || subject;
837
+
838
+ // Find existing list range to replace
839
+ const existingListRange = findExistingListRange(parentSubject, text, base);
840
+
841
+ let modifyFullListText = '\n' + modifyListText.map(item => item.text).join('\n');
842
+
843
+ if (existingListRange) {
844
+ // Replace existing list
845
+ edits.push({
846
+ start: existingListRange.start,
847
+ end: existingListRange.end,
848
+ text: modifyFullListText
849
+ });
850
+ } else {
851
+ // Insert after subject (fallback)
852
+ const modifyInsertPoint = findInsertionPointForSubject(parentSubject, text, base);
853
+ edits.push({
854
+ start: modifyInsertPoint,
855
+ end: modifyInsertPoint,
856
+ text: modifyFullListText
857
+ });
858
+ }
859
+ break;
860
+
861
+ case 'delete':
862
+ // Find and delete the entire list
863
+ // TODO: Implement proper list range detection
864
+ break;
865
+ }
866
+
433
867
  return edits;
434
868
  }
435
869
 
870
+ function serializeListItems(items, ctx, base) {
871
+ const serializedItems = [];
872
+
873
+ for (let i = 0; i < items.length; i++) {
874
+ const { item, listNode } = items[i];
875
+
876
+ // Find list item annotations (types, predicates) from origin data
877
+ const itemAnnotations = [];
878
+
879
+ // Find type annotations for this list item
880
+ Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
881
+ const parsed = parseQuadIndexKey(key);
882
+ if (parsed?.s === item.value &&
883
+ parsed?.p.endsWith('#type') &&
884
+ entry.type === 'list-anchor-type') {
885
+ itemAnnotations.push({
886
+ type: 'type',
887
+ iri: parsed.o.v,
888
+ range: entry.range
889
+ });
890
+ }
891
+ });
892
+
893
+ // Find predicate annotations for this list item
894
+ Array.from(base.quadIndex.entries()).forEach(([key, entry]) => {
895
+ const parsed = parseQuadIndexKey(key);
896
+ if (parsed?.s === item.value &&
897
+ entry.type === 'list-anchor-predicate') {
898
+ itemAnnotations.push({
899
+ type: 'predicate',
900
+ iri: parsed.p,
901
+ value: parsed.o.v,
902
+ range: entry.range
903
+ });
904
+ }
905
+ });
906
+
907
+ // Serialize the list item as proper ordered list syntax
908
+ let itemText = `${i + 1}. `;
909
+
910
+ if (item.termType === 'NamedNode') {
911
+ const itemShort = shortenIRI(item.value, ctx);
912
+ itemText += `${itemShort}`;
913
+
914
+ // Add subject annotation if we have one
915
+ itemText += ` {=${itemShort}}`;
916
+ } else {
917
+ itemText += item.value;
918
+ }
919
+
920
+ // Add list anchor annotations
921
+ const annotationTokens = [];
922
+ for (const ann of itemAnnotations) {
923
+ if (ann.type === 'type') {
924
+ const typeShort = shortenIRI(ann.iri, ctx);
925
+ annotationTokens.push(`.${typeShort}`);
926
+ } else if (ann.type === 'predicate') {
927
+ const predShort = shortenIRI(ann.iri, ctx);
928
+ annotationTokens.push(predShort);
929
+ }
930
+ }
931
+
932
+ if (annotationTokens.length > 0) {
933
+ itemText += ` {${annotationTokens.join(' ')}}`;
934
+ }
935
+
936
+ serializedItems.push({
937
+ text: itemText,
938
+ listNode,
939
+ item,
940
+ annotations: itemAnnotations
941
+ });
942
+ }
943
+
944
+ return serializedItems;
945
+ }
946
+
947
+ function serializeListContext(listSubject, ctx, base) {
948
+ // Find context quads for this list
949
+ const contextQuads = Array.from(base.quadIndex.entries())
950
+ .filter(([key, entry]) => {
951
+ const parsed = parseQuadIndexKey(key);
952
+ return parsed?.s === listSubject &&
953
+ parsed?.p.endsWith('#in');
954
+ });
955
+
956
+ if (contextQuads.length === 0) return null;
957
+
958
+ // For now, return a generic context
959
+ // TODO: Reconstruct actual context text from origin data
960
+ return "Status values: {?sh:in .ex:StatusType label}";
961
+ }
962
+
436
963
  function applyEdits(text, edits, ctx, base) {
437
964
  let result = text;
438
965