@type-editor/changeset 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +8 -0
- package/.prettierignore +8 -0
- package/.prettierrc +9 -0
- package/LICENSE +48 -0
- package/README.md +153 -0
- package/dist/index.cjs +1 -0
- package/dist/index.d.ts +107 -0
- package/dist/index.js +1 -0
- package/eslint.config.ts +69 -0
- package/package.json +54 -0
- package/src/Change.ts +456 -0
- package/src/ChangeSet.ts +578 -0
- package/src/Span.ts +170 -0
- package/src/compute-diff.ts +100 -0
- package/src/default-encoder.ts +41 -0
- package/src/index.ts +8 -0
- package/src/max-simplify-distance.ts +7 -0
- package/src/myers-diff/run-myers-diff.ts +261 -0
- package/src/simplify-changes/expand-to-word-boundaries.ts +43 -0
- package/src/simplify-changes/fill-change.ts +99 -0
- package/src/simplify-changes/get-text.ts +69 -0
- package/src/simplify-changes/has-word-boundary.ts +34 -0
- package/src/simplify-changes/is-letter.ts +62 -0
- package/src/simplify-changes/simplify-adjacent-changes.ts +111 -0
- package/src/simplify-changes.ts +42 -0
- package/src/tokenizer/tokenize-block-node.ts +47 -0
- package/src/tokenizer/tokenize-fragment.ts +46 -0
- package/src/tokenizer/tokenize-textNode.ts +31 -0
- package/src/types/ChangeJSON.ts +23 -0
- package/src/types/ChangeSetConfig.ts +19 -0
- package/src/types/TokenEncoder.ts +52 -0
- package/src/types/TrimmedRange.ts +10 -0
- package/tsconfig.json +27 -0
- package/typedoc.json +11 -0
- package/vite.config.ts +54 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import {Change} from '../Change';
|
|
2
|
+
import {Span} from '../Span';
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Creates a merged change that spans from fromB to toB, filling gaps between changes.
|
|
7
|
+
*
|
|
8
|
+
* This function takes a sequence of changes and creates a single change that covers
|
|
9
|
+
* a broader range, filling any gaps between the original changes. The gaps are filled
|
|
10
|
+
* with spans using the data from adjacent changes.
|
|
11
|
+
*
|
|
12
|
+
* @param changes - The array of changes to merge.
|
|
13
|
+
* @param fromB - The start position in the new document.
|
|
14
|
+
* @param toB - The end position in the new document.
|
|
15
|
+
* @returns A new Change covering the expanded range.
|
|
16
|
+
*/
|
|
17
|
+
export function fillChange(changes: ReadonlyArray<Change>, fromB: number, toB: number): Change {
|
|
18
|
+
// Calculate the corresponding positions in the old document
|
|
19
|
+
const fromA: number = changes[0].fromA - (changes[0].fromB - fromB);
|
|
20
|
+
const lastChange: Change = changes[changes.length - 1];
|
|
21
|
+
const toA: number = lastChange.toA + (toB - lastChange.toB);
|
|
22
|
+
|
|
23
|
+
// Initialize span arrays
|
|
24
|
+
let deletedSpans: ReadonlyArray<Span> = Span.none;
|
|
25
|
+
let insertedSpans: ReadonlyArray<Span> = Span.none;
|
|
26
|
+
|
|
27
|
+
// Get initial data for filling gaps (prefer actual changes over empty ones)
|
|
28
|
+
const initialSpans = changes[0].deleted.length ? changes[0].deleted :
|
|
29
|
+
changes[0].inserted.length ? changes[0].inserted : null;
|
|
30
|
+
let deletedData = initialSpans?.[0]?.data;
|
|
31
|
+
let insertedData = (changes[0].inserted.length
|
|
32
|
+
? changes[0].inserted
|
|
33
|
+
: changes[0].deleted.length ? changes[0].deleted : null)?.[0]?.data;
|
|
34
|
+
|
|
35
|
+
let positionA = fromA;
|
|
36
|
+
let positionB = fromB;
|
|
37
|
+
|
|
38
|
+
for (let i = 0; i <= changes.length; i++) {
|
|
39
|
+
const currentChange: Change | null = i === changes.length ? null : changes[i];
|
|
40
|
+
const endA: number = currentChange ? currentChange.fromA : toA;
|
|
41
|
+
const endB: number = currentChange ? currentChange.fromB : toB;
|
|
42
|
+
|
|
43
|
+
// Fill gap in old document if there is one
|
|
44
|
+
if (endA > positionA) {
|
|
45
|
+
deletedSpans = Span.join(
|
|
46
|
+
deletedSpans,
|
|
47
|
+
[new Span(endA - positionA, deletedData)],
|
|
48
|
+
combineSpanData
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Fill gap in new document if there is one
|
|
53
|
+
if (endB > positionB) {
|
|
54
|
+
insertedSpans = Span.join(
|
|
55
|
+
insertedSpans,
|
|
56
|
+
[new Span(endB - positionB, insertedData)],
|
|
57
|
+
combineSpanData
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// If we've processed all changes, we're done
|
|
62
|
+
if (!currentChange) {
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Add the actual change spans
|
|
67
|
+
deletedSpans = Span.join(deletedSpans, currentChange.deleted, combineSpanData);
|
|
68
|
+
insertedSpans = Span.join(insertedSpans, currentChange.inserted, combineSpanData);
|
|
69
|
+
|
|
70
|
+
// Update data for the next gap fill
|
|
71
|
+
if (deletedSpans.length > 0) {
|
|
72
|
+
deletedData = deletedSpans[deletedSpans.length - 1].data;
|
|
73
|
+
}
|
|
74
|
+
if (insertedSpans.length > 0) {
|
|
75
|
+
insertedData = insertedSpans[insertedSpans.length - 1].data;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Move positions forward
|
|
79
|
+
positionA = currentChange.toA;
|
|
80
|
+
positionB = currentChange.toB;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return new Change(fromA, toA, fromB, toB, deletedSpans, insertedSpans);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Combines two span data values by returning the value if they're equal, or null otherwise.
|
|
88
|
+
*
|
|
89
|
+
* This function is used as a callback for Span.join operations to determine if adjacent
|
|
90
|
+
* spans with the same data can be merged.
|
|
91
|
+
*
|
|
92
|
+
* @template T - The type of data being compared.
|
|
93
|
+
* @param a - The first data value.
|
|
94
|
+
* @param b - The second data value.
|
|
95
|
+
* @returns The data value if both are equal, null otherwise.
|
|
96
|
+
*/
|
|
97
|
+
function combineSpanData<T>(a: T, b: T): T | null {
|
|
98
|
+
return a === b ? a : null;
|
|
99
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import type {Fragment, Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Extracts text content from a document fragment range.
|
|
6
|
+
*
|
|
7
|
+
* Converts a range of document nodes into a string representation for
|
|
8
|
+
* character-level analysis. Non-text elements (images, widgets, etc.) are
|
|
9
|
+
* represented as spaces to prevent them from being considered part of words.
|
|
10
|
+
*
|
|
11
|
+
* @param fragment - The document fragment to extract text from.
|
|
12
|
+
* @param start - The start position of the range (inclusive).
|
|
13
|
+
* @param end - The end position of the range (exclusive).
|
|
14
|
+
* @returns The text content with non-text nodes replaced by spaces.
|
|
15
|
+
*/
|
|
16
|
+
export function getText(fragment: Fragment, start: number, end: number): string {
|
|
17
|
+
const resultParts: Array<string> = [];
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Recursively extracts text from a fragment, handling different node types.
|
|
21
|
+
*
|
|
22
|
+
* @param frag - The fragment to process.
|
|
23
|
+
* @param rangeStart - The start position relative to the fragment.
|
|
24
|
+
* @param rangeEnd - The end position relative to the fragment.
|
|
25
|
+
*/
|
|
26
|
+
function extractText(frag: Fragment, rangeStart: number, rangeEnd: number): void {
|
|
27
|
+
let offset = 0;
|
|
28
|
+
|
|
29
|
+
for (let i = 0; i < frag.childCount; i++) {
|
|
30
|
+
const child: Node = frag.child(i);
|
|
31
|
+
const childEnd: number = offset + child.nodeSize;
|
|
32
|
+
const overlapStart: number = Math.max(offset, rangeStart);
|
|
33
|
+
const overlapEnd: number = Math.min(childEnd, rangeEnd);
|
|
34
|
+
|
|
35
|
+
// Only process nodes that overlap with our range
|
|
36
|
+
if (overlapStart < overlapEnd) {
|
|
37
|
+
if (child.isText) {
|
|
38
|
+
// Extract the relevant portion of text content
|
|
39
|
+
const textStart: number = Math.max(0, rangeStart - offset);
|
|
40
|
+
const textEnd: number = Math.min(child.text.length, rangeEnd - offset);
|
|
41
|
+
resultParts.push(child.text.slice(textStart, textEnd));
|
|
42
|
+
} else if (child.isLeaf) {
|
|
43
|
+
// Leaf nodes (like images) are represented as spaces
|
|
44
|
+
resultParts.push(' ');
|
|
45
|
+
} else {
|
|
46
|
+
// Non-leaf block nodes: add space before if at start
|
|
47
|
+
if (overlapStart === offset) {
|
|
48
|
+
resultParts.push(' ');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Recursively process the node's content
|
|
52
|
+
const contentStart: number = Math.max(0, overlapStart - offset - 1);
|
|
53
|
+
const contentEnd: number = Math.min(child.content.size, overlapEnd - offset);
|
|
54
|
+
extractText(child.content, contentStart, contentEnd);
|
|
55
|
+
|
|
56
|
+
// Add space after if at end
|
|
57
|
+
if (overlapEnd === childEnd) {
|
|
58
|
+
resultParts.push(' ');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
offset = childEnd;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
extractText(fragment, start, end);
|
|
68
|
+
return resultParts.join('');
|
|
69
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import {isLetter} from './is-letter';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Checks if there's a word boundary between two changes.
|
|
6
|
+
*
|
|
7
|
+
* A word boundary is detected when there's a transition from non-letter to letter
|
|
8
|
+
* or vice versa between the end of one change and the start of the next.
|
|
9
|
+
*
|
|
10
|
+
* @param text - The text to analyze.
|
|
11
|
+
* @param textStart - The offset where the analyzed text starts in the document.
|
|
12
|
+
* @param fromPos - The start position to check from.
|
|
13
|
+
* @param toPos - The end position to check to.
|
|
14
|
+
* @param contextEnd - The end of the text context (to avoid out-of-bounds).
|
|
15
|
+
* @returns True if a word boundary is found, false otherwise.
|
|
16
|
+
*/
|
|
17
|
+
export function hasWordBoundary(text: string,
|
|
18
|
+
textStart: number,
|
|
19
|
+
fromPos: number,
|
|
20
|
+
toPos: number,
|
|
21
|
+
contextEnd: number): boolean {
|
|
22
|
+
// Check each position in the range for non-letter characters
|
|
23
|
+
for (let pos = fromPos; pos < toPos; pos++) {
|
|
24
|
+
const isLetterChar: boolean = pos >= contextEnd ? false : isLetter(text.charCodeAt(pos - textStart));
|
|
25
|
+
|
|
26
|
+
// A word boundary exists if any character in the range is not a letter
|
|
27
|
+
if (!isLetterChar) {
|
|
28
|
+
return true;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
|
|
2
|
+
/** Unicode letter detection regex (if supported by the runtime). */
|
|
3
|
+
let unicodeLetterRegex: RegExp | undefined;
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Initialize Unicode letter detection regex if the runtime supports it.
|
|
7
|
+
* Falls back to alternative detection methods if not available.
|
|
8
|
+
*/
|
|
9
|
+
try {
|
|
10
|
+
unicodeLetterRegex = new RegExp('[\\p{Alphabetic}_]', 'u');
|
|
11
|
+
} catch (_) {
|
|
12
|
+
// Unicode property escapes not supported
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Regular expression matching common single-case script characters.
|
|
18
|
+
* These characters are always word characters but don't have distinct
|
|
19
|
+
* upper/lowercase forms (Hebrew, Arabic, CJK, etc.).
|
|
20
|
+
*/
|
|
21
|
+
const nonASCIISingleCaseWordChar = /[\u00df\u0587\u0590-\u05f4\u0600-\u06ff\u3040-\u309f\u30a0-\u30ff\u3400-\u4db5\u4e00-\u9fcc\uac00-\ud7af]/;
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* ASCII character code ranges for word characters.
|
|
26
|
+
*/
|
|
27
|
+
const ASCII_DIGIT_START = 48; // '0'
|
|
28
|
+
const ASCII_DIGIT_END = 57; // '9'
|
|
29
|
+
const ASCII_UPPER_START = 65; // 'A'
|
|
30
|
+
const ASCII_UPPER_END = 90; // 'Z'
|
|
31
|
+
const ASCII_LOWER_START = 97; // 'a'
|
|
32
|
+
const ASCII_LOWER_END = 122; // 'z'
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Determines whether a character code represents a letter or digit.
|
|
37
|
+
*
|
|
38
|
+
* For ASCII characters, checks if the code is in the alphanumeric range.
|
|
39
|
+
* For non-ASCII characters, uses Unicode properties if available, otherwise
|
|
40
|
+
* checks for case changes or single-case script membership.
|
|
41
|
+
*
|
|
42
|
+
* @param code - The character code to test.
|
|
43
|
+
* @returns True if the character is a letter or digit, false otherwise.
|
|
44
|
+
*/
|
|
45
|
+
export function isLetter(code: number): boolean {
|
|
46
|
+
// Fast path for ASCII characters
|
|
47
|
+
if (code < 128) {
|
|
48
|
+
return (code >= ASCII_DIGIT_START && code <= ASCII_DIGIT_END)
|
|
49
|
+
|| (code >= ASCII_UPPER_START && code <= ASCII_UPPER_END)
|
|
50
|
+
|| (code >= ASCII_LOWER_START && code <= ASCII_LOWER_END);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const char = String.fromCharCode(code);
|
|
54
|
+
|
|
55
|
+
// Use Unicode properties if available
|
|
56
|
+
if (unicodeLetterRegex) {
|
|
57
|
+
return unicodeLetterRegex.test(char);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Fallback: check for case changes or single-case script membership
|
|
61
|
+
return char.toUpperCase() !== char.toLowerCase() || nonASCIISingleCaseWordChar.test(char);
|
|
62
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import type {Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import {Change} from '../Change';
|
|
4
|
+
import {MAX_SIMPLIFY_DISTANCE} from '../max-simplify-distance';
|
|
5
|
+
import {expandToWordBoundaries} from './expand-to-word-boundaries';
|
|
6
|
+
import {fillChange} from './fill-change';
|
|
7
|
+
import {getText} from './get-text';
|
|
8
|
+
import {hasWordBoundary} from './has-word-boundary';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Processes a group of adjacent changes and adds simplified versions to the target array.
|
|
13
|
+
*
|
|
14
|
+
* This function examines changes in a group to determine if they should be merged.
|
|
15
|
+
* Changes are merged if they're within the same word (no word boundary between them).
|
|
16
|
+
* Mixed insertions/deletions are expanded to word boundaries unless they're single
|
|
17
|
+
* character replacements.
|
|
18
|
+
*
|
|
19
|
+
* @param changes - The complete array of changes.
|
|
20
|
+
* @param from - The start index in the changes array (inclusive).
|
|
21
|
+
* @param to - The end index in the changes array (exclusive).
|
|
22
|
+
* @param doc - The document node to analyze.
|
|
23
|
+
* @param target - The array to add simplified changes to.
|
|
24
|
+
*/
|
|
25
|
+
export function simplifyAdjacentChanges(changes: ReadonlyArray<Change>,
|
|
26
|
+
from: number,
|
|
27
|
+
to: number,
|
|
28
|
+
doc: Node,
|
|
29
|
+
target: Array<Change>): void {
|
|
30
|
+
// Get text context around the changes for word boundary detection
|
|
31
|
+
const contextStart: number = Math.max(0, changes[from].fromB - MAX_SIMPLIFY_DISTANCE);
|
|
32
|
+
const contextEnd: number = Math.min(doc.content.size, changes[to - 1].toB + MAX_SIMPLIFY_DISTANCE);
|
|
33
|
+
const text: string = getText(doc.content, contextStart, contextEnd);
|
|
34
|
+
|
|
35
|
+
for (let i = from; i < to; i++) {
|
|
36
|
+
const groupStartIndex: number = i;
|
|
37
|
+
let lastChange: Change = changes[i];
|
|
38
|
+
let totalDeleted: number = lastChange.lenA;
|
|
39
|
+
let totalInserted: number = lastChange.lenB;
|
|
40
|
+
|
|
41
|
+
// Try to merge consecutive changes that are within the same word
|
|
42
|
+
while (i < to - 1) {
|
|
43
|
+
const nextChange: Change = changes[i + 1];
|
|
44
|
+
|
|
45
|
+
// Check if there's a word boundary between the current and next change
|
|
46
|
+
const hasWindowBoundary: boolean = hasWordBoundary(
|
|
47
|
+
text,
|
|
48
|
+
contextStart,
|
|
49
|
+
lastChange.toB,
|
|
50
|
+
nextChange.fromB,
|
|
51
|
+
contextEnd
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
if (hasWindowBoundary) {
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Accumulate the change metrics
|
|
59
|
+
totalDeleted += nextChange.lenA;
|
|
60
|
+
totalInserted += nextChange.lenB;
|
|
61
|
+
lastChange = nextChange;
|
|
62
|
+
i++;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Determine if we should expand this change group to word boundaries
|
|
66
|
+
const shouldExpandToWords: boolean =
|
|
67
|
+
totalInserted > 0
|
|
68
|
+
&& totalDeleted > 0
|
|
69
|
+
&& !(totalInserted === 1 && totalDeleted === 1);
|
|
70
|
+
|
|
71
|
+
if (shouldExpandToWords) {
|
|
72
|
+
// Expand the range to word boundaries
|
|
73
|
+
const [expandedFrom, expandedTo] = expandToWordBoundaries(
|
|
74
|
+
text,
|
|
75
|
+
contextStart,
|
|
76
|
+
contextEnd,
|
|
77
|
+
changes[groupStartIndex].fromB,
|
|
78
|
+
changes[i].toB
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
// Create a merged change covering the expanded range
|
|
82
|
+
const mergedChange: Change = fillChange(
|
|
83
|
+
changes.slice(groupStartIndex, i + 1),
|
|
84
|
+
expandedFrom,
|
|
85
|
+
expandedTo
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// Try to merge with the previous change in target if they're adjacent
|
|
89
|
+
const previousChange: Change | null = target.length > 0 ? target[target.length - 1] : null;
|
|
90
|
+
|
|
91
|
+
if (previousChange?.toA === mergedChange.fromA) {
|
|
92
|
+
// Merge with previous change
|
|
93
|
+
target[target.length - 1] = new Change(
|
|
94
|
+
previousChange.fromA,
|
|
95
|
+
mergedChange.toA,
|
|
96
|
+
previousChange.fromB,
|
|
97
|
+
mergedChange.toB,
|
|
98
|
+
previousChange.deleted.concat(mergedChange.deleted),
|
|
99
|
+
previousChange.inserted.concat(mergedChange.inserted)
|
|
100
|
+
);
|
|
101
|
+
} else {
|
|
102
|
+
target.push(mergedChange);
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
// Add changes individually without expansion
|
|
106
|
+
for (let j = groupStartIndex; j <= i; j++) {
|
|
107
|
+
target.push(changes[j]);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type {Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import type {Change} from './Change';
|
|
4
|
+
import {MAX_SIMPLIFY_DISTANCE} from './max-simplify-distance';
|
|
5
|
+
import {simplifyAdjacentChanges} from './simplify-changes/simplify-adjacent-changes';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Simplifies a set of changes for presentation.
|
|
10
|
+
*
|
|
11
|
+
* This function makes changes more readable by expanding insertions and deletions
|
|
12
|
+
* that occur within the same word to cover entire words. This prevents confusing
|
|
13
|
+
* partial-word changes while maintaining accuracy.
|
|
14
|
+
*
|
|
15
|
+
* The algorithm:
|
|
16
|
+
* 1. Groups nearby changes (within MAX_SIMPLIFY_DISTANCE)
|
|
17
|
+
* 2. For mixed insertions/deletions in a group, expands to word boundaries
|
|
18
|
+
* 3. Preserves single-character replacements as-is
|
|
19
|
+
* 4. Merges adjacent changes when appropriate
|
|
20
|
+
*
|
|
21
|
+
* @param changes - The array of changes to simplify.
|
|
22
|
+
* @param doc - The document node (new version) to analyze.
|
|
23
|
+
* @returns A new array of simplified changes.
|
|
24
|
+
*/
|
|
25
|
+
export function simplifyChanges(changes: ReadonlyArray<Change>, doc: Node): Array<Change> {
|
|
26
|
+
const result: Array<Change> = [];
|
|
27
|
+
|
|
28
|
+
for (let i = 0; i < changes.length; i++) {
|
|
29
|
+
const groupStart: number = i;
|
|
30
|
+
let groupEnd: number = changes[i].toB;
|
|
31
|
+
|
|
32
|
+
// Group adjacent changes that are within MAX_SIMPLIFY_DISTANCE
|
|
33
|
+
while (i < changes.length - 1 && changes[i + 1].fromB <= groupEnd + MAX_SIMPLIFY_DISTANCE) {
|
|
34
|
+
groupEnd = changes[++i].toB;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Process each group of adjacent changes
|
|
38
|
+
simplifyAdjacentChanges(changes, groupStart, i + 1, doc, result);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type {Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import type {TokenEncoder} from '../types/TokenEncoder';
|
|
4
|
+
import {tokenizeFragment} from './tokenize-fragment';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Tokenize a block (non-leaf) node by encoding its boundaries and recursively
|
|
9
|
+
* tokenizing its content.
|
|
10
|
+
*
|
|
11
|
+
* @param blockNode - The block node to tokenize.
|
|
12
|
+
* @param encoder - The encoder to use for converting nodes to tokens.
|
|
13
|
+
* @param rangeStart - The start position in the document.
|
|
14
|
+
* @param rangeEnd - The end position in the document.
|
|
15
|
+
* @param nodeOffset - The offset of this node in the document.
|
|
16
|
+
* @param nodeEndOffset - The end offset of this node in the document.
|
|
17
|
+
* @param target - The array to append tokens to.
|
|
18
|
+
*/
|
|
19
|
+
export function tokenizeBlockNode<T>(blockNode: Node,
|
|
20
|
+
encoder: TokenEncoder<T>,
|
|
21
|
+
rangeStart: number,
|
|
22
|
+
rangeEnd: number,
|
|
23
|
+
nodeOffset: number,
|
|
24
|
+
nodeEndOffset: number,
|
|
25
|
+
target: Array<T>): void {
|
|
26
|
+
// Add start token if we're at the beginning of the node
|
|
27
|
+
if (rangeStart === nodeOffset) {
|
|
28
|
+
target.push(encoder.encodeNodeStart(blockNode));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Recursively tokenize the node's content
|
|
32
|
+
const contentStart = Math.max(nodeOffset + 1, rangeStart) - nodeOffset - 1;
|
|
33
|
+
const contentEnd = Math.min(nodeEndOffset - 1, rangeEnd) - nodeOffset - 1;
|
|
34
|
+
|
|
35
|
+
tokenizeFragment(
|
|
36
|
+
blockNode.content,
|
|
37
|
+
encoder,
|
|
38
|
+
contentStart,
|
|
39
|
+
contentEnd,
|
|
40
|
+
target
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// Add end token if we're at the end of the node
|
|
44
|
+
if (rangeEnd === nodeEndOffset) {
|
|
45
|
+
target.push(encoder.encodeNodeEnd(blockNode));
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type {Fragment, Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import type {TokenEncoder} from '../types/TokenEncoder';
|
|
4
|
+
import {tokenizeBlockNode} from './tokenize-block-node';
|
|
5
|
+
import {tokenizeTextNode} from './tokenize-textNode';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Convert the given range of a fragment to tokens for diff comparison.
|
|
10
|
+
* Recursively processes the fragment tree, encoding text characters and node boundaries.
|
|
11
|
+
*
|
|
12
|
+
* @param fragment - The fragment to tokenize.
|
|
13
|
+
* @param encoder - The encoder to use for converting nodes and characters to tokens.
|
|
14
|
+
* @param start - The start offset within the fragment.
|
|
15
|
+
* @param end - The end offset within the fragment.
|
|
16
|
+
* @param target - The array to append tokens to.
|
|
17
|
+
* @returns The target array with all tokens appended.
|
|
18
|
+
*/
|
|
19
|
+
export function tokenizeFragment<T>(fragment: Fragment,
|
|
20
|
+
encoder: TokenEncoder<T>,
|
|
21
|
+
start: number,
|
|
22
|
+
end: number,
|
|
23
|
+
target: Array<T>): Array<T> {
|
|
24
|
+
let currentOffset = 0;
|
|
25
|
+
|
|
26
|
+
for (let i = 0; i < fragment.childCount; i++) {
|
|
27
|
+
const child: Node = fragment.child(i);
|
|
28
|
+
const childEndOffset: number = currentOffset + child.nodeSize;
|
|
29
|
+
const rangeStart: number = Math.max(currentOffset, start);
|
|
30
|
+
const rangeEnd: number = Math.min(childEndOffset, end);
|
|
31
|
+
|
|
32
|
+
if (rangeStart < rangeEnd) {
|
|
33
|
+
if (child.isText) {
|
|
34
|
+
tokenizeTextNode(child, encoder, rangeStart, rangeEnd, currentOffset, target);
|
|
35
|
+
} else if (child.isLeaf) {
|
|
36
|
+
target.push(encoder.encodeNodeStart(child));
|
|
37
|
+
} else {
|
|
38
|
+
tokenizeBlockNode(child, encoder, rangeStart, rangeEnd, currentOffset, childEndOffset, target);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
currentOffset = childEndOffset;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return target;
|
|
46
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type {Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import type {TokenEncoder} from '../types/TokenEncoder';
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Tokenize a text node by encoding each character within the specified range.
|
|
8
|
+
*
|
|
9
|
+
* @param textNode - The text node to tokenize.
|
|
10
|
+
* @param encoder - The encoder to use for converting characters to tokens.
|
|
11
|
+
* @param rangeStart - The start position in the document.
|
|
12
|
+
* @param rangeEnd - The end position in the document.
|
|
13
|
+
* @param nodeOffset - The offset of this node in the document.
|
|
14
|
+
* @param target - The array to append tokens to.
|
|
15
|
+
*/
|
|
16
|
+
export function tokenizeTextNode<T>(textNode: Node,
|
|
17
|
+
encoder: TokenEncoder<T>,
|
|
18
|
+
rangeStart: number,
|
|
19
|
+
rangeEnd: number,
|
|
20
|
+
nodeOffset: number,
|
|
21
|
+
target: Array<T>): void {
|
|
22
|
+
const text = textNode.text;
|
|
23
|
+
if (!text) {
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
for (let j = rangeStart; j < rangeEnd; j++) {
|
|
28
|
+
const charCode: number = text.charCodeAt(j - nodeOffset);
|
|
29
|
+
target.push(encoder.encodeCharacter(charCode, textNode.marks));
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
/**
|
|
3
|
+
* JSON-serializable representation of a Change.
|
|
4
|
+
*
|
|
5
|
+
* Describes a change between two document versions (A and B), including
|
|
6
|
+
* the affected ranges and the deleted/inserted content spans.
|
|
7
|
+
*
|
|
8
|
+
* @template Data - The type of metadata associated with each span.
|
|
9
|
+
*/
|
|
10
|
+
export interface ChangeJSON<Data> {
|
|
11
|
+
/** The start position in document A where the change begins. */
|
|
12
|
+
fromA: number;
|
|
13
|
+
/** The end position in document A where the change ends. */
|
|
14
|
+
toA: number;
|
|
15
|
+
/** The start position in document B where the change begins. */
|
|
16
|
+
fromB: number;
|
|
17
|
+
/** The end position in document B where the change ends. */
|
|
18
|
+
toB: number;
|
|
19
|
+
/** The spans that were deleted from document A. */
|
|
20
|
+
deleted: ReadonlyArray<{length: number, data: Data}>;
|
|
21
|
+
/** The spans that were inserted into document B. */
|
|
22
|
+
inserted: ReadonlyArray<{length: number, data: Data}>;
|
|
23
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type {Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
import type {TokenEncoder} from './TokenEncoder';
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configuration options for a ChangeSet.
|
|
8
|
+
*
|
|
9
|
+
* @template Data - The type of metadata associated with changes.
|
|
10
|
+
*/
|
|
11
|
+
export interface ChangeSetConfig<Data> {
|
|
12
|
+
/** The starting document that changes are tracked from. */
|
|
13
|
+
doc: Node;
|
|
14
|
+
/** Function to combine metadata from adjacent spans. Returns null if incompatible. */
|
|
15
|
+
combine: (dataA: Data, dataB: Data) => Data;
|
|
16
|
+
/** Encoder for tokenizing document content during diff operations. */
|
|
17
|
+
encoder: TokenEncoder<any>;
|
|
18
|
+
|
|
19
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type {Mark, Node} from '@type-editor/model';
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* A token encoder can be passed when creating a `ChangeSet` in order
|
|
6
|
+
* to influence the way the library runs its diffing algorithm. The
|
|
7
|
+
* encoder determines how document tokens (such as nodes and
|
|
8
|
+
* characters) are encoded and compared.
|
|
9
|
+
*
|
|
10
|
+
* Note that both the encoding and the comparison may run a lot, and
|
|
11
|
+
* doing non-trivial work in these functions could impact
|
|
12
|
+
* performance.
|
|
13
|
+
*/
|
|
14
|
+
export interface TokenEncoder<T> {
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Encode a given character, with the given marks applied.
|
|
18
|
+
*
|
|
19
|
+
* @param char - The character code to encode.
|
|
20
|
+
* @param marks - The marks applied to the character.
|
|
21
|
+
* @returns The encoded representation of the character.
|
|
22
|
+
*/
|
|
23
|
+
encodeCharacter(char: number, marks: ReadonlyArray<Mark>): T;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Encode the start of a node or, if this is a leaf node, the
|
|
27
|
+
* entire node.
|
|
28
|
+
*
|
|
29
|
+
* @param node - The node to encode.
|
|
30
|
+
* @returns The encoded representation of the node start.
|
|
31
|
+
*/
|
|
32
|
+
encodeNodeStart(node: Node): T;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Encode the end token for the given node. It is valid to encode
|
|
36
|
+
* every end token in the same way.
|
|
37
|
+
*
|
|
38
|
+
* @param node - The node to encode the end token for.
|
|
39
|
+
* @returns The encoded representation of the node end.
|
|
40
|
+
*/
|
|
41
|
+
encodeNodeEnd(node: Node): T;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Compare the given tokens. Should return true when they count as
|
|
45
|
+
* equal.
|
|
46
|
+
*
|
|
47
|
+
* @param a - The first token to compare.
|
|
48
|
+
* @param b - The second token to compare.
|
|
49
|
+
* @returns True if the tokens are equal, false otherwise.
|
|
50
|
+
*/
|
|
51
|
+
compareTokens(a: T, b: T): boolean;
|
|
52
|
+
}
|