redis-diff_match_patch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2195 @@
1
+ --[[
2
+ * Diff Match and Patch
3
+ *
4
+ * Copyright 2006 Google Inc.
5
+ * http://code.google.com/p/google-diff-match-patch/
6
+ *
7
+ * Based on the JavaScript implementation by Neil Fraser.
8
+ * Ported to Lua by Duncan Cross.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ --]]
22
+
23
+ --[[
24
+ -- Lua 5.1 and earlier requires the external BitOp library.
25
+ -- This library is built-in from Lua 5.2 and later as 'bit32'.
26
+ require 'bit' -- <http://bitop.luajit.org/>
27
+ local band, bor, lshift
28
+ = bit.band, bit.bor, bit.lshift
29
+ --]]
30
+
31
+ local band, bor, lshift
32
+ = bit32.band, bit32.bor, bit32.lshift
33
+ local type, setmetatable, ipairs, select
34
+ = type, setmetatable, ipairs, select
35
+ local unpack, tonumber, error
36
+ = unpack, tonumber, error
37
+ local strsub, strbyte, strchar, gmatch, gsub
38
+ = string.sub, string.byte, string.char, string.gmatch, string.gsub
39
+ local strmatch, strfind, strformat
40
+ = string.match, string.find, string.format
41
+ local tinsert, tremove, tconcat
42
+ = table.insert, table.remove, table.concat
43
+ local max, min, floor, ceil, abs
44
+ = math.max, math.min, math.floor, math.ceil, math.abs
45
+ local clock = os.clock
46
+
47
+ module 'diff_match_patch'
48
+
49
+
50
+ -- Utility functions.
51
+
52
+ local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]'
53
+ local function percentEncode_replace(v)
54
+ return strformat('%%%02X', strbyte(v))
55
+ end
56
+
57
+ local function tsplice(t, idx, deletions, ...)
58
+ local insertions = select('#', ...)
59
+ for i = 1, deletions do
60
+ tremove(t, idx)
61
+ end
62
+ for i = insertions, 1, -1 do
63
+ -- do not remove parentheses around select
64
+ tinsert(t, idx, (select(i, ...)))
65
+ end
66
+ end
67
+
68
+ local function strelement(str, i)
69
+ return strsub(str, i, i)
70
+ end
71
+
72
+ local function indexOf(a, b, start)
73
+ if (#b == 0) then
74
+ return nil
75
+ end
76
+ return strfind(a, b, start, true)
77
+ end
78
+
79
+ local htmlEncode_pattern = '[&<>\n]'
80
+ local htmlEncode_replace = {
81
+ ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['\n'] = '&para;<br>'
82
+ }
83
+
84
+ -- Public API Functions
85
+ -- (Exported at the end of the script)
86
+
87
+ local diff_main,
88
+ diff_cleanupSemantic,
89
+ diff_cleanupEfficiency,
90
+ diff_levenshtein,
91
+ diff_prettyHtml
92
+
93
+ local match_main
94
+
95
+ local patch_make,
96
+ patch_toText,
97
+ patch_fromText,
98
+ patch_apply
99
+
100
+ --[[
101
+ * The data structure representing a diff is an array of tuples:
102
+ * {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}}
103
+ * which means: delete 'Hello', add 'Goodbye' and keep ' world.'
104
+ --]]
105
+ local DIFF_DELETE = -1
106
+ local DIFF_INSERT = 1
107
+ local DIFF_EQUAL = 0
108
+
109
+ -- Number of seconds to map a diff before giving up (0 for infinity).
110
+ local Diff_Timeout = 1.0
111
+ -- Cost of an empty edit operation in terms of edit characters.
112
+ local Diff_EditCost = 4
113
+ -- At what point is no match declared (0.0 = perfection, 1.0 = very loose).
114
+ local Match_Threshold = 0.5
115
+ -- How far to search for a match (0 = exact location, 1000+ = broad match).
116
+ -- A match this many characters away from the expected location will add
117
+ -- 1.0 to the score (0.0 is a perfect match).
118
+ local Match_Distance = 1000
119
+ -- When deleting a large block of text (over ~64 characters), how close do
120
+ -- the contents have to be to match the expected contents. (0.0 = perfection,
121
+ -- 1.0 = very loose). Note that Match_Threshold controls how closely the
122
+ -- end points of a delete need to match.
123
+ local Patch_DeleteThreshold = 0.5
124
+ -- Chunk size for context length.
125
+ local Patch_Margin = 4
126
+ -- The number of bits in an int.
127
+ local Match_MaxBits = 32
128
+
129
+ function settings(new)
130
+ if new then
131
+ Diff_Timeout = new.Diff_Timeout or Diff_Timeout
132
+ Diff_EditCost = new.Diff_EditCost or Diff_EditCost
133
+ Match_Threshold = new.Match_Threshold or Match_Threshold
134
+ Match_Distance = new.Match_Distance or Match_Distance
135
+ Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold
136
+ Patch_Margin = new.Patch_Margin or Patch_Margin
137
+ Match_MaxBits = new.Match_MaxBits or Match_MaxBits
138
+ else
139
+ return {
140
+ Diff_Timeout = Diff_Timeout;
141
+ Diff_EditCost = Diff_EditCost;
142
+ Match_Threshold = Match_Threshold;
143
+ Match_Distance = Match_Distance;
144
+ Patch_DeleteThreshold = Patch_DeleteThreshold;
145
+ Patch_Margin = Patch_Margin;
146
+ Match_MaxBits = Match_MaxBits;
147
+ }
148
+ end
149
+ end
150
+
151
+ -- ---------------------------------------------------------------------------
152
+ -- DIFF API
153
+ -- ---------------------------------------------------------------------------
154
+
155
+ -- The private diff functions
156
+ local _diff_compute,
157
+ _diff_bisect,
158
+ _diff_halfMatchI,
159
+ _diff_halfMatch,
160
+ _diff_cleanupSemanticScore,
161
+ _diff_cleanupSemanticLossless,
162
+ _diff_cleanupMerge,
163
+ _diff_commonPrefix,
164
+ _diff_commonSuffix,
165
+ _diff_commonOverlap,
166
+ _diff_xIndex,
167
+ _diff_text1,
168
+ _diff_text2,
169
+ _diff_toDelta,
170
+ _diff_fromDelta
171
+
172
+ --[[
173
+ * Find the differences between two texts. Simplifies the problem by stripping
174
+ * any common prefix or suffix off the texts before diffing.
175
+ * @param {string} text1 Old string to be diffed.
176
+ * @param {string} text2 New string to be diffed.
177
+ * @param {boolean} opt_checklines Has no effect in Lua.
178
+ * @param {number} opt_deadline Optional time when the diff should be complete
179
+ * by. Used internally for recursive calls. Users should set DiffTimeout
180
+ * instead.
181
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
182
+ --]]
183
+ function diff_main(text1, text2, opt_checklines, opt_deadline)
184
+ -- Set a deadline by which time the diff must be complete.
185
+ if opt_deadline == nil then
186
+ if Diff_Timeout <= 0 then
187
+ opt_deadline = 2 ^ 31
188
+ else
189
+ opt_deadline = clock() + Diff_Timeout
190
+ end
191
+ end
192
+ local deadline = opt_deadline
193
+
194
+ -- Check for null inputs.
195
+ if text1 == nil or text1 == nil then
196
+ error('Null inputs. (diff_main)')
197
+ end
198
+
199
+ -- Check for equality (speedup).
200
+ if text1 == text2 then
201
+ if #text1 > 0 then
202
+ return {{DIFF_EQUAL, text1}}
203
+ end
204
+ return {}
205
+ end
206
+
207
+ -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of
208
+ -- implementing the line-mode speedup.
209
+ local checklines = false
210
+
211
+ -- Trim off common prefix (speedup).
212
+ local commonlength = _diff_commonPrefix(text1, text2)
213
+ local commonprefix
214
+ if commonlength > 0 then
215
+ commonprefix = strsub(text1, 1, commonlength)
216
+ text1 = strsub(text1, commonlength + 1)
217
+ text2 = strsub(text2, commonlength + 1)
218
+ end
219
+
220
+ -- Trim off common suffix (speedup).
221
+ commonlength = _diff_commonSuffix(text1, text2)
222
+ local commonsuffix
223
+ if commonlength > 0 then
224
+ commonsuffix = strsub(text1, -commonlength)
225
+ text1 = strsub(text1, 1, -commonlength - 1)
226
+ text2 = strsub(text2, 1, -commonlength - 1)
227
+ end
228
+
229
+ -- Compute the diff on the middle block.
230
+ local diffs = _diff_compute(text1, text2, checklines, deadline)
231
+
232
+ -- Restore the prefix and suffix.
233
+ if commonprefix then
234
+ tinsert(diffs, 1, {DIFF_EQUAL, commonprefix})
235
+ end
236
+ if commonsuffix then
237
+ diffs[#diffs + 1] = {DIFF_EQUAL, commonsuffix}
238
+ end
239
+
240
+ _diff_cleanupMerge(diffs)
241
+ return diffs
242
+ end
243
+
244
+ --[[
245
+ * Reduce the number of edits by eliminating semantically trivial equalities.
246
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
247
+ --]]
248
+ function diff_cleanupSemantic(diffs)
249
+ local changes = false
250
+ local equalities = {} -- Stack of indices where equalities are found.
251
+ local equalitiesLength = 0 -- Keeping our own length var is faster.
252
+ local lastequality = nil
253
+ -- Always equal to diffs[equalities[equalitiesLength]][2]
254
+ local pointer = 1 -- Index of current position.
255
+ -- Number of characters that changed prior to the equality.
256
+ local length_insertions1 = 0
257
+ local length_deletions1 = 0
258
+ -- Number of characters that changed after the equality.
259
+ local length_insertions2 = 0
260
+ local length_deletions2 = 0
261
+
262
+ while diffs[pointer] do
263
+ if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
264
+ equalitiesLength = equalitiesLength + 1
265
+ equalities[equalitiesLength] = pointer
266
+ length_insertions1 = length_insertions2
267
+ length_deletions1 = length_deletions2
268
+ length_insertions2 = 0
269
+ length_deletions2 = 0
270
+ lastequality = diffs[pointer][2]
271
+ else -- An insertion or deletion.
272
+ if diffs[pointer][1] == DIFF_INSERT then
273
+ length_insertions2 = length_insertions2 + #(diffs[pointer][2])
274
+ else
275
+ length_deletions2 = length_deletions2 + #(diffs[pointer][2])
276
+ end
277
+ -- Eliminate an equality that is smaller or equal to the edits on both
278
+ -- sides of it.
279
+ if lastequality
280
+ and (#lastequality <= max(length_insertions1, length_deletions1))
281
+ and (#lastequality <= max(length_insertions2, length_deletions2)) then
282
+ -- Duplicate record.
283
+ tinsert(diffs, equalities[equalitiesLength],
284
+ {DIFF_DELETE, lastequality})
285
+ -- Change second copy to insert.
286
+ diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
287
+ -- Throw away the equality we just deleted.
288
+ equalitiesLength = equalitiesLength - 1
289
+ -- Throw away the previous equality (it needs to be reevaluated).
290
+ equalitiesLength = equalitiesLength - 1
291
+ pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
292
+ length_insertions1, length_deletions1 = 0, 0 -- Reset the counters.
293
+ length_insertions2, length_deletions2 = 0, 0
294
+ lastequality = nil
295
+ changes = true
296
+ end
297
+ end
298
+ pointer = pointer + 1
299
+ end
300
+
301
+ -- Normalize the diff.
302
+ if changes then
303
+ _diff_cleanupMerge(diffs)
304
+ end
305
+ _diff_cleanupSemanticLossless(diffs)
306
+
307
+ -- Find any overlaps between deletions and insertions.
308
+ -- e.g: <del>abcxxx</del><ins>xxxdef</ins>
309
+ -- -> <del>abc</del>xxx<ins>def</ins>
310
+ -- e.g: <del>xxxabc</del><ins>defxxx</ins>
311
+ -- -> <ins>def</ins>xxx<del>abc</del>
312
+ -- Only extract an overlap if it is as big as the edit ahead or behind it.
313
+ pointer = 2
314
+ while diffs[pointer] do
315
+ if (diffs[pointer - 1][1] == DIFF_DELETE and
316
+ diffs[pointer][1] == DIFF_INSERT) then
317
+ local deletion = diffs[pointer - 1][2]
318
+ local insertion = diffs[pointer][2]
319
+ local overlap_length1 = _diff_commonOverlap(deletion, insertion)
320
+ local overlap_length2 = _diff_commonOverlap(insertion, deletion)
321
+ if (overlap_length1 >= overlap_length2) then
322
+ if (overlap_length1 >= #deletion / 2 or
323
+ overlap_length1 >= #insertion / 2) then
324
+ -- Overlap found. Insert an equality and trim the surrounding edits.
325
+ tinsert(diffs, pointer,
326
+ {DIFF_EQUAL, strsub(insertion, 1, overlap_length1)})
327
+ diffs[pointer - 1][2] =
328
+ strsub(deletion, 1, #deletion - overlap_length1)
329
+ diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1)
330
+ pointer = pointer + 1
331
+ end
332
+ else
333
+ if (overlap_length2 >= #deletion / 2 or
334
+ overlap_length2 >= #insertion / 2) then
335
+ -- Reverse overlap found.
336
+ -- Insert an equality and swap and trim the surrounding edits.
337
+ tinsert(diffs, pointer,
338
+ {DIFF_EQUAL, strsub(deletion, 1, overlap_length2)})
339
+ diffs[pointer - 1] = {DIFF_INSERT,
340
+ strsub(insertion, 1, #insertion - overlap_length2)}
341
+ diffs[pointer + 1] = {DIFF_DELETE,
342
+ strsub(deletion, overlap_length2 + 1)}
343
+ pointer = pointer + 1
344
+ end
345
+ end
346
+ pointer = pointer + 1
347
+ end
348
+ pointer = pointer + 1
349
+ end
350
+ end
351
+
352
+ --[[
353
+ * Reduce the number of edits by eliminating operationally trivial equalities.
354
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
355
+ --]]
356
+ function diff_cleanupEfficiency(diffs)
357
+ local changes = false
358
+ -- Stack of indices where equalities are found.
359
+ local equalities = {}
360
+ -- Keeping our own length var is faster.
361
+ local equalitiesLength = 0
362
+ -- Always equal to diffs[equalities[equalitiesLength]][2]
363
+ local lastequality = nil
364
+ -- Index of current position.
365
+ local pointer = 1
366
+
367
+ -- The following four are really booleans but are stored as numbers because
368
+ -- they are used at one point like this:
369
+ --
370
+ -- (pre_ins + pre_del + post_ins + post_del) == 3
371
+ --
372
+ -- ...i.e. checking that 3 of them are true and 1 of them is false.
373
+
374
+ -- Is there an insertion operation before the last equality.
375
+ local pre_ins = 0
376
+ -- Is there a deletion operation before the last equality.
377
+ local pre_del = 0
378
+ -- Is there an insertion operation after the last equality.
379
+ local post_ins = 0
380
+ -- Is there a deletion operation after the last equality.
381
+ local post_del = 0
382
+
383
+ while diffs[pointer] do
384
+ if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
385
+ local diffText = diffs[pointer][2]
386
+ if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then
387
+ -- Candidate found.
388
+ equalitiesLength = equalitiesLength + 1
389
+ equalities[equalitiesLength] = pointer
390
+ pre_ins, pre_del = post_ins, post_del
391
+ lastequality = diffText
392
+ else
393
+ -- Not a candidate, and can never become one.
394
+ equalitiesLength = 0
395
+ lastequality = nil
396
+ end
397
+ post_ins, post_del = 0, 0
398
+ else -- An insertion or deletion.
399
+ if diffs[pointer][1] == DIFF_DELETE then
400
+ post_del = 1
401
+ else
402
+ post_ins = 1
403
+ end
404
+ --[[
405
+ * Five types to be split:
406
+ * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
407
+ * <ins>A</ins>X<ins>C</ins><del>D</del>
408
+ * <ins>A</ins><del>B</del>X<ins>C</ins>
409
+ * <ins>A</del>X<ins>C</ins><del>D</del>
410
+ * <ins>A</ins><del>B</del>X<del>C</del>
411
+ --]]
412
+ if lastequality and (
413
+ (pre_ins+pre_del+post_ins+post_del == 4)
414
+ or
415
+ (
416
+ (#lastequality < Diff_EditCost / 2)
417
+ and
418
+ (pre_ins+pre_del+post_ins+post_del == 3)
419
+ )) then
420
+ -- Duplicate record.
421
+ tinsert(diffs, equalities[equalitiesLength],
422
+ {DIFF_DELETE, lastequality})
423
+ -- Change second copy to insert.
424
+ diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
425
+ -- Throw away the equality we just deleted.
426
+ equalitiesLength = equalitiesLength - 1
427
+ lastequality = nil
428
+ if (pre_ins == 1) and (pre_del == 1) then
429
+ -- No changes made which could affect previous entry, keep going.
430
+ post_ins, post_del = 1, 1
431
+ equalitiesLength = 0
432
+ else
433
+ -- Throw away the previous equality.
434
+ equalitiesLength = equalitiesLength - 1
435
+ pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
436
+ post_ins, post_del = 0, 0
437
+ end
438
+ changes = true
439
+ end
440
+ end
441
+ pointer = pointer + 1
442
+ end
443
+
444
+ if changes then
445
+ _diff_cleanupMerge(diffs)
446
+ end
447
+ end
448
+
449
+ --[[
450
+ * Compute the Levenshtein distance; the number of inserted, deleted or
451
+ * substituted characters.
452
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
453
+ * @return {number} Number of changes.
454
+ --]]
455
+ function diff_levenshtein(diffs)
456
+ local levenshtein = 0
457
+ local insertions, deletions = 0, 0
458
+ for x, diff in ipairs(diffs) do
459
+ local op, data = diff[1], diff[2]
460
+ if (op == DIFF_INSERT) then
461
+ insertions = insertions + #data
462
+ elseif (op == DIFF_DELETE) then
463
+ deletions = deletions + #data
464
+ elseif (op == DIFF_EQUAL) then
465
+ -- A deletion and an insertion is one substitution.
466
+ levenshtein = levenshtein + max(insertions, deletions)
467
+ insertions = 0
468
+ deletions = 0
469
+ end
470
+ end
471
+ levenshtein = levenshtein + max(insertions, deletions)
472
+ return levenshtein
473
+ end
474
+
475
+ --[[
476
+ * Convert a diff array into a pretty HTML report.
477
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
478
+ * @return {string} HTML representation.
479
+ --]]
480
+ function diff_prettyHtml(diffs)
481
+ local html = {}
482
+ for x, diff in ipairs(diffs) do
483
+ local op = diff[1] -- Operation (insert, delete, equal)
484
+ local data = diff[2] -- Text of change.
485
+ local text = gsub(data, htmlEncode_pattern, htmlEncode_replace)
486
+ if op == DIFF_INSERT then
487
+ html[x] = '<ins style="background:#e6ffe6;">' .. text .. '</ins>'
488
+ elseif op == DIFF_DELETE then
489
+ html[x] = '<del style="background:#ffe6e6;">' .. text .. '</del>'
490
+ elseif op == DIFF_EQUAL then
491
+ html[x] = '<span>' .. text .. '</span>'
492
+ end
493
+ end
494
+ return tconcat(html)
495
+ end
496
+
497
+ -- ---------------------------------------------------------------------------
498
+ -- UNOFFICIAL/PRIVATE DIFF FUNCTIONS
499
+ -- ---------------------------------------------------------------------------
500
+
501
+ --[[
502
+ * Find the differences between two texts. Assumes that the texts do not
503
+ * have any common prefix or suffix.
504
+ * @param {string} text1 Old string to be diffed.
505
+ * @param {string} text2 New string to be diffed.
506
+ * @param {boolean} checklines Has no effect in Lua.
507
+ * @param {number} deadline Time when the diff should be complete by.
508
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
509
+ * @private
510
+ --]]
511
+ function _diff_compute(text1, text2, checklines, deadline)
512
+ if #text1 == 0 then
513
+ -- Just add some text (speedup).
514
+ return {{DIFF_INSERT, text2}}
515
+ end
516
+
517
+ if #text2 == 0 then
518
+ -- Just delete some text (speedup).
519
+ return {{DIFF_DELETE, text1}}
520
+ end
521
+
522
+ local diffs
523
+
524
+ local longtext = (#text1 > #text2) and text1 or text2
525
+ local shorttext = (#text1 > #text2) and text2 or text1
526
+ local i = indexOf(longtext, shorttext)
527
+
528
+ if i ~= nil then
529
+ -- Shorter text is inside the longer text (speedup).
530
+ diffs = {
531
+ {DIFF_INSERT, strsub(longtext, 1, i - 1)},
532
+ {DIFF_EQUAL, shorttext},
533
+ {DIFF_INSERT, strsub(longtext, i + #shorttext)}
534
+ }
535
+ -- Swap insertions for deletions if diff is reversed.
536
+ if #text1 > #text2 then
537
+ diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE
538
+ end
539
+ return diffs
540
+ end
541
+
542
+ if #shorttext == 1 then
543
+ -- Single character string.
544
+ -- After the previous speedup, the character can't be an equality.
545
+ return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
546
+ end
547
+ longtext, shorttext = nil, nil -- Garbage collect.
548
+
549
+ -- Check to see if the problem can be split in two.
550
+ do
551
+ local
552
+ text1_a, text1_b,
553
+ text2_a, text2_b,
554
+ mid_common = _diff_halfMatch(text1, text2)
555
+
556
+ if text1_a then
557
+ -- A half-match was found, sort out the return data.
558
+ -- Send both pairs off for separate processing.
559
+ local diffs_a = diff_main(text1_a, text2_a, checklines, deadline)
560
+ local diffs_b = diff_main(text1_b, text2_b, checklines, deadline)
561
+ -- Merge the results.
562
+ local diffs_a_len = #diffs_a
563
+ diffs = diffs_a
564
+ diffs[diffs_a_len + 1] = {DIFF_EQUAL, mid_common}
565
+ for i, b_diff in ipairs(diffs_b) do
566
+ diffs[diffs_a_len + 1 + i] = b_diff
567
+ end
568
+ return diffs
569
+ end
570
+ end
571
+
572
+ return _diff_bisect(text1, text2, deadline)
573
+ end
574
+
575
+ --[[
576
+ * Find the 'middle snake' of a diff, split the problem in two
577
+ * and return the recursively constructed diff.
578
+ * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
579
+ * @param {string} text1 Old string to be diffed.
580
+ * @param {string} text2 New string to be diffed.
581
+ * @param {number} deadline Time at which to bail if not yet complete.
582
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
583
+ * @private
584
+ --]]
585
+ function _diff_bisect(text1, text2, deadline)
586
+ -- Cache the text lengths to prevent multiple calls.
587
+ local text1_length = #text1
588
+ local text2_length = #text2
589
+ local _sub, _element
590
+ local max_d = ceil((text1_length + text2_length) / 2)
591
+ local v_offset = max_d
592
+ local v_length = 2 * max_d
593
+ local v1 = {}
594
+ local v2 = {}
595
+ -- Setting all elements to -1 is faster in Lua than mixing integers and nil.
596
+ for x = 0, v_length - 1 do
597
+ v1[x] = -1
598
+ v2[x] = -1
599
+ end
600
+ v1[v_offset + 1] = 0
601
+ v2[v_offset + 1] = 0
602
+ local delta = text1_length - text2_length
603
+ -- If the total number of characters is odd, then
604
+ -- the front path will collide with the reverse path.
605
+ local front = (delta % 2 ~= 0)
606
+ -- Offsets for start and end of k loop.
607
+ -- Prevents mapping of space beyond the grid.
608
+ local k1start = 0
609
+ local k1end = 0
610
+ local k2start = 0
611
+ local k2end = 0
612
+ for d = 0, max_d - 1 do
613
+ -- Bail out if deadline is reached.
614
+ if clock() > deadline then
615
+ break
616
+ end
617
+
618
+ -- Walk the front path one step.
619
+ for k1 = -d + k1start, d - k1end, 2 do
620
+ local k1_offset = v_offset + k1
621
+ local x1
622
+ if (k1 == -d) or ((k1 ~= d) and
623
+ (v1[k1_offset - 1] < v1[k1_offset + 1])) then
624
+ x1 = v1[k1_offset + 1]
625
+ else
626
+ x1 = v1[k1_offset - 1] + 1
627
+ end
628
+ local y1 = x1 - k1
629
+ while (x1 <= text1_length) and (y1 <= text2_length)
630
+ and (strelement(text1, x1) == strelement(text2, y1)) do
631
+ x1 = x1 + 1
632
+ y1 = y1 + 1
633
+ end
634
+ v1[k1_offset] = x1
635
+ if x1 > text1_length + 1 then
636
+ -- Ran off the right of the graph.
637
+ k1end = k1end + 2
638
+ elseif y1 > text2_length + 1 then
639
+ -- Ran off the bottom of the graph.
640
+ k1start = k1start + 2
641
+ elseif front then
642
+ local k2_offset = v_offset + delta - k1
643
+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then
644
+ -- Mirror x2 onto top-left coordinate system.
645
+ local x2 = text1_length - v2[k2_offset] + 1
646
+ if x1 > x2 then
647
+ -- Overlap detected.
648
+ return _diff_bisectSplit(text1, text2, x1, y1, deadline)
649
+ end
650
+ end
651
+ end
652
+ end
653
+
654
+ -- Walk the reverse path one step.
655
+ for k2 = -d + k2start, d - k2end, 2 do
656
+ local k2_offset = v_offset + k2
657
+ local x2
658
+ if (k2 == -d) or ((k2 ~= d) and
659
+ (v2[k2_offset - 1] < v2[k2_offset + 1])) then
660
+ x2 = v2[k2_offset + 1]
661
+ else
662
+ x2 = v2[k2_offset - 1] + 1
663
+ end
664
+ local y2 = x2 - k2
665
+ while (x2 <= text1_length) and (y2 <= text2_length)
666
+ and (strelement(text1, -x2) == strelement(text2, -y2)) do
667
+ x2 = x2 + 1
668
+ y2 = y2 + 1
669
+ end
670
+ v2[k2_offset] = x2
671
+ if x2 > text1_length + 1 then
672
+ -- Ran off the left of the graph.
673
+ k2end = k2end + 2
674
+ elseif y2 > text2_length + 1 then
675
+ -- Ran off the top of the graph.
676
+ k2start = k2start + 2
677
+ elseif not front then
678
+ local k1_offset = v_offset + delta - k2
679
+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then
680
+ local x1 = v1[k1_offset]
681
+ local y1 = v_offset + x1 - k1_offset
682
+ -- Mirror x2 onto top-left coordinate system.
683
+ x2 = text1_length - x2 + 1
684
+ if x1 > x2 then
685
+ -- Overlap detected.
686
+ return _diff_bisectSplit(text1, text2, x1, y1, deadline)
687
+ end
688
+ end
689
+ end
690
+ end
691
+ end
692
+ -- Diff took too long and hit the deadline or
693
+ -- number of diffs equals number of characters, no commonality at all.
694
+ return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
695
+ end
696
+
697
+ --[[
698
+ * Given the location of the 'middle snake', split the diff in two parts
699
+ * and recurse.
700
+ * @param {string} text1 Old string to be diffed.
701
+ * @param {string} text2 New string to be diffed.
702
+ * @param {number} x Index of split point in text1.
703
+ * @param {number} y Index of split point in text2.
704
+ * @param {number} deadline Time at which to bail if not yet complete.
705
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
706
+ * @private
707
+ --]]
708
+ function _diff_bisectSplit(text1, text2, x, y, deadline)
709
+ local text1a = strsub(text1, 1, x - 1)
710
+ local text2a = strsub(text2, 1, y - 1)
711
+ local text1b = strsub(text1, x)
712
+ local text2b = strsub(text2, y)
713
+
714
+ -- Compute both diffs serially.
715
+ local diffs = diff_main(text1a, text2a, false, deadline)
716
+ local diffsb = diff_main(text1b, text2b, false, deadline)
717
+
718
+ local diffs_len = #diffs
719
+ for i, v in ipairs(diffsb) do
720
+ diffs[diffs_len + i] = v
721
+ end
722
+ return diffs
723
+ end
724
+
725
+ --[[
726
+ * Determine the common prefix of two strings.
727
+ * @param {string} text1 First string.
728
+ * @param {string} text2 Second string.
729
+ * @return {number} The number of characters common to the start of each
730
+ * string.
731
+ --]]
732
+ function _diff_commonPrefix(text1, text2)
733
+ -- Quick check for common null cases.
734
+ if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1))
735
+ then
736
+ return 0
737
+ end
738
+ -- Binary search.
739
+ -- Performance analysis: http://neil.fraser.name/news/2007/10/09/
740
+ local pointermin = 1
741
+ local pointermax = min(#text1, #text2)
742
+ local pointermid = pointermax
743
+ local pointerstart = 1
744
+ while (pointermin < pointermid) do
745
+ if (strsub(text1, pointerstart, pointermid)
746
+ == strsub(text2, pointerstart, pointermid)) then
747
+ pointermin = pointermid
748
+ pointerstart = pointermin
749
+ else
750
+ pointermax = pointermid
751
+ end
752
+ pointermid = floor(pointermin + (pointermax - pointermin) / 2)
753
+ end
754
+ return pointermid
755
+ end
756
+
757
+ --[[
758
+ * Determine the common suffix of two strings.
759
+ * @param {string} text1 First string.
760
+ * @param {string} text2 Second string.
761
+ * @return {number} The number of characters common to the end of each string.
762
+ --]]
763
+ function _diff_commonSuffix(text1, text2)
764
+ -- Quick check for common null cases.
765
+ if (#text1 == 0) or (#text2 == 0)
766
+ or (strbyte(text1, -1) ~= strbyte(text2, -1)) then
767
+ return 0
768
+ end
769
+ -- Binary search.
770
+ -- Performance analysis: http://neil.fraser.name/news/2007/10/09/
771
+ local pointermin = 1
772
+ local pointermax = min(#text1, #text2)
773
+ local pointermid = pointermax
774
+ local pointerend = 1
775
+ while (pointermin < pointermid) do
776
+ if (strsub(text1, -pointermid, -pointerend)
777
+ == strsub(text2, -pointermid, -pointerend)) then
778
+ pointermin = pointermid
779
+ pointerend = pointermin
780
+ else
781
+ pointermax = pointermid
782
+ end
783
+ pointermid = floor(pointermin + (pointermax - pointermin) / 2)
784
+ end
785
+ return pointermid
786
+ end
787
+
788
+ --[[
789
+ * Determine if the suffix of one string is the prefix of another.
790
+ * @param {string} text1 First string.
791
+ * @param {string} text2 Second string.
792
+ * @return {number} The number of characters common to the end of the first
793
+ * string and the start of the second string.
794
+ * @private
795
+ --]]
796
+ function _diff_commonOverlap(text1, text2)
797
+ -- Cache the text lengths to prevent multiple calls.
798
+ local text1_length = #text1
799
+ local text2_length = #text2
800
+ -- Eliminate the null case.
801
+ if text1_length == 0 or text2_length == 0 then
802
+ return 0
803
+ end
804
+ -- Truncate the longer string.
805
+ if text1_length > text2_length then
806
+ text1 = strsub(text1, text1_length - text2_length + 1)
807
+ elseif text1_length < text2_length then
808
+ text2 = strsub(text2, 1, text1_length)
809
+ end
810
+ local text_length = min(text1_length, text2_length)
811
+ -- Quick check for the worst case.
812
+ if text1 == text2 then
813
+ return text_length
814
+ end
815
+
816
+ -- Start by looking for a single character match
817
+ -- and increase length until no match is found.
818
+ -- Performance analysis: http://neil.fraser.name/news/2010/11/04/
819
+ local best = 0
820
+ local length = 1
821
+ while true do
822
+ local pattern = strsub(text1, text_length - length + 1)
823
+ local found = strfind(text2, pattern, 1, true)
824
+ if found == nil then
825
+ return best
826
+ end
827
+ length = length + found - 1
828
+ if found == 1 or strsub(text1, text_length - length + 1) ==
829
+ strsub(text2, 1, length) then
830
+ best = length
831
+ length = length + 1
832
+ end
833
+ end
834
+ end
835
+
836
+ --[[
837
+ * Does a substring of shorttext exist within longtext such that the substring
838
+ * is at least half the length of longtext?
839
+ * This speedup can produce non-minimal diffs.
840
+ * Closure, but does not reference any external variables.
841
+ * @param {string} longtext Longer string.
842
+ * @param {string} shorttext Shorter string.
843
+ * @param {number} i Start index of quarter length substring within longtext.
844
+ * @return {?Array.<string>} Five element Array, containing the prefix of
845
+ * longtext, the suffix of longtext, the prefix of shorttext, the suffix
846
+ * of shorttext and the common middle. Or nil if there was no match.
847
+ * @private
848
+ --]]
849
+ function _diff_halfMatchI(longtext, shorttext, i)
850
+ -- Start with a 1/4 length substring at position i as a seed.
851
+ local seed = strsub(longtext, i, i + floor(#longtext / 4))
852
+ local j = 0 -- LUANOTE: do not change to 1, was originally -1
853
+ local best_common = ''
854
+ local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b
855
+ while true do
856
+ j = indexOf(shorttext, seed, j + 1)
857
+ if (j == nil) then
858
+ break
859
+ end
860
+ local prefixLength = _diff_commonPrefix(strsub(longtext, i),
861
+ strsub(shorttext, j))
862
+ local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1),
863
+ strsub(shorttext, 1, j - 1))
864
+ if #best_common < suffixLength + prefixLength then
865
+ best_common = strsub(shorttext, j - suffixLength, j - 1)
866
+ .. strsub(shorttext, j, j + prefixLength - 1)
867
+ best_longtext_a = strsub(longtext, 1, i - suffixLength - 1)
868
+ best_longtext_b = strsub(longtext, i + prefixLength)
869
+ best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1)
870
+ best_shorttext_b = strsub(shorttext, j + prefixLength)
871
+ end
872
+ end
873
+ if #best_common * 2 >= #longtext then
874
+ return {best_longtext_a, best_longtext_b,
875
+ best_shorttext_a, best_shorttext_b, best_common}
876
+ else
877
+ return nil
878
+ end
879
+ end
880
+
881
+ --[[
882
+ * Do the two texts share a substring which is at least half the length of the
883
+ * longer text?
884
+ * @param {string} text1 First string.
885
+ * @param {string} text2 Second string.
886
+ * @return {?Array.<string>} Five element Array, containing the prefix of
887
+ * text1, the suffix of text1, the prefix of text2, the suffix of
888
+ * text2 and the common middle. Or nil if there was no match.
889
+ * @private
890
+ --]]
891
+ function _diff_halfMatch(text1, text2)
892
+ if Diff_Timeout <= 0 then
893
+ -- Don't risk returning a non-optimal diff if we have unlimited time.
894
+ return nil
895
+ end
896
+ local longtext = (#text1 > #text2) and text1 or text2
897
+ local shorttext = (#text1 > #text2) and text2 or text1
898
+ if (#longtext < 4) or (#shorttext * 2 < #longtext) then
899
+ return nil -- Pointless.
900
+ end
901
+
902
+ -- First check if the second quarter is the seed for a half-match.
903
+ local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4))
904
+ -- Check again based on the third quarter.
905
+ local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2))
906
+ local hm
907
+ if not hm1 and not hm2 then
908
+ return nil
909
+ elseif not hm2 then
910
+ hm = hm1
911
+ elseif not hm1 then
912
+ hm = hm2
913
+ else
914
+ -- Both matched. Select the longest.
915
+ hm = (#hm1[5] > #hm2[5]) and hm1 or hm2
916
+ end
917
+
918
+ -- A half-match was found, sort out the return data.
919
+ local text1_a, text1_b, text2_a, text2_b
920
+ if (#text1 > #text2) then
921
+ text1_a, text1_b = hm[1], hm[2]
922
+ text2_a, text2_b = hm[3], hm[4]
923
+ else
924
+ text2_a, text2_b = hm[1], hm[2]
925
+ text1_a, text1_b = hm[3], hm[4]
926
+ end
927
+ local mid_common = hm[5]
928
+ return text1_a, text1_b, text2_a, text2_b, mid_common
929
+ end
930
+
931
+ --[[
932
+ * Given two strings, compute a score representing whether the internal
933
+ * boundary falls on logical boundaries.
934
+ * Scores range from 6 (best) to 0 (worst).
935
+ * @param {string} one First string.
936
+ * @param {string} two Second string.
937
+ * @return {number} The score.
938
+ * @private
939
+ --]]
940
+ function _diff_cleanupSemanticScore(one, two)
941
+ if (#one == 0) or (#two == 0) then
942
+ -- Edges are the best.
943
+ return 6
944
+ end
945
+
946
+ -- Each port of this function behaves slightly differently due to
947
+ -- subtle differences in each language's definition of things like
948
+ -- 'whitespace'. Since this function's purpose is largely cosmetic,
949
+ -- the choice has been made to use each language's native features
950
+ -- rather than force total conformity.
951
+ local char1 = strsub(one, -1)
952
+ local char2 = strsub(two, 1, 1)
953
+ local nonAlphaNumeric1 = strmatch(char1, '%W')
954
+ local nonAlphaNumeric2 = strmatch(char2, '%W')
955
+ local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s')
956
+ local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s')
957
+ local lineBreak1 = whitespace1 and strmatch(char1, '%c')
958
+ local lineBreak2 = whitespace2 and strmatch(char2, '%c')
959
+ local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$')
960
+ local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n')
961
+
962
+ if blankLine1 or blankLine2 then
963
+ -- Five points for blank lines.
964
+ return 5
965
+ elseif lineBreak1 or lineBreak2 then
966
+ -- Four points for line breaks.
967
+ return 4
968
+ elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then
969
+ -- Three points for end of sentences.
970
+ return 3
971
+ elseif whitespace1 or whitespace2 then
972
+ -- Two points for whitespace.
973
+ return 2
974
+ elseif nonAlphaNumeric1 or nonAlphaNumeric2 then
975
+ -- One point for non-alphanumeric.
976
+ return 1
977
+ end
978
+ return 0
979
+ end
980
+
981
+ --[[
982
+ * Look for single edits surrounded on both sides by equalities
983
+ * which can be shifted sideways to align the edit to a word boundary.
984
+ * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
985
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
986
+ --]]
987
+ function _diff_cleanupSemanticLossless(diffs)
988
+ local pointer = 2
989
+ -- Intentionally ignore the first and last element (don't need checking).
990
+ while diffs[pointer + 1] do
991
+ local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
992
+ if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
993
+ -- This is a single edit surrounded by equalities.
994
+ local diff = diffs[pointer]
995
+
996
+ local equality1 = prevDiff[2]
997
+ local edit = diff[2]
998
+ local equality2 = nextDiff[2]
999
+
1000
+ -- First, shift the edit as far left as possible.
1001
+ local commonOffset = _diff_commonSuffix(equality1, edit)
1002
+ if commonOffset > 0 then
1003
+ local commonString = strsub(edit, -commonOffset)
1004
+ equality1 = strsub(equality1, 1, -commonOffset - 1)
1005
+ edit = commonString .. strsub(edit, 1, -commonOffset - 1)
1006
+ equality2 = commonString .. equality2
1007
+ end
1008
+
1009
+ -- Second, step character by character right, looking for the best fit.
1010
+ local bestEquality1 = equality1
1011
+ local bestEdit = edit
1012
+ local bestEquality2 = equality2
1013
+ local bestScore = _diff_cleanupSemanticScore(equality1, edit)
1014
+ + _diff_cleanupSemanticScore(edit, equality2)
1015
+
1016
+ while strbyte(edit, 1) == strbyte(equality2, 1) do
1017
+ equality1 = equality1 .. strsub(edit, 1, 1)
1018
+ edit = strsub(edit, 2) .. strsub(equality2, 1, 1)
1019
+ equality2 = strsub(equality2, 2)
1020
+ local score = _diff_cleanupSemanticScore(equality1, edit)
1021
+ + _diff_cleanupSemanticScore(edit, equality2)
1022
+ -- The >= encourages trailing rather than leading whitespace on edits.
1023
+ if score >= bestScore then
1024
+ bestScore = score
1025
+ bestEquality1 = equality1
1026
+ bestEdit = edit
1027
+ bestEquality2 = equality2
1028
+ end
1029
+ end
1030
+ if prevDiff[2] ~= bestEquality1 then
1031
+ -- We have an improvement, save it back to the diff.
1032
+ if #bestEquality1 > 0 then
1033
+ diffs[pointer - 1][2] = bestEquality1
1034
+ else
1035
+ tremove(diffs, pointer - 1)
1036
+ pointer = pointer - 1
1037
+ end
1038
+ diffs[pointer][2] = bestEdit
1039
+ if #bestEquality2 > 0 then
1040
+ diffs[pointer + 1][2] = bestEquality2
1041
+ else
1042
+ tremove(diffs, pointer + 1, 1)
1043
+ pointer = pointer - 1
1044
+ end
1045
+ end
1046
+ end
1047
+ pointer = pointer + 1
1048
+ end
1049
+ end
1050
+
1051
+ --[[
1052
+ * Reorder and merge like edit sections. Merge equalities.
1053
+ * Any edit section can move as long as it doesn't cross an equality.
1054
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1055
+ --]]
1056
+ function _diff_cleanupMerge(diffs)
1057
+ diffs[#diffs + 1] = {DIFF_EQUAL, ''} -- Add a dummy entry at the end.
1058
+ local pointer = 1
1059
+ local count_delete, count_insert = 0, 0
1060
+ local text_delete, text_insert = '', ''
1061
+ local commonlength
1062
+ while diffs[pointer] do
1063
+ local diff_type = diffs[pointer][1]
1064
+ if diff_type == DIFF_INSERT then
1065
+ count_insert = count_insert + 1
1066
+ text_insert = text_insert .. diffs[pointer][2]
1067
+ pointer = pointer + 1
1068
+ elseif diff_type == DIFF_DELETE then
1069
+ count_delete = count_delete + 1
1070
+ text_delete = text_delete .. diffs[pointer][2]
1071
+ pointer = pointer + 1
1072
+ elseif diff_type == DIFF_EQUAL then
1073
+ -- Upon reaching an equality, check for prior redundancies.
1074
+ if count_delete + count_insert > 1 then
1075
+ if (count_delete > 0) and (count_insert > 0) then
1076
+ -- Factor out any common prefixies.
1077
+ commonlength = _diff_commonPrefix(text_insert, text_delete)
1078
+ if commonlength > 0 then
1079
+ local back_pointer = pointer - count_delete - count_insert
1080
+ if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL)
1081
+ then
1082
+ diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2]
1083
+ .. strsub(text_insert, 1, commonlength)
1084
+ else
1085
+ tinsert(diffs, 1,
1086
+ {DIFF_EQUAL, strsub(text_insert, 1, commonlength)})
1087
+ pointer = pointer + 1
1088
+ end
1089
+ text_insert = strsub(text_insert, commonlength + 1)
1090
+ text_delete = strsub(text_delete, commonlength + 1)
1091
+ end
1092
+ -- Factor out any common suffixies.
1093
+ commonlength = _diff_commonSuffix(text_insert, text_delete)
1094
+ if commonlength ~= 0 then
1095
+ diffs[pointer][2] =
1096
+ strsub(text_insert, -commonlength) .. diffs[pointer][2]
1097
+ text_insert = strsub(text_insert, 1, -commonlength - 1)
1098
+ text_delete = strsub(text_delete, 1, -commonlength - 1)
1099
+ end
1100
+ end
1101
+ -- Delete the offending records and add the merged ones.
1102
+ if count_delete == 0 then
1103
+ tsplice(diffs, pointer - count_insert,
1104
+ count_insert, {DIFF_INSERT, text_insert})
1105
+ elseif count_insert == 0 then
1106
+ tsplice(diffs, pointer - count_delete,
1107
+ count_delete, {DIFF_DELETE, text_delete})
1108
+ else
1109
+ tsplice(diffs, pointer - count_delete - count_insert,
1110
+ count_delete + count_insert,
1111
+ {DIFF_DELETE, text_delete}, {DIFF_INSERT, text_insert})
1112
+ end
1113
+ pointer = pointer - count_delete - count_insert
1114
+ + (count_delete>0 and 1 or 0) + (count_insert>0 and 1 or 0) + 1
1115
+ elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then
1116
+ -- Merge this equality with the previous one.
1117
+ diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2]
1118
+ tremove(diffs, pointer)
1119
+ else
1120
+ pointer = pointer + 1
1121
+ end
1122
+ count_insert, count_delete = 0, 0
1123
+ text_delete, text_insert = '', ''
1124
+ end
1125
+ end
1126
+ if diffs[#diffs][2] == '' then
1127
+ diffs[#diffs] = nil -- Remove the dummy entry at the end.
1128
+ end
1129
+
1130
+ -- Second pass: look for single edits surrounded on both sides by equalities
1131
+ -- which can be shifted sideways to eliminate an equality.
1132
+ -- e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1133
+ local changes = false
1134
+ pointer = 2
1135
+ -- Intentionally ignore the first and last element (don't need checking).
1136
+ while pointer < #diffs do
1137
+ local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
1138
+ if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
1139
+ -- This is a single edit surrounded by equalities.
1140
+ local diff = diffs[pointer]
1141
+ local currentText = diff[2]
1142
+ local prevText = prevDiff[2]
1143
+ local nextText = nextDiff[2]
1144
+ if strsub(currentText, -#prevText) == prevText then
1145
+ -- Shift the edit over the previous equality.
1146
+ diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1)
1147
+ nextDiff[2] = prevText .. nextDiff[2]
1148
+ tremove(diffs, pointer - 1)
1149
+ changes = true
1150
+ elseif strsub(currentText, 1, #nextText) == nextText then
1151
+ -- Shift the edit over the next equality.
1152
+ prevDiff[2] = prevText .. nextText
1153
+ diff[2] = strsub(currentText, #nextText + 1) .. nextText
1154
+ tremove(diffs, pointer + 1)
1155
+ changes = true
1156
+ end
1157
+ end
1158
+ pointer = pointer + 1
1159
+ end
1160
+ -- If shifts were made, the diff needs reordering and another shift sweep.
1161
+ if changes then
1162
+ -- LUANOTE: no return value, but necessary to use 'return' to get
1163
+ -- tail calls.
1164
+ return _diff_cleanupMerge(diffs)
1165
+ end
1166
+ end
1167
+
1168
+ --[[
1169
+ * loc is a location in text1, compute and return the equivalent location in
1170
+ * text2.
1171
+ * e.g. 'The cat' vs 'The big cat', 1->1, 5->8
1172
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1173
+ * @param {number} loc Location within text1.
1174
+ * @return {number} Location within text2.
1175
+ --]]
1176
+ function _diff_xIndex(diffs, loc)
1177
+ local chars1 = 1
1178
+ local chars2 = 1
1179
+ local last_chars1 = 1
1180
+ local last_chars2 = 1
1181
+ local x
1182
+ for _x, diff in ipairs(diffs) do
1183
+ x = _x
1184
+ if diff[1] ~= DIFF_INSERT then -- Equality or deletion.
1185
+ chars1 = chars1 + #diff[2]
1186
+ end
1187
+ if diff[1] ~= DIFF_DELETE then -- Equality or insertion.
1188
+ chars2 = chars2 + #diff[2]
1189
+ end
1190
+ if chars1 > loc then -- Overshot the location.
1191
+ break
1192
+ end
1193
+ last_chars1 = chars1
1194
+ last_chars2 = chars2
1195
+ end
1196
+ -- Was the location deleted?
1197
+ if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then
1198
+ return last_chars2
1199
+ end
1200
+ -- Add the remaining character length.
1201
+ return last_chars2 + (loc - last_chars1)
1202
+ end
1203
+
1204
+ --[[
1205
+ * Compute and return the source text (all equalities and deletions).
1206
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1207
+ * @return {string} Source text.
1208
+ --]]
1209
+ function _diff_text1(diffs)
1210
+ local text = {}
1211
+ for x, diff in ipairs(diffs) do
1212
+ if diff[1] ~= DIFF_INSERT then
1213
+ text[#text + 1] = diff[2]
1214
+ end
1215
+ end
1216
+ return tconcat(text)
1217
+ end
1218
+
1219
+ --[[
1220
+ * Compute and return the destination text (all equalities and insertions).
1221
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1222
+ * @return {string} Destination text.
1223
+ --]]
1224
+ function _diff_text2(diffs)
1225
+ local text = {}
1226
+ for x, diff in ipairs(diffs) do
1227
+ if diff[1] ~= DIFF_DELETE then
1228
+ text[#text + 1] = diff[2]
1229
+ end
1230
+ end
1231
+ return tconcat(text)
1232
+ end
1233
+
1234
+ --[[
1235
+ * Crush the diff into an encoded string which describes the operations
1236
+ * required to transform text1 into text2.
1237
+ * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1238
+ * Operations are tab-separated. Inserted text is escaped using %xx notation.
1239
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1240
+ * @return {string} Delta text.
1241
+ --]]
1242
+ function _diff_toDelta(diffs)
1243
+ local text = {}
1244
+ for x, diff in ipairs(diffs) do
1245
+ local op, data = diff[1], diff[2]
1246
+ if op == DIFF_INSERT then
1247
+ text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace)
1248
+ elseif op == DIFF_DELETE then
1249
+ text[x] = '-' .. #data
1250
+ elseif op == DIFF_EQUAL then
1251
+ text[x] = '=' .. #data
1252
+ end
1253
+ end
1254
+ return tconcat(text, '\t')
1255
+ end
1256
+
1257
+ --[[
1258
+ * Given the original text1, and an encoded string which describes the
1259
+ * operations required to transform text1 into text2, compute the full diff.
1260
+ * @param {string} text1 Source string for the diff.
1261
+ * @param {string} delta Delta text.
1262
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
1263
+ * @throws {Errorend If invalid input.
1264
+ --]]
1265
+ function _diff_fromDelta(text1, delta)
1266
+ local diffs = {}
1267
+ local diffsLength = 0 -- Keeping our own length var is faster
1268
+ local pointer = 1 -- Cursor in text1
1269
+ for token in gmatch(delta, '[^\t]+') do
1270
+ -- Each token begins with a one character parameter which specifies the
1271
+ -- operation of this token (delete, insert, equality).
1272
+ local tokenchar, param = strsub(token, 1, 1), strsub(token, 2)
1273
+ if (tokenchar == '+') then
1274
+ local invalidDecode = false
1275
+ local decoded = gsub(param, '%%(.?.?)',
1276
+ function(c)
1277
+ local n = tonumber(c, 16)
1278
+ if (#c ~= 2) or (n == nil) then
1279
+ invalidDecode = true
1280
+ return ''
1281
+ end
1282
+ return strchar(n)
1283
+ end)
1284
+ if invalidDecode then
1285
+ -- Malformed URI sequence.
1286
+ error('Illegal escape in _diff_fromDelta: ' .. param)
1287
+ end
1288
+ diffsLength = diffsLength + 1
1289
+ diffs[diffsLength] = {DIFF_INSERT, decoded}
1290
+ elseif (tokenchar == '-') or (tokenchar == '=') then
1291
+ local n = tonumber(param)
1292
+ if (n == nil) or (n < 0) then
1293
+ error('Invalid number in _diff_fromDelta: ' .. param)
1294
+ end
1295
+ local text = strsub(text1, pointer, pointer + n - 1)
1296
+ pointer = pointer + n
1297
+ if (tokenchar == '=') then
1298
+ diffsLength = diffsLength + 1
1299
+ diffs[diffsLength] = {DIFF_EQUAL, text}
1300
+ else
1301
+ diffsLength = diffsLength + 1
1302
+ diffs[diffsLength] = {DIFF_DELETE, text}
1303
+ end
1304
+ else
1305
+ error('Invalid diff operation in _diff_fromDelta: ' .. token)
1306
+ end
1307
+ end
1308
+ if (pointer ~= #text1 + 1) then
1309
+ error('Delta length (' .. (pointer - 1)
1310
+ .. ') does not equal source text length (' .. #text1 .. ').')
1311
+ end
1312
+ return diffs
1313
+ end
1314
+
1315
+ -- ---------------------------------------------------------------------------
1316
+ -- MATCH API
1317
+ -- ---------------------------------------------------------------------------
1318
+
1319
+ local _match_bitap, _match_alphabet
1320
+
1321
+ --[[
1322
+ * Locate the best instance of 'pattern' in 'text' near 'loc'.
1323
+ * @param {string} text The text to search.
1324
+ * @param {string} pattern The pattern to search for.
1325
+ * @param {number} loc The location to search around.
1326
+ * @return {number} Best match index or -1.
1327
+ --]]
1328
+ function match_main(text, pattern, loc)
1329
+ -- Check for null inputs.
1330
+ if text == nil or pattern == nil or loc == nil then
1331
+ error('Null inputs. (match_main)')
1332
+ end
1333
+
1334
+ if text == pattern then
1335
+ -- Shortcut (potentially not guaranteed by the algorithm)
1336
+ return 1
1337
+ elseif #text == 0 then
1338
+ -- Nothing to match.
1339
+ return -1
1340
+ end
1341
+ loc = max(1, min(loc, #text))
1342
+ if strsub(text, loc, loc + #pattern - 1) == pattern then
1343
+ -- Perfect match at the perfect spot! (Includes case of null pattern)
1344
+ return loc
1345
+ else
1346
+ -- Do a fuzzy compare.
1347
+ return _match_bitap(text, pattern, loc)
1348
+ end
1349
+ end
1350
+
1351
+ -- ---------------------------------------------------------------------------
1352
+ -- UNOFFICIAL/PRIVATE MATCH FUNCTIONS
1353
+ -- ---------------------------------------------------------------------------
1354
+
1355
+ --[[
1356
+ * Initialise the alphabet for the Bitap algorithm.
1357
+ * @param {string} pattern The text to encode.
1358
+ * @return {Object} Hash of character locations.
1359
+ * @private
1360
+ --]]
1361
+ function _match_alphabet(pattern)
1362
+ local s = {}
1363
+ local i = 0
1364
+ for c in gmatch(pattern, '.') do
1365
+ s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1))
1366
+ i = i + 1
1367
+ end
1368
+ return s
1369
+ end
1370
+
1371
+ --[[
1372
+ * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1373
+ * Bitap algorithm.
1374
+ * @param {string} text The text to search.
1375
+ * @param {string} pattern The pattern to search for.
1376
+ * @param {number} loc The location to search around.
1377
+ * @return {number} Best match index or -1.
1378
+ * @private
1379
+ --]]
1380
+ function _match_bitap(text, pattern, loc)
1381
+ if #pattern > Match_MaxBits then
1382
+ error('Pattern too long.')
1383
+ end
1384
+
1385
+ -- Initialise the alphabet.
1386
+ local s = _match_alphabet(pattern)
1387
+
1388
+ --[[
1389
+ * Compute and return the score for a match with e errors and x location.
1390
+ * Accesses loc and pattern through being a closure.
1391
+ * @param {number} e Number of errors in match.
1392
+ * @param {number} x Location of match.
1393
+ * @return {number} Overall score for match (0.0 = good, 1.0 = bad).
1394
+ * @private
1395
+ --]]
1396
+ local function _match_bitapScore(e, x)
1397
+ local accuracy = e / #pattern
1398
+ local proximity = abs(loc - x)
1399
+ if (Match_Distance == 0) then
1400
+ -- Dodge divide by zero error.
1401
+ return (proximity == 0) and 1 or accuracy
1402
+ end
1403
+ return accuracy + (proximity / Match_Distance)
1404
+ end
1405
+
1406
+ -- Highest score beyond which we give up.
1407
+ local score_threshold = Match_Threshold
1408
+ -- Is there a nearby exact match? (speedup)
1409
+ local best_loc = indexOf(text, pattern, loc)
1410
+ if best_loc then
1411
+ score_threshold = min(_match_bitapScore(0, best_loc), score_threshold)
1412
+ -- LUANOTE: Ideally we'd also check from the other direction, but Lua
1413
+ -- doesn't have an efficent lastIndexOf function.
1414
+ end
1415
+
1416
+ -- Initialise the bit arrays.
1417
+ local matchmask = lshift(1, #pattern - 1)
1418
+ best_loc = -1
1419
+
1420
+ local bin_min, bin_mid
1421
+ local bin_max = #pattern + #text
1422
+ local last_rd
1423
+ for d = 0, #pattern - 1, 1 do
1424
+ -- Scan for the best match; each iteration allows for one more error.
1425
+ -- Run a binary search to determine how far from 'loc' we can stray at this
1426
+ -- error level.
1427
+ bin_min = 0
1428
+ bin_mid = bin_max
1429
+ while (bin_min < bin_mid) do
1430
+ if (_match_bitapScore(d, loc + bin_mid) <= score_threshold) then
1431
+ bin_min = bin_mid
1432
+ else
1433
+ bin_max = bin_mid
1434
+ end
1435
+ bin_mid = floor(bin_min + (bin_max - bin_min) / 2)
1436
+ end
1437
+ -- Use the result from this iteration as the maximum for the next.
1438
+ bin_max = bin_mid
1439
+ local start = max(1, loc - bin_mid + 1)
1440
+ local finish = min(loc + bin_mid, #text) + #pattern
1441
+
1442
+ local rd = {}
1443
+ for j = start, finish do
1444
+ rd[j] = 0
1445
+ end
1446
+ rd[finish + 1] = lshift(1, d) - 1
1447
+ for j = finish, start, -1 do
1448
+ local charMatch = s[strsub(text, j - 1, j - 1)] or 0
1449
+ if (d == 0) then -- First pass: exact match.
1450
+ rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch)
1451
+ else
1452
+ -- Subsequent passes: fuzzy match.
1453
+ -- Functions instead of operators make this hella messy.
1454
+ rd[j] = bor(
1455
+ band(
1456
+ bor(
1457
+ lshift(rd[j + 1], 1),
1458
+ 1
1459
+ ),
1460
+ charMatch
1461
+ ),
1462
+ bor(
1463
+ bor(
1464
+ lshift(bor(last_rd[j + 1], last_rd[j]), 1),
1465
+ 1
1466
+ ),
1467
+ last_rd[j + 1]
1468
+ )
1469
+ )
1470
+ end
1471
+ if (band(rd[j], matchmask) ~= 0) then
1472
+ local score = _match_bitapScore(d, j - 1)
1473
+ -- This match will almost certainly be better than any existing match.
1474
+ -- But check anyway.
1475
+ if (score <= score_threshold) then
1476
+ -- Told you so.
1477
+ score_threshold = score
1478
+ best_loc = j - 1
1479
+ if (best_loc > loc) then
1480
+ -- When passing loc, don't exceed our current distance from loc.
1481
+ start = max(1, loc * 2 - best_loc)
1482
+ else
1483
+ -- Already passed loc, downhill from here on in.
1484
+ break
1485
+ end
1486
+ end
1487
+ end
1488
+ end
1489
+ -- No hope for a (better) match at greater error levels.
1490
+ if (_match_bitapScore(d + 1, loc) > score_threshold) then
1491
+ break
1492
+ end
1493
+ last_rd = rd
1494
+ end
1495
+ return best_loc
1496
+ end
1497
+
1498
+ -- -----------------------------------------------------------------------------
1499
+ -- PATCH API
1500
+ -- -----------------------------------------------------------------------------
1501
+
1502
+ local _patch_addContext,
1503
+ _patch_deepCopy,
1504
+ _patch_addPadding,
1505
+ _patch_splitMax,
1506
+ _patch_appendText,
1507
+ _new_patch_obj
1508
+
1509
+ --[[
1510
+ * Compute a list of patches to turn text1 into text2.
1511
+ * Use diffs if provided, otherwise compute it ourselves.
1512
+ * There are four ways to call this function, depending on what data is
1513
+ * available to the caller:
1514
+ * Method 1:
1515
+ * a = text1, b = text2
1516
+ * Method 2:
1517
+ * a = diffs
1518
+ * Method 3 (optimal):
1519
+ * a = text1, b = diffs
1520
+ * Method 4 (deprecated, use method 3):
1521
+ * a = text1, b = text2, c = diffs
1522
+ *
1523
+ * @param {string|Array.<Array.<number|string>>} a text1 (methods 1,3,4) or
1524
+ * Array of diff tuples for text1 to text2 (method 2).
1525
+ * @param {string|Array.<Array.<number|string>>} opt_b text2 (methods 1,4) or
1526
+ * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
1527
+ * @param {string|Array.<Array.<number|string>>} opt_c Array of diff tuples for
1528
+ * text1 to text2 (method 4) or undefined (methods 1,2,3).
1529
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
1530
+ --]]
1531
+ function patch_make(a, opt_b, opt_c)
1532
+ local text1, diffs
1533
+ local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c)
1534
+ if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then
1535
+ -- Method 1: text1, text2
1536
+ -- Compute diffs from text1 and text2.
1537
+ text1 = a
1538
+ diffs = diff_main(text1, opt_b, true)
1539
+ if (#diffs > 2) then
1540
+ diff_cleanupSemantic(diffs)
1541
+ diff_cleanupEfficiency(diffs)
1542
+ end
1543
+ elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then
1544
+ -- Method 2: diffs
1545
+ -- Compute text1 from diffs.
1546
+ diffs = a
1547
+ text1 = _diff_text1(diffs)
1548
+ elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then
1549
+ -- Method 3: text1, diffs
1550
+ text1 = a
1551
+ diffs = opt_b
1552
+ elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table')
1553
+ then
1554
+ -- Method 4: text1, text2, diffs
1555
+ -- text2 is not used.
1556
+ text1 = a
1557
+ diffs = opt_c
1558
+ else
1559
+ error('Unknown call format to patch_make.')
1560
+ end
1561
+
1562
+ if (diffs[1] == nil) then
1563
+ return {} -- Get rid of the null case.
1564
+ end
1565
+
1566
+ local patches = {}
1567
+ local patch = _new_patch_obj()
1568
+ local patchDiffLength = 0 -- Keeping our own length var is faster.
1569
+ local char_count1 = 0 -- Number of characters into the text1 string.
1570
+ local char_count2 = 0 -- Number of characters into the text2 string.
1571
+ -- Start with text1 (prepatch_text) and apply the diffs until we arrive at
1572
+ -- text2 (postpatch_text). We recreate the patches one by one to determine
1573
+ -- context info.
1574
+ local prepatch_text, postpatch_text = text1, text1
1575
+ for x, diff in ipairs(diffs) do
1576
+ local diff_type, diff_text = diff[1], diff[2]
1577
+
1578
+ if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then
1579
+ -- A new patch starts here.
1580
+ patch.start1 = char_count1 + 1
1581
+ patch.start2 = char_count2 + 1
1582
+ end
1583
+
1584
+ if (diff_type == DIFF_INSERT) then
1585
+ patchDiffLength = patchDiffLength + 1
1586
+ patch.diffs[patchDiffLength] = diff
1587
+ patch.length2 = patch.length2 + #diff_text
1588
+ postpatch_text = strsub(postpatch_text, 1, char_count2)
1589
+ .. diff_text .. strsub(postpatch_text, char_count2 + 1)
1590
+ elseif (diff_type == DIFF_DELETE) then
1591
+ patch.length1 = patch.length1 + #diff_text
1592
+ patchDiffLength = patchDiffLength + 1
1593
+ patch.diffs[patchDiffLength] = diff
1594
+ postpatch_text = strsub(postpatch_text, 1, char_count2)
1595
+ .. strsub(postpatch_text, char_count2 + #diff_text + 1)
1596
+ elseif (diff_type == DIFF_EQUAL) then
1597
+ if (#diff_text <= Patch_Margin * 2)
1598
+ and (patchDiffLength ~= 0) and (#diffs ~= x) then
1599
+ -- Small equality inside a patch.
1600
+ patchDiffLength = patchDiffLength + 1
1601
+ patch.diffs[patchDiffLength] = diff
1602
+ patch.length1 = patch.length1 + #diff_text
1603
+ patch.length2 = patch.length2 + #diff_text
1604
+ elseif (#diff_text >= Patch_Margin * 2) then
1605
+ -- Time for a new patch.
1606
+ if (patchDiffLength ~= 0) then
1607
+ _patch_addContext(patch, prepatch_text)
1608
+ patches[#patches + 1] = patch
1609
+ patch = _new_patch_obj()
1610
+ patchDiffLength = 0
1611
+ -- Unlike Unidiff, our patch lists have a rolling context.
1612
+ -- http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1613
+ -- Update prepatch text & pos to reflect the application of the
1614
+ -- just completed patch.
1615
+ prepatch_text = postpatch_text
1616
+ char_count1 = char_count2
1617
+ end
1618
+ end
1619
+ end
1620
+
1621
+ -- Update the current character count.
1622
+ if (diff_type ~= DIFF_INSERT) then
1623
+ char_count1 = char_count1 + #diff_text
1624
+ end
1625
+ if (diff_type ~= DIFF_DELETE) then
1626
+ char_count2 = char_count2 + #diff_text
1627
+ end
1628
+ end
1629
+
1630
+ -- Pick up the leftover patch if not empty.
1631
+ if (patchDiffLength > 0) then
1632
+ _patch_addContext(patch, prepatch_text)
1633
+ patches[#patches + 1] = patch
1634
+ end
1635
+
1636
+ return patches
1637
+ end
1638
+
1639
+ --[[
1640
+ * Merge a set of patches onto the text. Return a patched text, as well
1641
+ * as a list of true/false values indicating which patches were applied.
1642
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1643
+ * @param {string} text Old text.
1644
+ * @return {Array.<string|Array.<boolean>>} Two return values, the
1645
+ * new text and an array of boolean values.
1646
+ --]]
1647
+ function patch_apply(patches, text)
1648
+ if patches[1] == nil then
1649
+ return text, {}
1650
+ end
1651
+
1652
+ -- Deep copy the patches so that no changes are made to originals.
1653
+ patches = _patch_deepCopy(patches)
1654
+
1655
+ local nullPadding = _patch_addPadding(patches)
1656
+ text = nullPadding .. text .. nullPadding
1657
+
1658
+ _patch_splitMax(patches)
1659
+ -- delta keeps track of the offset between the expected and actual location
1660
+ -- of the previous patch. If there are patches expected at positions 10 and
1661
+ -- 20, but the first patch was found at 12, delta is 2 and the second patch
1662
+ -- has an effective expected position of 22.
1663
+ local delta = 0
1664
+ local results = {}
1665
+ for x, patch in ipairs(patches) do
1666
+ local expected_loc = patch.start2 + delta
1667
+ local text1 = _diff_text1(patch.diffs)
1668
+ local start_loc
1669
+ local end_loc = -1
1670
+ if #text1 > Match_MaxBits then
1671
+ -- _patch_splitMax will only provide an oversized pattern in
1672
+ -- the case of a monster delete.
1673
+ start_loc = match_main(text,
1674
+ strsub(text1, 1, Match_MaxBits), expected_loc)
1675
+ if start_loc ~= -1 then
1676
+ end_loc = match_main(text, strsub(text1, -Match_MaxBits),
1677
+ expected_loc + #text1 - Match_MaxBits)
1678
+ if end_loc == -1 or start_loc >= end_loc then
1679
+ -- Can't find valid trailing context. Drop this patch.
1680
+ start_loc = -1
1681
+ end
1682
+ end
1683
+ else
1684
+ start_loc = match_main(text, text1, expected_loc)
1685
+ end
1686
+ if start_loc == -1 then
1687
+ -- No match found. :(
1688
+ results[x] = false
1689
+ -- Subtract the delta for this failed patch from subsequent patches.
1690
+ delta = delta - patch.length2 - patch.length1
1691
+ else
1692
+ -- Found a match. :)
1693
+ results[x] = true
1694
+ delta = start_loc - expected_loc
1695
+ local text2
1696
+ if end_loc == -1 then
1697
+ text2 = strsub(text, start_loc, start_loc + #text1 - 1)
1698
+ else
1699
+ text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1)
1700
+ end
1701
+ if text1 == text2 then
1702
+ -- Perfect match, just shove the replacement text in.
1703
+ text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs)
1704
+ .. strsub(text, start_loc + #text1)
1705
+ else
1706
+ -- Imperfect match. Run a diff to get a framework of equivalent
1707
+ -- indices.
1708
+ local diffs = diff_main(text1, text2, false)
1709
+ if (#text1 > Match_MaxBits)
1710
+ and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then
1711
+ -- The end points match, but the content is unacceptably bad.
1712
+ results[x] = false
1713
+ else
1714
+ _diff_cleanupSemanticLossless(diffs)
1715
+ local index1 = 1
1716
+ local index2
1717
+ for y, mod in ipairs(patch.diffs) do
1718
+ if mod[1] ~= DIFF_EQUAL then
1719
+ index2 = _diff_xIndex(diffs, index1)
1720
+ end
1721
+ if mod[1] == DIFF_INSERT then
1722
+ text = strsub(text, 1, start_loc + index2 - 2)
1723
+ .. mod[2] .. strsub(text, start_loc + index2 - 1)
1724
+ elseif mod[1] == DIFF_DELETE then
1725
+ text = strsub(text, 1, start_loc + index2 - 2) .. strsub(text,
1726
+ start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1))
1727
+ end
1728
+ if mod[1] ~= DIFF_DELETE then
1729
+ index1 = index1 + #mod[2]
1730
+ end
1731
+ end
1732
+ end
1733
+ end
1734
+ end
1735
+ end
1736
+ -- Strip the padding off.
1737
+ text = strsub(text, #nullPadding + 1, -#nullPadding - 1)
1738
+ return text, results
1739
+ end
1740
+
1741
+ --[[
1742
+ * Take a list of patches and return a textual representation.
1743
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1744
+ * @return {string} Text representation of patches.
1745
+ --]]
1746
+ function patch_toText(patches)
1747
+ local text = {}
1748
+ for x, patch in ipairs(patches) do
1749
+ _patch_appendText(patch, text)
1750
+ end
1751
+ return tconcat(text)
1752
+ end
1753
+
1754
+ --[[
1755
+ * Parse a textual representation of patches and return a list of patch objects.
1756
+ * @param {string} textline Text representation of patches.
1757
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
1758
+ * @throws {Error} If invalid input.
1759
+ --]]
1760
+ function patch_fromText(textline)
1761
+ local patches = {}
1762
+ if (#textline == 0) then
1763
+ return patches
1764
+ end
1765
+ local text = {}
1766
+ for line in gmatch(textline, '([^\n]*)') do
1767
+ text[#text + 1] = line
1768
+ end
1769
+ local textPointer = 1
1770
+ while (textPointer <= #text) do
1771
+ local start1, length1, start2, length2
1772
+ = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$')
1773
+ if (start1 == nil) then
1774
+ error('Invalid patch string: "' .. text[textPointer] .. '"')
1775
+ end
1776
+ local patch = _new_patch_obj()
1777
+ patches[#patches + 1] = patch
1778
+
1779
+ start1 = tonumber(start1)
1780
+ length1 = tonumber(length1) or 1
1781
+ if (length1 == 0) then
1782
+ start1 = start1 + 1
1783
+ end
1784
+ patch.start1 = start1
1785
+ patch.length1 = length1
1786
+
1787
+ start2 = tonumber(start2)
1788
+ length2 = tonumber(length2) or 1
1789
+ if (length2 == 0) then
1790
+ start2 = start2 + 1
1791
+ end
1792
+ patch.start2 = start2
1793
+ patch.length2 = length2
1794
+
1795
+ textPointer = textPointer + 1
1796
+
1797
+ while true do
1798
+ local line = text[textPointer]
1799
+ if (line == nil) then
1800
+ break
1801
+ end
1802
+ local sign; sign, line = strsub(line, 1, 1), strsub(line, 2)
1803
+
1804
+ local invalidDecode = false
1805
+ local decoded = gsub(line, '%%(.?.?)',
1806
+ function(c)
1807
+ local n = tonumber(c, 16)
1808
+ if (#c ~= 2) or (n == nil) then
1809
+ invalidDecode = true
1810
+ return ''
1811
+ end
1812
+ return strchar(n)
1813
+ end)
1814
+ if invalidDecode then
1815
+ -- Malformed URI sequence.
1816
+ error('Illegal escape in patch_fromText: ' .. line)
1817
+ end
1818
+
1819
+ line = decoded
1820
+
1821
+ if (sign == '-') then
1822
+ -- Deletion.
1823
+ patch.diffs[#patch.diffs + 1] = {DIFF_DELETE, line}
1824
+ elseif (sign == '+') then
1825
+ -- Insertion.
1826
+ patch.diffs[#patch.diffs + 1] = {DIFF_INSERT, line}
1827
+ elseif (sign == ' ') then
1828
+ -- Minor equality.
1829
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, line}
1830
+ elseif (sign == '@') then
1831
+ -- Start of next patch.
1832
+ break
1833
+ elseif (sign == '') then
1834
+ -- Blank line? Whatever.
1835
+ else
1836
+ -- WTF?
1837
+ error('Invalid patch mode "' .. sign .. '" in: ' .. line)
1838
+ end
1839
+ textPointer = textPointer + 1
1840
+ end
1841
+ end
1842
+ return patches
1843
+ end
1844
+
1845
+ -- ---------------------------------------------------------------------------
1846
+ -- UNOFFICIAL/PRIVATE PATCH FUNCTIONS
1847
+ -- ---------------------------------------------------------------------------
1848
+
1849
+ local patch_meta = {
1850
+ __tostring = function(patch)
1851
+ local buf = {}
1852
+ _patch_appendText(patch, buf)
1853
+ return tconcat(buf)
1854
+ end
1855
+ }
1856
+
1857
+ --[[
1858
+ * Class representing one patch operation.
1859
+ * @constructor
1860
+ --]]
1861
+ function _new_patch_obj()
1862
+ return setmetatable({
1863
+ --[[ @type {Array.<Array.<number|string>>} ]]
1864
+ diffs = {};
1865
+ --[[ @type {?number} ]]
1866
+ start1 = 1; -- nil;
1867
+ --[[ @type {?number} ]]
1868
+ start2 = 1; -- nil;
1869
+ --[[ @type {number} ]]
1870
+ length1 = 0;
1871
+ --[[ @type {number} ]]
1872
+ length2 = 0;
1873
+ }, patch_meta)
1874
+ end
1875
+
1876
+ --[[
1877
+ * Increase the context until it is unique,
1878
+ * but don't let the pattern expand beyond Match_MaxBits.
1879
+ * @param {_new_patch_obj} patch The patch to grow.
1880
+ * @param {string} text Source text.
1881
+ * @private
1882
+ --]]
1883
+ function _patch_addContext(patch, text)
1884
+ if (#text == 0) then
1885
+ return
1886
+ end
1887
+ local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1)
1888
+ local padding = 0
1889
+
1890
+ -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly
1891
+ -- different logic here than in other language ports.
1892
+ -- Look for the first two matches of pattern in text. If two are found,
1893
+ -- increase the pattern length.
1894
+ local firstMatch = indexOf(text, pattern)
1895
+ local secondMatch = nil
1896
+ if (firstMatch ~= nil) then
1897
+ secondMatch = indexOf(text, pattern, firstMatch + 1)
1898
+ end
1899
+ while (#pattern == 0 or secondMatch ~= nil)
1900
+ and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do
1901
+ padding = padding + Patch_Margin
1902
+ pattern = strsub(text, max(1, patch.start2 - padding),
1903
+ patch.start2 + patch.length1 - 1 + padding)
1904
+ firstMatch = indexOf(text, pattern)
1905
+ if (firstMatch ~= nil) then
1906
+ secondMatch = indexOf(text, pattern, firstMatch + 1)
1907
+ else
1908
+ secondMatch = nil
1909
+ end
1910
+ end
1911
+ -- Add one chunk for good luck.
1912
+ padding = padding + Patch_Margin
1913
+
1914
+ -- Add the prefix.
1915
+ local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1)
1916
+ if (#prefix > 0) then
1917
+ tinsert(patch.diffs, 1, {DIFF_EQUAL, prefix})
1918
+ end
1919
+ -- Add the suffix.
1920
+ local suffix = strsub(text, patch.start2 + patch.length1,
1921
+ patch.start2 + patch.length1 - 1 + padding)
1922
+ if (#suffix > 0) then
1923
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, suffix}
1924
+ end
1925
+
1926
+ -- Roll back the start points.
1927
+ patch.start1 = patch.start1 - #prefix
1928
+ patch.start2 = patch.start2 - #prefix
1929
+ -- Extend the lengths.
1930
+ patch.length1 = patch.length1 + #prefix + #suffix
1931
+ patch.length2 = patch.length2 + #prefix + #suffix
1932
+ end
1933
+
1934
+ --[[
1935
+ * Given an array of patches, return another array that is identical.
1936
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1937
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
1938
+ --]]
1939
+ function _patch_deepCopy(patches)
1940
+ local patchesCopy = {}
1941
+ for x, patch in ipairs(patches) do
1942
+ local patchCopy = _new_patch_obj()
1943
+ local diffsCopy = {}
1944
+ for i, diff in ipairs(patch.diffs) do
1945
+ diffsCopy[i] = {diff[1], diff[2]}
1946
+ end
1947
+ patchCopy.diffs = diffsCopy
1948
+ patchCopy.start1 = patch.start1
1949
+ patchCopy.start2 = patch.start2
1950
+ patchCopy.length1 = patch.length1
1951
+ patchCopy.length2 = patch.length2
1952
+ patchesCopy[x] = patchCopy
1953
+ end
1954
+ return patchesCopy
1955
+ end
1956
+
1957
+ --[[
1958
+ * Add some padding on text start and end so that edges can match something.
1959
+ * Intended to be called only from within patch_apply.
1960
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1961
+ * @return {string} The padding string added to each side.
1962
+ --]]
1963
+ function _patch_addPadding(patches)
1964
+ local paddingLength = Patch_Margin
1965
+ local nullPadding = ''
1966
+ for x = 1, paddingLength do
1967
+ nullPadding = nullPadding .. strchar(x)
1968
+ end
1969
+
1970
+ -- Bump all the patches forward.
1971
+ for x, patch in ipairs(patches) do
1972
+ patch.start1 = patch.start1 + paddingLength
1973
+ patch.start2 = patch.start2 + paddingLength
1974
+ end
1975
+
1976
+ -- Add some padding on start of first diff.
1977
+ local patch = patches[1]
1978
+ local diffs = patch.diffs
1979
+ local firstDiff = diffs[1]
1980
+ if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then
1981
+ -- Add nullPadding equality.
1982
+ tinsert(diffs, 1, {DIFF_EQUAL, nullPadding})
1983
+ patch.start1 = patch.start1 - paddingLength -- Should be 0.
1984
+ patch.start2 = patch.start2 - paddingLength -- Should be 0.
1985
+ patch.length1 = patch.length1 + paddingLength
1986
+ patch.length2 = patch.length2 + paddingLength
1987
+ elseif (paddingLength > #firstDiff[2]) then
1988
+ -- Grow first equality.
1989
+ local extraLength = paddingLength - #firstDiff[2]
1990
+ firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2]
1991
+ patch.start1 = patch.start1 - extraLength
1992
+ patch.start2 = patch.start2 - extraLength
1993
+ patch.length1 = patch.length1 + extraLength
1994
+ patch.length2 = patch.length2 + extraLength
1995
+ end
1996
+
1997
+ -- Add some padding on end of last diff.
1998
+ patch = patches[#patches]
1999
+ diffs = patch.diffs
2000
+ local lastDiff = diffs[#diffs]
2001
+ if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then
2002
+ -- Add nullPadding equality.
2003
+ diffs[#diffs + 1] = {DIFF_EQUAL, nullPadding}
2004
+ patch.length1 = patch.length1 + paddingLength
2005
+ patch.length2 = patch.length2 + paddingLength
2006
+ elseif (paddingLength > #lastDiff[2]) then
2007
+ -- Grow last equality.
2008
+ local extraLength = paddingLength - #lastDiff[2]
2009
+ lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength)
2010
+ patch.length1 = patch.length1 + extraLength
2011
+ patch.length2 = patch.length2 + extraLength
2012
+ end
2013
+
2014
+ return nullPadding
2015
+ end
2016
+
2017
+ --[[
2018
+ * Look through the patches and break up any which are longer than the maximum
2019
+ * limit of the match algorithm.
2020
+ * Intended to be called only from within patch_apply.
2021
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
2022
+ --]]
2023
+ function _patch_splitMax(patches)
2024
+ local patch_size = Match_MaxBits
2025
+ local x = 1
2026
+ while true do
2027
+ local patch = patches[x]
2028
+ if patch == nil then
2029
+ return
2030
+ end
2031
+ if patch.length1 > patch_size then
2032
+ local bigpatch = patch
2033
+ -- Remove the big old patch.
2034
+ tremove(patches, x)
2035
+ x = x - 1
2036
+ local start1 = bigpatch.start1
2037
+ local start2 = bigpatch.start2
2038
+ local precontext = ''
2039
+ while bigpatch.diffs[1] do
2040
+ -- Create one of several smaller patches.
2041
+ local patch = _new_patch_obj()
2042
+ local empty = true
2043
+ patch.start1 = start1 - #precontext
2044
+ patch.start2 = start2 - #precontext
2045
+ if precontext ~= '' then
2046
+ patch.length1, patch.length2 = #precontext, #precontext
2047
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, precontext}
2048
+ end
2049
+ while bigpatch.diffs[1] and (patch.length1 < patch_size-Patch_Margin) do
2050
+ local diff_type = bigpatch.diffs[1][1]
2051
+ local diff_text = bigpatch.diffs[1][2]
2052
+ if (diff_type == DIFF_INSERT) then
2053
+ -- Insertions are harmless.
2054
+ patch.length2 = patch.length2 + #diff_text
2055
+ start2 = start2 + #diff_text
2056
+ patch.diffs[#(patch.diffs) + 1] = bigpatch.diffs[1]
2057
+ tremove(bigpatch.diffs, 1)
2058
+ empty = false
2059
+ elseif (diff_type == DIFF_DELETE) and (#patch.diffs == 1)
2060
+ and (patch.diffs[1][1] == DIFF_EQUAL)
2061
+ and (#diff_text > 2 * patch_size) then
2062
+ -- This is a large deletion. Let it pass in one chunk.
2063
+ patch.length1 = patch.length1 + #diff_text
2064
+ start1 = start1 + #diff_text
2065
+ empty = false
2066
+ patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
2067
+ tremove(bigpatch.diffs, 1)
2068
+ else
2069
+ -- Deletion or equality.
2070
+ -- Only take as much as we can stomach.
2071
+ diff_text = strsub(diff_text, 1,
2072
+ patch_size - patch.length1 - Patch_Margin)
2073
+ patch.length1 = patch.length1 + #diff_text
2074
+ start1 = start1 + #diff_text
2075
+ if (diff_type == DIFF_EQUAL) then
2076
+ patch.length2 = patch.length2 + #diff_text
2077
+ start2 = start2 + #diff_text
2078
+ else
2079
+ empty = false
2080
+ end
2081
+ patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
2082
+ if (diff_text == bigpatch.diffs[1][2]) then
2083
+ tremove(bigpatch.diffs, 1)
2084
+ else
2085
+ bigpatch.diffs[1][2]
2086
+ = strsub(bigpatch.diffs[1][2], #diff_text + 1)
2087
+ end
2088
+ end
2089
+ end
2090
+ -- Compute the head context for the next patch.
2091
+ precontext = _diff_text2(patch.diffs)
2092
+ precontext = strsub(precontext, -Patch_Margin)
2093
+ -- Append the end context for this patch.
2094
+ local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin)
2095
+ if postcontext ~= '' then
2096
+ patch.length1 = patch.length1 + #postcontext
2097
+ patch.length2 = patch.length2 + #postcontext
2098
+ if patch.diffs[1]
2099
+ and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then
2100
+ patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2]
2101
+ .. postcontext
2102
+ else
2103
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, postcontext}
2104
+ end
2105
+ end
2106
+ if not empty then
2107
+ x = x + 1
2108
+ tinsert(patches, x, patch)
2109
+ end
2110
+ end
2111
+ end
2112
+ x = x + 1
2113
+ end
2114
+ end
2115
+
2116
+ --[[
2117
+ * Emulate GNU diff's format.
2118
+ * Header: @@ -382,8 +481,9 @@
2119
+ * @return {string} The GNU diff string.
2120
+ --]]
2121
+ function _patch_appendText(patch, text)
2122
+ local coords1, coords2
2123
+ local length1, length2 = patch.length1, patch.length2
2124
+ local start1, start2 = patch.start1, patch.start2
2125
+ local diffs = patch.diffs
2126
+
2127
+ if length1 == 1 then
2128
+ coords1 = start1
2129
+ else
2130
+ coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1
2131
+ end
2132
+
2133
+ if length2 == 1 then
2134
+ coords2 = start2
2135
+ else
2136
+ coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2
2137
+ end
2138
+ text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n'
2139
+
2140
+ local op
2141
+ -- Escape the body of the patch with %xx notation.
2142
+ for x, diff in ipairs(patch.diffs) do
2143
+ local diff_type = diff[1]
2144
+ if diff_type == DIFF_INSERT then
2145
+ op = '+'
2146
+ elseif diff_type == DIFF_DELETE then
2147
+ op = '-'
2148
+ elseif diff_type == DIFF_EQUAL then
2149
+ op = ' '
2150
+ end
2151
+ text[#text + 1] = op
2152
+ .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace)
2153
+ .. '\n'
2154
+ end
2155
+
2156
+ return text
2157
+ end
2158
+
2159
+ -- Expose the API
2160
+ _M.DIFF_DELETE = DIFF_DELETE
2161
+ _M.DIFF_INSERT = DIFF_INSERT
2162
+ _M.DIFF_EQUAL = DIFF_EQUAL
2163
+
2164
+ _M.diff_main = diff_main
2165
+ _M.diff_cleanupSemantic = diff_cleanupSemantic
2166
+ _M.diff_cleanupEfficiency = diff_cleanupEfficiency
2167
+ _M.diff_levenshtein = diff_levenshtein
2168
+ _M.diff_prettyHtml = diff_prettyHtml
2169
+
2170
+ _M.match_main = match_main
2171
+
2172
+ _M.patch_make = patch_make
2173
+ _M.patch_toText = patch_toText
2174
+ _M.patch_fromText = patch_fromText
2175
+ _M.patch_apply = patch_apply
2176
+
2177
+ -- Expose some non-API functions as well, for testing purposes etc.
2178
+ _M.diff_commonPrefix = _diff_commonPrefix
2179
+ _M.diff_commonSuffix = _diff_commonSuffix
2180
+ _M.diff_commonOverlap = _diff_commonOverlap
2181
+ _M.diff_halfMatch = _diff_halfMatch
2182
+ _M.diff_bisect = _diff_bisect
2183
+ _M.diff_cleanupMerge = _diff_cleanupMerge
2184
+ _M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless
2185
+ _M.diff_text1 = _diff_text1
2186
+ _M.diff_text2 = _diff_text2
2187
+ _M.diff_toDelta = _diff_toDelta
2188
+ _M.diff_fromDelta = _diff_fromDelta
2189
+ _M.diff_xIndex = _diff_xIndex
2190
+ _M.match_alphabet = _match_alphabet
2191
+ _M.match_bitap = _match_bitap
2192
+ _M.new_patch_obj = _new_patch_obj
2193
+ _M.patch_addContext = _patch_addContext
2194
+ _M.patch_splitMax = _patch_splitMax
2195
+ _M.patch_addPadding = _patch_addPadding