redis-diff_match_patch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2283 @@
1
+ --[[
2
+ * Diff Match and Patch
3
+ *
4
+ * Copyright 2006 Google Inc.
5
+ * http://code.google.com/p/google-diff-match-patch/
6
+ *
7
+ * Based on the JavaScript implementation by Neil Fraser.
8
+ * Ported to Lua by Duncan Cross.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ --]]
22
+
23
+ --[[
24
+ -- Lua 5.1 and earlier requires the external BitOp library.
25
+ -- This library is built-in from Lua 5.2 and later as 'bit32'.
26
+ require 'bit' -- <http://bitop.luajit.org/>
27
+ local band, bor, lshift
28
+ = bit.band, bit.bor, bit.lshift
29
+ --]]
30
+
31
+
32
+ function BMAnd(v, m)
33
+ -- v -> hex string to be masked
34
+ -- m -> hex string mask
35
+ -- s -> hex string as masked
36
+ -- bv -> binary string of v
37
+ -- bm -> binary string mask
38
+
39
+ local bv = Hex2Bin(v)
40
+ local bm = Hex2Bin(m)
41
+
42
+ local i = 0
43
+ local s = ""
44
+
45
+ while (string.len(bv) < 32) do
46
+ bv = "0000"..bv
47
+ end
48
+
49
+ while (string.len(bm) < 32) do
50
+ bm = "0000"..bm
51
+ end
52
+
53
+
54
+ for i = 1, 32 do
55
+ cv = string.sub(bv, i, i)
56
+ cm = string.sub(bm, i, i)
57
+ if cv == cm then
58
+ if cv == "1" then
59
+ s = s.."1"
60
+ else
61
+ s = s.."0"
62
+ end
63
+ else
64
+ s = s.."0"
65
+
66
+ end
67
+ end
68
+
69
+ return Bin2Hex(s)
70
+ end
71
+
72
+ function BMOr(v, m)
73
+ -- v -> hex string to be masked
74
+ -- m -> hex string mask
75
+ -- s -> hex string as masked
76
+ -- bv -> binary string of v
77
+ -- bm -> binary string mask
78
+
79
+ local bv = Hex2Bin(v)
80
+ local bm = Hex2Bin(m)
81
+
82
+ local i = 0
83
+ local s = ""
84
+
85
+ while (string.len(bv) < 32) do
86
+ bv = "0000"..bv
87
+ end
88
+
89
+ while (string.len(bm) < 32) do
90
+ bm = "0000"..bm
91
+ end
92
+
93
+
94
+ for i = 1, 32 do
95
+ cv = string.sub(bv, i, i)
96
+ cm = string.sub(bm, i, i)
97
+ if cv == "1" then
98
+ s = s.."1"
99
+ elseif cm == "1" then
100
+ s = s.."1"
101
+ else
102
+ s = s.."0"
103
+ end
104
+ end
105
+
106
+ return Bin2Hex(s)
107
+ end
108
+
109
+ function BShLeft(v, nb)
110
+ -- v -> hexstring value to be shifted
111
+ -- nb -> number of bits to shift to the right
112
+ -- s -> binary string of v
113
+
114
+ local s = Hex2Bin(v)
115
+
116
+ while (string.len(s) < 32) do
117
+ s = "0000"..s
118
+ end
119
+
120
+ s = string.sub(s, nb + 1, 32)
121
+
122
+ while (string.len(s) < 32) do
123
+ s = s.."0"
124
+ end
125
+
126
+ return Bin2Hex(s)
127
+ end
128
+
129
+ local band, bor, lshift
130
+ = BMAnd, BMOr, BShLeft
131
+ local type, setmetatable, ipairs, select
132
+ = type, setmetatable, ipairs, select
133
+ local unpack, tonumber, error
134
+ = unpack, tonumber, error
135
+ local strsub, strbyte, strchar, gmatch, gsub
136
+ = string.sub, string.byte, string.char, string.gmatch, string.gsub
137
+ local strmatch, strfind, strformat
138
+ = string.match, string.find, string.format
139
+ local tinsert, tremove, tconcat
140
+ = table.insert, table.remove, table.concat
141
+ local max, min, floor, ceil, abs
142
+ = math.max, math.min, math.floor, math.ceil, math.abs
143
+
144
+
145
+
146
+ -- Utility functions.
147
+
148
+ local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]'
149
+ local function percentEncode_replace(v)
150
+ return strformat('%%%02X', strbyte(v))
151
+ end
152
+
153
+ local function tsplice(t, idx, deletions, ...)
154
+ local insertions = select('#', ...)
155
+ for i = 1, deletions do
156
+ tremove(t, idx)
157
+ end
158
+ for i = insertions, 1, -1 do
159
+ -- do not remove parentheses around select
160
+ tinsert(t, idx, (select(i, ...)))
161
+ end
162
+ end
163
+
164
+ local function strelement(str, i)
165
+ return strsub(str, i, i)
166
+ end
167
+
168
+ local function indexOf(a, b, start)
169
+ if (#b == 0) then
170
+ return nil
171
+ end
172
+ return strfind(a, b, start, true)
173
+ end
174
+
175
+ local htmlEncode_pattern = '[&<>\n]'
176
+ local htmlEncode_replace = {
177
+ ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['\n'] = '&para;<br>'
178
+ }
179
+
180
+ -- Public API Functions
181
+ -- (Exported at the end of the script)
182
+
183
+ local diff_main,
184
+ diff_cleanupSemantic,
185
+ diff_cleanupEfficiency,
186
+ diff_levenshtein,
187
+ diff_prettyHtml
188
+
189
+ local match_main
190
+
191
+ local patch_make,
192
+ patch_toText,
193
+ patch_fromText,
194
+ patch_apply
195
+
196
+ --[[
197
+ * The data structure representing a diff is an array of tuples:
198
+ * {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}}
199
+ * which means: delete 'Hello', add 'Goodbye' and keep ' world.'
200
+ --]]
201
+ local DIFF_DELETE = -1
202
+ local DIFF_INSERT = 1
203
+ local DIFF_EQUAL = 0
204
+
205
+ -- Number of seconds to map a diff before giving up (0 for infinity).
206
+ local Diff_Timeout = 1.0
207
+ -- Cost of an empty edit operation in terms of edit characters.
208
+ local Diff_EditCost = 4
209
+ -- At what point is no match declared (0.0 = perfection, 1.0 = very loose).
210
+ local Match_Threshold = 0.5
211
+ -- How far to search for a match (0 = exact location, 1000+ = broad match).
212
+ -- A match this many characters away from the expected location will add
213
+ -- 1.0 to the score (0.0 is a perfect match).
214
+ local Match_Distance = 1000
215
+ -- When deleting a large block of text (over ~64 characters), how close do
216
+ -- the contents have to be to match the expected contents. (0.0 = perfection,
217
+ -- 1.0 = very loose). Note that Match_Threshold controls how closely the
218
+ -- end points of a delete need to match.
219
+ local Patch_DeleteThreshold = 0.5
220
+ -- Chunk size for context length.
221
+ local Patch_Margin = 4
222
+ -- The number of bits in an int.
223
+ local Match_MaxBits = 32
224
+
225
+ function settings(new)
226
+ if new then
227
+ Diff_Timeout = new.Diff_Timeout or Diff_Timeout
228
+ Diff_EditCost = new.Diff_EditCost or Diff_EditCost
229
+ Match_Threshold = new.Match_Threshold or Match_Threshold
230
+ Match_Distance = new.Match_Distance or Match_Distance
231
+ Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold
232
+ Patch_Margin = new.Patch_Margin or Patch_Margin
233
+ Match_MaxBits = new.Match_MaxBits or Match_MaxBits
234
+ else
235
+ return {
236
+ Diff_Timeout = Diff_Timeout;
237
+ Diff_EditCost = Diff_EditCost;
238
+ Match_Threshold = Match_Threshold;
239
+ Match_Distance = Match_Distance;
240
+ Patch_DeleteThreshold = Patch_DeleteThreshold;
241
+ Patch_Margin = Patch_Margin;
242
+ Match_MaxBits = Match_MaxBits;
243
+ }
244
+ end
245
+ end
246
+
247
+ -- ---------------------------------------------------------------------------
248
+ -- DIFF API
249
+ -- ---------------------------------------------------------------------------
250
+
251
+ -- The private diff functions
252
+ local _diff_compute,
253
+ _diff_bisect,
254
+ _diff_halfMatchI,
255
+ _diff_halfMatch,
256
+ _diff_cleanupSemanticScore,
257
+ _diff_cleanupSemanticLossless,
258
+ _diff_cleanupMerge,
259
+ _diff_commonPrefix,
260
+ _diff_commonSuffix,
261
+ _diff_commonOverlap,
262
+ _diff_xIndex,
263
+ _diff_text1,
264
+ _diff_text2,
265
+ _diff_toDelta,
266
+ _diff_fromDelta
267
+
268
+ --[[
269
+ * Find the differences between two texts. Simplifies the problem by stripping
270
+ * any common prefix or suffix off the texts before diffing.
271
+ * @param {string} text1 Old string to be diffed.
272
+ * @param {string} text2 New string to be diffed.
273
+ * @param {boolean} opt_checklines Has no effect in Lua.
274
+ * by. Used internally for recursive calls. Users should set DiffTimeout
275
+ * instead.
276
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
277
+ --]]
278
+ function diff_main(text1, text2, opt_checklines)
279
+ -- Check for null inputs.
280
+ if text1 == nil or text1 == nil then
281
+ error('Null inputs. (diff_main)')
282
+ end
283
+
284
+ -- Check for equality (speedup).
285
+ if text1 == text2 then
286
+ if #text1 > 0 then
287
+ return {{DIFF_EQUAL, text1}}
288
+ end
289
+ return {}
290
+ end
291
+
292
+ -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of
293
+ -- implementing the line-mode speedup.
294
+ local checklines = false
295
+
296
+ -- Trim off common prefix (speedup).
297
+ local commonlength = _diff_commonPrefix(text1, text2)
298
+ local commonprefix
299
+ if commonlength > 0 then
300
+ commonprefix = strsub(text1, 1, commonlength)
301
+ text1 = strsub(text1, commonlength + 1)
302
+ text2 = strsub(text2, commonlength + 1)
303
+ end
304
+
305
+ -- Trim off common suffix (speedup).
306
+ commonlength = _diff_commonSuffix(text1, text2)
307
+ local commonsuffix
308
+ if commonlength > 0 then
309
+ commonsuffix = strsub(text1, -commonlength)
310
+ text1 = strsub(text1, 1, -commonlength - 1)
311
+ text2 = strsub(text2, 1, -commonlength - 1)
312
+ end
313
+
314
+ -- Compute the diff on the middle block.
315
+ local diffs = _diff_compute(text1, text2, checklines)
316
+
317
+ -- Restore the prefix and suffix.
318
+ if commonprefix then
319
+ tinsert(diffs, 1, {DIFF_EQUAL, commonprefix})
320
+ end
321
+ if commonsuffix then
322
+ diffs[#diffs + 1] = {DIFF_EQUAL, commonsuffix}
323
+ end
324
+
325
+ _diff_cleanupMerge(diffs)
326
+ return diffs
327
+ end
328
+
329
+ --[[
330
+ * Reduce the number of edits by eliminating semantically trivial equalities.
331
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
332
+ --]]
333
+ function diff_cleanupSemantic(diffs)
334
+ local changes = false
335
+ local equalities = {} -- Stack of indices where equalities are found.
336
+ local equalitiesLength = 0 -- Keeping our own length var is faster.
337
+ local lastequality = nil
338
+ -- Always equal to diffs[equalities[equalitiesLength]][2]
339
+ local pointer = 1 -- Index of current position.
340
+ -- Number of characters that changed prior to the equality.
341
+ local length_insertions1 = 0
342
+ local length_deletions1 = 0
343
+ -- Number of characters that changed after the equality.
344
+ local length_insertions2 = 0
345
+ local length_deletions2 = 0
346
+
347
+ while diffs[pointer] do
348
+ if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
349
+ equalitiesLength = equalitiesLength + 1
350
+ equalities[equalitiesLength] = pointer
351
+ length_insertions1 = length_insertions2
352
+ length_deletions1 = length_deletions2
353
+ length_insertions2 = 0
354
+ length_deletions2 = 0
355
+ lastequality = diffs[pointer][2]
356
+ else -- An insertion or deletion.
357
+ if diffs[pointer][1] == DIFF_INSERT then
358
+ length_insertions2 = length_insertions2 + #(diffs[pointer][2])
359
+ else
360
+ length_deletions2 = length_deletions2 + #(diffs[pointer][2])
361
+ end
362
+ -- Eliminate an equality that is smaller or equal to the edits on both
363
+ -- sides of it.
364
+ if lastequality
365
+ and (#lastequality <= max(length_insertions1, length_deletions1))
366
+ and (#lastequality <= max(length_insertions2, length_deletions2)) then
367
+ -- Duplicate record.
368
+ tinsert(diffs, equalities[equalitiesLength],
369
+ {DIFF_DELETE, lastequality})
370
+ -- Change second copy to insert.
371
+ diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
372
+ -- Throw away the equality we just deleted.
373
+ equalitiesLength = equalitiesLength - 1
374
+ -- Throw away the previous equality (it needs to be reevaluated).
375
+ equalitiesLength = equalitiesLength - 1
376
+ pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
377
+ length_insertions1, length_deletions1 = 0, 0 -- Reset the counters.
378
+ length_insertions2, length_deletions2 = 0, 0
379
+ lastequality = nil
380
+ changes = true
381
+ end
382
+ end
383
+ pointer = pointer + 1
384
+ end
385
+
386
+ -- Normalize the diff.
387
+ if changes then
388
+ _diff_cleanupMerge(diffs)
389
+ end
390
+ _diff_cleanupSemanticLossless(diffs)
391
+
392
+ -- Find any overlaps between deletions and insertions.
393
+ -- e.g: <del>abcxxx</del><ins>xxxdef</ins>
394
+ -- -> <del>abc</del>xxx<ins>def</ins>
395
+ -- e.g: <del>xxxabc</del><ins>defxxx</ins>
396
+ -- -> <ins>def</ins>xxx<del>abc</del>
397
+ -- Only extract an overlap if it is as big as the edit ahead or behind it.
398
+ pointer = 2
399
+ while diffs[pointer] do
400
+ if (diffs[pointer - 1][1] == DIFF_DELETE and
401
+ diffs[pointer][1] == DIFF_INSERT) then
402
+ local deletion = diffs[pointer - 1][2]
403
+ local insertion = diffs[pointer][2]
404
+ local overlap_length1 = _diff_commonOverlap(deletion, insertion)
405
+ local overlap_length2 = _diff_commonOverlap(insertion, deletion)
406
+ if (overlap_length1 >= overlap_length2) then
407
+ if (overlap_length1 >= #deletion / 2 or
408
+ overlap_length1 >= #insertion / 2) then
409
+ -- Overlap found. Insert an equality and trim the surrounding edits.
410
+ tinsert(diffs, pointer,
411
+ {DIFF_EQUAL, strsub(insertion, 1, overlap_length1)})
412
+ diffs[pointer - 1][2] =
413
+ strsub(deletion, 1, #deletion - overlap_length1)
414
+ diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1)
415
+ pointer = pointer + 1
416
+ end
417
+ else
418
+ if (overlap_length2 >= #deletion / 2 or
419
+ overlap_length2 >= #insertion / 2) then
420
+ -- Reverse overlap found.
421
+ -- Insert an equality and swap and trim the surrounding edits.
422
+ tinsert(diffs, pointer,
423
+ {DIFF_EQUAL, strsub(deletion, 1, overlap_length2)})
424
+ diffs[pointer - 1] = {DIFF_INSERT,
425
+ strsub(insertion, 1, #insertion - overlap_length2)}
426
+ diffs[pointer + 1] = {DIFF_DELETE,
427
+ strsub(deletion, overlap_length2 + 1)}
428
+ pointer = pointer + 1
429
+ end
430
+ end
431
+ pointer = pointer + 1
432
+ end
433
+ pointer = pointer + 1
434
+ end
435
+ end
436
+
437
+ --[[
438
+ * Reduce the number of edits by eliminating operationally trivial equalities.
439
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
440
+ --]]
441
+ function diff_cleanupEfficiency(diffs)
442
+ local changes = false
443
+ -- Stack of indices where equalities are found.
444
+ local equalities = {}
445
+ -- Keeping our own length var is faster.
446
+ local equalitiesLength = 0
447
+ -- Always equal to diffs[equalities[equalitiesLength]][2]
448
+ local lastequality = nil
449
+ -- Index of current position.
450
+ local pointer = 1
451
+
452
+ -- The following four are really booleans but are stored as numbers because
453
+ -- they are used at one point like this:
454
+ --
455
+ -- (pre_ins + pre_del + post_ins + post_del) == 3
456
+ --
457
+ -- ...i.e. checking that 3 of them are true and 1 of them is false.
458
+
459
+ -- Is there an insertion operation before the last equality.
460
+ local pre_ins = 0
461
+ -- Is there a deletion operation before the last equality.
462
+ local pre_del = 0
463
+ -- Is there an insertion operation after the last equality.
464
+ local post_ins = 0
465
+ -- Is there a deletion operation after the last equality.
466
+ local post_del = 0
467
+
468
+ while diffs[pointer] do
469
+ if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
470
+ local diffText = diffs[pointer][2]
471
+ if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then
472
+ -- Candidate found.
473
+ equalitiesLength = equalitiesLength + 1
474
+ equalities[equalitiesLength] = pointer
475
+ pre_ins, pre_del = post_ins, post_del
476
+ lastequality = diffText
477
+ else
478
+ -- Not a candidate, and can never become one.
479
+ equalitiesLength = 0
480
+ lastequality = nil
481
+ end
482
+ post_ins, post_del = 0, 0
483
+ else -- An insertion or deletion.
484
+ if diffs[pointer][1] == DIFF_DELETE then
485
+ post_del = 1
486
+ else
487
+ post_ins = 1
488
+ end
489
+ --[[
490
+ * Five types to be split:
491
+ * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
492
+ * <ins>A</ins>X<ins>C</ins><del>D</del>
493
+ * <ins>A</ins><del>B</del>X<ins>C</ins>
494
+ * <ins>A</del>X<ins>C</ins><del>D</del>
495
+ * <ins>A</ins><del>B</del>X<del>C</del>
496
+ --]]
497
+ if lastequality and (
498
+ (pre_ins+pre_del+post_ins+post_del == 4)
499
+ or
500
+ (
501
+ (#lastequality < Diff_EditCost / 2)
502
+ and
503
+ (pre_ins+pre_del+post_ins+post_del == 3)
504
+ )) then
505
+ -- Duplicate record.
506
+ tinsert(diffs, equalities[equalitiesLength],
507
+ {DIFF_DELETE, lastequality})
508
+ -- Change second copy to insert.
509
+ diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
510
+ -- Throw away the equality we just deleted.
511
+ equalitiesLength = equalitiesLength - 1
512
+ lastequality = nil
513
+ if (pre_ins == 1) and (pre_del == 1) then
514
+ -- No changes made which could affect previous entry, keep going.
515
+ post_ins, post_del = 1, 1
516
+ equalitiesLength = 0
517
+ else
518
+ -- Throw away the previous equality.
519
+ equalitiesLength = equalitiesLength - 1
520
+ pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
521
+ post_ins, post_del = 0, 0
522
+ end
523
+ changes = true
524
+ end
525
+ end
526
+ pointer = pointer + 1
527
+ end
528
+
529
+ if changes then
530
+ _diff_cleanupMerge(diffs)
531
+ end
532
+ end
533
+
534
+ --[[
535
+ * Compute the Levenshtein distance; the number of inserted, deleted or
536
+ * substituted characters.
537
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
538
+ * @return {number} Number of changes.
539
+ --]]
540
+ function diff_levenshtein(diffs)
541
+ local levenshtein = 0
542
+ local insertions, deletions = 0, 0
543
+ for x, diff in ipairs(diffs) do
544
+ local op, data = diff[1], diff[2]
545
+ if (op == DIFF_INSERT) then
546
+ insertions = insertions + #data
547
+ elseif (op == DIFF_DELETE) then
548
+ deletions = deletions + #data
549
+ elseif (op == DIFF_EQUAL) then
550
+ -- A deletion and an insertion is one substitution.
551
+ levenshtein = levenshtein + max(insertions, deletions)
552
+ insertions = 0
553
+ deletions = 0
554
+ end
555
+ end
556
+ levenshtein = levenshtein + max(insertions, deletions)
557
+ return levenshtein
558
+ end
559
+
560
+ --[[
561
+ * Convert a diff array into a pretty HTML report.
562
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
563
+ * @return {string} HTML representation.
564
+ --]]
565
+ function diff_prettyHtml(diffs)
566
+ local html = {}
567
+ for x, diff in ipairs(diffs) do
568
+ local op = diff[1] -- Operation (insert, delete, equal)
569
+ local data = diff[2] -- Text of change.
570
+ local text = gsub(data, htmlEncode_pattern, htmlEncode_replace)
571
+ if op == DIFF_INSERT then
572
+ html[x] = '<ins style="background:#e6ffe6;">' .. text .. '</ins>'
573
+ elseif op == DIFF_DELETE then
574
+ html[x] = '<del style="background:#ffe6e6;">' .. text .. '</del>'
575
+ elseif op == DIFF_EQUAL then
576
+ html[x] = '<span>' .. text .. '</span>'
577
+ end
578
+ end
579
+ return tconcat(html)
580
+ end
581
+
582
+ -- ---------------------------------------------------------------------------
583
+ -- UNOFFICIAL/PRIVATE DIFF FUNCTIONS
584
+ -- ---------------------------------------------------------------------------
585
+
586
+ --[[
587
+ * Find the differences between two texts. Assumes that the texts do not
588
+ * have any common prefix or suffix.
589
+ * @param {string} text1 Old string to be diffed.
590
+ * @param {string} text2 New string to be diffed.
591
+ * @param {boolean} checklines Has no effect in Lua.
592
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
593
+ * @private
594
+ --]]
595
+ function _diff_compute(text1, text2, checklines)
596
+ if #text1 == 0 then
597
+ -- Just add some text (speedup).
598
+ return {{DIFF_INSERT, text2}}
599
+ end
600
+
601
+ if #text2 == 0 then
602
+ -- Just delete some text (speedup).
603
+ return {{DIFF_DELETE, text1}}
604
+ end
605
+
606
+ local diffs
607
+
608
+ local longtext = (#text1 > #text2) and text1 or text2
609
+ local shorttext = (#text1 > #text2) and text2 or text1
610
+ local i = indexOf(longtext, shorttext)
611
+
612
+ if i ~= nil then
613
+ -- Shorter text is inside the longer text (speedup).
614
+ diffs = {
615
+ {DIFF_INSERT, strsub(longtext, 1, i - 1)},
616
+ {DIFF_EQUAL, shorttext},
617
+ {DIFF_INSERT, strsub(longtext, i + #shorttext)}
618
+ }
619
+ -- Swap insertions for deletions if diff is reversed.
620
+ if #text1 > #text2 then
621
+ diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE
622
+ end
623
+ return diffs
624
+ end
625
+
626
+ if #shorttext == 1 then
627
+ -- Single character string.
628
+ -- After the previous speedup, the character can't be an equality.
629
+ return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
630
+ end
631
+ longtext, shorttext = nil, nil -- Garbage collect.
632
+
633
+ -- Check to see if the problem can be split in two.
634
+ do
635
+ local
636
+ text1_a, text1_b,
637
+ text2_a, text2_b,
638
+ mid_common = _diff_halfMatch(text1, text2)
639
+
640
+ if text1_a then
641
+ -- A half-match was found, sort out the return data.
642
+ -- Send both pairs off for separate processing.
643
+ local diffs_a = diff_main(text1_a, text2_a, checklines)
644
+ local diffs_b = diff_main(text1_b, text2_b, checklines)
645
+ -- Merge the results.
646
+ local diffs_a_len = #diffs_a
647
+ diffs = diffs_a
648
+ diffs[diffs_a_len + 1] = {DIFF_EQUAL, mid_common}
649
+ for i, b_diff in ipairs(diffs_b) do
650
+ diffs[diffs_a_len + 1 + i] = b_diff
651
+ end
652
+ return diffs
653
+ end
654
+ end
655
+
656
+ return _diff_bisect(text1, text2)
657
+ end
658
+
659
+ --[[
660
+ * Find the 'middle snake' of a diff, split the problem in two
661
+ * and return the recursively constructed diff.
662
+ * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
663
+ * @param {string} text1 Old string to be diffed.
664
+ * @param {string} text2 New string to be diffed.
665
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
666
+ * @private
667
+ --]]
668
+ function _diff_bisect(text1, text2)
669
+ -- Cache the text lengths to prevent multiple calls.
670
+ local text1_length = #text1
671
+ local text2_length = #text2
672
+ local _sub, _element
673
+ local max_d = ceil((text1_length + text2_length) / 2)
674
+ local v_offset = max_d
675
+ local v_length = 2 * max_d
676
+ local v1 = {}
677
+ local v2 = {}
678
+ -- Setting all elements to -1 is faster in Lua than mixing integers and nil.
679
+ for x = 0, v_length - 1 do
680
+ v1[x] = -1
681
+ v2[x] = -1
682
+ end
683
+ v1[v_offset + 1] = 0
684
+ v2[v_offset + 1] = 0
685
+ local delta = text1_length - text2_length
686
+ -- If the total number of characters is odd, then
687
+ -- the front path will collide with the reverse path.
688
+ local front = (delta % 2 ~= 0)
689
+ -- Offsets for start and end of k loop.
690
+ -- Prevents mapping of space beyond the grid.
691
+ local k1start = 0
692
+ local k1end = 0
693
+ local k2start = 0
694
+ local k2end = 0
695
+ for d = 0, max_d - 1 do
696
+ -- Walk the front path one step.
697
+ for k1 = -d + k1start, d - k1end, 2 do
698
+ local k1_offset = v_offset + k1
699
+ local x1
700
+ if (k1 == -d) or ((k1 ~= d) and
701
+ (v1[k1_offset - 1] < v1[k1_offset + 1])) then
702
+ x1 = v1[k1_offset + 1]
703
+ else
704
+ x1 = v1[k1_offset - 1] + 1
705
+ end
706
+ local y1 = x1 - k1
707
+ while (x1 <= text1_length) and (y1 <= text2_length)
708
+ and (strelement(text1, x1) == strelement(text2, y1)) do
709
+ x1 = x1 + 1
710
+ y1 = y1 + 1
711
+ end
712
+ v1[k1_offset] = x1
713
+ if x1 > text1_length + 1 then
714
+ -- Ran off the right of the graph.
715
+ k1end = k1end + 2
716
+ elseif y1 > text2_length + 1 then
717
+ -- Ran off the bottom of the graph.
718
+ k1start = k1start + 2
719
+ elseif front then
720
+ local k2_offset = v_offset + delta - k1
721
+ if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then
722
+ -- Mirror x2 onto top-left coordinate system.
723
+ local x2 = text1_length - v2[k2_offset] + 1
724
+ if x1 > x2 then
725
+ -- Overlap detected.
726
+ return _diff_bisectSplit(text1, text2, x1, y1)
727
+ end
728
+ end
729
+ end
730
+ end
731
+
732
+ -- Walk the reverse path one step.
733
+ for k2 = -d + k2start, d - k2end, 2 do
734
+ local k2_offset = v_offset + k2
735
+ local x2
736
+ if (k2 == -d) or ((k2 ~= d) and
737
+ (v2[k2_offset - 1] < v2[k2_offset + 1])) then
738
+ x2 = v2[k2_offset + 1]
739
+ else
740
+ x2 = v2[k2_offset - 1] + 1
741
+ end
742
+ local y2 = x2 - k2
743
+ while (x2 <= text1_length) and (y2 <= text2_length)
744
+ and (strelement(text1, -x2) == strelement(text2, -y2)) do
745
+ x2 = x2 + 1
746
+ y2 = y2 + 1
747
+ end
748
+ v2[k2_offset] = x2
749
+ if x2 > text1_length + 1 then
750
+ -- Ran off the left of the graph.
751
+ k2end = k2end + 2
752
+ elseif y2 > text2_length + 1 then
753
+ -- Ran off the top of the graph.
754
+ k2start = k2start + 2
755
+ elseif not front then
756
+ local k1_offset = v_offset + delta - k2
757
+ if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then
758
+ local x1 = v1[k1_offset]
759
+ local y1 = v_offset + x1 - k1_offset
760
+ -- Mirror x2 onto top-left coordinate system.
761
+ x2 = text1_length - x2 + 1
762
+ if x1 > x2 then
763
+ -- Overlap detected.
764
+ return _diff_bisectSplit(text1, text2, x1, y1)
765
+ end
766
+ end
767
+ end
768
+ end
769
+ end
770
+ -- Diff took too long and hit the deadline or
771
+ -- number of diffs equals number of characters, no commonality at all.
772
+ return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}}
773
+ end
774
+
775
+ --[[
776
+ * Given the location of the 'middle snake', split the diff in two parts
777
+ * and recurse.
778
+ * @param {string} text1 Old string to be diffed.
779
+ * @param {string} text2 New string to be diffed.
780
+ * @param {number} x Index of split point in text1.
781
+ * @param {number} y Index of split point in text2.
782
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
783
+ * @private
784
+ --]]
785
+ function _diff_bisectSplit(text1, text2, x, y)
786
+ local text1a = strsub(text1, 1, x - 1)
787
+ local text2a = strsub(text2, 1, y - 1)
788
+ local text1b = strsub(text1, x)
789
+ local text2b = strsub(text2, y)
790
+
791
+ -- Compute both diffs serially.
792
+ local diffs = diff_main(text1a, text2a, false)
793
+ local diffsb = diff_main(text1b, text2b, false)
794
+
795
+ local diffs_len = #diffs
796
+ for i, v in ipairs(diffsb) do
797
+ diffs[diffs_len + i] = v
798
+ end
799
+ return diffs
800
+ end
801
+
802
+ --[[
803
+ * Determine the common prefix of two strings.
804
+ * @param {string} text1 First string.
805
+ * @param {string} text2 Second string.
806
+ * @return {number} The number of characters common to the start of each
807
+ * string.
808
+ --]]
809
+ function _diff_commonPrefix(text1, text2)
810
+ -- Quick check for common null cases.
811
+ if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1))
812
+ then
813
+ return 0
814
+ end
815
+ -- Binary search.
816
+ -- Performance analysis: http://neil.fraser.name/news/2007/10/09/
817
+ local pointermin = 1
818
+ local pointermax = min(#text1, #text2)
819
+ local pointermid = pointermax
820
+ local pointerstart = 1
821
+ while (pointermin < pointermid) do
822
+ if (strsub(text1, pointerstart, pointermid)
823
+ == strsub(text2, pointerstart, pointermid)) then
824
+ pointermin = pointermid
825
+ pointerstart = pointermin
826
+ else
827
+ pointermax = pointermid
828
+ end
829
+ pointermid = floor(pointermin + (pointermax - pointermin) / 2)
830
+ end
831
+ return pointermid
832
+ end
833
+
834
+ --[[
835
+ * Determine the common suffix of two strings.
836
+ * @param {string} text1 First string.
837
+ * @param {string} text2 Second string.
838
+ * @return {number} The number of characters common to the end of each string.
839
+ --]]
840
+ function _diff_commonSuffix(text1, text2)
841
+ -- Quick check for common null cases.
842
+ if (#text1 == 0) or (#text2 == 0)
843
+ or (strbyte(text1, -1) ~= strbyte(text2, -1)) then
844
+ return 0
845
+ end
846
+ -- Binary search.
847
+ -- Performance analysis: http://neil.fraser.name/news/2007/10/09/
848
+ local pointermin = 1
849
+ local pointermax = min(#text1, #text2)
850
+ local pointermid = pointermax
851
+ local pointerend = 1
852
+ while (pointermin < pointermid) do
853
+ if (strsub(text1, -pointermid, -pointerend)
854
+ == strsub(text2, -pointermid, -pointerend)) then
855
+ pointermin = pointermid
856
+ pointerend = pointermin
857
+ else
858
+ pointermax = pointermid
859
+ end
860
+ pointermid = floor(pointermin + (pointermax - pointermin) / 2)
861
+ end
862
+ return pointermid
863
+ end
864
+
865
+ --[[
866
+ * Determine if the suffix of one string is the prefix of another.
867
+ * @param {string} text1 First string.
868
+ * @param {string} text2 Second string.
869
+ * @return {number} The number of characters common to the end of the first
870
+ * string and the start of the second string.
871
+ * @private
872
+ --]]
873
+ function _diff_commonOverlap(text1, text2)
874
+ -- Cache the text lengths to prevent multiple calls.
875
+ local text1_length = #text1
876
+ local text2_length = #text2
877
+ -- Eliminate the null case.
878
+ if text1_length == 0 or text2_length == 0 then
879
+ return 0
880
+ end
881
+ -- Truncate the longer string.
882
+ if text1_length > text2_length then
883
+ text1 = strsub(text1, text1_length - text2_length + 1)
884
+ elseif text1_length < text2_length then
885
+ text2 = strsub(text2, 1, text1_length)
886
+ end
887
+ local text_length = min(text1_length, text2_length)
888
+ -- Quick check for the worst case.
889
+ if text1 == text2 then
890
+ return text_length
891
+ end
892
+
893
+ -- Start by looking for a single character match
894
+ -- and increase length until no match is found.
895
+ -- Performance analysis: http://neil.fraser.name/news/2010/11/04/
896
+ local best = 0
897
+ local length = 1
898
+ while true do
899
+ local pattern = strsub(text1, text_length - length + 1)
900
+ local found = strfind(text2, pattern, 1, true)
901
+ if found == nil then
902
+ return best
903
+ end
904
+ length = length + found - 1
905
+ if found == 1 or strsub(text1, text_length - length + 1) ==
906
+ strsub(text2, 1, length) then
907
+ best = length
908
+ length = length + 1
909
+ end
910
+ end
911
+ end
912
+
913
+ --[[
914
+ * Does a substring of shorttext exist within longtext such that the substring
915
+ * is at least half the length of longtext?
916
+ * This speedup can produce non-minimal diffs.
917
+ * Closure, but does not reference any external variables.
918
+ * @param {string} longtext Longer string.
919
+ * @param {string} shorttext Shorter string.
920
+ * @param {number} i Start index of quarter length substring within longtext.
921
+ * @return {?Array.<string>} Five element Array, containing the prefix of
922
+ * longtext, the suffix of longtext, the prefix of shorttext, the suffix
923
+ * of shorttext and the common middle. Or nil if there was no match.
924
+ * @private
925
+ --]]
926
+ function _diff_halfMatchI(longtext, shorttext, i)
927
+ -- Start with a 1/4 length substring at position i as a seed.
928
+ local seed = strsub(longtext, i, i + floor(#longtext / 4))
929
+ local j = 0 -- LUANOTE: do not change to 1, was originally -1
930
+ local best_common = ''
931
+ local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b
932
+ while true do
933
+ j = indexOf(shorttext, seed, j + 1)
934
+ if (j == nil) then
935
+ break
936
+ end
937
+ local prefixLength = _diff_commonPrefix(strsub(longtext, i),
938
+ strsub(shorttext, j))
939
+ local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1),
940
+ strsub(shorttext, 1, j - 1))
941
+ if #best_common < suffixLength + prefixLength then
942
+ best_common = strsub(shorttext, j - suffixLength, j - 1)
943
+ .. strsub(shorttext, j, j + prefixLength - 1)
944
+ best_longtext_a = strsub(longtext, 1, i - suffixLength - 1)
945
+ best_longtext_b = strsub(longtext, i + prefixLength)
946
+ best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1)
947
+ best_shorttext_b = strsub(shorttext, j + prefixLength)
948
+ end
949
+ end
950
+ if #best_common * 2 >= #longtext then
951
+ return {best_longtext_a, best_longtext_b,
952
+ best_shorttext_a, best_shorttext_b, best_common}
953
+ else
954
+ return nil
955
+ end
956
+ end
957
+
958
+ --[[
959
+ * Do the two texts share a substring which is at least half the length of the
960
+ * longer text?
961
+ * @param {string} text1 First string.
962
+ * @param {string} text2 Second string.
963
+ * @return {?Array.<string>} Five element Array, containing the prefix of
964
+ * text1, the suffix of text1, the prefix of text2, the suffix of
965
+ * text2 and the common middle. Or nil if there was no match.
966
+ * @private
967
+ --]]
968
+ function _diff_halfMatch(text1, text2)
969
+ if Diff_Timeout <= 0 then
970
+ -- Don't risk returning a non-optimal diff if we have unlimited time.
971
+ return nil
972
+ end
973
+ local longtext = (#text1 > #text2) and text1 or text2
974
+ local shorttext = (#text1 > #text2) and text2 or text1
975
+ if (#longtext < 4) or (#shorttext * 2 < #longtext) then
976
+ return nil -- Pointless.
977
+ end
978
+
979
+ -- First check if the second quarter is the seed for a half-match.
980
+ local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4))
981
+ -- Check again based on the third quarter.
982
+ local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2))
983
+ local hm
984
+ if not hm1 and not hm2 then
985
+ return nil
986
+ elseif not hm2 then
987
+ hm = hm1
988
+ elseif not hm1 then
989
+ hm = hm2
990
+ else
991
+ -- Both matched. Select the longest.
992
+ hm = (#hm1[5] > #hm2[5]) and hm1 or hm2
993
+ end
994
+
995
+ -- A half-match was found, sort out the return data.
996
+ local text1_a, text1_b, text2_a, text2_b
997
+ if (#text1 > #text2) then
998
+ text1_a, text1_b = hm[1], hm[2]
999
+ text2_a, text2_b = hm[3], hm[4]
1000
+ else
1001
+ text2_a, text2_b = hm[1], hm[2]
1002
+ text1_a, text1_b = hm[3], hm[4]
1003
+ end
1004
+ local mid_common = hm[5]
1005
+ return text1_a, text1_b, text2_a, text2_b, mid_common
1006
+ end
1007
+
1008
+ --[[
1009
+ * Given two strings, compute a score representing whether the internal
1010
+ * boundary falls on logical boundaries.
1011
+ * Scores range from 6 (best) to 0 (worst).
1012
+ * @param {string} one First string.
1013
+ * @param {string} two Second string.
1014
+ * @return {number} The score.
1015
+ * @private
1016
+ --]]
1017
+ function _diff_cleanupSemanticScore(one, two)
1018
+ if (#one == 0) or (#two == 0) then
1019
+ -- Edges are the best.
1020
+ return 6
1021
+ end
1022
+
1023
+ -- Each port of this function behaves slightly differently due to
1024
+ -- subtle differences in each language's definition of things like
1025
+ -- 'whitespace'. Since this function's purpose is largely cosmetic,
1026
+ -- the choice has been made to use each language's native features
1027
+ -- rather than force total conformity.
1028
+ local char1 = strsub(one, -1)
1029
+ local char2 = strsub(two, 1, 1)
1030
+ local nonAlphaNumeric1 = strmatch(char1, '%W')
1031
+ local nonAlphaNumeric2 = strmatch(char2, '%W')
1032
+ local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s')
1033
+ local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s')
1034
+ local lineBreak1 = whitespace1 and strmatch(char1, '%c')
1035
+ local lineBreak2 = whitespace2 and strmatch(char2, '%c')
1036
+ local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$')
1037
+ local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n')
1038
+
1039
+ if blankLine1 or blankLine2 then
1040
+ -- Five points for blank lines.
1041
+ return 5
1042
+ elseif lineBreak1 or lineBreak2 then
1043
+ -- Four points for line breaks.
1044
+ return 4
1045
+ elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then
1046
+ -- Three points for end of sentences.
1047
+ return 3
1048
+ elseif whitespace1 or whitespace2 then
1049
+ -- Two points for whitespace.
1050
+ return 2
1051
+ elseif nonAlphaNumeric1 or nonAlphaNumeric2 then
1052
+ -- One point for non-alphanumeric.
1053
+ return 1
1054
+ end
1055
+ return 0
1056
+ end
1057
+
1058
+ --[[
1059
+ * Look for single edits surrounded on both sides by equalities
1060
+ * which can be shifted sideways to align the edit to a word boundary.
1061
+ * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
1062
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1063
+ --]]
1064
+ function _diff_cleanupSemanticLossless(diffs)
1065
+ local pointer = 2
1066
+ -- Intentionally ignore the first and last element (don't need checking).
1067
+ while diffs[pointer + 1] do
1068
+ local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
1069
+ if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
1070
+ -- This is a single edit surrounded by equalities.
1071
+ local diff = diffs[pointer]
1072
+
1073
+ local equality1 = prevDiff[2]
1074
+ local edit = diff[2]
1075
+ local equality2 = nextDiff[2]
1076
+
1077
+ -- First, shift the edit as far left as possible.
1078
+ local commonOffset = _diff_commonSuffix(equality1, edit)
1079
+ if commonOffset > 0 then
1080
+ local commonString = strsub(edit, -commonOffset)
1081
+ equality1 = strsub(equality1, 1, -commonOffset - 1)
1082
+ edit = commonString .. strsub(edit, 1, -commonOffset - 1)
1083
+ equality2 = commonString .. equality2
1084
+ end
1085
+
1086
+ -- Second, step character by character right, looking for the best fit.
1087
+ local bestEquality1 = equality1
1088
+ local bestEdit = edit
1089
+ local bestEquality2 = equality2
1090
+ local bestScore = _diff_cleanupSemanticScore(equality1, edit)
1091
+ + _diff_cleanupSemanticScore(edit, equality2)
1092
+
1093
+ while strbyte(edit, 1) == strbyte(equality2, 1) do
1094
+ equality1 = equality1 .. strsub(edit, 1, 1)
1095
+ edit = strsub(edit, 2) .. strsub(equality2, 1, 1)
1096
+ equality2 = strsub(equality2, 2)
1097
+ local score = _diff_cleanupSemanticScore(equality1, edit)
1098
+ + _diff_cleanupSemanticScore(edit, equality2)
1099
+ -- The >= encourages trailing rather than leading whitespace on edits.
1100
+ if score >= bestScore then
1101
+ bestScore = score
1102
+ bestEquality1 = equality1
1103
+ bestEdit = edit
1104
+ bestEquality2 = equality2
1105
+ end
1106
+ end
1107
+ if prevDiff[2] ~= bestEquality1 then
1108
+ -- We have an improvement, save it back to the diff.
1109
+ if #bestEquality1 > 0 then
1110
+ diffs[pointer - 1][2] = bestEquality1
1111
+ else
1112
+ tremove(diffs, pointer - 1)
1113
+ pointer = pointer - 1
1114
+ end
1115
+ diffs[pointer][2] = bestEdit
1116
+ if #bestEquality2 > 0 then
1117
+ diffs[pointer + 1][2] = bestEquality2
1118
+ else
1119
+ tremove(diffs, pointer + 1, 1)
1120
+ pointer = pointer - 1
1121
+ end
1122
+ end
1123
+ end
1124
+ pointer = pointer + 1
1125
+ end
1126
+ end
1127
+
1128
+ --[[
1129
+ * Reorder and merge like edit sections. Merge equalities.
1130
+ * Any edit section can move as long as it doesn't cross an equality.
1131
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1132
+ --]]
1133
+ function _diff_cleanupMerge(diffs)
1134
+ diffs[#diffs + 1] = {DIFF_EQUAL, ''} -- Add a dummy entry at the end.
1135
+ local pointer = 1
1136
+ local count_delete, count_insert = 0, 0
1137
+ local text_delete, text_insert = '', ''
1138
+ local commonlength
1139
+ while diffs[pointer] do
1140
+ local diff_type = diffs[pointer][1]
1141
+ if diff_type == DIFF_INSERT then
1142
+ count_insert = count_insert + 1
1143
+ text_insert = text_insert .. diffs[pointer][2]
1144
+ pointer = pointer + 1
1145
+ elseif diff_type == DIFF_DELETE then
1146
+ count_delete = count_delete + 1
1147
+ text_delete = text_delete .. diffs[pointer][2]
1148
+ pointer = pointer + 1
1149
+ elseif diff_type == DIFF_EQUAL then
1150
+ -- Upon reaching an equality, check for prior redundancies.
1151
+ if count_delete + count_insert > 1 then
1152
+ if (count_delete > 0) and (count_insert > 0) then
1153
+ -- Factor out any common prefixies.
1154
+ commonlength = _diff_commonPrefix(text_insert, text_delete)
1155
+ if commonlength > 0 then
1156
+ local back_pointer = pointer - count_delete - count_insert
1157
+ if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL)
1158
+ then
1159
+ diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2]
1160
+ .. strsub(text_insert, 1, commonlength)
1161
+ else
1162
+ tinsert(diffs, 1,
1163
+ {DIFF_EQUAL, strsub(text_insert, 1, commonlength)})
1164
+ pointer = pointer + 1
1165
+ end
1166
+ text_insert = strsub(text_insert, commonlength + 1)
1167
+ text_delete = strsub(text_delete, commonlength + 1)
1168
+ end
1169
+ -- Factor out any common suffixies.
1170
+ commonlength = _diff_commonSuffix(text_insert, text_delete)
1171
+ if commonlength ~= 0 then
1172
+ diffs[pointer][2] =
1173
+ strsub(text_insert, -commonlength) .. diffs[pointer][2]
1174
+ text_insert = strsub(text_insert, 1, -commonlength - 1)
1175
+ text_delete = strsub(text_delete, 1, -commonlength - 1)
1176
+ end
1177
+ end
1178
+ -- Delete the offending records and add the merged ones.
1179
+ if count_delete == 0 then
1180
+ tsplice(diffs, pointer - count_insert,
1181
+ count_insert, {DIFF_INSERT, text_insert})
1182
+ elseif count_insert == 0 then
1183
+ tsplice(diffs, pointer - count_delete,
1184
+ count_delete, {DIFF_DELETE, text_delete})
1185
+ else
1186
+ tsplice(diffs, pointer - count_delete - count_insert,
1187
+ count_delete + count_insert,
1188
+ {DIFF_DELETE, text_delete}, {DIFF_INSERT, text_insert})
1189
+ end
1190
+ pointer = pointer - count_delete - count_insert
1191
+ + (count_delete>0 and 1 or 0) + (count_insert>0 and 1 or 0) + 1
1192
+ elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then
1193
+ -- Merge this equality with the previous one.
1194
+ diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2]
1195
+ tremove(diffs, pointer)
1196
+ else
1197
+ pointer = pointer + 1
1198
+ end
1199
+ count_insert, count_delete = 0, 0
1200
+ text_delete, text_insert = '', ''
1201
+ end
1202
+ end
1203
+ if diffs[#diffs][2] == '' then
1204
+ diffs[#diffs] = nil -- Remove the dummy entry at the end.
1205
+ end
1206
+
1207
+ -- Second pass: look for single edits surrounded on both sides by equalities
1208
+ -- which can be shifted sideways to eliminate an equality.
1209
+ -- e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1210
+ local changes = false
1211
+ pointer = 2
1212
+ -- Intentionally ignore the first and last element (don't need checking).
1213
+ while pointer < #diffs do
1214
+ local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
1215
+ if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
1216
+ -- This is a single edit surrounded by equalities.
1217
+ local diff = diffs[pointer]
1218
+ local currentText = diff[2]
1219
+ local prevText = prevDiff[2]
1220
+ local nextText = nextDiff[2]
1221
+ if strsub(currentText, -#prevText) == prevText then
1222
+ -- Shift the edit over the previous equality.
1223
+ diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1)
1224
+ nextDiff[2] = prevText .. nextDiff[2]
1225
+ tremove(diffs, pointer - 1)
1226
+ changes = true
1227
+ elseif strsub(currentText, 1, #nextText) == nextText then
1228
+ -- Shift the edit over the next equality.
1229
+ prevDiff[2] = prevText .. nextText
1230
+ diff[2] = strsub(currentText, #nextText + 1) .. nextText
1231
+ tremove(diffs, pointer + 1)
1232
+ changes = true
1233
+ end
1234
+ end
1235
+ pointer = pointer + 1
1236
+ end
1237
+ -- If shifts were made, the diff needs reordering and another shift sweep.
1238
+ if changes then
1239
+ -- LUANOTE: no return value, but necessary to use 'return' to get
1240
+ -- tail calls.
1241
+ return _diff_cleanupMerge(diffs)
1242
+ end
1243
+ end
1244
+
1245
+ --[[
1246
+ * loc is a location in text1, compute and return the equivalent location in
1247
+ * text2.
1248
+ * e.g. 'The cat' vs 'The big cat', 1->1, 5->8
1249
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1250
+ * @param {number} loc Location within text1.
1251
+ * @return {number} Location within text2.
1252
+ --]]
1253
+ function _diff_xIndex(diffs, loc)
1254
+ local chars1 = 1
1255
+ local chars2 = 1
1256
+ local last_chars1 = 1
1257
+ local last_chars2 = 1
1258
+ local x
1259
+ for _x, diff in ipairs(diffs) do
1260
+ x = _x
1261
+ if diff[1] ~= DIFF_INSERT then -- Equality or deletion.
1262
+ chars1 = chars1 + #diff[2]
1263
+ end
1264
+ if diff[1] ~= DIFF_DELETE then -- Equality or insertion.
1265
+ chars2 = chars2 + #diff[2]
1266
+ end
1267
+ if chars1 > loc then -- Overshot the location.
1268
+ break
1269
+ end
1270
+ last_chars1 = chars1
1271
+ last_chars2 = chars2
1272
+ end
1273
+ -- Was the location deleted?
1274
+ if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then
1275
+ return last_chars2
1276
+ end
1277
+ -- Add the remaining character length.
1278
+ return last_chars2 + (loc - last_chars1)
1279
+ end
1280
+
1281
+ --[[
1282
+ * Compute and return the source text (all equalities and deletions).
1283
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1284
+ * @return {string} Source text.
1285
+ --]]
1286
+ function _diff_text1(diffs)
1287
+ local text = {}
1288
+ for x, diff in ipairs(diffs) do
1289
+ if diff[1] ~= DIFF_INSERT then
1290
+ text[#text + 1] = diff[2]
1291
+ end
1292
+ end
1293
+ return tconcat(text)
1294
+ end
1295
+
1296
+ --[[
1297
+ * Compute and return the destination text (all equalities and insertions).
1298
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1299
+ * @return {string} Destination text.
1300
+ --]]
1301
+ function _diff_text2(diffs)
1302
+ local text = {}
1303
+ for x, diff in ipairs(diffs) do
1304
+ if diff[1] ~= DIFF_DELETE then
1305
+ text[#text + 1] = diff[2]
1306
+ end
1307
+ end
1308
+ return tconcat(text)
1309
+ end
1310
+
1311
+ --[[
1312
+ * Crush the diff into an encoded string which describes the operations
1313
+ * required to transform text1 into text2.
1314
+ * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1315
+ * Operations are tab-separated. Inserted text is escaped using %xx notation.
1316
+ * @param {Array.<Array.<number|string>>} diffs Array of diff tuples.
1317
+ * @return {string} Delta text.
1318
+ --]]
1319
+ function _diff_toDelta(diffs)
1320
+ local text = {}
1321
+ for x, diff in ipairs(diffs) do
1322
+ local op, data = diff[1], diff[2]
1323
+ if op == DIFF_INSERT then
1324
+ text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace)
1325
+ elseif op == DIFF_DELETE then
1326
+ text[x] = '-' .. #data
1327
+ elseif op == DIFF_EQUAL then
1328
+ text[x] = '=' .. #data
1329
+ end
1330
+ end
1331
+ return tconcat(text, '\t')
1332
+ end
1333
+
1334
+ --[[
1335
+ * Given the original text1, and an encoded string which describes the
1336
+ * operations required to transform text1 into text2, compute the full diff.
1337
+ * @param {string} text1 Source string for the diff.
1338
+ * @param {string} delta Delta text.
1339
+ * @return {Array.<Array.<number|string>>} Array of diff tuples.
1340
+ * @throws {Errorend If invalid input.
1341
+ --]]
1342
+ function _diff_fromDelta(text1, delta)
1343
+ local diffs = {}
1344
+ local diffsLength = 0 -- Keeping our own length var is faster
1345
+ local pointer = 1 -- Cursor in text1
1346
+ for token in gmatch(delta, '[^\t]+') do
1347
+ -- Each token begins with a one character parameter which specifies the
1348
+ -- operation of this token (delete, insert, equality).
1349
+ local tokenchar, param = strsub(token, 1, 1), strsub(token, 2)
1350
+ if (tokenchar == '+') then
1351
+ local invalidDecode = false
1352
+ local decoded = gsub(param, '%%(.?.?)',
1353
+ function(c)
1354
+ local n = tonumber(c, 16)
1355
+ if (#c ~= 2) or (n == nil) then
1356
+ invalidDecode = true
1357
+ return ''
1358
+ end
1359
+ return strchar(n)
1360
+ end)
1361
+ if invalidDecode then
1362
+ -- Malformed URI sequence.
1363
+ error('Illegal escape in _diff_fromDelta: ' .. param)
1364
+ end
1365
+ diffsLength = diffsLength + 1
1366
+ diffs[diffsLength] = {DIFF_INSERT, decoded}
1367
+ elseif (tokenchar == '-') or (tokenchar == '=') then
1368
+ local n = tonumber(param)
1369
+ if (n == nil) or (n < 0) then
1370
+ error('Invalid number in _diff_fromDelta: ' .. param)
1371
+ end
1372
+ local text = strsub(text1, pointer, pointer + n - 1)
1373
+ pointer = pointer + n
1374
+ if (tokenchar == '=') then
1375
+ diffsLength = diffsLength + 1
1376
+ diffs[diffsLength] = {DIFF_EQUAL, text}
1377
+ else
1378
+ diffsLength = diffsLength + 1
1379
+ diffs[diffsLength] = {DIFF_DELETE, text}
1380
+ end
1381
+ else
1382
+ error('Invalid diff operation in _diff_fromDelta: ' .. token)
1383
+ end
1384
+ end
1385
+ if (pointer ~= #text1 + 1) then
1386
+ error('Delta length (' .. (pointer - 1)
1387
+ .. ') does not equal source text length (' .. #text1 .. ').')
1388
+ end
1389
+ return diffs
1390
+ end
1391
+
1392
+ -- ---------------------------------------------------------------------------
1393
+ -- MATCH API
1394
+ -- ---------------------------------------------------------------------------
1395
+
1396
+ local _match_bitap, _match_alphabet
1397
+
1398
+ --[[
1399
+ * Locate the best instance of 'pattern' in 'text' near 'loc'.
1400
+ * @param {string} text The text to search.
1401
+ * @param {string} pattern The pattern to search for.
1402
+ * @param {number} loc The location to search around.
1403
+ * @return {number} Best match index or -1.
1404
+ --]]
1405
+ function match_main(text, pattern, loc)
1406
+ -- Check for null inputs.
1407
+ if text == nil or pattern == nil or loc == nil then
1408
+ error('Null inputs. (match_main)')
1409
+ end
1410
+
1411
+ if text == pattern then
1412
+ -- Shortcut (potentially not guaranteed by the algorithm)
1413
+ return 1
1414
+ elseif #text == 0 then
1415
+ -- Nothing to match.
1416
+ return -1
1417
+ end
1418
+ loc = max(1, min(loc, #text))
1419
+ if strsub(text, loc, loc + #pattern - 1) == pattern then
1420
+ -- Perfect match at the perfect spot! (Includes case of null pattern)
1421
+ return loc
1422
+ else
1423
+ -- Do a fuzzy compare.
1424
+ return _match_bitap(text, pattern, loc)
1425
+ end
1426
+ end
1427
+
1428
+ -- ---------------------------------------------------------------------------
1429
+ -- UNOFFICIAL/PRIVATE MATCH FUNCTIONS
1430
+ -- ---------------------------------------------------------------------------
1431
+
1432
+ --[[
1433
+ * Initialise the alphabet for the Bitap algorithm.
1434
+ * @param {string} pattern The text to encode.
1435
+ * @return {Object} Hash of character locations.
1436
+ * @private
1437
+ --]]
1438
+ function _match_alphabet(pattern)
1439
+ local s = {}
1440
+ local i = 0
1441
+ for c in gmatch(pattern, '.') do
1442
+ s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1))
1443
+ i = i + 1
1444
+ end
1445
+ return s
1446
+ end
1447
+
1448
+ --[[
1449
+ * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1450
+ * Bitap algorithm.
1451
+ * @param {string} text The text to search.
1452
+ * @param {string} pattern The pattern to search for.
1453
+ * @param {number} loc The location to search around.
1454
+ * @return {number} Best match index or -1.
1455
+ * @private
1456
+ --]]
1457
+ function _match_bitap(text, pattern, loc)
1458
+ if #pattern > Match_MaxBits then
1459
+ error('Pattern too long.')
1460
+ end
1461
+
1462
+ -- Initialise the alphabet.
1463
+ local s = _match_alphabet(pattern)
1464
+
1465
+ --[[
1466
+ * Compute and return the score for a match with e errors and x location.
1467
+ * Accesses loc and pattern through being a closure.
1468
+ * @param {number} e Number of errors in match.
1469
+ * @param {number} x Location of match.
1470
+ * @return {number} Overall score for match (0.0 = good, 1.0 = bad).
1471
+ * @private
1472
+ --]]
1473
+ local function _match_bitapScore(e, x)
1474
+ local accuracy = e / #pattern
1475
+ local proximity = abs(loc - x)
1476
+ if (Match_Distance == 0) then
1477
+ -- Dodge divide by zero error.
1478
+ return (proximity == 0) and 1 or accuracy
1479
+ end
1480
+ return accuracy + (proximity / Match_Distance)
1481
+ end
1482
+
1483
+ -- Highest score beyond which we give up.
1484
+ local score_threshold = Match_Threshold
1485
+ -- Is there a nearby exact match? (speedup)
1486
+ local best_loc = indexOf(text, pattern, loc)
1487
+ if best_loc then
1488
+ score_threshold = min(_match_bitapScore(0, best_loc), score_threshold)
1489
+ -- LUANOTE: Ideally we'd also check from the other direction, but Lua
1490
+ -- doesn't have an efficent lastIndexOf function.
1491
+ end
1492
+
1493
+ -- Initialise the bit arrays.
1494
+ local matchmask = lshift(1, #pattern - 1)
1495
+ best_loc = -1
1496
+
1497
+ local bin_min, bin_mid
1498
+ local bin_max = #pattern + #text
1499
+ local last_rd
1500
+ for d = 0, #pattern - 1, 1 do
1501
+ -- Scan for the best match; each iteration allows for one more error.
1502
+ -- Run a binary search to determine how far from 'loc' we can stray at this
1503
+ -- error level.
1504
+ bin_min = 0
1505
+ bin_mid = bin_max
1506
+ while (bin_min < bin_mid) do
1507
+ if (_match_bitapScore(d, loc + bin_mid) <= score_threshold) then
1508
+ bin_min = bin_mid
1509
+ else
1510
+ bin_max = bin_mid
1511
+ end
1512
+ bin_mid = floor(bin_min + (bin_max - bin_min) / 2)
1513
+ end
1514
+ -- Use the result from this iteration as the maximum for the next.
1515
+ bin_max = bin_mid
1516
+ local start = max(1, loc - bin_mid + 1)
1517
+ local finish = min(loc + bin_mid, #text) + #pattern
1518
+
1519
+ local rd = {}
1520
+ for j = start, finish do
1521
+ rd[j] = 0
1522
+ end
1523
+ rd[finish + 1] = lshift(1, d) - 1
1524
+ for j = finish, start, -1 do
1525
+ local charMatch = s[strsub(text, j - 1, j - 1)] or 0
1526
+ if (d == 0) then -- First pass: exact match.
1527
+ rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch)
1528
+ else
1529
+ -- Subsequent passes: fuzzy match.
1530
+ -- Functions instead of operators make this hella messy.
1531
+ rd[j] = bor(
1532
+ band(
1533
+ bor(
1534
+ lshift(rd[j + 1], 1),
1535
+ 1
1536
+ ),
1537
+ charMatch
1538
+ ),
1539
+ bor(
1540
+ bor(
1541
+ lshift(bor(last_rd[j + 1], last_rd[j]), 1),
1542
+ 1
1543
+ ),
1544
+ last_rd[j + 1]
1545
+ )
1546
+ )
1547
+ end
1548
+ if (band(rd[j], matchmask) ~= 0) then
1549
+ local score = _match_bitapScore(d, j - 1)
1550
+ -- This match will almost certainly be better than any existing match.
1551
+ -- But check anyway.
1552
+ if (score <= score_threshold) then
1553
+ -- Told you so.
1554
+ score_threshold = score
1555
+ best_loc = j - 1
1556
+ if (best_loc > loc) then
1557
+ -- When passing loc, don't exceed our current distance from loc.
1558
+ start = max(1, loc * 2 - best_loc)
1559
+ else
1560
+ -- Already passed loc, downhill from here on in.
1561
+ break
1562
+ end
1563
+ end
1564
+ end
1565
+ end
1566
+ -- No hope for a (better) match at greater error levels.
1567
+ if (_match_bitapScore(d + 1, loc) > score_threshold) then
1568
+ break
1569
+ end
1570
+ last_rd = rd
1571
+ end
1572
+ return best_loc
1573
+ end
1574
+
1575
+ -- -----------------------------------------------------------------------------
1576
+ -- PATCH API
1577
+ -- -----------------------------------------------------------------------------
1578
+
1579
+ local _patch_addContext,
1580
+ _patch_deepCopy,
1581
+ _patch_addPadding,
1582
+ _patch_splitMax,
1583
+ _patch_appendText,
1584
+ _new_patch_obj
1585
+
1586
+ --[[
1587
+ * Compute a list of patches to turn text1 into text2.
1588
+ * Use diffs if provided, otherwise compute it ourselves.
1589
+ * There are four ways to call this function, depending on what data is
1590
+ * available to the caller:
1591
+ * Method 1:
1592
+ * a = text1, b = text2
1593
+ * Method 2:
1594
+ * a = diffs
1595
+ * Method 3 (optimal):
1596
+ * a = text1, b = diffs
1597
+ * Method 4 (deprecated, use method 3):
1598
+ * a = text1, b = text2, c = diffs
1599
+ *
1600
+ * @param {string|Array.<Array.<number|string>>} a text1 (methods 1,3,4) or
1601
+ * Array of diff tuples for text1 to text2 (method 2).
1602
+ * @param {string|Array.<Array.<number|string>>} opt_b text2 (methods 1,4) or
1603
+ * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
1604
+ * @param {string|Array.<Array.<number|string>>} opt_c Array of diff tuples for
1605
+ * text1 to text2 (method 4) or undefined (methods 1,2,3).
1606
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
1607
+ --]]
1608
+ function patch_make(a, opt_b, opt_c)
1609
+ local text1, diffs
1610
+ local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c)
1611
+ if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then
1612
+ -- Method 1: text1, text2
1613
+ -- Compute diffs from text1 and text2.
1614
+ text1 = a
1615
+ diffs = diff_main(text1, opt_b, true)
1616
+ if (#diffs > 2) then
1617
+ diff_cleanupSemantic(diffs)
1618
+ diff_cleanupEfficiency(diffs)
1619
+ end
1620
+ elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then
1621
+ -- Method 2: diffs
1622
+ -- Compute text1 from diffs.
1623
+ diffs = a
1624
+ text1 = _diff_text1(diffs)
1625
+ elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then
1626
+ -- Method 3: text1, diffs
1627
+ text1 = a
1628
+ diffs = opt_b
1629
+ elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table')
1630
+ then
1631
+ -- Method 4: text1, text2, diffs
1632
+ -- text2 is not used.
1633
+ text1 = a
1634
+ diffs = opt_c
1635
+ else
1636
+ error('Unknown call format to patch_make.')
1637
+ end
1638
+
1639
+ if (diffs[1] == nil) then
1640
+ return {} -- Get rid of the null case.
1641
+ end
1642
+
1643
+ local patches = {}
1644
+ local patch = _new_patch_obj()
1645
+ local patchDiffLength = 0 -- Keeping our own length var is faster.
1646
+ local char_count1 = 0 -- Number of characters into the text1 string.
1647
+ local char_count2 = 0 -- Number of characters into the text2 string.
1648
+ -- Start with text1 (prepatch_text) and apply the diffs until we arrive at
1649
+ -- text2 (postpatch_text). We recreate the patches one by one to determine
1650
+ -- context info.
1651
+ local prepatch_text, postpatch_text = text1, text1
1652
+ for x, diff in ipairs(diffs) do
1653
+ local diff_type, diff_text = diff[1], diff[2]
1654
+
1655
+ if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then
1656
+ -- A new patch starts here.
1657
+ patch.start1 = char_count1 + 1
1658
+ patch.start2 = char_count2 + 1
1659
+ end
1660
+
1661
+ if (diff_type == DIFF_INSERT) then
1662
+ patchDiffLength = patchDiffLength + 1
1663
+ patch.diffs[patchDiffLength] = diff
1664
+ patch.length2 = patch.length2 + #diff_text
1665
+ postpatch_text = strsub(postpatch_text, 1, char_count2)
1666
+ .. diff_text .. strsub(postpatch_text, char_count2 + 1)
1667
+ elseif (diff_type == DIFF_DELETE) then
1668
+ patch.length1 = patch.length1 + #diff_text
1669
+ patchDiffLength = patchDiffLength + 1
1670
+ patch.diffs[patchDiffLength] = diff
1671
+ postpatch_text = strsub(postpatch_text, 1, char_count2)
1672
+ .. strsub(postpatch_text, char_count2 + #diff_text + 1)
1673
+ elseif (diff_type == DIFF_EQUAL) then
1674
+ if (#diff_text <= Patch_Margin * 2)
1675
+ and (patchDiffLength ~= 0) and (#diffs ~= x) then
1676
+ -- Small equality inside a patch.
1677
+ patchDiffLength = patchDiffLength + 1
1678
+ patch.diffs[patchDiffLength] = diff
1679
+ patch.length1 = patch.length1 + #diff_text
1680
+ patch.length2 = patch.length2 + #diff_text
1681
+ elseif (#diff_text >= Patch_Margin * 2) then
1682
+ -- Time for a new patch.
1683
+ if (patchDiffLength ~= 0) then
1684
+ _patch_addContext(patch, prepatch_text)
1685
+ patches[#patches + 1] = patch
1686
+ patch = _new_patch_obj()
1687
+ patchDiffLength = 0
1688
+ -- Unlike Unidiff, our patch lists have a rolling context.
1689
+ -- http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1690
+ -- Update prepatch text & pos to reflect the application of the
1691
+ -- just completed patch.
1692
+ prepatch_text = postpatch_text
1693
+ char_count1 = char_count2
1694
+ end
1695
+ end
1696
+ end
1697
+
1698
+ -- Update the current character count.
1699
+ if (diff_type ~= DIFF_INSERT) then
1700
+ char_count1 = char_count1 + #diff_text
1701
+ end
1702
+ if (diff_type ~= DIFF_DELETE) then
1703
+ char_count2 = char_count2 + #diff_text
1704
+ end
1705
+ end
1706
+
1707
+ -- Pick up the leftover patch if not empty.
1708
+ if (patchDiffLength > 0) then
1709
+ _patch_addContext(patch, prepatch_text)
1710
+ patches[#patches + 1] = patch
1711
+ end
1712
+
1713
+ return patches
1714
+ end
1715
+
1716
+ --[[
1717
+ * Merge a set of patches onto the text. Return a patched text, as well
1718
+ * as a list of true/false values indicating which patches were applied.
1719
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1720
+ * @param {string} text Old text.
1721
+ * @return {Array.<string|Array.<boolean>>} Two return values, the
1722
+ * new text and an array of boolean values.
1723
+ --]]
1724
+ function patch_apply(patches, text)
1725
+ if patches[1] == nil then
1726
+ return text, {}
1727
+ end
1728
+
1729
+ -- Deep copy the patches so that no changes are made to originals.
1730
+ patches = _patch_deepCopy(patches)
1731
+
1732
+ local nullPadding = _patch_addPadding(patches)
1733
+ text = nullPadding .. text .. nullPadding
1734
+
1735
+ _patch_splitMax(patches)
1736
+ -- delta keeps track of the offset between the expected and actual location
1737
+ -- of the previous patch. If there are patches expected at positions 10 and
1738
+ -- 20, but the first patch was found at 12, delta is 2 and the second patch
1739
+ -- has an effective expected position of 22.
1740
+ local delta = 0
1741
+ local results = {}
1742
+ for x, patch in ipairs(patches) do
1743
+ local expected_loc = patch.start2 + delta
1744
+ local text1 = _diff_text1(patch.diffs)
1745
+ local start_loc
1746
+ local end_loc = -1
1747
+ if #text1 > Match_MaxBits then
1748
+ -- _patch_splitMax will only provide an oversized pattern in
1749
+ -- the case of a monster delete.
1750
+ start_loc = match_main(text,
1751
+ strsub(text1, 1, Match_MaxBits), expected_loc)
1752
+ if start_loc ~= -1 then
1753
+ end_loc = match_main(text, strsub(text1, -Match_MaxBits),
1754
+ expected_loc + #text1 - Match_MaxBits)
1755
+ if end_loc == -1 or start_loc >= end_loc then
1756
+ -- Can't find valid trailing context. Drop this patch.
1757
+ start_loc = -1
1758
+ end
1759
+ end
1760
+ else
1761
+ start_loc = match_main(text, text1, expected_loc)
1762
+ end
1763
+ if start_loc == -1 then
1764
+ -- No match found. :(
1765
+ results[x] = false
1766
+ -- Subtract the delta for this failed patch from subsequent patches.
1767
+ delta = delta - patch.length2 - patch.length1
1768
+ else
1769
+ -- Found a match. :)
1770
+ results[x] = true
1771
+ delta = start_loc - expected_loc
1772
+ local text2
1773
+ if end_loc == -1 then
1774
+ text2 = strsub(text, start_loc, start_loc + #text1 - 1)
1775
+ else
1776
+ text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1)
1777
+ end
1778
+ if text1 == text2 then
1779
+ -- Perfect match, just shove the replacement text in.
1780
+ text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs)
1781
+ .. strsub(text, start_loc + #text1)
1782
+ else
1783
+ -- Imperfect match. Run a diff to get a framework of equivalent
1784
+ -- indices.
1785
+ local diffs = diff_main(text1, text2, false)
1786
+ if (#text1 > Match_MaxBits)
1787
+ and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then
1788
+ -- The end points match, but the content is unacceptably bad.
1789
+ results[x] = false
1790
+ else
1791
+ _diff_cleanupSemanticLossless(diffs)
1792
+ local index1 = 1
1793
+ local index2
1794
+ for y, mod in ipairs(patch.diffs) do
1795
+ if mod[1] ~= DIFF_EQUAL then
1796
+ index2 = _diff_xIndex(diffs, index1)
1797
+ end
1798
+ if mod[1] == DIFF_INSERT then
1799
+ text = strsub(text, 1, start_loc + index2 - 2)
1800
+ .. mod[2] .. strsub(text, start_loc + index2 - 1)
1801
+ elseif mod[1] == DIFF_DELETE then
1802
+ text = strsub(text, 1, start_loc + index2 - 2) .. strsub(text,
1803
+ start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1))
1804
+ end
1805
+ if mod[1] ~= DIFF_DELETE then
1806
+ index1 = index1 + #mod[2]
1807
+ end
1808
+ end
1809
+ end
1810
+ end
1811
+ end
1812
+ end
1813
+ -- Strip the padding off.
1814
+ text = strsub(text, #nullPadding + 1, -#nullPadding - 1)
1815
+ return text, results
1816
+ end
1817
+
1818
+ --[[
1819
+ * Take a list of patches and return a textual representation.
1820
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
1821
+ * @return {string} Text representation of patches.
1822
+ --]]
1823
+ function patch_toText(patches)
1824
+ local text = {}
1825
+ for x, patch in ipairs(patches) do
1826
+ _patch_appendText(patch, text)
1827
+ end
1828
+ return tconcat(text)
1829
+ end
1830
+
1831
+ --[[
1832
+ * Parse a textual representation of patches and return a list of patch objects.
1833
+ * @param {string} textline Text representation of patches.
1834
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
1835
+ * @throws {Error} If invalid input.
1836
+ --]]
1837
+ function patch_fromText(textline)
1838
+ local patches = {}
1839
+ if (#textline == 0) then
1840
+ return patches
1841
+ end
1842
+ local text = {}
1843
+ for line in gmatch(textline, '([^\n]*)') do
1844
+ text[#text + 1] = line
1845
+ end
1846
+ local textPointer = 1
1847
+ while (textPointer <= #text) do
1848
+ local start1, length1, start2, length2
1849
+ = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$')
1850
+ if (start1 == nil) then
1851
+ error('Invalid patch string: "' .. text[textPointer] .. '"')
1852
+ end
1853
+ local patch = _new_patch_obj()
1854
+ patches[#patches + 1] = patch
1855
+
1856
+ start1 = tonumber(start1)
1857
+ length1 = tonumber(length1) or 1
1858
+ if (length1 == 0) then
1859
+ start1 = start1 + 1
1860
+ end
1861
+ patch.start1 = start1
1862
+ patch.length1 = length1
1863
+
1864
+ start2 = tonumber(start2)
1865
+ length2 = tonumber(length2) or 1
1866
+ if (length2 == 0) then
1867
+ start2 = start2 + 1
1868
+ end
1869
+ patch.start2 = start2
1870
+ patch.length2 = length2
1871
+
1872
+ textPointer = textPointer + 1
1873
+
1874
+ while true do
1875
+ local line = text[textPointer]
1876
+ if (line == nil) then
1877
+ break
1878
+ end
1879
+ local sign; sign, line = strsub(line, 1, 1), strsub(line, 2)
1880
+
1881
+ local invalidDecode = false
1882
+ local decoded = gsub(line, '%%(.?.?)',
1883
+ function(c)
1884
+ local n = tonumber(c, 16)
1885
+ if (#c ~= 2) or (n == nil) then
1886
+ invalidDecode = true
1887
+ return ''
1888
+ end
1889
+ return strchar(n)
1890
+ end)
1891
+ if invalidDecode then
1892
+ -- Malformed URI sequence.
1893
+ error('Illegal escape in patch_fromText: ' .. line)
1894
+ end
1895
+
1896
+ line = decoded
1897
+
1898
+ if (sign == '-') then
1899
+ -- Deletion.
1900
+ patch.diffs[#patch.diffs + 1] = {DIFF_DELETE, line}
1901
+ elseif (sign == '+') then
1902
+ -- Insertion.
1903
+ patch.diffs[#patch.diffs + 1] = {DIFF_INSERT, line}
1904
+ elseif (sign == ' ') then
1905
+ -- Minor equality.
1906
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, line}
1907
+ elseif (sign == '@') then
1908
+ -- Start of next patch.
1909
+ break
1910
+ elseif (sign == '') then
1911
+ -- Blank line? Whatever.
1912
+ else
1913
+ -- WTF?
1914
+ error('Invalid patch mode "' .. sign .. '" in: ' .. line)
1915
+ end
1916
+ textPointer = textPointer + 1
1917
+ end
1918
+ end
1919
+ return patches
1920
+ end
1921
+
1922
+ -- ---------------------------------------------------------------------------
1923
+ -- UNOFFICIAL/PRIVATE PATCH FUNCTIONS
1924
+ -- ---------------------------------------------------------------------------
1925
+
1926
+ local patch_meta = {
1927
+ __tostring = function(patch)
1928
+ local buf = {}
1929
+ _patch_appendText(patch, buf)
1930
+ return tconcat(buf)
1931
+ end
1932
+ }
1933
+
1934
+ --[[
1935
+ * Class representing one patch operation.
1936
+ * @constructor
1937
+ --]]
1938
+ function _new_patch_obj()
1939
+ return setmetatable({
1940
+ --[[ @type {Array.<Array.<number|string>>} ]]
1941
+ diffs = {};
1942
+ --[[ @type {?number} ]]
1943
+ start1 = 1; -- nil;
1944
+ --[[ @type {?number} ]]
1945
+ start2 = 1; -- nil;
1946
+ --[[ @type {number} ]]
1947
+ length1 = 0;
1948
+ --[[ @type {number} ]]
1949
+ length2 = 0;
1950
+ }, patch_meta)
1951
+ end
1952
+
1953
+ --[[
1954
+ * Increase the context until it is unique,
1955
+ * but don't let the pattern expand beyond Match_MaxBits.
1956
+ * @param {_new_patch_obj} patch The patch to grow.
1957
+ * @param {string} text Source text.
1958
+ * @private
1959
+ --]]
1960
+ function _patch_addContext(patch, text)
1961
+ if (#text == 0) then
1962
+ return
1963
+ end
1964
+ local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1)
1965
+ local padding = 0
1966
+
1967
+ -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly
1968
+ -- different logic here than in other language ports.
1969
+ -- Look for the first two matches of pattern in text. If two are found,
1970
+ -- increase the pattern length.
1971
+ local firstMatch = indexOf(text, pattern)
1972
+ local secondMatch = nil
1973
+ if (firstMatch ~= nil) then
1974
+ secondMatch = indexOf(text, pattern, firstMatch + 1)
1975
+ end
1976
+ while (#pattern == 0 or secondMatch ~= nil)
1977
+ and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do
1978
+ padding = padding + Patch_Margin
1979
+ pattern = strsub(text, max(1, patch.start2 - padding),
1980
+ patch.start2 + patch.length1 - 1 + padding)
1981
+ firstMatch = indexOf(text, pattern)
1982
+ if (firstMatch ~= nil) then
1983
+ secondMatch = indexOf(text, pattern, firstMatch + 1)
1984
+ else
1985
+ secondMatch = nil
1986
+ end
1987
+ end
1988
+ -- Add one chunk for good luck.
1989
+ padding = padding + Patch_Margin
1990
+
1991
+ -- Add the prefix.
1992
+ local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1)
1993
+ if (#prefix > 0) then
1994
+ tinsert(patch.diffs, 1, {DIFF_EQUAL, prefix})
1995
+ end
1996
+ -- Add the suffix.
1997
+ local suffix = strsub(text, patch.start2 + patch.length1,
1998
+ patch.start2 + patch.length1 - 1 + padding)
1999
+ if (#suffix > 0) then
2000
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, suffix}
2001
+ end
2002
+
2003
+ -- Roll back the start points.
2004
+ patch.start1 = patch.start1 - #prefix
2005
+ patch.start2 = patch.start2 - #prefix
2006
+ -- Extend the lengths.
2007
+ patch.length1 = patch.length1 + #prefix + #suffix
2008
+ patch.length2 = patch.length2 + #prefix + #suffix
2009
+ end
2010
+
2011
+ --[[
2012
+ * Given an array of patches, return another array that is identical.
2013
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
2014
+ * @return {Array.<_new_patch_obj>} Array of patch objects.
2015
+ --]]
2016
+ function _patch_deepCopy(patches)
2017
+ local patchesCopy = {}
2018
+ for x, patch in ipairs(patches) do
2019
+ local patchCopy = _new_patch_obj()
2020
+ local diffsCopy = {}
2021
+ for i, diff in ipairs(patch.diffs) do
2022
+ diffsCopy[i] = {diff[1], diff[2]}
2023
+ end
2024
+ patchCopy.diffs = diffsCopy
2025
+ patchCopy.start1 = patch.start1
2026
+ patchCopy.start2 = patch.start2
2027
+ patchCopy.length1 = patch.length1
2028
+ patchCopy.length2 = patch.length2
2029
+ patchesCopy[x] = patchCopy
2030
+ end
2031
+ return patchesCopy
2032
+ end
2033
+
2034
+ --[[
2035
+ * Add some padding on text start and end so that edges can match something.
2036
+ * Intended to be called only from within patch_apply.
2037
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
2038
+ * @return {string} The padding string added to each side.
2039
+ --]]
2040
+ function _patch_addPadding(patches)
2041
+ local paddingLength = Patch_Margin
2042
+ local nullPadding = ''
2043
+ for x = 1, paddingLength do
2044
+ nullPadding = nullPadding .. strchar(x)
2045
+ end
2046
+
2047
+ -- Bump all the patches forward.
2048
+ for x, patch in ipairs(patches) do
2049
+ patch.start1 = patch.start1 + paddingLength
2050
+ patch.start2 = patch.start2 + paddingLength
2051
+ end
2052
+
2053
+ -- Add some padding on start of first diff.
2054
+ local patch = patches[1]
2055
+ local diffs = patch.diffs
2056
+ local firstDiff = diffs[1]
2057
+ if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then
2058
+ -- Add nullPadding equality.
2059
+ tinsert(diffs, 1, {DIFF_EQUAL, nullPadding})
2060
+ patch.start1 = patch.start1 - paddingLength -- Should be 0.
2061
+ patch.start2 = patch.start2 - paddingLength -- Should be 0.
2062
+ patch.length1 = patch.length1 + paddingLength
2063
+ patch.length2 = patch.length2 + paddingLength
2064
+ elseif (paddingLength > #firstDiff[2]) then
2065
+ -- Grow first equality.
2066
+ local extraLength = paddingLength - #firstDiff[2]
2067
+ firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2]
2068
+ patch.start1 = patch.start1 - extraLength
2069
+ patch.start2 = patch.start2 - extraLength
2070
+ patch.length1 = patch.length1 + extraLength
2071
+ patch.length2 = patch.length2 + extraLength
2072
+ end
2073
+
2074
+ -- Add some padding on end of last diff.
2075
+ patch = patches[#patches]
2076
+ diffs = patch.diffs
2077
+ local lastDiff = diffs[#diffs]
2078
+ if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then
2079
+ -- Add nullPadding equality.
2080
+ diffs[#diffs + 1] = {DIFF_EQUAL, nullPadding}
2081
+ patch.length1 = patch.length1 + paddingLength
2082
+ patch.length2 = patch.length2 + paddingLength
2083
+ elseif (paddingLength > #lastDiff[2]) then
2084
+ -- Grow last equality.
2085
+ local extraLength = paddingLength - #lastDiff[2]
2086
+ lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength)
2087
+ patch.length1 = patch.length1 + extraLength
2088
+ patch.length2 = patch.length2 + extraLength
2089
+ end
2090
+
2091
+ return nullPadding
2092
+ end
2093
+
2094
+ --[[
2095
+ * Look through the patches and break up any which are longer than the maximum
2096
+ * limit of the match algorithm.
2097
+ * Intended to be called only from within patch_apply.
2098
+ * @param {Array.<_new_patch_obj>} patches Array of patch objects.
2099
+ --]]
2100
+ function _patch_splitMax(patches)
2101
+ local patch_size = Match_MaxBits
2102
+ local x = 1
2103
+ while true do
2104
+ local patch = patches[x]
2105
+ if patch == nil then
2106
+ return
2107
+ end
2108
+ if patch.length1 > patch_size then
2109
+ local bigpatch = patch
2110
+ -- Remove the big old patch.
2111
+ tremove(patches, x)
2112
+ x = x - 1
2113
+ local start1 = bigpatch.start1
2114
+ local start2 = bigpatch.start2
2115
+ local precontext = ''
2116
+ while bigpatch.diffs[1] do
2117
+ -- Create one of several smaller patches.
2118
+ local patch = _new_patch_obj()
2119
+ local empty = true
2120
+ patch.start1 = start1 - #precontext
2121
+ patch.start2 = start2 - #precontext
2122
+ if precontext ~= '' then
2123
+ patch.length1, patch.length2 = #precontext, #precontext
2124
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, precontext}
2125
+ end
2126
+ while bigpatch.diffs[1] and (patch.length1 < patch_size-Patch_Margin) do
2127
+ local diff_type = bigpatch.diffs[1][1]
2128
+ local diff_text = bigpatch.diffs[1][2]
2129
+ if (diff_type == DIFF_INSERT) then
2130
+ -- Insertions are harmless.
2131
+ patch.length2 = patch.length2 + #diff_text
2132
+ start2 = start2 + #diff_text
2133
+ patch.diffs[#(patch.diffs) + 1] = bigpatch.diffs[1]
2134
+ tremove(bigpatch.diffs, 1)
2135
+ empty = false
2136
+ elseif (diff_type == DIFF_DELETE) and (#patch.diffs == 1)
2137
+ and (patch.diffs[1][1] == DIFF_EQUAL)
2138
+ and (#diff_text > 2 * patch_size) then
2139
+ -- This is a large deletion. Let it pass in one chunk.
2140
+ patch.length1 = patch.length1 + #diff_text
2141
+ start1 = start1 + #diff_text
2142
+ empty = false
2143
+ patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
2144
+ tremove(bigpatch.diffs, 1)
2145
+ else
2146
+ -- Deletion or equality.
2147
+ -- Only take as much as we can stomach.
2148
+ diff_text = strsub(diff_text, 1,
2149
+ patch_size - patch.length1 - Patch_Margin)
2150
+ patch.length1 = patch.length1 + #diff_text
2151
+ start1 = start1 + #diff_text
2152
+ if (diff_type == DIFF_EQUAL) then
2153
+ patch.length2 = patch.length2 + #diff_text
2154
+ start2 = start2 + #diff_text
2155
+ else
2156
+ empty = false
2157
+ end
2158
+ patch.diffs[#patch.diffs + 1] = {diff_type, diff_text}
2159
+ if (diff_text == bigpatch.diffs[1][2]) then
2160
+ tremove(bigpatch.diffs, 1)
2161
+ else
2162
+ bigpatch.diffs[1][2]
2163
+ = strsub(bigpatch.diffs[1][2], #diff_text + 1)
2164
+ end
2165
+ end
2166
+ end
2167
+ -- Compute the head context for the next patch.
2168
+ precontext = _diff_text2(patch.diffs)
2169
+ precontext = strsub(precontext, -Patch_Margin)
2170
+ -- Append the end context for this patch.
2171
+ local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin)
2172
+ if postcontext ~= '' then
2173
+ patch.length1 = patch.length1 + #postcontext
2174
+ patch.length2 = patch.length2 + #postcontext
2175
+ if patch.diffs[1]
2176
+ and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then
2177
+ patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2]
2178
+ .. postcontext
2179
+ else
2180
+ patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, postcontext}
2181
+ end
2182
+ end
2183
+ if not empty then
2184
+ x = x + 1
2185
+ tinsert(patches, x, patch)
2186
+ end
2187
+ end
2188
+ end
2189
+ x = x + 1
2190
+ end
2191
+ end
2192
+
2193
+ --[[
2194
+ * Emulate GNU diff's format.
2195
+ * Header: @@ -382,8 +481,9 @@
2196
+ * @return {string} The GNU diff string.
2197
+ --]]
2198
+ function _patch_appendText(patch, text)
2199
+ local coords1, coords2
2200
+ local length1, length2 = patch.length1, patch.length2
2201
+ local start1, start2 = patch.start1, patch.start2
2202
+ local diffs = patch.diffs
2203
+
2204
+ if length1 == 1 then
2205
+ coords1 = start1
2206
+ else
2207
+ coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1
2208
+ end
2209
+
2210
+ if length2 == 1 then
2211
+ coords2 = start2
2212
+ else
2213
+ coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2
2214
+ end
2215
+ text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n'
2216
+
2217
+ local op
2218
+ -- Escape the body of the patch with %xx notation.
2219
+ for x, diff in ipairs(patch.diffs) do
2220
+ local diff_type = diff[1]
2221
+ if diff_type == DIFF_INSERT then
2222
+ op = '+'
2223
+ elseif diff_type == DIFF_DELETE then
2224
+ op = '-'
2225
+ elseif diff_type == DIFF_EQUAL then
2226
+ op = ' '
2227
+ end
2228
+ text[#text + 1] = op
2229
+ .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace)
2230
+ .. '\n'
2231
+ end
2232
+
2233
+ return text
2234
+ end
2235
+
2236
+ local text1 = redis.call("get", KEYS[1])
2237
+ local text2 = redis.call("get", KEYS[2])
2238
+ local patches = patch_make(text1, text2)
2239
+ local patch_text = patch_toText(patches)
2240
+
2241
+ if KEYS[3] then
2242
+ redis.call("set", KEYS[3], patch_text)
2243
+ end
2244
+
2245
+ return patch_text
2246
+
2247
+ -- -- Expose the API
2248
+ -- _M.DIFF_DELETE = DIFF_DELETE
2249
+ -- _M.DIFF_INSERT = DIFF_INSERT
2250
+ -- _M.DIFF_EQUAL = DIFF_EQUAL
2251
+ --
2252
+ -- _M.diff_main = diff_main
2253
+ -- _M.diff_cleanupSemantic = diff_cleanupSemantic
2254
+ -- _M.diff_cleanupEfficiency = diff_cleanupEfficiency
2255
+ -- _M.diff_levenshtein = diff_levenshtein
2256
+ -- _M.diff_prettyHtml = diff_prettyHtml
2257
+ --
2258
+ -- _M.match_main = match_main
2259
+ --
2260
+ -- _M.patch_make = patch_make
2261
+ -- _M.patch_toText = patch_toText
2262
+ -- _M.patch_fromText = patch_fromText
2263
+ -- _M.patch_apply = patch_apply
2264
+ --
2265
+ -- -- Expose some non-API functions as well, for testing purposes etc.
2266
+ -- _M.diff_commonPrefix = _diff_commonPrefix
2267
+ -- _M.diff_commonSuffix = _diff_commonSuffix
2268
+ -- _M.diff_commonOverlap = _diff_commonOverlap
2269
+ -- _M.diff_halfMatch = _diff_halfMatch
2270
+ -- _M.diff_bisect = _diff_bisect
2271
+ -- _M.diff_cleanupMerge = _diff_cleanupMerge
2272
+ -- _M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless
2273
+ -- _M.diff_text1 = _diff_text1
2274
+ -- _M.diff_text2 = _diff_text2
2275
+ -- _M.diff_toDelta = _diff_toDelta
2276
+ -- _M.diff_fromDelta = _diff_fromDelta
2277
+ -- _M.diff_xIndex = _diff_xIndex
2278
+ -- _M.match_alphabet = _match_alphabet
2279
+ -- _M.match_bitap = _match_bitap
2280
+ -- _M.new_patch_obj = _new_patch_obj
2281
+ -- _M.patch_addContext = _patch_addContext
2282
+ -- _M.patch_splitMax = _patch_splitMax
2283
+ -- _M.patch_addPadding = _patch_addPadding