diff_match_patch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2011, Jorge Kalmbach <kalmbach.at.gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any
4
+ person obtaining a copy of this software and associated
5
+ documentation files (the "Software"), to deal in the
6
+ Software without restriction, including without limitation
7
+ the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the
9
+ Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice
13
+ shall be included in all copies or substantial portions of
14
+ the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
17
+ KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
18
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
20
+ OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,11 @@
1
+ # DiffMatchPatch
2
+
3
+ A ruby implementation of the google diff-match-patch library.
4
+ http://code.google.com/p/google-diff-match-patch/
5
+
6
+ The Diff Match and Patch libraries offer robust algorithms to perform the operations required for synchronizing plain text.
7
+
8
+ This work was inspired by the diff_match_patch-ruby module.
9
+ (https://github.com/reima/diff_match_patch-ruby)
10
+
11
+ Copyright (c) 2011, Jorge Kalmbach <kalmbach.at.gmail.com>
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -0,0 +1,1626 @@
1
+ require 'patch_obj'
2
+
3
+ # Class containing the diff, match and patch methods.
4
+ # Also contains the behaviour settings.
5
+ class DiffMatchPatch
6
+ attr_accessor :diff_timeout
7
+ attr_accessor :diff_editCost
8
+ attr_accessor :match_threshold
9
+ attr_accessor :match_distance
10
+ attr_accessor :patch_deleteThreshold
11
+ attr_accessor :patch_margin
12
+ attr_reader :match_maxBits
13
+
14
+ def initialize
15
+ # Inits a diff_match_patch object with default settings.
16
+ # Redefine these in your program to override the defaults.
17
+
18
+ # Number of seconds to map a diff before giving up (0 for infinity).
19
+ @diff_timeout = 1
20
+ # Cost of an empty edit operation in terms of edit characters.
21
+ @diff_editCost = 4
22
+ # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
23
+ @match_threshold = 0.5
24
+ # How far to search for a match (0 = exact location, 1000+ = broad match).
25
+ # A match this many characters away from the expected location will add
26
+ # 1.0 to the score (0.0 is a perfect match).
27
+ @match_distance = 1000
28
+ # When deleting a large block of text (over ~64 characters), how close does
29
+ # the contents have to match the expected contents. (0.0 = perfection,
30
+ # 1.0 = very loose). Note that Match_Threshold controls how closely the
31
+ # end points of a delete need to match.
32
+ @patch_deleteThreshold = 0.5
33
+ # Chunk size for context length.
34
+ @patch_margin = 4
35
+
36
+ # The number of bits in an int.
37
+ # Python has no maximum, thus to disable patch splitting set to 0.
38
+ # However to avoid long patches in certain pathological cases, use 32.
39
+ # Multiple short patches (using native ints) are much faster than long ones.
40
+ @match_maxBits = 32
41
+ end
42
+
43
+
44
+ # Find the differences between two texts. Simplifies the problem by
45
+ # stripping any common prefix or suffix off the texts before diffing.
46
+ def diff_main(text1, text2, checklines=true, deadline=nil)
47
+ # Set a deadline by which time the diff must be complete.
48
+ if deadline.nil? && diff_timeout > 0
49
+ deadline = Time.now + diff_timeout
50
+ end
51
+
52
+ # Check for null inputs.
53
+ if text1.nil? || text2.nil?
54
+ raise ArgumentError.new('Null inputs. (diff_main)')
55
+ end
56
+
57
+ # Check for equality (speedup).
58
+ if text1 == text2
59
+ return [] if text1.empty?
60
+ return [[:equal, text1]]
61
+ end
62
+
63
+ checklines = true if checklines.nil?
64
+
65
+ # Trim off common prefix (speedup).
66
+ common_length = diff_commonPrefix(text1, text2)
67
+ if common_length.nonzero?
68
+ common_prefix = text1[0...common_length]
69
+ text1 = text1[common_length..-1]
70
+ text2 = text2[common_length..-1]
71
+ end
72
+
73
+ # Trim off common suffix (speedup).
74
+ common_length = diff_commonSuffix(text1, text2)
75
+ if common_length.nonzero?
76
+ common_suffix = text1[-common_length..-1]
77
+ text1 = text1[0...-common_length]
78
+ text2 = text2[0...-common_length]
79
+ end
80
+
81
+ # Compute the diff on the middle block.
82
+ diffs = diff_compute(text1, text2, checklines, deadline)
83
+
84
+ # Restore the prefix and suffix.
85
+ diffs.unshift([:equal, common_prefix]) unless common_prefix.nil?
86
+ diffs.push([:equal, common_suffix]) unless common_suffix.nil?
87
+ diff_cleanupMerge(diffs)
88
+
89
+ diffs
90
+ end
91
+
92
+ # Find the differences between two texts. Assumes that the texts do not
93
+ # have any common prefix or suffix.
94
+ def diff_compute(text1, text2, checklines, deadline)
95
+ # Just add some text (speedup).
96
+ return [[:insert, text2]] if text1.empty?
97
+
98
+ # Just delete some text (speedup).
99
+ return [[:delete, text1]] if text2.empty?
100
+
101
+ shorttext, longtext = [text1, text2].sort_by(&:length)
102
+ if i = longtext.index(shorttext)
103
+ # Shorter text is inside the longer text (speedup).
104
+ diffs = [[:insert, longtext[0...i]], [:equal, shorttext],
105
+ [:insert, longtext[(i + shorttext.length)..-1]]]
106
+
107
+ # Swap insertions for deletions if diff is reversed.
108
+ if text1.length > text2.length
109
+ diffs[0][0] = :delete
110
+ diffs[2][0] = :delete
111
+ end
112
+
113
+ return diffs
114
+ end
115
+
116
+ if shorttext.length == 1
117
+ # Single character string.
118
+ # After the previous speedup, the character can't be an equality.
119
+ return [[:delete, text1], [:insert, text2]]
120
+ end
121
+
122
+ # Garbage collect.
123
+ longtext = nil
124
+ shorttext = nil
125
+
126
+ # Check to see if the problem can be split in two.
127
+ if hm = diff_halfMatch(text1, text2)
128
+ # A half-match was found, sort out the return data.
129
+ text1_a, text1_b, text2_a, text2_b, mid_common = hm
130
+ # Send both pairs off for separate processing.
131
+ diffs_a = diff_main(text1_a, text2_a, checklines, deadline)
132
+ diffs_b = diff_main(text1_b, text2_b, checklines, deadline)
133
+ # Merge the results.
134
+ return diffs_a + [[:equal, mid_common]] + diffs_b
135
+ end
136
+
137
+ if checklines && text1.length > 100 && text2.length > 100
138
+ return diff_lineMode(text1, text2, deadline)
139
+ end
140
+
141
+ return diff_bisect(text1, text2, deadline)
142
+ end
143
+
144
+ # Do a quick line-level diff on both strings, then rediff the parts for
145
+ # greater accuracy.
146
+ # This speedup can produce non-minimal diffs.
147
+ def diff_lineMode(text1, text2, deadline)
148
+ # Scan the text on a line-by-line basis first.
149
+ text1, text2, line_array = diff_linesToChars(text1, text2)
150
+
151
+ diffs = diff_main(text1, text2, false, deadline)
152
+
153
+ # Convert the diff back to original text.
154
+ diff_charsToLines(diffs, line_array)
155
+ # Eliminate freak matches (e.g. blank lines)
156
+ diff_cleanupSemantic(diffs)
157
+
158
+ # Rediff any replacement blocks, this time character-by-character.
159
+ # Add a dummy entry at the end.
160
+ diffs.push([:equal, ''])
161
+ pointer = 0
162
+ count_delete = 0
163
+ count_insert = 0
164
+ text_delete = ''
165
+ text_insert = ''
166
+
167
+ while pointer < diffs.length
168
+ case diffs[pointer][0]
169
+ when :insert
170
+ count_insert += 1
171
+ text_insert += diffs[pointer][1]
172
+ when :delete
173
+ count_delete += 1
174
+ text_delete += diffs[pointer][1]
175
+ when :equal
176
+ # Upon reaching an equality, check for prior redundancies.
177
+ if count_delete >= 1 && count_insert >= 1
178
+ # Delete the offending records and add the merged ones.
179
+ a = diff_main(text_delete, text_insert, false, deadline)
180
+ diffs[pointer - count_delete - count_insert,
181
+ count_delete + count_insert] = []
182
+ pointer = pointer - count_delete - count_insert
183
+ diffs[pointer, 0] = a
184
+ pointer = pointer + a.length
185
+ end
186
+ count_insert = 0
187
+ count_delete = 0
188
+ text_delete = ''
189
+ text_insert = ''
190
+ end
191
+ pointer += 1
192
+ end
193
+
194
+ diffs.pop # Remove the dummy entry at the end.
195
+ return diffs
196
+ end
197
+
198
+ # Find the 'middle snake' of a diff, split the problem in two
199
+ # and return the recursively constructed diff.
200
+ # See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
201
+ def diff_bisect(text1, text2, deadline)
202
+ # Cache the text lengths to prevent multiple calls.
203
+ text1_length = text1.length
204
+ text2_length = text2.length
205
+ max_d = (text1_length + text2_length + 1) / 2
206
+ v_offset = max_d
207
+ v_length = 2 * max_d
208
+ v1 = Array.new(v_length, -1)
209
+ v2 = Array.new(v_length, -1)
210
+ v1[v_offset + 1] = 0
211
+ v2[v_offset + 1] = 0
212
+ delta = text1_length - text2_length
213
+
214
+ # If the total number of characters is odd, then the front path will
215
+ # collide with the reverse path.
216
+ front = (delta % 2 != 0)
217
+ # Offsets for start and end of k loop.
218
+ # Prevents mapping of space beyond the grid.
219
+ k1start = 0
220
+ k1end = 0
221
+ k2start = 0
222
+ k2end = 0
223
+ max_d.times do |d|
224
+ # Bail out if deadline is reached.
225
+ break if deadline && Time.now >= deadline
226
+
227
+ # Walk the front path one step.
228
+ (-d + k1start).step(d - k1end, 2) do |k1|
229
+ k1_offset = v_offset + k1
230
+ if k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]
231
+ x1 = v1[k1_offset + 1]
232
+ else
233
+ x1 = v1[k1_offset - 1] + 1
234
+ end
235
+
236
+ y1 = x1 - k1
237
+ while x1 < text1_length && y1 < text2_length && text1[x1] == text2[y1]
238
+ x1 += 1
239
+ y1 += 1
240
+ end
241
+
242
+ v1[k1_offset] = x1
243
+ if x1 > text1_length
244
+ # Ran off the right of the graph.
245
+ k1end += 2
246
+ elsif y1 > text2_length
247
+ # Ran off the bottom of the graph.
248
+ k1start += 2
249
+ elsif front
250
+ k2_offset = v_offset + delta - k1
251
+ if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1
252
+ # Mirror x2 onto top-left coordinate system.
253
+ x2 = text1_length - v2[k2_offset]
254
+ if x1 >= x2
255
+ # Overlap detected.
256
+ return diff_bisectSplit(text1, text2, x1, y1, deadline)
257
+ end
258
+ end
259
+ end
260
+ end
261
+
262
+ # Walk the reverse path one step.
263
+ (-d + k2start).step(d - k2end, 2) do |k2|
264
+ k2_offset = v_offset + k2
265
+ if k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]
266
+ x2 = v2[k2_offset + 1]
267
+ else
268
+ x2 = v2[k2_offset - 1] + 1
269
+ end
270
+
271
+ y2 = x2 - k2
272
+ while x2 < text1_length && y2 < text2_length && text1[-x2-1] == text2[-y2-1]
273
+ x2 += 1
274
+ y2 += 1
275
+ end
276
+
277
+ v2[k2_offset] = x2
278
+ if x2 > text1_length
279
+ # Ran off the left of the graph.
280
+ k2end += 2
281
+ elsif y2 > text2_length
282
+ # Ran off the top of the graph.
283
+ k2start += 2
284
+ elsif !front
285
+ k1_offset = v_offset + delta - k2
286
+ if k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1
287
+ x1 = v1[k1_offset]
288
+ y1 = v_offset + x1 - k1_offset
289
+ # Mirror x2 onto top-left coordinate system.
290
+ x2 = text1_length - x2
291
+ if x1 >= x2
292
+ # Overlap detected.
293
+ return diff_bisectSplit(text1, text2, x1, y1, deadline)
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
299
+
300
+ # Diff took too long and hit the deadline or
301
+ # number of diffs equals number of characters, no commonality at all.
302
+ [[:delete, text1], [:insert, text2]]
303
+ end
304
+
305
+ # Given the location of the 'middle snake', split the diff in two parts
306
+ # and recurse.
307
+ def diff_bisectSplit(text1, text2, x, y, deadline)
308
+ text1a = text1[0...x]
309
+ text2a = text2[0...y]
310
+ text1b = text1[x..-1]
311
+ text2b = text2[y..-1]
312
+
313
+ # Compute both diffs serially.
314
+ diffs = diff_main(text1a, text2a, false, deadline)
315
+ diffsb = diff_main(text1b, text2b, false, deadline)
316
+
317
+ diffs + diffsb
318
+ end
319
+
320
+ # Split two texts into an array of strings. Reduce the texts to a string
321
+ # of hashes where each Unicode character represents one line.
322
+ def diff_linesToChars(text1, text2)
323
+ line_array = [''] # e.g. line_array[4] == "Hello\n"
324
+ line_hash = {} # e.g. line_hash["Hello\n"] == 4
325
+
326
+ [text1, text2].map do |text|
327
+ # Split text into an array of strings. Reduce the text to a string of
328
+ # hashes where each Unicode character represents one line.
329
+ chars = ''
330
+ text.each_line do |line|
331
+ if line_hash[line]
332
+ chars += line_hash[line].chr(Encoding::UTF_8)
333
+ else
334
+ chars += line_array.length.chr(Encoding::UTF_8)
335
+ line_hash[line] = line_array.length
336
+ line_array.push(line)
337
+ end
338
+ end
339
+ chars
340
+ end.push(line_array)
341
+ end
342
+
343
+ # Rehydrate the text in a diff from a string of line hashes to real lines of text.
344
+ def diff_charsToLines(diffs, line_array)
345
+ diffs.each do |diff|
346
+ diff[1] = diff[1].chars.map{ |c| line_array[c.ord] }.join
347
+ end
348
+ end
349
+
350
+ # Determine the common prefix of two strings.
351
+ def diff_commonPrefix(text1, text2)
352
+ # Quick check for common null cases.
353
+ return 0 if text1.empty? || text2.empty? || text1[0] != text2[0]
354
+
355
+ # Binary search.
356
+ # Performance analysis: http://neil.fraser.name/news/2007/10/09/
357
+ pointer_min = 0
358
+ pointer_max = [text1.length, text2.length].min
359
+ pointer_mid = pointer_max
360
+ pointer_start = 0
361
+
362
+ while pointer_min < pointer_mid
363
+ if text1[pointer_start...pointer_mid] == text2[pointer_start...pointer_mid]
364
+ pointer_min = pointer_mid
365
+ pointer_start = pointer_min
366
+ else
367
+ pointer_max = pointer_mid
368
+ end
369
+ pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min
370
+ end
371
+
372
+ pointer_mid
373
+ end
374
+
375
+ # Determine the common suffix of two strings.
376
+ def diff_commonSuffix(text1, text2)
377
+ # Quick check for common null cases.
378
+ return 0 if text1.empty? || text2.empty? || text1[-1] != text2[-1]
379
+
380
+ # Binary search.
381
+ # Performance analysis: http://neil.fraser.name/news/2007/10/09/
382
+ pointer_min = 0
383
+ pointer_max = [text1.length, text2.length].min
384
+ pointer_mid = pointer_max
385
+ pointer_end = 0
386
+
387
+ while pointer_min < pointer_mid
388
+ if text1[-pointer_mid..(-pointer_end-1)] == text2[-pointer_mid..(-pointer_end-1)]
389
+ pointer_min = pointer_mid
390
+ pointer_end = pointer_min
391
+ else
392
+ pointer_max = pointer_mid
393
+ end
394
+ pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min
395
+ end
396
+
397
+ pointer_mid
398
+ end
399
+
400
+ # Determine if the suffix of one string is the prefix of another.
401
+ def diff_commonOverlap(text1, text2)
402
+ # Cache the text lengths to prevent multiple calls.
403
+ text1_length = text1.length
404
+ text2_length = text2.length
405
+
406
+ # Eliminate the null case.
407
+ return 0 if text1_length.zero? || text2_length.zero?
408
+
409
+ # Truncate the longer string.
410
+ if text1_length > text2_length
411
+ text1 = text1[-text2_length..-1]
412
+ else
413
+ text2 = text2[0...text1_length]
414
+ end
415
+ text_length = [text1_length, text2_length].min
416
+
417
+ # Quick check for the whole case.
418
+ return text_length if text1 == text2
419
+
420
+ # Start by looking for a single character match
421
+ # and increase length until no match is found.
422
+ # Performance analysis: http://neil.fraser.name/news/2010/11/04/
423
+ best = 0
424
+ length = 1
425
+ loop do
426
+ pattern = text1[(text_length - length)..-1]
427
+ found = text2.index(pattern)
428
+
429
+ return best if found.nil?
430
+
431
+ length += found
432
+ if found == 0 || text1[(text_length - length)..-1] == text2[0..length]
433
+ best = length
434
+ length += 1
435
+ end
436
+ end
437
+ end
438
+
439
+ # Does a substring of shorttext exist within longtext such that the
440
+ # substring is at least half the length of longtext?
441
+ def diff_halfMatchI(longtext, shorttext, i)
442
+ seed = longtext[i, longtext.length / 4]
443
+ j = -1
444
+ best_common = ''
445
+ while j = shorttext.index(seed, j + 1)
446
+ prefix_length = diff_commonPrefix(longtext[i..-1], shorttext[j..-1])
447
+ suffix_length = diff_commonSuffix(longtext[0...i], shorttext[0...j])
448
+ if best_common.length < suffix_length + prefix_length
449
+ best_common = shorttext[(j - suffix_length)...j] + shorttext[j...(j + prefix_length)]
450
+ best_longtext_a = longtext[0...(i - suffix_length)]
451
+ best_longtext_b = longtext[(i + prefix_length)..-1]
452
+ best_shorttext_a = shorttext[0...(j - suffix_length)]
453
+ best_shorttext_b = shorttext[(j + prefix_length)..-1]
454
+ end
455
+ end
456
+
457
+ if best_common.length * 2 >= longtext.length
458
+ [best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common]
459
+ end
460
+ end
461
+
462
+ # Do the two texts share a substring which is at least half the length of the
463
+ # longer text?
464
+ # This speedup can produce non-minimal diffs.
465
+ def diff_halfMatch(text1, text2)
466
+ # Don't risk returning a non-optimal diff if we have unlimited time
467
+ return nil if diff_timeout <= 0
468
+
469
+ shorttext, longtext = [text1, text2].sort_by(&:length)
470
+ if longtext.length < 4 || shorttext.length * 2 < longtext.length
471
+ return nil # Pointless.
472
+ end
473
+
474
+ # First check if the second quarter is the seed for a half-match.
475
+ hm1 = diff_halfMatchI(longtext, shorttext, (longtext.length + 3) / 4)
476
+ # Check again based on the third quarter.
477
+ hm2 = diff_halfMatchI(longtext, shorttext, (longtext.length + 1) / 2)
478
+
479
+ if hm1.nil? && hm2.nil?
480
+ return nil
481
+ elsif hm2.nil? || hm1.nil?
482
+ hm = hm2.nil? ? hm1 : hm2
483
+ else
484
+ # Both matched. Select the longest.
485
+ hm = hm1[4].length > hm2[4].length ? hm1 : hm2
486
+ end
487
+
488
+ # A half-match was found, sort out the return data.
489
+ if text1.length > text2.length
490
+ text1_a, text1_b, text2_a, text2_b, mid_common = hm
491
+ else
492
+ text2_a, text2_b, text1_a, text1_b, mid_common = hm
493
+ end
494
+
495
+ [text1_a, text1_b, text2_a, text2_b, mid_common]
496
+ end
497
+
498
+ # Reduce the number of edits by eliminating semantically trivial equalities.
499
+ def diff_cleanupSemantic(diffs)
500
+ changes = false
501
+ equalities = [] # Stack of indices where equalities are found.
502
+ last_equality = nil # Always equal to equalities.last[1]
503
+ pointer = 0 # Index of current position.
504
+ # Number of characters that changed prior to the equality.
505
+ length_insertions1 = 0
506
+ length_deletions1 = 0
507
+ # Number of characters that changed after the equality.
508
+ length_insertions2 = 0
509
+ length_deletions2 = 0
510
+
511
+ while pointer < diffs.length
512
+ if diffs[pointer][0] == :equal # Equality found.
513
+ equalities.push(pointer)
514
+ length_insertions1 = length_insertions2
515
+ length_deletions1 = length_deletions2
516
+ length_insertions2 = 0
517
+ length_deletions2 = 0
518
+ last_equality = diffs[pointer][1]
519
+ else # An insertion or deletion.
520
+ if diffs[pointer][0] == :insert
521
+ length_insertions2 += diffs[pointer][1].length
522
+ else
523
+ length_deletions2 += diffs[pointer][1].length
524
+ end
525
+
526
+ if last_equality &&
527
+ last_equality.length <= [length_insertions1, length_deletions1].max &&
528
+ last_equality.length <= [length_insertions2, length_deletions2].max
529
+ # Duplicate record.
530
+ diffs[equalities.last, 0] = [[:delete, last_equality]]
531
+
532
+ # Change second copy to insert.
533
+ diffs[equalities.last + 1][0] = :insert
534
+
535
+ # Throw away the equality we just deleted.
536
+ equalities.pop
537
+ # Throw away the previous equality (it needs to be reevaluated).
538
+ equalities.pop
539
+ pointer = equalities.last || -1
540
+
541
+ # Reset the counters.
542
+ length_insertions1 = 0
543
+ length_deletions1 = 0
544
+ length_insertions2 = 0
545
+ length_deletions2 = 0
546
+ last_equality = nil
547
+
548
+ changes = true
549
+ end
550
+ end
551
+ pointer += 1
552
+ end
553
+
554
+ # Normalize the diff.
555
+ if changes
556
+ diff_cleanupMerge(diffs)
557
+ end
558
+ diff_cleanupSemanticLossless(diffs)
559
+
560
+ # Find any overlaps between deletions and insertions.
561
+ # e.g: <del>abcxxx</del><ins>xxxdef</ins>
562
+ # -> <del>abc</del>xxx<ins>def</ins>
563
+ # e.g: <del>xxxabc</del><ins>defxxx</ins>
564
+ # -> <ins>def</ins>xxx<del>abc</del>
565
+ # Only extract an overlap if it is as big as the edit ahead or behind it.
566
+ pointer = 1
567
+ while pointer < diffs.length
568
+ if diffs[pointer - 1][0] == :delete && diffs[pointer][0] == :insert
569
+ deletion = diffs[pointer - 1][1]
570
+ insertion = diffs[pointer][1]
571
+ overlap_length1 = diff_commonOverlap(deletion, insertion)
572
+ overlap_length2 = diff_commonOverlap(insertion, deletion)
573
+ if overlap_length1 >= overlap_length2
574
+ if overlap_length1 >= deletion.length / 2.0 ||
575
+ overlap_length1 >= insertion.length / 2.0
576
+ # Overlap found. Insert an equality and trim the surrounding edits.
577
+ diffs[pointer, 0] = [[:equal, insertion[0...overlap_length1]]]
578
+ diffs[pointer -1][0] = :delete
579
+ diffs[pointer - 1][1] = deletion[0...-overlap_length1]
580
+ diffs[pointer + 1][0] = :insert
581
+ diffs[pointer + 1][1] = insertion[overlap_length1..-1]
582
+ pointer += 1
583
+ end
584
+ else
585
+ if overlap_length2 >= deletion.length / 2.0 ||
586
+ overlap_length2 >= insertion.length / 2.0
587
+ diffs[pointer, 0] = [[:equal, deletion[0...overlap_length2]]]
588
+ diffs[pointer - 1][0] = :insert
589
+ diffs[pointer - 1][1] = insertion[0...-overlap_length2]
590
+ diffs[pointer + 1][0] = :delete
591
+ diffs[pointer + 1][1] = deletion[overlap_length2..-1]
592
+ pointer += 1
593
+ end
594
+ end
595
+ pointer += 1
596
+ end
597
+ pointer += 1
598
+ end
599
+ end
600
+
601
+ # Given two strings, compute a score representing whether the
602
+ # internal boundary falls on logical boundaries.
603
+ # Scores range from 5 (best) to 0 (worst).
604
+ def diff_cleanupSemanticScore(one, two)
605
+ if one.empty? || two.empty?
606
+ # Edges are the best.
607
+ return 5
608
+ end
609
+
610
+ # Define some regex patterns for matching boundaries.
611
+ nonWordCharacter = /[^a-zA-Z0-9]/
612
+ whitespace = /\s/
613
+ linebreak = /[\r\n]/
614
+ lineEnd = /\n\r?\n$/
615
+ lineStart = /^\r?\n\r?\n/
616
+
617
+ # Each port of this function behaves slightly differently due to
618
+ # subtle differences in each language's definition of things like
619
+ # 'whitespace'. Since this function's purpose is largely cosmetic,
620
+ # the choice has been made to use each language's native features
621
+ # rather than force total conformity.
622
+ score = 0
623
+ # One point for non-alphanumeric.
624
+ if one[-1] =~ nonWordCharacter || two[0] =~ nonWordCharacter
625
+ score += 1
626
+ # Two points for whitespace.
627
+ if one[-1] =~ whitespace || two[0] =~ whitespace
628
+ score += 1
629
+ # Three points for line breaks.
630
+ if one[-1] =~ linebreak || two[0] =~ linebreak
631
+ score += 1
632
+ # Four points for blank lines.
633
+ if one =~ lineEnd || two =~ lineStart
634
+ score += 1
635
+ end
636
+ end
637
+ end
638
+ end
639
+
640
+ score
641
+ end
642
+
643
+ # Look for single edits surrounded on both sides by equalities
644
+ # which can be shifted sideways to align the edit to a word boundary.
645
+ # e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
646
+ def diff_cleanupSemanticLossless(diffs)
647
+ pointer = 1
648
+ # Intentionally ignore the first and last element (don't need checking).
649
+ while pointer < diffs.length - 1
650
+ if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal
651
+ # This is a single edit surrounded by equalities.
652
+ equality1 = diffs[pointer - 1][1]
653
+ edit = diffs[pointer][1]
654
+ equality2 = diffs[pointer + 1][1]
655
+
656
+ # First, shift the edit as far left as possible.
657
+ common_offset = diff_commonSuffix(equality1, edit)
658
+ if common_offset != 0
659
+ common_string = edit[-common_offset..-1]
660
+ equality1 = equality1[0...-common_offset]
661
+ edit = common_string + edit[0...-common_offset]
662
+ equality2 = common_string + equality2
663
+ end
664
+
665
+ # Second, step character by character right, looking for the best fit.
666
+ best_equality1 = equality1
667
+ best_edit = edit
668
+ best_equality2 = equality2
669
+ best_score = diff_cleanupSemanticScore(equality1, edit) +
670
+ diff_cleanupSemanticScore(edit, equality2)
671
+ while edit[0] == equality2[0]
672
+ equality1 += edit[0]
673
+ edit = edit[1..-1] + equality2[0]
674
+ equality2 = equality2[1..-1]
675
+ score = diff_cleanupSemanticScore(equality1, edit) +
676
+ diff_cleanupSemanticScore(edit, equality2)
677
+ # The >= encourages trailing rather than leading whitespace on edits.
678
+ if score >= best_score
679
+ best_score = score
680
+ best_equality1 = equality1
681
+ best_edit = edit
682
+ best_equality2 = equality2
683
+ end
684
+ end
685
+
686
+ if diffs[pointer - 1][1] != best_equality1
687
+ # We have an improvement, save it back to the diff.
688
+ if best_equality1.empty?
689
+ diffs[pointer - 1, 1] = []
690
+ pointer -= 1
691
+ else
692
+ diffs[pointer - 1][1] = best_equality1
693
+ end
694
+
695
+ diffs[pointer][1] = best_edit
696
+
697
+ if best_equality2.empty?
698
+ diffs[pointer + 1, 1] = []
699
+ pointer -= 1
700
+ else
701
+ diffs[pointer + 1][1] = best_equality2
702
+ end
703
+ end
704
+ end
705
+
706
+ pointer += 1
707
+ end
708
+ end
709
+
710
+ # Reduce the number of edits by eliminating operationally trivial equalities.
711
+ def diff_cleanupEfficiency(diffs)
712
+ changes = false
713
+ equalities = [] # Stack of indices where equalities are found.
714
+ last_equality = '' # Always equal to equalities.last[1]
715
+ pointer = 0 # Index of current position.
716
+ pre_ins = false # Is there an insertion operation before the last equality.
717
+ pre_del = false # Is there a deletion operation before the last equality.
718
+ post_ins = false # Is there an insertion operation after the last equality.
719
+ post_del = false # Is there a deletion operation after the last equality.
720
+
721
+ while pointer < diffs.length
722
+ if diffs[pointer][0] == :equal # Equality found.
723
+ if diffs[pointer][1].length < diff_editCost && (post_ins || post_del)
724
+ # Candidate found.
725
+ equalities.push(pointer)
726
+ pre_ins = post_ins
727
+ pre_del = post_del
728
+ last_equality = diffs[pointer][1]
729
+ else
730
+ # Not a candidate, and can never become one.
731
+ equalities.clear
732
+ last_equality = ''
733
+ end
734
+ post_ins = false
735
+ post_del = false
736
+ else # An insertion or deletion.
737
+ if diffs[pointer][0] == :delete
738
+ post_del = true
739
+ else
740
+ post_ins = true
741
+ end
742
+
743
+ # Five types to be split:
744
+ # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
745
+ # <ins>A</ins>X<ins>C</ins><del>D</del>
746
+ # <ins>A</ins><del>B</del>X<ins>C</ins>
747
+ # <ins>A</del>X<ins>C</ins><del>D</del>
748
+ # <ins>A</ins><del>B</del>X<del>C</del>
749
+
750
+ if !last_equality.empty? &&
751
+ ((pre_ins && pre_del && post_ins && post_del) ||
752
+ ((last_equality.length < diff_editCost / 2) &&
753
+ [pre_ins, pre_del, post_ins, post_del].count(true) == 3))
754
+ # Duplicate record.
755
+ diffs[equalities.last, 0] = [[:delete, last_equality]]
756
+ # Change second copy to insert.
757
+ diffs[equalities.last + 1][0] = :insert
758
+ equalities.pop # Throw away the equality we just deleted
759
+ last_equality = ''
760
+ if pre_ins && pre_del
761
+ # No changes made which could affect previous entry, keep going.
762
+ post_ins = true
763
+ post_del = true
764
+ equalities.clear
765
+ else
766
+ if !equalities.empty?
767
+ equalities.pop # Throw away the previous equality.
768
+ pointer = equalities.last || -1
769
+ end
770
+ post_ins = false
771
+ post_del = false
772
+ end
773
+ changes = true
774
+ end
775
+ end
776
+ pointer += 1
777
+ end
778
+
779
+ if changes
780
+ diff_cleanupMerge(diffs)
781
+ end
782
+ end
783
+
784
+ # Reorder and merge like edit sections. Merge equalities.
785
+ # Any edit section can move as long as it doesn't cross an equality.
786
+ def diff_cleanupMerge(diffs)
787
+ diffs.push([:equal, '']) # Add a dummy entry at the end.
788
+ pointer = 0
789
+ count_delete = 0
790
+ count_insert = 0
791
+ text_delete = ''
792
+ text_insert = ''
793
+
794
+ while pointer < diffs.length
795
+ case diffs[pointer][0]
796
+ when :insert
797
+ count_insert += 1
798
+ text_insert += diffs[pointer][1]
799
+ pointer += 1
800
+ when :delete
801
+ count_delete += 1
802
+ text_delete += diffs[pointer][1]
803
+ pointer += 1
804
+ when :equal
805
+ # Upon reaching an equality, check for prior redundancies.
806
+ if count_delete + count_insert > 1
807
+ if count_delete != 0 && count_insert != 0
808
+ # Factor out any common prefixies.
809
+ common_length = diff_commonPrefix(text_insert, text_delete)
810
+ if common_length != 0
811
+ if (pointer - count_delete - count_insert) > 0 &&
812
+ diffs[pointer - count_delete - count_insert - 1][0] == :equal
813
+ diffs[pointer - count_delete - count_insert - 1][1] +=
814
+ text_insert[0...common_length]
815
+ else
816
+ diffs.unshift([:equal, text_insert[0...common_length]])
817
+ pointer += 1
818
+ end
819
+ text_insert = text_insert[common_length..-1]
820
+ text_delete = text_delete[common_length..-1]
821
+ end
822
+ # Factor out any common suffixies.
823
+ common_length = diff_commonSuffix(text_insert, text_delete)
824
+ if common_length != 0
825
+ diffs[pointer][1] = text_insert[-common_length..-1] + diffs[pointer][1]
826
+ text_insert = text_insert[0...-common_length]
827
+ text_delete = text_delete[0...-common_length]
828
+ end
829
+ end
830
+
831
+ # Delete the offending records and add the merged ones.
832
+ if count_delete.zero?
833
+ diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
834
+ [[:insert, text_insert]]
835
+ elsif count_insert.zero?
836
+ diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
837
+ [[:delete, text_delete]]
838
+ else
839
+ diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
840
+ [[:delete, text_delete], [:insert, text_insert]]
841
+ end
842
+ pointer = pointer - count_delete - count_insert +
843
+ (count_delete.zero? ? 0 : 1) + (count_insert.zero? ? 0 : 1) + 1
844
+ elsif pointer != 0 && diffs[pointer - 1][0] == :equal
845
+ # Merge this equality with the previous one.
846
+ diffs[pointer - 1][1] += diffs[pointer][1]
847
+ diffs[pointer, 1] = []
848
+ else
849
+ pointer += 1
850
+ end
851
+ count_insert = 0
852
+ count_delete = 0
853
+ text_delete = ''
854
+ text_insert = ''
855
+ end
856
+ end
857
+
858
+ if diffs.last[1].empty?
859
+ diffs.pop # Remove the dummy entry at the end.
860
+ end
861
+
862
+ # Second pass: look for single edits surrounded on both sides by equalities
863
+ # which can be shifted sideways to eliminate an equality.
864
+ # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
865
+ changes = false
866
+ pointer = 1
867
+
868
+ # Intentionally ignore the first and last element (don't need checking).
869
+ while pointer < diffs.length - 1
870
+ if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal
871
+ # This is a single edit surrounded by equalities.
872
+ if diffs[pointer][1][-diffs[pointer - 1][1].length..-1] == diffs[pointer - 1][1]
873
+ # Shift the edit over the previous equality.
874
+ diffs[pointer][1] = diffs[pointer - 1][1] + diffs[pointer][1][0...-diffs[pointer - 1][1].length]
875
+ diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1]
876
+ diffs[pointer - 1, 1] = []
877
+ changes = true
878
+ elsif diffs[pointer][1][0...diffs[pointer + 1][1].length] == diffs[pointer + 1][1]
879
+ # Shift the edit over the next equality.
880
+ diffs[pointer - 1][1] += diffs[pointer + 1][1]
881
+ diffs[pointer][1] = diffs[pointer][1][diffs[pointer + 1][1].length..-1] +
882
+ diffs[pointer + 1][1]
883
+ diffs[pointer + 1, 1] = []
884
+ changes = true
885
+ end
886
+ end
887
+ pointer += 1
888
+ end
889
+
890
+ # If shifts were made, the diff needs reordering and another shift sweep.
891
+ if changes
892
+ diff_cleanupMerge(diffs)
893
+ end
894
+ end
895
+
896
+ # loc is a location in text1, compute and return the equivalent location
897
+ # in text2. e.g. 'The cat' vs 'The big cat', 1->1, 5->8
898
+ def diff_xIndex(diffs, loc)
899
+ chars1 = 0
900
+ chars2 = 0
901
+ last_chars1 = 0
902
+ last_chars2 = 0
903
+ x = diffs.index do |diff|
904
+ if diff[0] != :insert
905
+ chars1 += diff[1].length
906
+ end
907
+ if diff[0] != :delete
908
+ chars2 += diff[1].length
909
+ end
910
+ if chars1 > loc
911
+ true
912
+ else
913
+ last_chars1 = chars1
914
+ last_chars2 = chars2
915
+ false
916
+ end
917
+ end
918
+
919
+ if diffs.length != x && diffs[x][0] == :delete
920
+ # The location was deleted.
921
+ last_chars2
922
+ else
923
+ # Add the remaining len(character).
924
+ last_chars2 + (loc - last_chars1)
925
+ end
926
+ end
927
+
928
+ # Convert a diff array into a pretty HTML report.
929
+ def diff_prettyHtml(diffs)
930
+ diffs.map do |op, data|
931
+ text = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;').gsub('\n', '&para;<br>')
932
+ case op
933
+ when :insert
934
+ "<ins style=\"background:#e6ffe6;\">#{text}</ins>"
935
+ when :delete
936
+ "<del style=\"background:#ffe6e6;\">#{text}</del>"
937
+ when :equal
938
+ "<span>#{text}</span>"
939
+ end
940
+ end.join
941
+ end
942
+
943
+ # Compute and return the source text (all equalities and deletions).
944
+ def diff_text1(diffs)
945
+ diffs.map do |op, data|
946
+ if op == :insert
947
+ ''
948
+ else
949
+ data
950
+ end
951
+ end.join
952
+ end
953
+
954
+ # Compute and return the destination text (all equalities and insertions).
955
+ def diff_text2(diffs)
956
+ diffs.map do |op, data|
957
+ if op == :delete
958
+ ''
959
+ else
960
+ data
961
+ end
962
+ end.join
963
+ end
964
+
965
+ # Compute the Levenshtein distance; the number of inserted, deleted or
966
+ # substituted characters.
967
+ def diff_levenshtein(diffs)
968
+ levenshtein = 0
969
+ insertions = 0
970
+ deletions = 0
971
+
972
+ diffs.each do |op, data|
973
+ case op
974
+ when :insert
975
+ insertions += data.length
976
+ when :delete
977
+ deletions += data.length
978
+ when :equal
979
+ # A deletion and an insertion is one substitution.
980
+ levenshtein += [insertions, deletions].max
981
+ insertions = 0
982
+ deletions = 0
983
+ end
984
+ end
985
+
986
+ levenshtein + [insertions, deletions].max
987
+ end
988
+
989
+ # Crush the diff into an encoded string which describes the operations
990
+ # required to transform text1 into text2.
991
+ # E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
992
+ # Operations are tab-separated. Inserted text is escaped using %xx notation.
993
+ def diff_toDelta(diffs)
994
+ diffs.map do |op, data|
995
+ case op
996
+ when :insert
997
+ '+' + URI.encode(data, /[^0-9A-Za-z_.;!~*'(),\/?:@&=+$\#-]/)
998
+ when :delete
999
+ '-' + data.length.to_s
1000
+ when :equal
1001
+ '=' + data.length.to_s
1002
+ end
1003
+ end.join("\t").gsub('%20', ' ')
1004
+ end
1005
+
1006
+ # Given the original text1, and an encoded string which describes the
1007
+ # operations required to transform text1 into text2, compute the full diff.
1008
+ def diff_fromDelta(text1, delta)
1009
+ # Deltas should be composed of a subset of ascii chars, Unicode not required.
1010
+ delta.encode('ascii')
1011
+ diffs = []
1012
+ pointer = 0 # Cursor in text1
1013
+ delta.split("\t").each do |token|
1014
+ # Each token begins with a one character parameter which specifies the
1015
+ # operation of this token (delete, insert, equality).
1016
+ param = token[1..-1]
1017
+ case token[0]
1018
+ when '+'
1019
+ diffs.push([:insert, URI.decode(param.force_encoding(Encoding::UTF_8))])
1020
+ when '-', '='
1021
+ begin
1022
+ n = Integer(param)
1023
+ raise if n < 0
1024
+ text = text1[pointer...(pointer + n)]
1025
+ pointer += n
1026
+ if token[0] == '='
1027
+ diffs.push([:equal, text])
1028
+ else
1029
+ diffs.push([:delete, text])
1030
+ end
1031
+ rescue ArgumentError => e
1032
+ raise ArgumentError.new(
1033
+ "Invalid number in diff_fromDelta: #{param.inspect}")
1034
+ end
1035
+ else
1036
+ raise ArgumentError.new(
1037
+ "Invalid diff operation in diff_fromDelta: #{token.inspect}")
1038
+ end
1039
+ end
1040
+
1041
+ if pointer != text1.length
1042
+ raise ArgumentError.new("Delta length (#{pointer}) does not equal " +
1043
+ "source text length #{text1.length}")
1044
+ end
1045
+ diffs
1046
+ end
1047
+
1048
+ # Locate the best instance of 'pattern' in 'text' near 'loc'.
1049
+ def match_main(text, pattern, loc)
1050
+ # Check for null inputs.
1051
+ if [text, pattern].any?(&:nil?)
1052
+ raise ArgumentError.new("Null input. (match_main)")
1053
+ end
1054
+
1055
+ loc = [0, [loc, text.length].min].max
1056
+ if text == pattern
1057
+ # Shortcut (potentially not guaranteed by the algorithm)
1058
+ 0
1059
+ elsif text.empty?
1060
+ # Nothing to match
1061
+ -1
1062
+ elsif text[loc, pattern.length] == pattern
1063
+ # Perfect match at the perfect spot! (Includes case of null pattern)
1064
+ loc
1065
+ else
1066
+ # Do a fuzzy compare.
1067
+ match_bitap(text, pattern, loc)
1068
+ end
1069
+ end
1070
+
1071
+ # Locate the best instance of 'pattern' in 'text' near 'loc' using the
1072
+ # Bitap algorithm.
1073
+ def match_bitap(text, pattern, loc)
1074
+ if pattern.length > match_maxBits
1075
+ throw ArgumentError.new("Pattern too long")
1076
+ end
1077
+
1078
+ # Initialise the alphabet.
1079
+ s = match_alphabet(pattern)
1080
+
1081
+ # Compute and return the score for a match with e errors and x location.
1082
+ match_bitapScore = -> e, x do
1083
+ accuracy = e.to_f / pattern.length
1084
+ proximity = (loc - x).abs
1085
+ if match_distance == 0
1086
+ # Dodge divide by zero error.
1087
+ return proximity == 0 ? accuracy : 1.0
1088
+ end
1089
+ return accuracy + (proximity.to_f / match_distance)
1090
+ end
1091
+
1092
+ # Highest score beyond which we give up.
1093
+ score_threshold = match_threshold
1094
+ # Is there a nearby exact match? (speedup)
1095
+ best_loc = text.index(pattern, loc)
1096
+ if best_loc
1097
+ score_threshold = [match_bitapScore[0, best_loc], score_threshold].min
1098
+ # What about in the other direction? (speedup)
1099
+ best_loc = text.rindex(pattern, loc + pattern.length)
1100
+ if best_loc
1101
+ score_threshold = [match_bitapScore[0, best_loc], score_threshold].min
1102
+ end
1103
+ end
1104
+
1105
+ # Initialise the bit arrays.
1106
+ match_mask = 1 << (pattern.length - 1)
1107
+ best_loc = -1
1108
+
1109
+ bin_max = pattern.length + text.length
1110
+ # Empty initialization added to appease pychecker.
1111
+ last_rd = nil
1112
+ pattern.length.times do |d|
1113
+ # Scan for the best match; each iteration allows for one more error.
1114
+ # Run a binary search to determine how far from 'loc' we can stray at this
1115
+ # error level.
1116
+ bin_min = 0
1117
+ bin_mid = bin_max
1118
+ while bin_min < bin_mid
1119
+ if match_bitapScore[d, loc + bin_mid] <= score_threshold
1120
+ bin_min = bin_mid
1121
+ else
1122
+ bin_max = bin_mid
1123
+ end
1124
+ bin_mid = (bin_max - bin_min) / 2 + bin_min
1125
+ end
1126
+
1127
+ # Use the result from this iteration as the maximum for the next.
1128
+ bin_max = bin_mid
1129
+ start = [1, loc - bin_mid + 1].max
1130
+ finish = [loc + bin_mid, text.length].min + pattern.length
1131
+
1132
+ rd = Array.new(finish + 2, 0)
1133
+ rd[finish + 1] = (1 << d) - 1
1134
+ finish.downto(start) do |j|
1135
+ char_match = s[text[j - 1]] || 0
1136
+ if d == 0 # First pass: exact match.
1137
+ rd[j] = ((rd[j + 1] << 1) | 1) & char_match
1138
+ else # Subsequent passes: fuzzy match.
1139
+ rd[j] = ((rd[j + 1] << 1) | 1) & char_match |
1140
+ (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1141
+ end
1142
+ if (rd[j] & match_mask).nonzero?
1143
+ score = match_bitapScore[d, j - 1]
1144
+ # This match will almost certainly be better than any existing match.
1145
+ # But check anyway.
1146
+ if score <= score_threshold
1147
+ # Told you so.
1148
+ score_threshold = score
1149
+ best_loc = j - 1
1150
+ if best_loc > loc
1151
+ # When passing loc, don't exceed our current distance from loc.
1152
+ start = [1, 2 * loc - best_loc].max
1153
+ else
1154
+ # Already passed loc, downhill from here on in.
1155
+ break
1156
+ end
1157
+ end
1158
+ end
1159
+ end
1160
+
1161
+ # No hope for a (better) match at greater error levels.
1162
+ if match_bitapScore[d + 1, loc] > score_threshold
1163
+ break
1164
+ end
1165
+ last_rd = rd
1166
+ end
1167
+
1168
+ best_loc
1169
+ end
1170
+
1171
+ # Initialise the alphabet for the Bitap algorithm.
1172
+ def match_alphabet(pattern)
1173
+ s = {}
1174
+ pattern.chars.each_with_index do |c, i|
1175
+ s[c] ||= 0
1176
+ s[c] |= 1 << (pattern.length - i - 1)
1177
+ end
1178
+ s
1179
+ end
1180
+
1181
+ # Parse a textual representation of patches and return a list of patch
1182
+ # objects.
1183
+ def patch_fromText(textline)
1184
+ return [] if textline.empty?
1185
+
1186
+ patches = []
1187
+ text = textline.split("\n")
1188
+ text_pointer = 0
1189
+ patch_header = /^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/
1190
+ while text_pointer < text.length
1191
+ m = text[text_pointer].match(patch_header)
1192
+ if m.nil?
1193
+ raise ArgumentError.new("Invalid patch string: #{text[text_pointer]}")
1194
+ end
1195
+ patch = PatchObj.new
1196
+ patches.push(patch)
1197
+ patch.start1 = m[1].to_i
1198
+ if m[2].empty?
1199
+ patch.start1 -= 1
1200
+ patch.length1 = 1
1201
+ elsif m[2] == '0'
1202
+ patch.length1 = 0
1203
+ else
1204
+ patch.start1 -= 1
1205
+ patch.length1 = m[2].to_i
1206
+ end
1207
+
1208
+ patch.start2 = m[3].to_i
1209
+ if m[4].empty?
1210
+ patch.start2 -= 1
1211
+ patch.length2 = 1
1212
+ elsif m[4] == '0'
1213
+ patch.length2 = 0
1214
+ else
1215
+ patch.start2 -= 1
1216
+ patch.length2 = m[4].to_i
1217
+ end
1218
+ text_pointer += 1
1219
+
1220
+ while text_pointer < text.length
1221
+ if text[text_pointer].empty?
1222
+ # Blank line? Whatever.
1223
+ text_pointer += 1
1224
+ next
1225
+ end
1226
+
1227
+ sign = text[text_pointer][0]
1228
+ line = URI.decode(text[text_pointer][1..-1].force_encoding(Encoding::UTF_8))
1229
+
1230
+ case sign
1231
+ when '-'
1232
+ # Deletion.
1233
+ patch.diffs.push([:delete, line])
1234
+ when '+'
1235
+ # Insertion.
1236
+ patch.diffs.push([:insert, line])
1237
+ when ' '
1238
+ # Minor equality
1239
+ patch.diffs.push([:equal, line])
1240
+ when '@'
1241
+ # Start of next patch.
1242
+ break
1243
+ else
1244
+ # WTF?
1245
+ raise ArgumentError.new("Invalid patch mode \"#{sign}\" in: #{line}")
1246
+ end
1247
+ text_pointer += 1
1248
+ end
1249
+ end
1250
+
1251
+ patches
1252
+ end
1253
+
1254
+ # Take a list of patches and return a textual representation
1255
+ def patch_toText(patches)
1256
+ patches.join
1257
+ end
1258
+
1259
+ # Increase the context until it is unique,
1260
+ # but don't let the pattern expand beyond match_maxBits
1261
+ def patch_addContext(patch, text)
1262
+ return if text.empty?
1263
+ pattern = text[patch.start2, patch.length1]
1264
+ padding = 0
1265
+
1266
+ # Look for the first and last matches of pattern in text. If two different
1267
+ # matches are found, increase the pattern length.
1268
+ while text.index(pattern) != text.rindex(pattern) &&
1269
+ pattern.length < match_maxBits - 2 * patch_margin
1270
+ padding += patch_margin
1271
+ pattern = text[[0, patch.start2 - padding].max...(patch.start2 + patch.length1 + padding)]
1272
+ end
1273
+
1274
+ # Add one chunk for good luck.
1275
+ padding += patch_margin
1276
+
1277
+ # Add the prefix.
1278
+ prefix = text[[0, patch.start2 - padding].max...patch.start2]
1279
+ patch.diffs.unshift([:equal, prefix]) if !prefix.to_s.empty?
1280
+
1281
+ # Add the suffix.
1282
+ suffix = text[patch.start2 + patch.length1, padding]
1283
+ patch.diffs.push([:equal, suffix]) if !suffix.to_s.empty?
1284
+
1285
+ # Roll back the start points.
1286
+ patch.start1 -= prefix.length
1287
+ patch.start2 -= prefix.length
1288
+
1289
+ # Extend the lengths.
1290
+ patch.length1 += prefix.length + suffix.length
1291
+ patch.length2 += prefix.length + suffix.length
1292
+ end
1293
+
1294
+ # Compute a list of patches to turn text1 into text2.
1295
+ # Use diffs if provided, otherwise compute it ourselves.
1296
+ # There are four ways to call this function, depending on what data is
1297
+ # available to the caller:
1298
+ # Method 1:
1299
+ # a = text1, b = text2
1300
+ # Method 2:
1301
+ # a = diffs
1302
+ # Method 3 (optimal):
1303
+ # a = text1, b = diffs
1304
+ # Method 4 (deprecated, use method 3):
1305
+ # a = text1, b = text2, c = diffs
1306
+ def patch_make(*args)
1307
+ text1 = nil
1308
+ diffs = nil
1309
+ if args.length == 2 && args[0].is_a?(String) && args[1].is_a?(String)
1310
+ # Compute diffs from text1 and text2.
1311
+ text1 = args[0]
1312
+ text2 = args[1]
1313
+ diffs = diff_main(text1, text2, true)
1314
+ if diffs.length > 2
1315
+ diff_cleanupSemantic(diffs)
1316
+ diff_cleanupEfficiency(diffs)
1317
+ end
1318
+ elsif args.length == 1 && args[0].is_a?(Array)
1319
+ # Compute text1 from diffs.
1320
+ diffs = args[0]
1321
+ text1 = diff_text1(diffs)
1322
+ elsif args.length == 2 && args[0].is_a?(String) && args[1].is_a?(Array)
1323
+ text1 = args[0]
1324
+ diffs = args[1]
1325
+ elsif args.length == 3 && args[0].is_a?(String) && args[1].is_a?(String) &&
1326
+ args[2].is_a?(Array)
1327
+ # Method 4: text1, text2, diffs
1328
+ # text2 is not used.
1329
+ text1 = args[0]
1330
+ text2 = args[1]
1331
+ diffs = args[2]
1332
+ else
1333
+ raise ArgumentError.new('Unknown call format to patch_make.')
1334
+ end
1335
+
1336
+ return [] if diffs.empty? # Get rid of the null case.
1337
+
1338
+ patches = []
1339
+ patch = PatchObj.new
1340
+ char_count1 = 0 # Number of characters into the text1 string.
1341
+ char_count2 = 0 # Number of characters into the text2 string.
1342
+ prepatch_text = text1 # Recreate the patches to determine context info.
1343
+ postpatch_text = text1
1344
+
1345
+ diffs.each_with_index do |diff, x|
1346
+ diff_type, diff_text = diffs[x]
1347
+ if patch.diffs.empty? && diff_type != :equal
1348
+ # A new patch starts here.
1349
+ patch.start1 = char_count1
1350
+ patch.start2 = char_count2
1351
+ end
1352
+
1353
+ case diff_type
1354
+ when :insert
1355
+ patch.diffs.push(diff)
1356
+ patch.length2 += diff_text.length
1357
+ postpatch_text = postpatch_text[0...char_count2] + diff_text +
1358
+ postpatch_text[char_count2..-1]
1359
+ when :delete
1360
+ patch.length1 += diff_text.length
1361
+ patch.diffs.push(diff)
1362
+ postpatch_text = postpatch_text[0...char_count2] +
1363
+ postpatch_text[(char_count2 + diff_text.length)..-1]
1364
+ when :equal
1365
+ if diff_text.length <= 2 * patch_margin &&
1366
+ !patch.diffs.empty? && diffs.length != x + 1
1367
+ # Small equality inside a patch.
1368
+ patch.diffs.push(diff)
1369
+ patch.length1 += diff_text.length
1370
+ patch.length2 += diff_text.length
1371
+ elsif diff_text.length >= 2 * patch_margin
1372
+ # Time for a new patch.
1373
+ unless patch.diffs.empty?
1374
+ patch_addContext(patch, prepatch_text)
1375
+ patches.push(patch)
1376
+ patch = PatchObj.new
1377
+ # Unlike Unidiff, our patch lists have a rolling context.
1378
+ # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1379
+ # Update prepatch text & pos to reflect the application of the
1380
+ # just completed patch.
1381
+ prepatch_text = postpatch_text
1382
+ char_count1 = char_count2
1383
+ end
1384
+ end
1385
+ end
1386
+
1387
+ # Update the current character count.
1388
+ if diff_type != :insert
1389
+ char_count1 += diff_text.length
1390
+ end
1391
+ if diff_type != :delete
1392
+ char_count2 += diff_text.length
1393
+ end
1394
+ end
1395
+
1396
+ # Pick up the leftover patch if not empty.
1397
+ unless patch.diffs.empty?
1398
+ patch_addContext(patch, prepatch_text)
1399
+ patches.push(patch)
1400
+ end
1401
+
1402
+ patches
1403
+ end
1404
+
1405
+ # Merge a set of patches onto the text. Return a patched text, as well
1406
+ # as a list of true/false values indicating which patches were applied.
1407
+ def patch_apply(patches, text)
1408
+ return [text, []] if patches.empty?
1409
+
1410
+ # Deep copy the patches so that no changes are made to originals.
1411
+ patches = Marshal.load(Marshal.dump(patches))
1412
+
1413
+ null_padding = patch_addPadding(patches)
1414
+ text = null_padding + text + null_padding
1415
+ patch_splitMax(patches)
1416
+
1417
+ # delta keeps track of the offset between the expected and actual location
1418
+ # of the previous patch. If there are patches expected at positions 10 and
1419
+ # 20, but the first patch was found at 12, delta is 2 and the second patch
1420
+ # has an effective expected position of 22.
1421
+ delta = 0
1422
+ results = []
1423
+ patches.each_with_index do |patch, x|
1424
+ expected_loc = patch.start2 + delta
1425
+ text1 = diff_text1(patch.diffs)
1426
+ end_loc = -1
1427
+ if text1.length > match_maxBits
1428
+ # patch_splitMax will only provide an oversized pattern in the case of
1429
+ # a monster delete.
1430
+ start_loc = match_main(text, text1[0, match_maxBits], expected_loc)
1431
+ if start_loc != -1
1432
+ end_loc = match_main(text, text1[(text1.length - match_maxBits)..-1],
1433
+ expected_loc + text1.length - match_maxBits)
1434
+ if end_loc == -1 || start_loc >= end_loc
1435
+ # Can't find valid trailing context. Drop this patch.
1436
+ start_loc = -1
1437
+ end
1438
+ end
1439
+ else
1440
+ start_loc = match_main(text, text1, expected_loc)
1441
+ end
1442
+ if start_loc == -1
1443
+ # No match found. :(
1444
+ results[x] = false
1445
+ # Subtract the delta for this failed patch from subsequent patches.
1446
+ delta -= patch.length2 - patch.length1
1447
+ else
1448
+ # Found a match. :)
1449
+ results[x] = true
1450
+ delta = start_loc - expected_loc
1451
+ text2 = text[start_loc, (end_loc == -1) ? text1.length : end_loc + match_maxBits]
1452
+
1453
+ if text1 == text2
1454
+ # Perfect match, just shove the replacement text in.
1455
+ text = text[0, start_loc] + diff_text2(patch.diffs) + text[(start_loc + text1.length)..-1]
1456
+ else
1457
+ # Imperfect match.
1458
+ # Run a diff to get a framework of equivalent indices.
1459
+ diffs = diff_main(text1, text2, false)
1460
+ if text1.length > match_maxBits &&
1461
+ diff_levenshtein(diffs).to_f / text1.length > patch_deleteThreshold
1462
+ # The end points match, but the content is unacceptably bad.
1463
+ results[x] = false
1464
+ else
1465
+ diff_cleanupSemanticLossless(diffs)
1466
+ index1 = 0
1467
+ patch.diffs.each do |op, data|
1468
+ if op != :equal
1469
+ index2 = diff_xIndex(diffs, index1)
1470
+ end
1471
+ if op == :insert # Insertion
1472
+ text = text[0, start_loc + index2] + data + text[(start_loc + index2)..-1]
1473
+ elsif op == :delete # Deletion
1474
+ text = text[0, start_loc + index2] +
1475
+ text[(start_loc + diff_xIndex(diffs, index1 + data.length))..-1]
1476
+ end
1477
+ if op != :delete
1478
+ index1 += data.length
1479
+ end
1480
+ end
1481
+ end
1482
+ end
1483
+ end
1484
+ end
1485
+
1486
+ # Strip the padding off.
1487
+ text = text[null_padding.length...-null_padding.length]
1488
+ [text, results]
1489
+ end
1490
+
1491
+ # Add some padding on text start and end so that edges can match
1492
+ # something. Intended to be called only from within patch_apply.
1493
+ def patch_addPadding(patches)
1494
+ padding_length = patch_margin
1495
+ null_padding = (1..padding_length).map{ |x| x.chr(Encoding::UTF_8) }.join
1496
+
1497
+ # Bump all the patches forward.
1498
+ patches.each do |patch|
1499
+ patch.start1 += padding_length
1500
+ patch.start2 += padding_length
1501
+ end
1502
+
1503
+ # Add some padding on start of first diff.
1504
+ patch = patches.first
1505
+ diffs = patch.diffs
1506
+ if diffs.empty? || diffs.first[0] != :equal
1507
+ # Add nullPadding equality.
1508
+ diffs.unshift([:equal, null_padding])
1509
+ patch.start1 -= padding_length # Should be 0.
1510
+ patch.start2 -= padding_length # Should be 0.
1511
+ patch.length1 += padding_length
1512
+ patch.length2 += padding_length
1513
+ elsif padding_length > diffs.first[1].length
1514
+ # Grow first equality.
1515
+ extra_length = padding_length - diffs.first[1].length
1516
+ diffs.first[1] = null_padding[diffs.first[1].length..-1] + diffs.first[1]
1517
+ patch.start1 -= extra_length
1518
+ patch.start2 -= extra_length
1519
+ patch.length1 += extra_length
1520
+ patch.length2 += extra_length
1521
+ end
1522
+
1523
+ # Add some padding on end of last diff.
1524
+ patch = patches.last
1525
+ diffs = patch.diffs
1526
+ if diffs.empty? || diffs.last[0] != :equal
1527
+ # Add nullPadding equality.
1528
+ diffs.push([:equal, null_padding])
1529
+ patch.length1 += padding_length
1530
+ patch.length2 += padding_length
1531
+ elsif padding_length > diffs.last[1].length
1532
+ # Grow last equality.
1533
+ extra_length = padding_length - diffs.last[1].length
1534
+ diffs.last[1] += null_padding[0, extra_length]
1535
+ patch.length1 += extra_length
1536
+ patch.length2 += extra_length
1537
+ end
1538
+
1539
+ null_padding
1540
+ end
1541
+
1542
+ # Look through the patches and break up any which are longer than the
1543
+ # maximum limit of the match algorithm.
1544
+ def patch_splitMax(patches)
1545
+ patch_size = match_maxBits
1546
+
1547
+ x = 0
1548
+ while x < patches.length
1549
+ if patches[x].length1 > patch_size
1550
+ big_patch = patches[x]
1551
+ # Remove the big old patch
1552
+ patches[x, 1] = []
1553
+ x -= 1
1554
+ start1 = big_patch.start1
1555
+ start2 = big_patch.start2
1556
+ pre_context = ''
1557
+ while !big_patch.diffs.empty?
1558
+ # Create one of several smaller patches.
1559
+ patch = PatchObj.new
1560
+ empty = true
1561
+ patch.start1 = start1 - pre_context.length
1562
+ patch.start2 = start2 - pre_context.length
1563
+ unless pre_context.empty?
1564
+ patch.length1 = patch.length2 = pre_context.length
1565
+ patch.diffs.push([:equal, pre_context])
1566
+ end
1567
+
1568
+ while !big_patch.diffs.empty? && patch.length1 < patch_size - patch_margin
1569
+ diff = big_patch.diffs.first
1570
+ if diff[0] == :insert
1571
+ # Insertions are harmless.
1572
+ patch.length2 += diff[1].length
1573
+ start2 += diff[1].length
1574
+ patch.diffs.push(big_patch.diffs.shift)
1575
+ empty = false
1576
+ elsif diff[0] == :delete && patch.diffs.length == 1 &&
1577
+ patch.diffs.first[0] == :equal && diff[1].length > 2 * patch_size
1578
+ # This is a large deletion. Let it pass in one chunk.
1579
+ patch.length1 += diff[1].length
1580
+ start1 += diff[1].length
1581
+ empty = false
1582
+ patch.diffs.push(big_patch.diffs.shift)
1583
+ else
1584
+ # Deletion or equality. Only take as much as we can stomach.
1585
+ diff_text = diff[1][0, patch_size - patch.length1 - patch_margin]
1586
+ patch.length1 += diff_text.length
1587
+ start1 += diff_text.length
1588
+ if diff[0] == :equal
1589
+ patch.length2 += diff_text.length
1590
+ start2 += diff_text.length
1591
+ else
1592
+ empty = false
1593
+ end
1594
+ patch.diffs.push([diff[0], diff_text])
1595
+ if diff_text == big_patch.diffs.first[1]
1596
+ big_patch.diffs.shift
1597
+ else
1598
+ big_patch.diffs.first[1] = big_patch.diffs.first[1][diff_text.length..-1]
1599
+ end
1600
+ end
1601
+ end
1602
+
1603
+ # Compute the head context for the next patch.
1604
+ pre_context = diff_text2(patch.diffs)[-patch_margin..-1] || ''
1605
+
1606
+ # Append the end context for this patch.
1607
+ post_context = diff_text1(big_patch.diffs)[0...patch_margin] || ''
1608
+ unless post_context.empty?
1609
+ patch.length1 += post_context.length
1610
+ patch.length2 += post_context.length
1611
+ if !patch.diffs.empty? && patch.diffs.last[0] == :equal
1612
+ patch.diffs.last[1] += post_context
1613
+ else
1614
+ patch.diffs.push([:equal, post_context])
1615
+ end
1616
+ end
1617
+ if !empty
1618
+ x += 1
1619
+ patches[x, 0] = [patch]
1620
+ end
1621
+ end
1622
+ end
1623
+ x += 1
1624
+ end
1625
+ end
1626
+ end