diff_match_patch 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +23 -0
- data/README.md +11 -0
- data/Rakefile +8 -0
- data/lib/diff_match_patch.rb +1626 -0
- data/lib/patch_obj.rb +52 -0
- data/test/diff_match_patch-test.rb +1208 -0
- metadata +50 -0
data/LICENSE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Copyright (c) 2011, Jorge Kalmbach <kalmbach.at.gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any
|
4
|
+
person obtaining a copy of this software and associated
|
5
|
+
documentation files (the "Software"), to deal in the
|
6
|
+
Software without restriction, including without limitation
|
7
|
+
the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the
|
9
|
+
Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice
|
13
|
+
shall be included in all copies or substantial portions of
|
14
|
+
the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
17
|
+
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
18
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
19
|
+
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
|
20
|
+
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
21
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
22
|
+
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
23
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# DiffMatchPatch
|
2
|
+
|
3
|
+
A ruby implementation of the google diff-match-patch library.
|
4
|
+
http://code.google.com/p/google-diff-match-patch/
|
5
|
+
|
6
|
+
The Diff Match and Patch libraries offer robust algorithms to perform the operations required for synchronizing plain text.
|
7
|
+
|
8
|
+
This work was inspired by the diff_match_patch-ruby module.
|
9
|
+
(https://github.com/reima/diff_match_patch-ruby)
|
10
|
+
|
11
|
+
Copyright (c) 2011, Jorge Kalmbach <kalmbach.at.gmail.com>
|
data/Rakefile
ADDED
@@ -0,0 +1,1626 @@
|
|
1
|
+
require 'patch_obj'
|
2
|
+
|
3
|
+
# Class containing the diff, match and patch methods.
|
4
|
+
# Also contains the behaviour settings.
|
5
|
+
class DiffMatchPatch
|
6
|
+
attr_accessor :diff_timeout
|
7
|
+
attr_accessor :diff_editCost
|
8
|
+
attr_accessor :match_threshold
|
9
|
+
attr_accessor :match_distance
|
10
|
+
attr_accessor :patch_deleteThreshold
|
11
|
+
attr_accessor :patch_margin
|
12
|
+
attr_reader :match_maxBits
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
# Inits a diff_match_patch object with default settings.
|
16
|
+
# Redefine these in your program to override the defaults.
|
17
|
+
|
18
|
+
# Number of seconds to map a diff before giving up (0 for infinity).
|
19
|
+
@diff_timeout = 1
|
20
|
+
# Cost of an empty edit operation in terms of edit characters.
|
21
|
+
@diff_editCost = 4
|
22
|
+
# At what point is no match declared (0.0 = perfection, 1.0 = very loose).
|
23
|
+
@match_threshold = 0.5
|
24
|
+
# How far to search for a match (0 = exact location, 1000+ = broad match).
|
25
|
+
# A match this many characters away from the expected location will add
|
26
|
+
# 1.0 to the score (0.0 is a perfect match).
|
27
|
+
@match_distance = 1000
|
28
|
+
# When deleting a large block of text (over ~64 characters), how close does
|
29
|
+
# the contents have to match the expected contents. (0.0 = perfection,
|
30
|
+
# 1.0 = very loose). Note that Match_Threshold controls how closely the
|
31
|
+
# end points of a delete need to match.
|
32
|
+
@patch_deleteThreshold = 0.5
|
33
|
+
# Chunk size for context length.
|
34
|
+
@patch_margin = 4
|
35
|
+
|
36
|
+
# The number of bits in an int.
|
37
|
+
# Python has no maximum, thus to disable patch splitting set to 0.
|
38
|
+
# However to avoid long patches in certain pathological cases, use 32.
|
39
|
+
# Multiple short patches (using native ints) are much faster than long ones.
|
40
|
+
@match_maxBits = 32
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
# Find the differences between two texts. Simplifies the problem by
|
45
|
+
# stripping any common prefix or suffix off the texts before diffing.
|
46
|
+
def diff_main(text1, text2, checklines=true, deadline=nil)
|
47
|
+
# Set a deadline by which time the diff must be complete.
|
48
|
+
if deadline.nil? && diff_timeout > 0
|
49
|
+
deadline = Time.now + diff_timeout
|
50
|
+
end
|
51
|
+
|
52
|
+
# Check for null inputs.
|
53
|
+
if text1.nil? || text2.nil?
|
54
|
+
raise ArgumentError.new('Null inputs. (diff_main)')
|
55
|
+
end
|
56
|
+
|
57
|
+
# Check for equality (speedup).
|
58
|
+
if text1 == text2
|
59
|
+
return [] if text1.empty?
|
60
|
+
return [[:equal, text1]]
|
61
|
+
end
|
62
|
+
|
63
|
+
checklines = true if checklines.nil?
|
64
|
+
|
65
|
+
# Trim off common prefix (speedup).
|
66
|
+
common_length = diff_commonPrefix(text1, text2)
|
67
|
+
if common_length.nonzero?
|
68
|
+
common_prefix = text1[0...common_length]
|
69
|
+
text1 = text1[common_length..-1]
|
70
|
+
text2 = text2[common_length..-1]
|
71
|
+
end
|
72
|
+
|
73
|
+
# Trim off common suffix (speedup).
|
74
|
+
common_length = diff_commonSuffix(text1, text2)
|
75
|
+
if common_length.nonzero?
|
76
|
+
common_suffix = text1[-common_length..-1]
|
77
|
+
text1 = text1[0...-common_length]
|
78
|
+
text2 = text2[0...-common_length]
|
79
|
+
end
|
80
|
+
|
81
|
+
# Compute the diff on the middle block.
|
82
|
+
diffs = diff_compute(text1, text2, checklines, deadline)
|
83
|
+
|
84
|
+
# Restore the prefix and suffix.
|
85
|
+
diffs.unshift([:equal, common_prefix]) unless common_prefix.nil?
|
86
|
+
diffs.push([:equal, common_suffix]) unless common_suffix.nil?
|
87
|
+
diff_cleanupMerge(diffs)
|
88
|
+
|
89
|
+
diffs
|
90
|
+
end
|
91
|
+
|
92
|
+
# Find the differences between two texts. Assumes that the texts do not
|
93
|
+
# have any common prefix or suffix.
|
94
|
+
def diff_compute(text1, text2, checklines, deadline)
|
95
|
+
# Just add some text (speedup).
|
96
|
+
return [[:insert, text2]] if text1.empty?
|
97
|
+
|
98
|
+
# Just delete some text (speedup).
|
99
|
+
return [[:delete, text1]] if text2.empty?
|
100
|
+
|
101
|
+
shorttext, longtext = [text1, text2].sort_by(&:length)
|
102
|
+
if i = longtext.index(shorttext)
|
103
|
+
# Shorter text is inside the longer text (speedup).
|
104
|
+
diffs = [[:insert, longtext[0...i]], [:equal, shorttext],
|
105
|
+
[:insert, longtext[(i + shorttext.length)..-1]]]
|
106
|
+
|
107
|
+
# Swap insertions for deletions if diff is reversed.
|
108
|
+
if text1.length > text2.length
|
109
|
+
diffs[0][0] = :delete
|
110
|
+
diffs[2][0] = :delete
|
111
|
+
end
|
112
|
+
|
113
|
+
return diffs
|
114
|
+
end
|
115
|
+
|
116
|
+
if shorttext.length == 1
|
117
|
+
# Single character string.
|
118
|
+
# After the previous speedup, the character can't be an equality.
|
119
|
+
return [[:delete, text1], [:insert, text2]]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Garbage collect.
|
123
|
+
longtext = nil
|
124
|
+
shorttext = nil
|
125
|
+
|
126
|
+
# Check to see if the problem can be split in two.
|
127
|
+
if hm = diff_halfMatch(text1, text2)
|
128
|
+
# A half-match was found, sort out the return data.
|
129
|
+
text1_a, text1_b, text2_a, text2_b, mid_common = hm
|
130
|
+
# Send both pairs off for separate processing.
|
131
|
+
diffs_a = diff_main(text1_a, text2_a, checklines, deadline)
|
132
|
+
diffs_b = diff_main(text1_b, text2_b, checklines, deadline)
|
133
|
+
# Merge the results.
|
134
|
+
return diffs_a + [[:equal, mid_common]] + diffs_b
|
135
|
+
end
|
136
|
+
|
137
|
+
if checklines && text1.length > 100 && text2.length > 100
|
138
|
+
return diff_lineMode(text1, text2, deadline)
|
139
|
+
end
|
140
|
+
|
141
|
+
return diff_bisect(text1, text2, deadline)
|
142
|
+
end
|
143
|
+
|
144
|
+
# Do a quick line-level diff on both strings, then rediff the parts for
|
145
|
+
# greater accuracy.
|
146
|
+
# This speedup can produce non-minimal diffs.
|
147
|
+
def diff_lineMode(text1, text2, deadline)
|
148
|
+
# Scan the text on a line-by-line basis first.
|
149
|
+
text1, text2, line_array = diff_linesToChars(text1, text2)
|
150
|
+
|
151
|
+
diffs = diff_main(text1, text2, false, deadline)
|
152
|
+
|
153
|
+
# Convert the diff back to original text.
|
154
|
+
diff_charsToLines(diffs, line_array)
|
155
|
+
# Eliminate freak matches (e.g. blank lines)
|
156
|
+
diff_cleanupSemantic(diffs)
|
157
|
+
|
158
|
+
# Rediff any replacement blocks, this time character-by-character.
|
159
|
+
# Add a dummy entry at the end.
|
160
|
+
diffs.push([:equal, ''])
|
161
|
+
pointer = 0
|
162
|
+
count_delete = 0
|
163
|
+
count_insert = 0
|
164
|
+
text_delete = ''
|
165
|
+
text_insert = ''
|
166
|
+
|
167
|
+
while pointer < diffs.length
|
168
|
+
case diffs[pointer][0]
|
169
|
+
when :insert
|
170
|
+
count_insert += 1
|
171
|
+
text_insert += diffs[pointer][1]
|
172
|
+
when :delete
|
173
|
+
count_delete += 1
|
174
|
+
text_delete += diffs[pointer][1]
|
175
|
+
when :equal
|
176
|
+
# Upon reaching an equality, check for prior redundancies.
|
177
|
+
if count_delete >= 1 && count_insert >= 1
|
178
|
+
# Delete the offending records and add the merged ones.
|
179
|
+
a = diff_main(text_delete, text_insert, false, deadline)
|
180
|
+
diffs[pointer - count_delete - count_insert,
|
181
|
+
count_delete + count_insert] = []
|
182
|
+
pointer = pointer - count_delete - count_insert
|
183
|
+
diffs[pointer, 0] = a
|
184
|
+
pointer = pointer + a.length
|
185
|
+
end
|
186
|
+
count_insert = 0
|
187
|
+
count_delete = 0
|
188
|
+
text_delete = ''
|
189
|
+
text_insert = ''
|
190
|
+
end
|
191
|
+
pointer += 1
|
192
|
+
end
|
193
|
+
|
194
|
+
diffs.pop # Remove the dummy entry at the end.
|
195
|
+
return diffs
|
196
|
+
end
|
197
|
+
|
198
|
+
# Find the 'middle snake' of a diff, split the problem in two
|
199
|
+
# and return the recursively constructed diff.
|
200
|
+
# See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
|
201
|
+
def diff_bisect(text1, text2, deadline)
|
202
|
+
# Cache the text lengths to prevent multiple calls.
|
203
|
+
text1_length = text1.length
|
204
|
+
text2_length = text2.length
|
205
|
+
max_d = (text1_length + text2_length + 1) / 2
|
206
|
+
v_offset = max_d
|
207
|
+
v_length = 2 * max_d
|
208
|
+
v1 = Array.new(v_length, -1)
|
209
|
+
v2 = Array.new(v_length, -1)
|
210
|
+
v1[v_offset + 1] = 0
|
211
|
+
v2[v_offset + 1] = 0
|
212
|
+
delta = text1_length - text2_length
|
213
|
+
|
214
|
+
# If the total number of characters is odd, then the front path will
|
215
|
+
# collide with the reverse path.
|
216
|
+
front = (delta % 2 != 0)
|
217
|
+
# Offsets for start and end of k loop.
|
218
|
+
# Prevents mapping of space beyond the grid.
|
219
|
+
k1start = 0
|
220
|
+
k1end = 0
|
221
|
+
k2start = 0
|
222
|
+
k2end = 0
|
223
|
+
max_d.times do |d|
|
224
|
+
# Bail out if deadline is reached.
|
225
|
+
break if deadline && Time.now >= deadline
|
226
|
+
|
227
|
+
# Walk the front path one step.
|
228
|
+
(-d + k1start).step(d - k1end, 2) do |k1|
|
229
|
+
k1_offset = v_offset + k1
|
230
|
+
if k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]
|
231
|
+
x1 = v1[k1_offset + 1]
|
232
|
+
else
|
233
|
+
x1 = v1[k1_offset - 1] + 1
|
234
|
+
end
|
235
|
+
|
236
|
+
y1 = x1 - k1
|
237
|
+
while x1 < text1_length && y1 < text2_length && text1[x1] == text2[y1]
|
238
|
+
x1 += 1
|
239
|
+
y1 += 1
|
240
|
+
end
|
241
|
+
|
242
|
+
v1[k1_offset] = x1
|
243
|
+
if x1 > text1_length
|
244
|
+
# Ran off the right of the graph.
|
245
|
+
k1end += 2
|
246
|
+
elsif y1 > text2_length
|
247
|
+
# Ran off the bottom of the graph.
|
248
|
+
k1start += 2
|
249
|
+
elsif front
|
250
|
+
k2_offset = v_offset + delta - k1
|
251
|
+
if k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1
|
252
|
+
# Mirror x2 onto top-left coordinate system.
|
253
|
+
x2 = text1_length - v2[k2_offset]
|
254
|
+
if x1 >= x2
|
255
|
+
# Overlap detected.
|
256
|
+
return diff_bisectSplit(text1, text2, x1, y1, deadline)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
# Walk the reverse path one step.
|
263
|
+
(-d + k2start).step(d - k2end, 2) do |k2|
|
264
|
+
k2_offset = v_offset + k2
|
265
|
+
if k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]
|
266
|
+
x2 = v2[k2_offset + 1]
|
267
|
+
else
|
268
|
+
x2 = v2[k2_offset - 1] + 1
|
269
|
+
end
|
270
|
+
|
271
|
+
y2 = x2 - k2
|
272
|
+
while x2 < text1_length && y2 < text2_length && text1[-x2-1] == text2[-y2-1]
|
273
|
+
x2 += 1
|
274
|
+
y2 += 1
|
275
|
+
end
|
276
|
+
|
277
|
+
v2[k2_offset] = x2
|
278
|
+
if x2 > text1_length
|
279
|
+
# Ran off the left of the graph.
|
280
|
+
k2end += 2
|
281
|
+
elsif y2 > text2_length
|
282
|
+
# Ran off the top of the graph.
|
283
|
+
k2start += 2
|
284
|
+
elsif !front
|
285
|
+
k1_offset = v_offset + delta - k2
|
286
|
+
if k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1
|
287
|
+
x1 = v1[k1_offset]
|
288
|
+
y1 = v_offset + x1 - k1_offset
|
289
|
+
# Mirror x2 onto top-left coordinate system.
|
290
|
+
x2 = text1_length - x2
|
291
|
+
if x1 >= x2
|
292
|
+
# Overlap detected.
|
293
|
+
return diff_bisectSplit(text1, text2, x1, y1, deadline)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# Diff took too long and hit the deadline or
|
301
|
+
# number of diffs equals number of characters, no commonality at all.
|
302
|
+
[[:delete, text1], [:insert, text2]]
|
303
|
+
end
|
304
|
+
|
305
|
+
# Given the location of the 'middle snake', split the diff in two parts
|
306
|
+
# and recurse.
|
307
|
+
def diff_bisectSplit(text1, text2, x, y, deadline)
|
308
|
+
text1a = text1[0...x]
|
309
|
+
text2a = text2[0...y]
|
310
|
+
text1b = text1[x..-1]
|
311
|
+
text2b = text2[y..-1]
|
312
|
+
|
313
|
+
# Compute both diffs serially.
|
314
|
+
diffs = diff_main(text1a, text2a, false, deadline)
|
315
|
+
diffsb = diff_main(text1b, text2b, false, deadline)
|
316
|
+
|
317
|
+
diffs + diffsb
|
318
|
+
end
|
319
|
+
|
320
|
+
# Split two texts into an array of strings. Reduce the texts to a string
|
321
|
+
# of hashes where each Unicode character represents one line.
|
322
|
+
def diff_linesToChars(text1, text2)
|
323
|
+
line_array = [''] # e.g. line_array[4] == "Hello\n"
|
324
|
+
line_hash = {} # e.g. line_hash["Hello\n"] == 4
|
325
|
+
|
326
|
+
[text1, text2].map do |text|
|
327
|
+
# Split text into an array of strings. Reduce the text to a string of
|
328
|
+
# hashes where each Unicode character represents one line.
|
329
|
+
chars = ''
|
330
|
+
text.each_line do |line|
|
331
|
+
if line_hash[line]
|
332
|
+
chars += line_hash[line].chr(Encoding::UTF_8)
|
333
|
+
else
|
334
|
+
chars += line_array.length.chr(Encoding::UTF_8)
|
335
|
+
line_hash[line] = line_array.length
|
336
|
+
line_array.push(line)
|
337
|
+
end
|
338
|
+
end
|
339
|
+
chars
|
340
|
+
end.push(line_array)
|
341
|
+
end
|
342
|
+
|
343
|
+
# Rehydrate the text in a diff from a string of line hashes to real lines of text.
|
344
|
+
def diff_charsToLines(diffs, line_array)
|
345
|
+
diffs.each do |diff|
|
346
|
+
diff[1] = diff[1].chars.map{ |c| line_array[c.ord] }.join
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
# Determine the common prefix of two strings.
|
351
|
+
def diff_commonPrefix(text1, text2)
|
352
|
+
# Quick check for common null cases.
|
353
|
+
return 0 if text1.empty? || text2.empty? || text1[0] != text2[0]
|
354
|
+
|
355
|
+
# Binary search.
|
356
|
+
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
|
357
|
+
pointer_min = 0
|
358
|
+
pointer_max = [text1.length, text2.length].min
|
359
|
+
pointer_mid = pointer_max
|
360
|
+
pointer_start = 0
|
361
|
+
|
362
|
+
while pointer_min < pointer_mid
|
363
|
+
if text1[pointer_start...pointer_mid] == text2[pointer_start...pointer_mid]
|
364
|
+
pointer_min = pointer_mid
|
365
|
+
pointer_start = pointer_min
|
366
|
+
else
|
367
|
+
pointer_max = pointer_mid
|
368
|
+
end
|
369
|
+
pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min
|
370
|
+
end
|
371
|
+
|
372
|
+
pointer_mid
|
373
|
+
end
|
374
|
+
|
375
|
+
# Determine the common suffix of two strings.
|
376
|
+
def diff_commonSuffix(text1, text2)
|
377
|
+
# Quick check for common null cases.
|
378
|
+
return 0 if text1.empty? || text2.empty? || text1[-1] != text2[-1]
|
379
|
+
|
380
|
+
# Binary search.
|
381
|
+
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
|
382
|
+
pointer_min = 0
|
383
|
+
pointer_max = [text1.length, text2.length].min
|
384
|
+
pointer_mid = pointer_max
|
385
|
+
pointer_end = 0
|
386
|
+
|
387
|
+
while pointer_min < pointer_mid
|
388
|
+
if text1[-pointer_mid..(-pointer_end-1)] == text2[-pointer_mid..(-pointer_end-1)]
|
389
|
+
pointer_min = pointer_mid
|
390
|
+
pointer_end = pointer_min
|
391
|
+
else
|
392
|
+
pointer_max = pointer_mid
|
393
|
+
end
|
394
|
+
pointer_mid = (pointer_max - pointer_min) / 2 + pointer_min
|
395
|
+
end
|
396
|
+
|
397
|
+
pointer_mid
|
398
|
+
end
|
399
|
+
|
400
|
+
# Determine if the suffix of one string is the prefix of another.
|
401
|
+
def diff_commonOverlap(text1, text2)
|
402
|
+
# Cache the text lengths to prevent multiple calls.
|
403
|
+
text1_length = text1.length
|
404
|
+
text2_length = text2.length
|
405
|
+
|
406
|
+
# Eliminate the null case.
|
407
|
+
return 0 if text1_length.zero? || text2_length.zero?
|
408
|
+
|
409
|
+
# Truncate the longer string.
|
410
|
+
if text1_length > text2_length
|
411
|
+
text1 = text1[-text2_length..-1]
|
412
|
+
else
|
413
|
+
text2 = text2[0...text1_length]
|
414
|
+
end
|
415
|
+
text_length = [text1_length, text2_length].min
|
416
|
+
|
417
|
+
# Quick check for the whole case.
|
418
|
+
return text_length if text1 == text2
|
419
|
+
|
420
|
+
# Start by looking for a single character match
|
421
|
+
# and increase length until no match is found.
|
422
|
+
# Performance analysis: http://neil.fraser.name/news/2010/11/04/
|
423
|
+
best = 0
|
424
|
+
length = 1
|
425
|
+
loop do
|
426
|
+
pattern = text1[(text_length - length)..-1]
|
427
|
+
found = text2.index(pattern)
|
428
|
+
|
429
|
+
return best if found.nil?
|
430
|
+
|
431
|
+
length += found
|
432
|
+
if found == 0 || text1[(text_length - length)..-1] == text2[0..length]
|
433
|
+
best = length
|
434
|
+
length += 1
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
# Does a substring of shorttext exist within longtext such that the
|
440
|
+
# substring is at least half the length of longtext?
|
441
|
+
def diff_halfMatchI(longtext, shorttext, i)
|
442
|
+
seed = longtext[i, longtext.length / 4]
|
443
|
+
j = -1
|
444
|
+
best_common = ''
|
445
|
+
while j = shorttext.index(seed, j + 1)
|
446
|
+
prefix_length = diff_commonPrefix(longtext[i..-1], shorttext[j..-1])
|
447
|
+
suffix_length = diff_commonSuffix(longtext[0...i], shorttext[0...j])
|
448
|
+
if best_common.length < suffix_length + prefix_length
|
449
|
+
best_common = shorttext[(j - suffix_length)...j] + shorttext[j...(j + prefix_length)]
|
450
|
+
best_longtext_a = longtext[0...(i - suffix_length)]
|
451
|
+
best_longtext_b = longtext[(i + prefix_length)..-1]
|
452
|
+
best_shorttext_a = shorttext[0...(j - suffix_length)]
|
453
|
+
best_shorttext_b = shorttext[(j + prefix_length)..-1]
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
if best_common.length * 2 >= longtext.length
|
458
|
+
[best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common]
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
# Do the two texts share a substring which is at least half the length of the
|
463
|
+
# longer text?
|
464
|
+
# This speedup can produce non-minimal diffs.
|
465
|
+
def diff_halfMatch(text1, text2)
|
466
|
+
# Don't risk returning a non-optimal diff if we have unlimited time
|
467
|
+
return nil if diff_timeout <= 0
|
468
|
+
|
469
|
+
shorttext, longtext = [text1, text2].sort_by(&:length)
|
470
|
+
if longtext.length < 4 || shorttext.length * 2 < longtext.length
|
471
|
+
return nil # Pointless.
|
472
|
+
end
|
473
|
+
|
474
|
+
# First check if the second quarter is the seed for a half-match.
|
475
|
+
hm1 = diff_halfMatchI(longtext, shorttext, (longtext.length + 3) / 4)
|
476
|
+
# Check again based on the third quarter.
|
477
|
+
hm2 = diff_halfMatchI(longtext, shorttext, (longtext.length + 1) / 2)
|
478
|
+
|
479
|
+
if hm1.nil? && hm2.nil?
|
480
|
+
return nil
|
481
|
+
elsif hm2.nil? || hm1.nil?
|
482
|
+
hm = hm2.nil? ? hm1 : hm2
|
483
|
+
else
|
484
|
+
# Both matched. Select the longest.
|
485
|
+
hm = hm1[4].length > hm2[4].length ? hm1 : hm2
|
486
|
+
end
|
487
|
+
|
488
|
+
# A half-match was found, sort out the return data.
|
489
|
+
if text1.length > text2.length
|
490
|
+
text1_a, text1_b, text2_a, text2_b, mid_common = hm
|
491
|
+
else
|
492
|
+
text2_a, text2_b, text1_a, text1_b, mid_common = hm
|
493
|
+
end
|
494
|
+
|
495
|
+
[text1_a, text1_b, text2_a, text2_b, mid_common]
|
496
|
+
end
|
497
|
+
|
498
|
+
# Reduce the number of edits by eliminating semantically trivial equalities.
|
499
|
+
def diff_cleanupSemantic(diffs)
|
500
|
+
changes = false
|
501
|
+
equalities = [] # Stack of indices where equalities are found.
|
502
|
+
last_equality = nil # Always equal to equalities.last[1]
|
503
|
+
pointer = 0 # Index of current position.
|
504
|
+
# Number of characters that changed prior to the equality.
|
505
|
+
length_insertions1 = 0
|
506
|
+
length_deletions1 = 0
|
507
|
+
# Number of characters that changed after the equality.
|
508
|
+
length_insertions2 = 0
|
509
|
+
length_deletions2 = 0
|
510
|
+
|
511
|
+
while pointer < diffs.length
|
512
|
+
if diffs[pointer][0] == :equal # Equality found.
|
513
|
+
equalities.push(pointer)
|
514
|
+
length_insertions1 = length_insertions2
|
515
|
+
length_deletions1 = length_deletions2
|
516
|
+
length_insertions2 = 0
|
517
|
+
length_deletions2 = 0
|
518
|
+
last_equality = diffs[pointer][1]
|
519
|
+
else # An insertion or deletion.
|
520
|
+
if diffs[pointer][0] == :insert
|
521
|
+
length_insertions2 += diffs[pointer][1].length
|
522
|
+
else
|
523
|
+
length_deletions2 += diffs[pointer][1].length
|
524
|
+
end
|
525
|
+
|
526
|
+
if last_equality &&
|
527
|
+
last_equality.length <= [length_insertions1, length_deletions1].max &&
|
528
|
+
last_equality.length <= [length_insertions2, length_deletions2].max
|
529
|
+
# Duplicate record.
|
530
|
+
diffs[equalities.last, 0] = [[:delete, last_equality]]
|
531
|
+
|
532
|
+
# Change second copy to insert.
|
533
|
+
diffs[equalities.last + 1][0] = :insert
|
534
|
+
|
535
|
+
# Throw away the equality we just deleted.
|
536
|
+
equalities.pop
|
537
|
+
# Throw away the previous equality (it needs to be reevaluated).
|
538
|
+
equalities.pop
|
539
|
+
pointer = equalities.last || -1
|
540
|
+
|
541
|
+
# Reset the counters.
|
542
|
+
length_insertions1 = 0
|
543
|
+
length_deletions1 = 0
|
544
|
+
length_insertions2 = 0
|
545
|
+
length_deletions2 = 0
|
546
|
+
last_equality = nil
|
547
|
+
|
548
|
+
changes = true
|
549
|
+
end
|
550
|
+
end
|
551
|
+
pointer += 1
|
552
|
+
end
|
553
|
+
|
554
|
+
# Normalize the diff.
|
555
|
+
if changes
|
556
|
+
diff_cleanupMerge(diffs)
|
557
|
+
end
|
558
|
+
diff_cleanupSemanticLossless(diffs)
|
559
|
+
|
560
|
+
# Find any overlaps between deletions and insertions.
|
561
|
+
# e.g: <del>abcxxx</del><ins>xxxdef</ins>
|
562
|
+
# -> <del>abc</del>xxx<ins>def</ins>
|
563
|
+
# e.g: <del>xxxabc</del><ins>defxxx</ins>
|
564
|
+
# -> <ins>def</ins>xxx<del>abc</del>
|
565
|
+
# Only extract an overlap if it is as big as the edit ahead or behind it.
|
566
|
+
pointer = 1
|
567
|
+
while pointer < diffs.length
|
568
|
+
if diffs[pointer - 1][0] == :delete && diffs[pointer][0] == :insert
|
569
|
+
deletion = diffs[pointer - 1][1]
|
570
|
+
insertion = diffs[pointer][1]
|
571
|
+
overlap_length1 = diff_commonOverlap(deletion, insertion)
|
572
|
+
overlap_length2 = diff_commonOverlap(insertion, deletion)
|
573
|
+
if overlap_length1 >= overlap_length2
|
574
|
+
if overlap_length1 >= deletion.length / 2.0 ||
|
575
|
+
overlap_length1 >= insertion.length / 2.0
|
576
|
+
# Overlap found. Insert an equality and trim the surrounding edits.
|
577
|
+
diffs[pointer, 0] = [[:equal, insertion[0...overlap_length1]]]
|
578
|
+
diffs[pointer -1][0] = :delete
|
579
|
+
diffs[pointer - 1][1] = deletion[0...-overlap_length1]
|
580
|
+
diffs[pointer + 1][0] = :insert
|
581
|
+
diffs[pointer + 1][1] = insertion[overlap_length1..-1]
|
582
|
+
pointer += 1
|
583
|
+
end
|
584
|
+
else
|
585
|
+
if overlap_length2 >= deletion.length / 2.0 ||
|
586
|
+
overlap_length2 >= insertion.length / 2.0
|
587
|
+
diffs[pointer, 0] = [[:equal, deletion[0...overlap_length2]]]
|
588
|
+
diffs[pointer - 1][0] = :insert
|
589
|
+
diffs[pointer - 1][1] = insertion[0...-overlap_length2]
|
590
|
+
diffs[pointer + 1][0] = :delete
|
591
|
+
diffs[pointer + 1][1] = deletion[overlap_length2..-1]
|
592
|
+
pointer += 1
|
593
|
+
end
|
594
|
+
end
|
595
|
+
pointer += 1
|
596
|
+
end
|
597
|
+
pointer += 1
|
598
|
+
end
|
599
|
+
end
|
600
|
+
|
601
|
+
# Given two strings, compute a score representing whether the
|
602
|
+
# internal boundary falls on logical boundaries.
|
603
|
+
# Scores range from 5 (best) to 0 (worst).
|
604
|
+
def diff_cleanupSemanticScore(one, two)
|
605
|
+
if one.empty? || two.empty?
|
606
|
+
# Edges are the best.
|
607
|
+
return 5
|
608
|
+
end
|
609
|
+
|
610
|
+
# Define some regex patterns for matching boundaries.
|
611
|
+
nonWordCharacter = /[^a-zA-Z0-9]/
|
612
|
+
whitespace = /\s/
|
613
|
+
linebreak = /[\r\n]/
|
614
|
+
lineEnd = /\n\r?\n$/
|
615
|
+
lineStart = /^\r?\n\r?\n/
|
616
|
+
|
617
|
+
# Each port of this function behaves slightly differently due to
|
618
|
+
# subtle differences in each language's definition of things like
|
619
|
+
# 'whitespace'. Since this function's purpose is largely cosmetic,
|
620
|
+
# the choice has been made to use each language's native features
|
621
|
+
# rather than force total conformity.
|
622
|
+
score = 0
|
623
|
+
# One point for non-alphanumeric.
|
624
|
+
if one[-1] =~ nonWordCharacter || two[0] =~ nonWordCharacter
|
625
|
+
score += 1
|
626
|
+
# Two points for whitespace.
|
627
|
+
if one[-1] =~ whitespace || two[0] =~ whitespace
|
628
|
+
score += 1
|
629
|
+
# Three points for line breaks.
|
630
|
+
if one[-1] =~ linebreak || two[0] =~ linebreak
|
631
|
+
score += 1
|
632
|
+
# Four points for blank lines.
|
633
|
+
if one =~ lineEnd || two =~ lineStart
|
634
|
+
score += 1
|
635
|
+
end
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
score
|
641
|
+
end
|
642
|
+
|
643
|
+
# Look for single edits surrounded on both sides by equalities
|
644
|
+
# which can be shifted sideways to align the edit to a word boundary.
|
645
|
+
# e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
|
646
|
+
def diff_cleanupSemanticLossless(diffs)
|
647
|
+
pointer = 1
|
648
|
+
# Intentionally ignore the first and last element (don't need checking).
|
649
|
+
while pointer < diffs.length - 1
|
650
|
+
if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal
|
651
|
+
# This is a single edit surrounded by equalities.
|
652
|
+
equality1 = diffs[pointer - 1][1]
|
653
|
+
edit = diffs[pointer][1]
|
654
|
+
equality2 = diffs[pointer + 1][1]
|
655
|
+
|
656
|
+
# First, shift the edit as far left as possible.
|
657
|
+
common_offset = diff_commonSuffix(equality1, edit)
|
658
|
+
if common_offset != 0
|
659
|
+
common_string = edit[-common_offset..-1]
|
660
|
+
equality1 = equality1[0...-common_offset]
|
661
|
+
edit = common_string + edit[0...-common_offset]
|
662
|
+
equality2 = common_string + equality2
|
663
|
+
end
|
664
|
+
|
665
|
+
# Second, step character by character right, looking for the best fit.
|
666
|
+
best_equality1 = equality1
|
667
|
+
best_edit = edit
|
668
|
+
best_equality2 = equality2
|
669
|
+
best_score = diff_cleanupSemanticScore(equality1, edit) +
|
670
|
+
diff_cleanupSemanticScore(edit, equality2)
|
671
|
+
while edit[0] == equality2[0]
|
672
|
+
equality1 += edit[0]
|
673
|
+
edit = edit[1..-1] + equality2[0]
|
674
|
+
equality2 = equality2[1..-1]
|
675
|
+
score = diff_cleanupSemanticScore(equality1, edit) +
|
676
|
+
diff_cleanupSemanticScore(edit, equality2)
|
677
|
+
# The >= encourages trailing rather than leading whitespace on edits.
|
678
|
+
if score >= best_score
|
679
|
+
best_score = score
|
680
|
+
best_equality1 = equality1
|
681
|
+
best_edit = edit
|
682
|
+
best_equality2 = equality2
|
683
|
+
end
|
684
|
+
end
|
685
|
+
|
686
|
+
if diffs[pointer - 1][1] != best_equality1
|
687
|
+
# We have an improvement, save it back to the diff.
|
688
|
+
if best_equality1.empty?
|
689
|
+
diffs[pointer - 1, 1] = []
|
690
|
+
pointer -= 1
|
691
|
+
else
|
692
|
+
diffs[pointer - 1][1] = best_equality1
|
693
|
+
end
|
694
|
+
|
695
|
+
diffs[pointer][1] = best_edit
|
696
|
+
|
697
|
+
if best_equality2.empty?
|
698
|
+
diffs[pointer + 1, 1] = []
|
699
|
+
pointer -= 1
|
700
|
+
else
|
701
|
+
diffs[pointer + 1][1] = best_equality2
|
702
|
+
end
|
703
|
+
end
|
704
|
+
end
|
705
|
+
|
706
|
+
pointer += 1
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# Reduce the number of edits by eliminating operationally trivial equalities.
|
711
|
+
def diff_cleanupEfficiency(diffs)
|
712
|
+
changes = false
|
713
|
+
equalities = [] # Stack of indices where equalities are found.
|
714
|
+
last_equality = '' # Always equal to equalities.last[1]
|
715
|
+
pointer = 0 # Index of current position.
|
716
|
+
pre_ins = false # Is there an insertion operation before the last equality.
|
717
|
+
pre_del = false # Is there a deletion operation before the last equality.
|
718
|
+
post_ins = false # Is there an insertion operation after the last equality.
|
719
|
+
post_del = false # Is there a deletion operation after the last equality.
|
720
|
+
|
721
|
+
while pointer < diffs.length
|
722
|
+
if diffs[pointer][0] == :equal # Equality found.
|
723
|
+
if diffs[pointer][1].length < diff_editCost && (post_ins || post_del)
|
724
|
+
# Candidate found.
|
725
|
+
equalities.push(pointer)
|
726
|
+
pre_ins = post_ins
|
727
|
+
pre_del = post_del
|
728
|
+
last_equality = diffs[pointer][1]
|
729
|
+
else
|
730
|
+
# Not a candidate, and can never become one.
|
731
|
+
equalities.clear
|
732
|
+
last_equality = ''
|
733
|
+
end
|
734
|
+
post_ins = false
|
735
|
+
post_del = false
|
736
|
+
else # An insertion or deletion.
|
737
|
+
if diffs[pointer][0] == :delete
|
738
|
+
post_del = true
|
739
|
+
else
|
740
|
+
post_ins = true
|
741
|
+
end
|
742
|
+
|
743
|
+
# Five types to be split:
|
744
|
+
# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
|
745
|
+
# <ins>A</ins>X<ins>C</ins><del>D</del>
|
746
|
+
# <ins>A</ins><del>B</del>X<ins>C</ins>
|
747
|
+
# <ins>A</del>X<ins>C</ins><del>D</del>
|
748
|
+
# <ins>A</ins><del>B</del>X<del>C</del>
|
749
|
+
|
750
|
+
if !last_equality.empty? &&
|
751
|
+
((pre_ins && pre_del && post_ins && post_del) ||
|
752
|
+
((last_equality.length < diff_editCost / 2) &&
|
753
|
+
[pre_ins, pre_del, post_ins, post_del].count(true) == 3))
|
754
|
+
# Duplicate record.
|
755
|
+
diffs[equalities.last, 0] = [[:delete, last_equality]]
|
756
|
+
# Change second copy to insert.
|
757
|
+
diffs[equalities.last + 1][0] = :insert
|
758
|
+
equalities.pop # Throw away the equality we just deleted
|
759
|
+
last_equality = ''
|
760
|
+
if pre_ins && pre_del
|
761
|
+
# No changes made which could affect previous entry, keep going.
|
762
|
+
post_ins = true
|
763
|
+
post_del = true
|
764
|
+
equalities.clear
|
765
|
+
else
|
766
|
+
if !equalities.empty?
|
767
|
+
equalities.pop # Throw away the previous equality.
|
768
|
+
pointer = equalities.last || -1
|
769
|
+
end
|
770
|
+
post_ins = false
|
771
|
+
post_del = false
|
772
|
+
end
|
773
|
+
changes = true
|
774
|
+
end
|
775
|
+
end
|
776
|
+
pointer += 1
|
777
|
+
end
|
778
|
+
|
779
|
+
if changes
|
780
|
+
diff_cleanupMerge(diffs)
|
781
|
+
end
|
782
|
+
end
|
783
|
+
|
784
|
+
# Reorder and merge like edit sections. Merge equalities.
|
785
|
+
# Any edit section can move as long as it doesn't cross an equality.
|
786
|
+
def diff_cleanupMerge(diffs)
|
787
|
+
diffs.push([:equal, '']) # Add a dummy entry at the end.
|
788
|
+
pointer = 0
|
789
|
+
count_delete = 0
|
790
|
+
count_insert = 0
|
791
|
+
text_delete = ''
|
792
|
+
text_insert = ''
|
793
|
+
|
794
|
+
while pointer < diffs.length
|
795
|
+
case diffs[pointer][0]
|
796
|
+
when :insert
|
797
|
+
count_insert += 1
|
798
|
+
text_insert += diffs[pointer][1]
|
799
|
+
pointer += 1
|
800
|
+
when :delete
|
801
|
+
count_delete += 1
|
802
|
+
text_delete += diffs[pointer][1]
|
803
|
+
pointer += 1
|
804
|
+
when :equal
|
805
|
+
# Upon reaching an equality, check for prior redundancies.
|
806
|
+
if count_delete + count_insert > 1
|
807
|
+
if count_delete != 0 && count_insert != 0
|
808
|
+
# Factor out any common prefixies.
|
809
|
+
common_length = diff_commonPrefix(text_insert, text_delete)
|
810
|
+
if common_length != 0
|
811
|
+
if (pointer - count_delete - count_insert) > 0 &&
|
812
|
+
diffs[pointer - count_delete - count_insert - 1][0] == :equal
|
813
|
+
diffs[pointer - count_delete - count_insert - 1][1] +=
|
814
|
+
text_insert[0...common_length]
|
815
|
+
else
|
816
|
+
diffs.unshift([:equal, text_insert[0...common_length]])
|
817
|
+
pointer += 1
|
818
|
+
end
|
819
|
+
text_insert = text_insert[common_length..-1]
|
820
|
+
text_delete = text_delete[common_length..-1]
|
821
|
+
end
|
822
|
+
# Factor out any common suffixies.
|
823
|
+
common_length = diff_commonSuffix(text_insert, text_delete)
|
824
|
+
if common_length != 0
|
825
|
+
diffs[pointer][1] = text_insert[-common_length..-1] + diffs[pointer][1]
|
826
|
+
text_insert = text_insert[0...-common_length]
|
827
|
+
text_delete = text_delete[0...-common_length]
|
828
|
+
end
|
829
|
+
end
|
830
|
+
|
831
|
+
# Delete the offending records and add the merged ones.
|
832
|
+
if count_delete.zero?
|
833
|
+
diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
|
834
|
+
[[:insert, text_insert]]
|
835
|
+
elsif count_insert.zero?
|
836
|
+
diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
|
837
|
+
[[:delete, text_delete]]
|
838
|
+
else
|
839
|
+
diffs[pointer - count_delete - count_insert, count_delete + count_insert] =
|
840
|
+
[[:delete, text_delete], [:insert, text_insert]]
|
841
|
+
end
|
842
|
+
pointer = pointer - count_delete - count_insert +
|
843
|
+
(count_delete.zero? ? 0 : 1) + (count_insert.zero? ? 0 : 1) + 1
|
844
|
+
elsif pointer != 0 && diffs[pointer - 1][0] == :equal
|
845
|
+
# Merge this equality with the previous one.
|
846
|
+
diffs[pointer - 1][1] += diffs[pointer][1]
|
847
|
+
diffs[pointer, 1] = []
|
848
|
+
else
|
849
|
+
pointer += 1
|
850
|
+
end
|
851
|
+
count_insert = 0
|
852
|
+
count_delete = 0
|
853
|
+
text_delete = ''
|
854
|
+
text_insert = ''
|
855
|
+
end
|
856
|
+
end
|
857
|
+
|
858
|
+
if diffs.last[1].empty?
|
859
|
+
diffs.pop # Remove the dummy entry at the end.
|
860
|
+
end
|
861
|
+
|
862
|
+
# Second pass: look for single edits surrounded on both sides by equalities
|
863
|
+
# which can be shifted sideways to eliminate an equality.
|
864
|
+
# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
|
865
|
+
changes = false
|
866
|
+
pointer = 1
|
867
|
+
|
868
|
+
# Intentionally ignore the first and last element (don't need checking).
|
869
|
+
while pointer < diffs.length - 1
|
870
|
+
if diffs[pointer - 1][0] == :equal && diffs[pointer + 1][0] == :equal
|
871
|
+
# This is a single edit surrounded by equalities.
|
872
|
+
if diffs[pointer][1][-diffs[pointer - 1][1].length..-1] == diffs[pointer - 1][1]
|
873
|
+
# Shift the edit over the previous equality.
|
874
|
+
diffs[pointer][1] = diffs[pointer - 1][1] + diffs[pointer][1][0...-diffs[pointer - 1][1].length]
|
875
|
+
diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1]
|
876
|
+
diffs[pointer - 1, 1] = []
|
877
|
+
changes = true
|
878
|
+
elsif diffs[pointer][1][0...diffs[pointer + 1][1].length] == diffs[pointer + 1][1]
|
879
|
+
# Shift the edit over the next equality.
|
880
|
+
diffs[pointer - 1][1] += diffs[pointer + 1][1]
|
881
|
+
diffs[pointer][1] = diffs[pointer][1][diffs[pointer + 1][1].length..-1] +
|
882
|
+
diffs[pointer + 1][1]
|
883
|
+
diffs[pointer + 1, 1] = []
|
884
|
+
changes = true
|
885
|
+
end
|
886
|
+
end
|
887
|
+
pointer += 1
|
888
|
+
end
|
889
|
+
|
890
|
+
# If shifts were made, the diff needs reordering and another shift sweep.
|
891
|
+
if changes
|
892
|
+
diff_cleanupMerge(diffs)
|
893
|
+
end
|
894
|
+
end
|
895
|
+
|
896
|
+
# loc is a location in text1, compute and return the equivalent location
|
897
|
+
# in text2. e.g. 'The cat' vs 'The big cat', 1->1, 5->8
|
898
|
+
def diff_xIndex(diffs, loc)
|
899
|
+
chars1 = 0
|
900
|
+
chars2 = 0
|
901
|
+
last_chars1 = 0
|
902
|
+
last_chars2 = 0
|
903
|
+
x = diffs.index do |diff|
|
904
|
+
if diff[0] != :insert
|
905
|
+
chars1 += diff[1].length
|
906
|
+
end
|
907
|
+
if diff[0] != :delete
|
908
|
+
chars2 += diff[1].length
|
909
|
+
end
|
910
|
+
if chars1 > loc
|
911
|
+
true
|
912
|
+
else
|
913
|
+
last_chars1 = chars1
|
914
|
+
last_chars2 = chars2
|
915
|
+
false
|
916
|
+
end
|
917
|
+
end
|
918
|
+
|
919
|
+
if diffs.length != x && diffs[x][0] == :delete
|
920
|
+
# The location was deleted.
|
921
|
+
last_chars2
|
922
|
+
else
|
923
|
+
# Add the remaining len(character).
|
924
|
+
last_chars2 + (loc - last_chars1)
|
925
|
+
end
|
926
|
+
end
|
927
|
+
|
928
|
+
# Convert a diff array into a pretty HTML report.
|
929
|
+
def diff_prettyHtml(diffs)
|
930
|
+
diffs.map do |op, data|
|
931
|
+
text = data.gsub('&', '&').gsub('<', '<').gsub('>', '>').gsub('\n', '¶<br>')
|
932
|
+
case op
|
933
|
+
when :insert
|
934
|
+
"<ins style=\"background:#e6ffe6;\">#{text}</ins>"
|
935
|
+
when :delete
|
936
|
+
"<del style=\"background:#ffe6e6;\">#{text}</del>"
|
937
|
+
when :equal
|
938
|
+
"<span>#{text}</span>"
|
939
|
+
end
|
940
|
+
end.join
|
941
|
+
end
|
942
|
+
|
943
|
+
# Compute and return the source text (all equalities and deletions).
|
944
|
+
def diff_text1(diffs)
|
945
|
+
diffs.map do |op, data|
|
946
|
+
if op == :insert
|
947
|
+
''
|
948
|
+
else
|
949
|
+
data
|
950
|
+
end
|
951
|
+
end.join
|
952
|
+
end
|
953
|
+
|
954
|
+
# Compute and return the destination text (all equalities and insertions).
|
955
|
+
def diff_text2(diffs)
|
956
|
+
diffs.map do |op, data|
|
957
|
+
if op == :delete
|
958
|
+
''
|
959
|
+
else
|
960
|
+
data
|
961
|
+
end
|
962
|
+
end.join
|
963
|
+
end
|
964
|
+
|
965
|
+
# Compute the Levenshtein distance; the number of inserted, deleted or
|
966
|
+
# substituted characters.
|
967
|
+
def diff_levenshtein(diffs)
|
968
|
+
levenshtein = 0
|
969
|
+
insertions = 0
|
970
|
+
deletions = 0
|
971
|
+
|
972
|
+
diffs.each do |op, data|
|
973
|
+
case op
|
974
|
+
when :insert
|
975
|
+
insertions += data.length
|
976
|
+
when :delete
|
977
|
+
deletions += data.length
|
978
|
+
when :equal
|
979
|
+
# A deletion and an insertion is one substitution.
|
980
|
+
levenshtein += [insertions, deletions].max
|
981
|
+
insertions = 0
|
982
|
+
deletions = 0
|
983
|
+
end
|
984
|
+
end
|
985
|
+
|
986
|
+
levenshtein + [insertions, deletions].max
|
987
|
+
end
|
988
|
+
|
989
|
+
# Crush the diff into an encoded string which describes the operations
|
990
|
+
# required to transform text1 into text2.
|
991
|
+
# E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
|
992
|
+
# Operations are tab-separated. Inserted text is escaped using %xx notation.
|
993
|
+
def diff_toDelta(diffs)
|
994
|
+
diffs.map do |op, data|
|
995
|
+
case op
|
996
|
+
when :insert
|
997
|
+
'+' + URI.encode(data, /[^0-9A-Za-z_.;!~*'(),\/?:@&=+$\#-]/)
|
998
|
+
when :delete
|
999
|
+
'-' + data.length.to_s
|
1000
|
+
when :equal
|
1001
|
+
'=' + data.length.to_s
|
1002
|
+
end
|
1003
|
+
end.join("\t").gsub('%20', ' ')
|
1004
|
+
end
|
1005
|
+
|
1006
|
+
# Given the original text1, and an encoded string which describes the
|
1007
|
+
# operations required to transform text1 into text2, compute the full diff.
|
1008
|
+
def diff_fromDelta(text1, delta)
|
1009
|
+
# Deltas should be composed of a subset of ascii chars, Unicode not required.
|
1010
|
+
delta.encode('ascii')
|
1011
|
+
diffs = []
|
1012
|
+
pointer = 0 # Cursor in text1
|
1013
|
+
delta.split("\t").each do |token|
|
1014
|
+
# Each token begins with a one character parameter which specifies the
|
1015
|
+
# operation of this token (delete, insert, equality).
|
1016
|
+
param = token[1..-1]
|
1017
|
+
case token[0]
|
1018
|
+
when '+'
|
1019
|
+
diffs.push([:insert, URI.decode(param.force_encoding(Encoding::UTF_8))])
|
1020
|
+
when '-', '='
|
1021
|
+
begin
|
1022
|
+
n = Integer(param)
|
1023
|
+
raise if n < 0
|
1024
|
+
text = text1[pointer...(pointer + n)]
|
1025
|
+
pointer += n
|
1026
|
+
if token[0] == '='
|
1027
|
+
diffs.push([:equal, text])
|
1028
|
+
else
|
1029
|
+
diffs.push([:delete, text])
|
1030
|
+
end
|
1031
|
+
rescue ArgumentError => e
|
1032
|
+
raise ArgumentError.new(
|
1033
|
+
"Invalid number in diff_fromDelta: #{param.inspect}")
|
1034
|
+
end
|
1035
|
+
else
|
1036
|
+
raise ArgumentError.new(
|
1037
|
+
"Invalid diff operation in diff_fromDelta: #{token.inspect}")
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
if pointer != text1.length
|
1042
|
+
raise ArgumentError.new("Delta length (#{pointer}) does not equal " +
|
1043
|
+
"source text length #{text1.length}")
|
1044
|
+
end
|
1045
|
+
diffs
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
# Locate the best instance of 'pattern' in 'text' near 'loc'.
|
1049
|
+
def match_main(text, pattern, loc)
|
1050
|
+
# Check for null inputs.
|
1051
|
+
if [text, pattern].any?(&:nil?)
|
1052
|
+
raise ArgumentError.new("Null input. (match_main)")
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
loc = [0, [loc, text.length].min].max
|
1056
|
+
if text == pattern
|
1057
|
+
# Shortcut (potentially not guaranteed by the algorithm)
|
1058
|
+
0
|
1059
|
+
elsif text.empty?
|
1060
|
+
# Nothing to match
|
1061
|
+
-1
|
1062
|
+
elsif text[loc, pattern.length] == pattern
|
1063
|
+
# Perfect match at the perfect spot! (Includes case of null pattern)
|
1064
|
+
loc
|
1065
|
+
else
|
1066
|
+
# Do a fuzzy compare.
|
1067
|
+
match_bitap(text, pattern, loc)
|
1068
|
+
end
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
# Locate the best instance of 'pattern' in 'text' near 'loc' using the
|
1072
|
+
# Bitap algorithm.
|
1073
|
+
def match_bitap(text, pattern, loc)
|
1074
|
+
if pattern.length > match_maxBits
|
1075
|
+
throw ArgumentError.new("Pattern too long")
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
# Initialise the alphabet.
|
1079
|
+
s = match_alphabet(pattern)
|
1080
|
+
|
1081
|
+
# Compute and return the score for a match with e errors and x location.
|
1082
|
+
match_bitapScore = -> e, x do
|
1083
|
+
accuracy = e.to_f / pattern.length
|
1084
|
+
proximity = (loc - x).abs
|
1085
|
+
if match_distance == 0
|
1086
|
+
# Dodge divide by zero error.
|
1087
|
+
return proximity == 0 ? accuracy : 1.0
|
1088
|
+
end
|
1089
|
+
return accuracy + (proximity.to_f / match_distance)
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
# Highest score beyond which we give up.
|
1093
|
+
score_threshold = match_threshold
|
1094
|
+
# Is there a nearby exact match? (speedup)
|
1095
|
+
best_loc = text.index(pattern, loc)
|
1096
|
+
if best_loc
|
1097
|
+
score_threshold = [match_bitapScore[0, best_loc], score_threshold].min
|
1098
|
+
# What about in the other direction? (speedup)
|
1099
|
+
best_loc = text.rindex(pattern, loc + pattern.length)
|
1100
|
+
if best_loc
|
1101
|
+
score_threshold = [match_bitapScore[0, best_loc], score_threshold].min
|
1102
|
+
end
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
# Initialise the bit arrays.
|
1106
|
+
match_mask = 1 << (pattern.length - 1)
|
1107
|
+
best_loc = -1
|
1108
|
+
|
1109
|
+
bin_max = pattern.length + text.length
|
1110
|
+
# Empty initialization added to appease pychecker.
|
1111
|
+
last_rd = nil
|
1112
|
+
pattern.length.times do |d|
|
1113
|
+
# Scan for the best match; each iteration allows for one more error.
|
1114
|
+
# Run a binary search to determine how far from 'loc' we can stray at this
|
1115
|
+
# error level.
|
1116
|
+
bin_min = 0
|
1117
|
+
bin_mid = bin_max
|
1118
|
+
while bin_min < bin_mid
|
1119
|
+
if match_bitapScore[d, loc + bin_mid] <= score_threshold
|
1120
|
+
bin_min = bin_mid
|
1121
|
+
else
|
1122
|
+
bin_max = bin_mid
|
1123
|
+
end
|
1124
|
+
bin_mid = (bin_max - bin_min) / 2 + bin_min
|
1125
|
+
end
|
1126
|
+
|
1127
|
+
# Use the result from this iteration as the maximum for the next.
|
1128
|
+
bin_max = bin_mid
|
1129
|
+
start = [1, loc - bin_mid + 1].max
|
1130
|
+
finish = [loc + bin_mid, text.length].min + pattern.length
|
1131
|
+
|
1132
|
+
rd = Array.new(finish + 2, 0)
|
1133
|
+
rd[finish + 1] = (1 << d) - 1
|
1134
|
+
finish.downto(start) do |j|
|
1135
|
+
char_match = s[text[j - 1]] || 0
|
1136
|
+
if d == 0 # First pass: exact match.
|
1137
|
+
rd[j] = ((rd[j + 1] << 1) | 1) & char_match
|
1138
|
+
else # Subsequent passes: fuzzy match.
|
1139
|
+
rd[j] = ((rd[j + 1] << 1) | 1) & char_match |
|
1140
|
+
(((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
|
1141
|
+
end
|
1142
|
+
if (rd[j] & match_mask).nonzero?
|
1143
|
+
score = match_bitapScore[d, j - 1]
|
1144
|
+
# This match will almost certainly be better than any existing match.
|
1145
|
+
# But check anyway.
|
1146
|
+
if score <= score_threshold
|
1147
|
+
# Told you so.
|
1148
|
+
score_threshold = score
|
1149
|
+
best_loc = j - 1
|
1150
|
+
if best_loc > loc
|
1151
|
+
# When passing loc, don't exceed our current distance from loc.
|
1152
|
+
start = [1, 2 * loc - best_loc].max
|
1153
|
+
else
|
1154
|
+
# Already passed loc, downhill from here on in.
|
1155
|
+
break
|
1156
|
+
end
|
1157
|
+
end
|
1158
|
+
end
|
1159
|
+
end
|
1160
|
+
|
1161
|
+
# No hope for a (better) match at greater error levels.
|
1162
|
+
if match_bitapScore[d + 1, loc] > score_threshold
|
1163
|
+
break
|
1164
|
+
end
|
1165
|
+
last_rd = rd
|
1166
|
+
end
|
1167
|
+
|
1168
|
+
best_loc
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
# Initialise the alphabet for the Bitap algorithm.
|
1172
|
+
def match_alphabet(pattern)
|
1173
|
+
s = {}
|
1174
|
+
pattern.chars.each_with_index do |c, i|
|
1175
|
+
s[c] ||= 0
|
1176
|
+
s[c] |= 1 << (pattern.length - i - 1)
|
1177
|
+
end
|
1178
|
+
s
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Parse a textual representation of patches and return a list of patch
|
1182
|
+
# objects.
|
1183
|
+
def patch_fromText(textline)
|
1184
|
+
return [] if textline.empty?
|
1185
|
+
|
1186
|
+
patches = []
|
1187
|
+
text = textline.split("\n")
|
1188
|
+
text_pointer = 0
|
1189
|
+
patch_header = /^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/
|
1190
|
+
while text_pointer < text.length
|
1191
|
+
m = text[text_pointer].match(patch_header)
|
1192
|
+
if m.nil?
|
1193
|
+
raise ArgumentError.new("Invalid patch string: #{text[text_pointer]}")
|
1194
|
+
end
|
1195
|
+
patch = PatchObj.new
|
1196
|
+
patches.push(patch)
|
1197
|
+
patch.start1 = m[1].to_i
|
1198
|
+
if m[2].empty?
|
1199
|
+
patch.start1 -= 1
|
1200
|
+
patch.length1 = 1
|
1201
|
+
elsif m[2] == '0'
|
1202
|
+
patch.length1 = 0
|
1203
|
+
else
|
1204
|
+
patch.start1 -= 1
|
1205
|
+
patch.length1 = m[2].to_i
|
1206
|
+
end
|
1207
|
+
|
1208
|
+
patch.start2 = m[3].to_i
|
1209
|
+
if m[4].empty?
|
1210
|
+
patch.start2 -= 1
|
1211
|
+
patch.length2 = 1
|
1212
|
+
elsif m[4] == '0'
|
1213
|
+
patch.length2 = 0
|
1214
|
+
else
|
1215
|
+
patch.start2 -= 1
|
1216
|
+
patch.length2 = m[4].to_i
|
1217
|
+
end
|
1218
|
+
text_pointer += 1
|
1219
|
+
|
1220
|
+
while text_pointer < text.length
|
1221
|
+
if text[text_pointer].empty?
|
1222
|
+
# Blank line? Whatever.
|
1223
|
+
text_pointer += 1
|
1224
|
+
next
|
1225
|
+
end
|
1226
|
+
|
1227
|
+
sign = text[text_pointer][0]
|
1228
|
+
line = URI.decode(text[text_pointer][1..-1].force_encoding(Encoding::UTF_8))
|
1229
|
+
|
1230
|
+
case sign
|
1231
|
+
when '-'
|
1232
|
+
# Deletion.
|
1233
|
+
patch.diffs.push([:delete, line])
|
1234
|
+
when '+'
|
1235
|
+
# Insertion.
|
1236
|
+
patch.diffs.push([:insert, line])
|
1237
|
+
when ' '
|
1238
|
+
# Minor equality
|
1239
|
+
patch.diffs.push([:equal, line])
|
1240
|
+
when '@'
|
1241
|
+
# Start of next patch.
|
1242
|
+
break
|
1243
|
+
else
|
1244
|
+
# WTF?
|
1245
|
+
raise ArgumentError.new("Invalid patch mode \"#{sign}\" in: #{line}")
|
1246
|
+
end
|
1247
|
+
text_pointer += 1
|
1248
|
+
end
|
1249
|
+
end
|
1250
|
+
|
1251
|
+
patches
|
1252
|
+
end
|
1253
|
+
|
1254
|
+
# Take a list of patches and return a textual representation
|
1255
|
+
def patch_toText(patches)
|
1256
|
+
patches.join
|
1257
|
+
end
|
1258
|
+
|
1259
|
+
# Increase the context until it is unique,
|
1260
|
+
# but don't let the pattern expand beyond match_maxBits
|
1261
|
+
def patch_addContext(patch, text)
|
1262
|
+
return if text.empty?
|
1263
|
+
pattern = text[patch.start2, patch.length1]
|
1264
|
+
padding = 0
|
1265
|
+
|
1266
|
+
# Look for the first and last matches of pattern in text. If two different
|
1267
|
+
# matches are found, increase the pattern length.
|
1268
|
+
while text.index(pattern) != text.rindex(pattern) &&
|
1269
|
+
pattern.length < match_maxBits - 2 * patch_margin
|
1270
|
+
padding += patch_margin
|
1271
|
+
pattern = text[[0, patch.start2 - padding].max...(patch.start2 + patch.length1 + padding)]
|
1272
|
+
end
|
1273
|
+
|
1274
|
+
# Add one chunk for good luck.
|
1275
|
+
padding += patch_margin
|
1276
|
+
|
1277
|
+
# Add the prefix.
|
1278
|
+
prefix = text[[0, patch.start2 - padding].max...patch.start2]
|
1279
|
+
patch.diffs.unshift([:equal, prefix]) if !prefix.to_s.empty?
|
1280
|
+
|
1281
|
+
# Add the suffix.
|
1282
|
+
suffix = text[patch.start2 + patch.length1, padding]
|
1283
|
+
patch.diffs.push([:equal, suffix]) if !suffix.to_s.empty?
|
1284
|
+
|
1285
|
+
# Roll back the start points.
|
1286
|
+
patch.start1 -= prefix.length
|
1287
|
+
patch.start2 -= prefix.length
|
1288
|
+
|
1289
|
+
# Extend the lengths.
|
1290
|
+
patch.length1 += prefix.length + suffix.length
|
1291
|
+
patch.length2 += prefix.length + suffix.length
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
# Compute a list of patches to turn text1 into text2.
|
1295
|
+
# Use diffs if provided, otherwise compute it ourselves.
|
1296
|
+
# There are four ways to call this function, depending on what data is
|
1297
|
+
# available to the caller:
|
1298
|
+
# Method 1:
|
1299
|
+
# a = text1, b = text2
|
1300
|
+
# Method 2:
|
1301
|
+
# a = diffs
|
1302
|
+
# Method 3 (optimal):
|
1303
|
+
# a = text1, b = diffs
|
1304
|
+
# Method 4 (deprecated, use method 3):
|
1305
|
+
# a = text1, b = text2, c = diffs
|
1306
|
+
def patch_make(*args)
|
1307
|
+
text1 = nil
|
1308
|
+
diffs = nil
|
1309
|
+
if args.length == 2 && args[0].is_a?(String) && args[1].is_a?(String)
|
1310
|
+
# Compute diffs from text1 and text2.
|
1311
|
+
text1 = args[0]
|
1312
|
+
text2 = args[1]
|
1313
|
+
diffs = diff_main(text1, text2, true)
|
1314
|
+
if diffs.length > 2
|
1315
|
+
diff_cleanupSemantic(diffs)
|
1316
|
+
diff_cleanupEfficiency(diffs)
|
1317
|
+
end
|
1318
|
+
elsif args.length == 1 && args[0].is_a?(Array)
|
1319
|
+
# Compute text1 from diffs.
|
1320
|
+
diffs = args[0]
|
1321
|
+
text1 = diff_text1(diffs)
|
1322
|
+
elsif args.length == 2 && args[0].is_a?(String) && args[1].is_a?(Array)
|
1323
|
+
text1 = args[0]
|
1324
|
+
diffs = args[1]
|
1325
|
+
elsif args.length == 3 && args[0].is_a?(String) && args[1].is_a?(String) &&
|
1326
|
+
args[2].is_a?(Array)
|
1327
|
+
# Method 4: text1, text2, diffs
|
1328
|
+
# text2 is not used.
|
1329
|
+
text1 = args[0]
|
1330
|
+
text2 = args[1]
|
1331
|
+
diffs = args[2]
|
1332
|
+
else
|
1333
|
+
raise ArgumentError.new('Unknown call format to patch_make.')
|
1334
|
+
end
|
1335
|
+
|
1336
|
+
return [] if diffs.empty? # Get rid of the null case.
|
1337
|
+
|
1338
|
+
patches = []
|
1339
|
+
patch = PatchObj.new
|
1340
|
+
char_count1 = 0 # Number of characters into the text1 string.
|
1341
|
+
char_count2 = 0 # Number of characters into the text2 string.
|
1342
|
+
prepatch_text = text1 # Recreate the patches to determine context info.
|
1343
|
+
postpatch_text = text1
|
1344
|
+
|
1345
|
+
diffs.each_with_index do |diff, x|
|
1346
|
+
diff_type, diff_text = diffs[x]
|
1347
|
+
if patch.diffs.empty? && diff_type != :equal
|
1348
|
+
# A new patch starts here.
|
1349
|
+
patch.start1 = char_count1
|
1350
|
+
patch.start2 = char_count2
|
1351
|
+
end
|
1352
|
+
|
1353
|
+
case diff_type
|
1354
|
+
when :insert
|
1355
|
+
patch.diffs.push(diff)
|
1356
|
+
patch.length2 += diff_text.length
|
1357
|
+
postpatch_text = postpatch_text[0...char_count2] + diff_text +
|
1358
|
+
postpatch_text[char_count2..-1]
|
1359
|
+
when :delete
|
1360
|
+
patch.length1 += diff_text.length
|
1361
|
+
patch.diffs.push(diff)
|
1362
|
+
postpatch_text = postpatch_text[0...char_count2] +
|
1363
|
+
postpatch_text[(char_count2 + diff_text.length)..-1]
|
1364
|
+
when :equal
|
1365
|
+
if diff_text.length <= 2 * patch_margin &&
|
1366
|
+
!patch.diffs.empty? && diffs.length != x + 1
|
1367
|
+
# Small equality inside a patch.
|
1368
|
+
patch.diffs.push(diff)
|
1369
|
+
patch.length1 += diff_text.length
|
1370
|
+
patch.length2 += diff_text.length
|
1371
|
+
elsif diff_text.length >= 2 * patch_margin
|
1372
|
+
# Time for a new patch.
|
1373
|
+
unless patch.diffs.empty?
|
1374
|
+
patch_addContext(patch, prepatch_text)
|
1375
|
+
patches.push(patch)
|
1376
|
+
patch = PatchObj.new
|
1377
|
+
# Unlike Unidiff, our patch lists have a rolling context.
|
1378
|
+
# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
|
1379
|
+
# Update prepatch text & pos to reflect the application of the
|
1380
|
+
# just completed patch.
|
1381
|
+
prepatch_text = postpatch_text
|
1382
|
+
char_count1 = char_count2
|
1383
|
+
end
|
1384
|
+
end
|
1385
|
+
end
|
1386
|
+
|
1387
|
+
# Update the current character count.
|
1388
|
+
if diff_type != :insert
|
1389
|
+
char_count1 += diff_text.length
|
1390
|
+
end
|
1391
|
+
if diff_type != :delete
|
1392
|
+
char_count2 += diff_text.length
|
1393
|
+
end
|
1394
|
+
end
|
1395
|
+
|
1396
|
+
# Pick up the leftover patch if not empty.
|
1397
|
+
unless patch.diffs.empty?
|
1398
|
+
patch_addContext(patch, prepatch_text)
|
1399
|
+
patches.push(patch)
|
1400
|
+
end
|
1401
|
+
|
1402
|
+
patches
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
# Merge a set of patches onto the text. Return a patched text, as well
|
1406
|
+
# as a list of true/false values indicating which patches were applied.
|
1407
|
+
def patch_apply(patches, text)
|
1408
|
+
return [text, []] if patches.empty?
|
1409
|
+
|
1410
|
+
# Deep copy the patches so that no changes are made to originals.
|
1411
|
+
patches = Marshal.load(Marshal.dump(patches))
|
1412
|
+
|
1413
|
+
null_padding = patch_addPadding(patches)
|
1414
|
+
text = null_padding + text + null_padding
|
1415
|
+
patch_splitMax(patches)
|
1416
|
+
|
1417
|
+
# delta keeps track of the offset between the expected and actual location
|
1418
|
+
# of the previous patch. If there are patches expected at positions 10 and
|
1419
|
+
# 20, but the first patch was found at 12, delta is 2 and the second patch
|
1420
|
+
# has an effective expected position of 22.
|
1421
|
+
delta = 0
|
1422
|
+
results = []
|
1423
|
+
patches.each_with_index do |patch, x|
|
1424
|
+
expected_loc = patch.start2 + delta
|
1425
|
+
text1 = diff_text1(patch.diffs)
|
1426
|
+
end_loc = -1
|
1427
|
+
if text1.length > match_maxBits
|
1428
|
+
# patch_splitMax will only provide an oversized pattern in the case of
|
1429
|
+
# a monster delete.
|
1430
|
+
start_loc = match_main(text, text1[0, match_maxBits], expected_loc)
|
1431
|
+
if start_loc != -1
|
1432
|
+
end_loc = match_main(text, text1[(text1.length - match_maxBits)..-1],
|
1433
|
+
expected_loc + text1.length - match_maxBits)
|
1434
|
+
if end_loc == -1 || start_loc >= end_loc
|
1435
|
+
# Can't find valid trailing context. Drop this patch.
|
1436
|
+
start_loc = -1
|
1437
|
+
end
|
1438
|
+
end
|
1439
|
+
else
|
1440
|
+
start_loc = match_main(text, text1, expected_loc)
|
1441
|
+
end
|
1442
|
+
if start_loc == -1
|
1443
|
+
# No match found. :(
|
1444
|
+
results[x] = false
|
1445
|
+
# Subtract the delta for this failed patch from subsequent patches.
|
1446
|
+
delta -= patch.length2 - patch.length1
|
1447
|
+
else
|
1448
|
+
# Found a match. :)
|
1449
|
+
results[x] = true
|
1450
|
+
delta = start_loc - expected_loc
|
1451
|
+
text2 = text[start_loc, (end_loc == -1) ? text1.length : end_loc + match_maxBits]
|
1452
|
+
|
1453
|
+
if text1 == text2
|
1454
|
+
# Perfect match, just shove the replacement text in.
|
1455
|
+
text = text[0, start_loc] + diff_text2(patch.diffs) + text[(start_loc + text1.length)..-1]
|
1456
|
+
else
|
1457
|
+
# Imperfect match.
|
1458
|
+
# Run a diff to get a framework of equivalent indices.
|
1459
|
+
diffs = diff_main(text1, text2, false)
|
1460
|
+
if text1.length > match_maxBits &&
|
1461
|
+
diff_levenshtein(diffs).to_f / text1.length > patch_deleteThreshold
|
1462
|
+
# The end points match, but the content is unacceptably bad.
|
1463
|
+
results[x] = false
|
1464
|
+
else
|
1465
|
+
diff_cleanupSemanticLossless(diffs)
|
1466
|
+
index1 = 0
|
1467
|
+
patch.diffs.each do |op, data|
|
1468
|
+
if op != :equal
|
1469
|
+
index2 = diff_xIndex(diffs, index1)
|
1470
|
+
end
|
1471
|
+
if op == :insert # Insertion
|
1472
|
+
text = text[0, start_loc + index2] + data + text[(start_loc + index2)..-1]
|
1473
|
+
elsif op == :delete # Deletion
|
1474
|
+
text = text[0, start_loc + index2] +
|
1475
|
+
text[(start_loc + diff_xIndex(diffs, index1 + data.length))..-1]
|
1476
|
+
end
|
1477
|
+
if op != :delete
|
1478
|
+
index1 += data.length
|
1479
|
+
end
|
1480
|
+
end
|
1481
|
+
end
|
1482
|
+
end
|
1483
|
+
end
|
1484
|
+
end
|
1485
|
+
|
1486
|
+
# Strip the padding off.
|
1487
|
+
text = text[null_padding.length...-null_padding.length]
|
1488
|
+
[text, results]
|
1489
|
+
end
|
1490
|
+
|
1491
|
+
# Add some padding on text start and end so that edges can match
|
1492
|
+
# something. Intended to be called only from within patch_apply.
|
1493
|
+
def patch_addPadding(patches)
|
1494
|
+
padding_length = patch_margin
|
1495
|
+
null_padding = (1..padding_length).map{ |x| x.chr(Encoding::UTF_8) }.join
|
1496
|
+
|
1497
|
+
# Bump all the patches forward.
|
1498
|
+
patches.each do |patch|
|
1499
|
+
patch.start1 += padding_length
|
1500
|
+
patch.start2 += padding_length
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
# Add some padding on start of first diff.
|
1504
|
+
patch = patches.first
|
1505
|
+
diffs = patch.diffs
|
1506
|
+
if diffs.empty? || diffs.first[0] != :equal
|
1507
|
+
# Add nullPadding equality.
|
1508
|
+
diffs.unshift([:equal, null_padding])
|
1509
|
+
patch.start1 -= padding_length # Should be 0.
|
1510
|
+
patch.start2 -= padding_length # Should be 0.
|
1511
|
+
patch.length1 += padding_length
|
1512
|
+
patch.length2 += padding_length
|
1513
|
+
elsif padding_length > diffs.first[1].length
|
1514
|
+
# Grow first equality.
|
1515
|
+
extra_length = padding_length - diffs.first[1].length
|
1516
|
+
diffs.first[1] = null_padding[diffs.first[1].length..-1] + diffs.first[1]
|
1517
|
+
patch.start1 -= extra_length
|
1518
|
+
patch.start2 -= extra_length
|
1519
|
+
patch.length1 += extra_length
|
1520
|
+
patch.length2 += extra_length
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
# Add some padding on end of last diff.
|
1524
|
+
patch = patches.last
|
1525
|
+
diffs = patch.diffs
|
1526
|
+
if diffs.empty? || diffs.last[0] != :equal
|
1527
|
+
# Add nullPadding equality.
|
1528
|
+
diffs.push([:equal, null_padding])
|
1529
|
+
patch.length1 += padding_length
|
1530
|
+
patch.length2 += padding_length
|
1531
|
+
elsif padding_length > diffs.last[1].length
|
1532
|
+
# Grow last equality.
|
1533
|
+
extra_length = padding_length - diffs.last[1].length
|
1534
|
+
diffs.last[1] += null_padding[0, extra_length]
|
1535
|
+
patch.length1 += extra_length
|
1536
|
+
patch.length2 += extra_length
|
1537
|
+
end
|
1538
|
+
|
1539
|
+
null_padding
|
1540
|
+
end
|
1541
|
+
|
1542
|
+
# Look through the patches and break up any which are longer than the
|
1543
|
+
# maximum limit of the match algorithm.
|
1544
|
+
def patch_splitMax(patches)
|
1545
|
+
patch_size = match_maxBits
|
1546
|
+
|
1547
|
+
x = 0
|
1548
|
+
while x < patches.length
|
1549
|
+
if patches[x].length1 > patch_size
|
1550
|
+
big_patch = patches[x]
|
1551
|
+
# Remove the big old patch
|
1552
|
+
patches[x, 1] = []
|
1553
|
+
x -= 1
|
1554
|
+
start1 = big_patch.start1
|
1555
|
+
start2 = big_patch.start2
|
1556
|
+
pre_context = ''
|
1557
|
+
while !big_patch.diffs.empty?
|
1558
|
+
# Create one of several smaller patches.
|
1559
|
+
patch = PatchObj.new
|
1560
|
+
empty = true
|
1561
|
+
patch.start1 = start1 - pre_context.length
|
1562
|
+
patch.start2 = start2 - pre_context.length
|
1563
|
+
unless pre_context.empty?
|
1564
|
+
patch.length1 = patch.length2 = pre_context.length
|
1565
|
+
patch.diffs.push([:equal, pre_context])
|
1566
|
+
end
|
1567
|
+
|
1568
|
+
while !big_patch.diffs.empty? && patch.length1 < patch_size - patch_margin
|
1569
|
+
diff = big_patch.diffs.first
|
1570
|
+
if diff[0] == :insert
|
1571
|
+
# Insertions are harmless.
|
1572
|
+
patch.length2 += diff[1].length
|
1573
|
+
start2 += diff[1].length
|
1574
|
+
patch.diffs.push(big_patch.diffs.shift)
|
1575
|
+
empty = false
|
1576
|
+
elsif diff[0] == :delete && patch.diffs.length == 1 &&
|
1577
|
+
patch.diffs.first[0] == :equal && diff[1].length > 2 * patch_size
|
1578
|
+
# This is a large deletion. Let it pass in one chunk.
|
1579
|
+
patch.length1 += diff[1].length
|
1580
|
+
start1 += diff[1].length
|
1581
|
+
empty = false
|
1582
|
+
patch.diffs.push(big_patch.diffs.shift)
|
1583
|
+
else
|
1584
|
+
# Deletion or equality. Only take as much as we can stomach.
|
1585
|
+
diff_text = diff[1][0, patch_size - patch.length1 - patch_margin]
|
1586
|
+
patch.length1 += diff_text.length
|
1587
|
+
start1 += diff_text.length
|
1588
|
+
if diff[0] == :equal
|
1589
|
+
patch.length2 += diff_text.length
|
1590
|
+
start2 += diff_text.length
|
1591
|
+
else
|
1592
|
+
empty = false
|
1593
|
+
end
|
1594
|
+
patch.diffs.push([diff[0], diff_text])
|
1595
|
+
if diff_text == big_patch.diffs.first[1]
|
1596
|
+
big_patch.diffs.shift
|
1597
|
+
else
|
1598
|
+
big_patch.diffs.first[1] = big_patch.diffs.first[1][diff_text.length..-1]
|
1599
|
+
end
|
1600
|
+
end
|
1601
|
+
end
|
1602
|
+
|
1603
|
+
# Compute the head context for the next patch.
|
1604
|
+
pre_context = diff_text2(patch.diffs)[-patch_margin..-1] || ''
|
1605
|
+
|
1606
|
+
# Append the end context for this patch.
|
1607
|
+
post_context = diff_text1(big_patch.diffs)[0...patch_margin] || ''
|
1608
|
+
unless post_context.empty?
|
1609
|
+
patch.length1 += post_context.length
|
1610
|
+
patch.length2 += post_context.length
|
1611
|
+
if !patch.diffs.empty? && patch.diffs.last[0] == :equal
|
1612
|
+
patch.diffs.last[1] += post_context
|
1613
|
+
else
|
1614
|
+
patch.diffs.push([:equal, post_context])
|
1615
|
+
end
|
1616
|
+
end
|
1617
|
+
if !empty
|
1618
|
+
x += 1
|
1619
|
+
patches[x, 0] = [patch]
|
1620
|
+
end
|
1621
|
+
end
|
1622
|
+
end
|
1623
|
+
x += 1
|
1624
|
+
end
|
1625
|
+
end
|
1626
|
+
end
|