hashdiff 0.3.7 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +32 -0
- data/.travis.yml +3 -5
- data/Gemfile +3 -1
- data/README.md +42 -31
- data/Rakefile +9 -4
- data/changelog.md +31 -3
- data/hashdiff.gemspec +26 -12
- data/lib/hashdiff.rb +4 -0
- data/lib/hashdiff/compare_hashes.rb +69 -0
- data/lib/hashdiff/diff.rb +61 -116
- data/lib/hashdiff/lcs.rb +27 -30
- data/lib/hashdiff/lcs_compare_arrays.rb +32 -0
- data/lib/hashdiff/linear_compare_array.rb +10 -6
- data/lib/hashdiff/patch.rb +5 -5
- data/lib/hashdiff/util.rb +44 -35
- data/lib/hashdiff/version.rb +4 -2
- data/spec/hashdiff/best_diff_spec.rb +44 -43
- data/spec/hashdiff/diff_array_spec.rb +19 -19
- data/spec/hashdiff/diff_spec.rb +180 -159
- data/spec/hashdiff/lcs_spec.rb +27 -26
- data/spec/hashdiff/linear_compare_array_spec.rb +20 -18
- data/spec/hashdiff/patch_spec.rb +123 -121
- data/spec/hashdiff/readme_spec.rb +15 -0
- data/spec/hashdiff/util_spec.rb +81 -43
- data/spec/spec_helper.rb +2 -0
- metadata +59 -23
data/lib/hashdiff/diff.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
module Hashdiff
|
3
4
|
# Best diff two objects, which tries to generate the smallest change set using different similarity values.
|
4
5
|
#
|
5
|
-
#
|
6
|
+
# Hashdiff.best_diff is useful in case of comparing two objects which include similar hashes in arrays.
|
6
7
|
#
|
7
8
|
# @param [Array, Hash] obj1
|
8
9
|
# @param [Array, Hash] obj2
|
9
10
|
# @param [Hash] options the options to use when comparing
|
10
11
|
# * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other
|
12
|
+
# * :indifferent (Boolean) [false] whether to treat hash keys indifferently. Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
|
11
13
|
# * :delimiter (String) ['.'] the delimiter used when returning nested key references
|
12
14
|
# * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
|
13
15
|
# * :strip (Boolean) [false] whether or not to call #strip on strings before comparing
|
@@ -22,27 +24,27 @@ module HashDiff
|
|
22
24
|
# @example
|
23
25
|
# a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]}
|
24
26
|
# b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] }
|
25
|
-
# diff =
|
27
|
+
# diff = Hashdiff.best_diff(a, b)
|
26
28
|
# diff.should == [['-', 'x[0].c', 3], ['+', 'x[0].b', 2], ['-', 'x[1].y', 3], ['-', 'x[1]', {}]]
|
27
29
|
#
|
28
30
|
# @since 0.0.1
|
29
31
|
def self.best_diff(obj1, obj2, options = {}, &block)
|
30
32
|
options[:comparison] = block if block_given?
|
31
33
|
|
32
|
-
opts = { :
|
33
|
-
|
34
|
-
|
34
|
+
opts = { similarity: 0.3 }.merge!(options)
|
35
|
+
diffs1 = diff(obj1, obj2, opts)
|
36
|
+
count1 = count_diff diffs1
|
35
37
|
|
36
|
-
opts = { :
|
37
|
-
|
38
|
-
|
38
|
+
opts = { similarity: 0.5 }.merge!(options)
|
39
|
+
diffs2 = diff(obj1, obj2, opts)
|
40
|
+
count2 = count_diff diffs2
|
39
41
|
|
40
|
-
opts = { :
|
41
|
-
|
42
|
-
|
42
|
+
opts = { similarity: 0.8 }.merge!(options)
|
43
|
+
diffs3 = diff(obj1, obj2, opts)
|
44
|
+
count3 = count_diff diffs3
|
43
45
|
|
44
|
-
count, diffs =
|
45
|
-
|
46
|
+
count, diffs = count1 < count2 ? [count1, diffs1] : [count2, diffs2]
|
47
|
+
count < count3 ? diffs : diffs3
|
46
48
|
end
|
47
49
|
|
48
50
|
# Compute the diff of two hashes or arrays
|
@@ -51,6 +53,7 @@ module HashDiff
|
|
51
53
|
# @param [Array, Hash] obj2
|
52
54
|
# @param [Hash] options the options to use when comparing
|
53
55
|
# * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other
|
56
|
+
# * :indifferent (Boolean) [false] whether to treat hash keys indifferently. Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
|
54
57
|
# * :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}.
|
55
58
|
# * :delimiter (String) ['.'] the delimiter used when returning nested key references
|
56
59
|
# * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
|
@@ -68,20 +71,21 @@ module HashDiff
|
|
68
71
|
# a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}}
|
69
72
|
# b = {"a" => 1, "b" => {}}
|
70
73
|
#
|
71
|
-
# diff =
|
74
|
+
# diff = Hashdiff.diff(a, b)
|
72
75
|
# diff.should == [['-', 'b.b1', 1], ['-', 'b.b2', 2]]
|
73
76
|
#
|
74
77
|
# @since 0.0.1
|
75
78
|
def self.diff(obj1, obj2, options = {}, &block)
|
76
79
|
opts = {
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
81
|
-
:
|
82
|
-
:
|
83
|
-
:
|
84
|
-
:
|
80
|
+
prefix: '',
|
81
|
+
similarity: 0.8,
|
82
|
+
delimiter: '.',
|
83
|
+
strict: true,
|
84
|
+
indifferent: false,
|
85
|
+
strip: false,
|
86
|
+
numeric_tolerance: 0,
|
87
|
+
array_path: false,
|
88
|
+
use_lcs: true
|
85
89
|
}.merge!(options)
|
86
90
|
|
87
91
|
opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == ''
|
@@ -92,120 +96,61 @@ module HashDiff
|
|
92
96
|
result = custom_compare(opts[:comparison], opts[:prefix], obj1, obj2)
|
93
97
|
return result if result
|
94
98
|
|
95
|
-
if obj1.nil?
|
96
|
-
return []
|
97
|
-
end
|
99
|
+
return [] if obj1.nil? && obj2.nil?
|
98
100
|
|
99
|
-
if obj1.nil?
|
100
|
-
return [['~', opts[:prefix], nil, obj2]]
|
101
|
-
end
|
101
|
+
return [['~', opts[:prefix], obj1, obj2]] if obj1.nil? || obj2.nil?
|
102
102
|
|
103
|
-
|
104
|
-
return [['~', opts[:prefix], obj1, nil]]
|
105
|
-
end
|
103
|
+
return [['~', opts[:prefix], obj1, obj2]] unless comparable?(obj1, obj2, opts[:strict])
|
106
104
|
|
107
|
-
|
108
|
-
return [['~', opts[:prefix], obj1, obj2]]
|
109
|
-
end
|
105
|
+
return LcsCompareArrays.call(obj1, obj2, opts) if obj1.is_a?(Array) && opts[:use_lcs]
|
110
106
|
|
111
|
-
|
112
|
-
if obj1.is_a?(Array) && opts[:use_lcs]
|
113
|
-
changeset = diff_array_lcs(obj1, obj2, opts) do |lcs|
|
114
|
-
# use a's index for similarity
|
115
|
-
lcs.each do |pair|
|
116
|
-
prefix = prefix_append_array_index(opts[:prefix], pair[0], opts)
|
117
|
-
result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => prefix)))
|
118
|
-
end
|
119
|
-
end
|
107
|
+
return LinearCompareArray.call(obj1, obj2, opts) if obj1.is_a?(Array) && !opts[:use_lcs]
|
120
108
|
|
121
|
-
|
122
|
-
change_key = prefix_append_array_index(opts[:prefix], change[1], opts)
|
123
|
-
if change[0] == '-'
|
124
|
-
result << ['-', change_key, change[2]]
|
125
|
-
elsif change[0] == '+'
|
126
|
-
result << ['+', change_key, change[2]]
|
127
|
-
end
|
128
|
-
end
|
129
|
-
elsif obj1.is_a?(Array) && !opts[:use_lcs]
|
130
|
-
result.concat(LinearCompareArray.call(obj1, obj2, opts))
|
131
|
-
elsif obj1.is_a?(Hash)
|
132
|
-
|
133
|
-
deleted_keys = obj1.keys - obj2.keys
|
134
|
-
common_keys = obj1.keys & obj2.keys
|
135
|
-
added_keys = obj2.keys - obj1.keys
|
136
|
-
|
137
|
-
# add deleted properties
|
138
|
-
deleted_keys.sort_by{|k,v| k.to_s }.each do |k|
|
139
|
-
change_key = prefix_append_key(opts[:prefix], k, opts)
|
140
|
-
custom_result = custom_compare(opts[:comparison], change_key, obj1[k], nil)
|
141
|
-
|
142
|
-
if custom_result
|
143
|
-
result.concat(custom_result)
|
144
|
-
else
|
145
|
-
result << ['-', change_key, obj1[k]]
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# recursive comparison for common keys
|
150
|
-
common_keys.sort_by{|k,v| k.to_s }.each do |k|
|
151
|
-
prefix = prefix_append_key(opts[:prefix], k, opts)
|
152
|
-
result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => prefix)))
|
153
|
-
end
|
109
|
+
return CompareHashes.call(obj1, obj2, opts) if obj1.is_a?(Hash)
|
154
110
|
|
155
|
-
|
156
|
-
added_keys.sort_by{|k,v| k.to_s }.each do |k|
|
157
|
-
change_key = prefix_append_key(opts[:prefix], k, opts)
|
158
|
-
unless obj1.key?(k)
|
159
|
-
custom_result = custom_compare(opts[:comparison], change_key, nil, obj2[k])
|
160
|
-
|
161
|
-
if custom_result
|
162
|
-
result.concat(custom_result)
|
163
|
-
else
|
164
|
-
result << ['+', change_key, obj2[k]]
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
else
|
169
|
-
return [] if compare_values(obj1, obj2, opts)
|
170
|
-
return [['~', opts[:prefix], obj1, obj2]]
|
171
|
-
end
|
111
|
+
return [] if compare_values(obj1, obj2, opts)
|
172
112
|
|
173
|
-
|
113
|
+
[['~', opts[:prefix], obj1, obj2]]
|
174
114
|
end
|
175
115
|
|
176
116
|
# @private
|
177
117
|
#
|
178
118
|
# diff array using LCS algorithm
|
179
|
-
def self.diff_array_lcs(
|
180
|
-
|
181
|
-
:prefix => '',
|
182
|
-
:similarity => 0.8,
|
183
|
-
:delimiter => '.'
|
184
|
-
}.merge!(options)
|
119
|
+
def self.diff_array_lcs(arraya, arrayb, options = {})
|
120
|
+
return [] if arraya.empty? && arrayb.empty?
|
185
121
|
|
186
122
|
change_set = []
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
change_set << ['+', index, b[index]]
|
123
|
+
|
124
|
+
if arraya.empty?
|
125
|
+
arrayb.each_index do |index|
|
126
|
+
change_set << ['+', index, arrayb[index]]
|
192
127
|
end
|
128
|
+
|
193
129
|
return change_set
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
130
|
+
end
|
131
|
+
|
132
|
+
if arrayb.empty?
|
133
|
+
arraya.each_index do |index|
|
134
|
+
i = arraya.size - index - 1
|
135
|
+
change_set << ['-', i, arraya[i]]
|
198
136
|
end
|
137
|
+
|
199
138
|
return change_set
|
200
139
|
end
|
201
140
|
|
202
|
-
|
141
|
+
opts = {
|
142
|
+
prefix: '',
|
143
|
+
similarity: 0.8,
|
144
|
+
delimiter: '.'
|
145
|
+
}.merge!(options)
|
146
|
+
|
147
|
+
links = lcs(arraya, arrayb, opts)
|
203
148
|
|
204
149
|
# yield common
|
205
150
|
yield links if block_given?
|
206
151
|
|
207
152
|
# padding the end
|
208
|
-
links << [
|
153
|
+
links << [arraya.size, arrayb.size]
|
209
154
|
|
210
155
|
last_x = -1
|
211
156
|
last_y = -1
|
@@ -213,13 +158,13 @@ module HashDiff
|
|
213
158
|
x, y = pair
|
214
159
|
|
215
160
|
# remove from a, beginning from the end
|
216
|
-
(x > last_x + 1)
|
217
|
-
change_set << ['-', last_y + i + 1,
|
161
|
+
(x > last_x + 1) && (x - last_x - 2).downto(0).each do |i|
|
162
|
+
change_set << ['-', last_y + i + 1, arraya[i + last_x + 1]]
|
218
163
|
end
|
219
164
|
|
220
165
|
# add from b, beginning from the head
|
221
|
-
(y > last_y + 1)
|
222
|
-
change_set << ['+', last_y + i + 1,
|
166
|
+
(y > last_y + 1) && 0.upto(y - last_y - 2).each do |i|
|
167
|
+
change_set << ['+', last_y + i + 1, arrayb[i + last_y + 1]]
|
223
168
|
end
|
224
169
|
|
225
170
|
# update flags
|
data/lib/hashdiff/lcs.rb
CHANGED
@@ -1,46 +1,44 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
2
4
|
# @private
|
3
5
|
#
|
4
6
|
# caculate array difference using LCS algorithm
|
5
7
|
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
|
6
|
-
def self.lcs(
|
7
|
-
|
8
|
+
def self.lcs(arraya, arrayb, options = {})
|
9
|
+
return [] if arraya.empty? || arrayb.empty?
|
8
10
|
|
9
|
-
opts
|
11
|
+
opts = { similarity: 0.8 }.merge!(options)
|
10
12
|
|
11
|
-
|
13
|
+
opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts)
|
12
14
|
|
13
15
|
a_start = b_start = 0
|
14
|
-
a_finish =
|
15
|
-
b_finish =
|
16
|
+
a_finish = arraya.size - 1
|
17
|
+
b_finish = arrayb.size - 1
|
16
18
|
vector = []
|
17
19
|
|
18
20
|
lcs = []
|
19
21
|
(b_start..b_finish).each do |bi|
|
20
|
-
lcs[bi] = []
|
22
|
+
lcs[bi] = []
|
21
23
|
(a_start..a_finish).each do |ai|
|
22
|
-
if similar?(
|
23
|
-
topleft = (ai > 0
|
24
|
+
if similar?(arraya[ai], arrayb[bi], opts)
|
25
|
+
topleft = (ai > 0) && (bi > 0) ? lcs[bi - 1][ai - 1][1] : 0
|
24
26
|
lcs[bi][ai] = [:topleft, topleft + 1]
|
25
|
-
elsif
|
26
|
-
|
27
|
-
|
28
|
-
count = (top > left) ? top : left
|
27
|
+
elsif (top = bi > 0 ? lcs[bi - 1][ai][1] : 0)
|
28
|
+
left = ai > 0 ? lcs[bi][ai - 1][1] : 0
|
29
|
+
count = top > left ? top : left
|
29
30
|
|
30
|
-
direction =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
direction = :both
|
42
|
-
end
|
43
|
-
end
|
31
|
+
direction = if top > left
|
32
|
+
:top
|
33
|
+
elsif top < left
|
34
|
+
:left
|
35
|
+
elsif bi.zero?
|
36
|
+
:top
|
37
|
+
elsif ai.zero?
|
38
|
+
:left
|
39
|
+
else
|
40
|
+
:both
|
41
|
+
end
|
44
42
|
|
45
43
|
lcs[bi][ai] = [direction, count]
|
46
44
|
end
|
@@ -49,7 +47,7 @@ module HashDiff
|
|
49
47
|
|
50
48
|
x = a_finish
|
51
49
|
y = b_finish
|
52
|
-
while x >= 0
|
50
|
+
while (x >= 0) && (y >= 0) && (lcs[y][x][1] > 0)
|
53
51
|
if lcs[y][x][0] == :both
|
54
52
|
x -= 1
|
55
53
|
elsif lcs[y][x][0] == :topleft
|
@@ -65,5 +63,4 @@ module HashDiff
|
|
65
63
|
|
66
64
|
vector
|
67
65
|
end
|
68
|
-
|
69
66
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
4
|
+
# @private
|
5
|
+
# Used to compare arrays using the lcs algorithm
|
6
|
+
class LcsCompareArrays
|
7
|
+
class << self
|
8
|
+
def call(obj1, obj2, opts = {})
|
9
|
+
result = []
|
10
|
+
|
11
|
+
changeset = Hashdiff.diff_array_lcs(obj1, obj2, opts) do |lcs|
|
12
|
+
# use a's index for similarity
|
13
|
+
lcs.each do |pair|
|
14
|
+
prefix = Hashdiff.prefix_append_array_index(opts[:prefix], pair[0], opts)
|
15
|
+
|
16
|
+
result.concat(Hashdiff.diff(obj1[pair[0]], obj2[pair[1]], opts.merge(prefix: prefix)))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
changeset.each do |change|
|
21
|
+
next if change[0] != '-' && change[0] != '+'
|
22
|
+
|
23
|
+
change_key = Hashdiff.prefix_append_array_index(opts[:prefix], change[1], opts)
|
24
|
+
|
25
|
+
result << [change[0], change_key, change[2]]
|
26
|
+
end
|
27
|
+
|
28
|
+
result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,11 +1,13 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
2
4
|
# @private
|
3
5
|
#
|
4
6
|
# Used to compare arrays in a linear complexity, which produces longer diffs
|
5
7
|
# than using the lcs algorithm but is considerably faster
|
6
8
|
class LinearCompareArray
|
7
9
|
def self.call(old_array, new_array, options = {})
|
8
|
-
instance =
|
10
|
+
instance = new(old_array, new_array, options)
|
9
11
|
instance.call
|
10
12
|
end
|
11
13
|
|
@@ -78,8 +80,8 @@ module HashDiff
|
|
78
80
|
end
|
79
81
|
|
80
82
|
def item_difference(old_item, new_item, item_index)
|
81
|
-
prefix =
|
82
|
-
|
83
|
+
prefix = Hashdiff.prefix_append_array_index(options[:prefix], item_index, options)
|
84
|
+
Hashdiff.diff(old_item, new_item, options.merge(prefix: prefix))
|
83
85
|
end
|
84
86
|
|
85
87
|
# look ahead in the new array to see if the current item appears later
|
@@ -120,6 +122,7 @@ module HashDiff
|
|
120
122
|
|
121
123
|
def append_addititions_before_match(match_index)
|
122
124
|
return unless match_index
|
125
|
+
|
123
126
|
(new_index...match_index).each { |i| append_addition(new_array[i], i) }
|
124
127
|
self.expected_additions = expected_additions - (match_index - new_index)
|
125
128
|
self.new_index = match_index
|
@@ -127,18 +130,19 @@ module HashDiff
|
|
127
130
|
|
128
131
|
def append_deletions_before_match(match_index)
|
129
132
|
return unless match_index
|
133
|
+
|
130
134
|
(old_index...match_index).each { |i| append_deletion(old_array[i], i) }
|
131
135
|
self.expected_additions = expected_additions + (match_index - new_index)
|
132
136
|
self.old_index = match_index
|
133
137
|
end
|
134
138
|
|
135
139
|
def append_addition(item, index)
|
136
|
-
key =
|
140
|
+
key = Hashdiff.prefix_append_array_index(options[:prefix], index, options)
|
137
141
|
additions << ['+', key, item]
|
138
142
|
end
|
139
143
|
|
140
144
|
def append_deletion(item, index)
|
141
|
-
key =
|
145
|
+
key = Hashdiff.prefix_append_array_index(options[:prefix], index, options)
|
142
146
|
deletions << ['-', key, item]
|
143
147
|
end
|
144
148
|
|
data/lib/hashdiff/patch.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# This module provides methods to diff two hash, patch and unpatch hash
|
3
5
|
#
|
4
|
-
module
|
5
|
-
|
6
|
+
module Hashdiff
|
6
7
|
# Apply patch to object
|
7
8
|
#
|
8
9
|
# @param [Hash, Array] obj the object to be patched, can be an Array or a Hash
|
@@ -22,7 +23,7 @@ module HashDiff
|
|
22
23
|
|
23
24
|
last_part = parts.last
|
24
25
|
|
25
|
-
parent_node = node(obj, parts[0, parts.size-1])
|
26
|
+
parent_node = node(obj, parts[0, parts.size - 1])
|
26
27
|
|
27
28
|
if change[0] == '+'
|
28
29
|
if parent_node.is_a?(Array)
|
@@ -63,7 +64,7 @@ module HashDiff
|
|
63
64
|
|
64
65
|
last_part = parts.last
|
65
66
|
|
66
|
-
parent_node = node(obj, parts[0, parts.size-1])
|
67
|
+
parent_node = node(obj, parts[0, parts.size - 1])
|
67
68
|
|
68
69
|
if change[0] == '+'
|
69
70
|
if parent_node.is_a?(Array)
|
@@ -84,5 +85,4 @@ module HashDiff
|
|
84
85
|
|
85
86
|
obj
|
86
87
|
end
|
87
|
-
|
88
88
|
end
|