hashdiff 0.3.7 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +32 -0
- data/.travis.yml +3 -5
- data/Gemfile +3 -1
- data/README.md +42 -31
- data/Rakefile +9 -4
- data/changelog.md +31 -3
- data/hashdiff.gemspec +26 -12
- data/lib/hashdiff.rb +4 -0
- data/lib/hashdiff/compare_hashes.rb +69 -0
- data/lib/hashdiff/diff.rb +61 -116
- data/lib/hashdiff/lcs.rb +27 -30
- data/lib/hashdiff/lcs_compare_arrays.rb +32 -0
- data/lib/hashdiff/linear_compare_array.rb +10 -6
- data/lib/hashdiff/patch.rb +5 -5
- data/lib/hashdiff/util.rb +44 -35
- data/lib/hashdiff/version.rb +4 -2
- data/spec/hashdiff/best_diff_spec.rb +44 -43
- data/spec/hashdiff/diff_array_spec.rb +19 -19
- data/spec/hashdiff/diff_spec.rb +180 -159
- data/spec/hashdiff/lcs_spec.rb +27 -26
- data/spec/hashdiff/linear_compare_array_spec.rb +20 -18
- data/spec/hashdiff/patch_spec.rb +123 -121
- data/spec/hashdiff/readme_spec.rb +15 -0
- data/spec/hashdiff/util_spec.rb +81 -43
- data/spec/spec_helper.rb +2 -0
- metadata +59 -23
data/lib/hashdiff/diff.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
module Hashdiff
|
3
4
|
# Best diff two objects, which tries to generate the smallest change set using different similarity values.
|
4
5
|
#
|
5
|
-
#
|
6
|
+
# Hashdiff.best_diff is useful in case of comparing two objects which include similar hashes in arrays.
|
6
7
|
#
|
7
8
|
# @param [Array, Hash] obj1
|
8
9
|
# @param [Array, Hash] obj2
|
9
10
|
# @param [Hash] options the options to use when comparing
|
10
11
|
# * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other
|
12
|
+
# * :indifferent (Boolean) [false] whether to treat hash keys indifferently. Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
|
11
13
|
# * :delimiter (String) ['.'] the delimiter used when returning nested key references
|
12
14
|
# * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
|
13
15
|
# * :strip (Boolean) [false] whether or not to call #strip on strings before comparing
|
@@ -22,27 +24,27 @@ module HashDiff
|
|
22
24
|
# @example
|
23
25
|
# a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]}
|
24
26
|
# b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] }
|
25
|
-
# diff =
|
27
|
+
# diff = Hashdiff.best_diff(a, b)
|
26
28
|
# diff.should == [['-', 'x[0].c', 3], ['+', 'x[0].b', 2], ['-', 'x[1].y', 3], ['-', 'x[1]', {}]]
|
27
29
|
#
|
28
30
|
# @since 0.0.1
|
29
31
|
def self.best_diff(obj1, obj2, options = {}, &block)
|
30
32
|
options[:comparison] = block if block_given?
|
31
33
|
|
32
|
-
opts = { :
|
33
|
-
|
34
|
-
|
34
|
+
opts = { similarity: 0.3 }.merge!(options)
|
35
|
+
diffs1 = diff(obj1, obj2, opts)
|
36
|
+
count1 = count_diff diffs1
|
35
37
|
|
36
|
-
opts = { :
|
37
|
-
|
38
|
-
|
38
|
+
opts = { similarity: 0.5 }.merge!(options)
|
39
|
+
diffs2 = diff(obj1, obj2, opts)
|
40
|
+
count2 = count_diff diffs2
|
39
41
|
|
40
|
-
opts = { :
|
41
|
-
|
42
|
-
|
42
|
+
opts = { similarity: 0.8 }.merge!(options)
|
43
|
+
diffs3 = diff(obj1, obj2, opts)
|
44
|
+
count3 = count_diff diffs3
|
43
45
|
|
44
|
-
count, diffs =
|
45
|
-
|
46
|
+
count, diffs = count1 < count2 ? [count1, diffs1] : [count2, diffs2]
|
47
|
+
count < count3 ? diffs : diffs3
|
46
48
|
end
|
47
49
|
|
48
50
|
# Compute the diff of two hashes or arrays
|
@@ -51,6 +53,7 @@ module HashDiff
|
|
51
53
|
# @param [Array, Hash] obj2
|
52
54
|
# @param [Hash] options the options to use when comparing
|
53
55
|
# * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other
|
56
|
+
# * :indifferent (Boolean) [false] whether to treat hash keys indifferently. Set to true to ignore differences between symbol keys (ie. {a: 1} ~= {'a' => 1})
|
54
57
|
# * :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}.
|
55
58
|
# * :delimiter (String) ['.'] the delimiter used when returning nested key references
|
56
59
|
# * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
|
@@ -68,20 +71,21 @@ module HashDiff
|
|
68
71
|
# a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}}
|
69
72
|
# b = {"a" => 1, "b" => {}}
|
70
73
|
#
|
71
|
-
# diff =
|
74
|
+
# diff = Hashdiff.diff(a, b)
|
72
75
|
# diff.should == [['-', 'b.b1', 1], ['-', 'b.b2', 2]]
|
73
76
|
#
|
74
77
|
# @since 0.0.1
|
75
78
|
def self.diff(obj1, obj2, options = {}, &block)
|
76
79
|
opts = {
|
77
|
-
:
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
81
|
-
:
|
82
|
-
:
|
83
|
-
:
|
84
|
-
:
|
80
|
+
prefix: '',
|
81
|
+
similarity: 0.8,
|
82
|
+
delimiter: '.',
|
83
|
+
strict: true,
|
84
|
+
indifferent: false,
|
85
|
+
strip: false,
|
86
|
+
numeric_tolerance: 0,
|
87
|
+
array_path: false,
|
88
|
+
use_lcs: true
|
85
89
|
}.merge!(options)
|
86
90
|
|
87
91
|
opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == ''
|
@@ -92,120 +96,61 @@ module HashDiff
|
|
92
96
|
result = custom_compare(opts[:comparison], opts[:prefix], obj1, obj2)
|
93
97
|
return result if result
|
94
98
|
|
95
|
-
if obj1.nil?
|
96
|
-
return []
|
97
|
-
end
|
99
|
+
return [] if obj1.nil? && obj2.nil?
|
98
100
|
|
99
|
-
if obj1.nil?
|
100
|
-
return [['~', opts[:prefix], nil, obj2]]
|
101
|
-
end
|
101
|
+
return [['~', opts[:prefix], obj1, obj2]] if obj1.nil? || obj2.nil?
|
102
102
|
|
103
|
-
|
104
|
-
return [['~', opts[:prefix], obj1, nil]]
|
105
|
-
end
|
103
|
+
return [['~', opts[:prefix], obj1, obj2]] unless comparable?(obj1, obj2, opts[:strict])
|
106
104
|
|
107
|
-
|
108
|
-
return [['~', opts[:prefix], obj1, obj2]]
|
109
|
-
end
|
105
|
+
return LcsCompareArrays.call(obj1, obj2, opts) if obj1.is_a?(Array) && opts[:use_lcs]
|
110
106
|
|
111
|
-
|
112
|
-
if obj1.is_a?(Array) && opts[:use_lcs]
|
113
|
-
changeset = diff_array_lcs(obj1, obj2, opts) do |lcs|
|
114
|
-
# use a's index for similarity
|
115
|
-
lcs.each do |pair|
|
116
|
-
prefix = prefix_append_array_index(opts[:prefix], pair[0], opts)
|
117
|
-
result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => prefix)))
|
118
|
-
end
|
119
|
-
end
|
107
|
+
return LinearCompareArray.call(obj1, obj2, opts) if obj1.is_a?(Array) && !opts[:use_lcs]
|
120
108
|
|
121
|
-
|
122
|
-
change_key = prefix_append_array_index(opts[:prefix], change[1], opts)
|
123
|
-
if change[0] == '-'
|
124
|
-
result << ['-', change_key, change[2]]
|
125
|
-
elsif change[0] == '+'
|
126
|
-
result << ['+', change_key, change[2]]
|
127
|
-
end
|
128
|
-
end
|
129
|
-
elsif obj1.is_a?(Array) && !opts[:use_lcs]
|
130
|
-
result.concat(LinearCompareArray.call(obj1, obj2, opts))
|
131
|
-
elsif obj1.is_a?(Hash)
|
132
|
-
|
133
|
-
deleted_keys = obj1.keys - obj2.keys
|
134
|
-
common_keys = obj1.keys & obj2.keys
|
135
|
-
added_keys = obj2.keys - obj1.keys
|
136
|
-
|
137
|
-
# add deleted properties
|
138
|
-
deleted_keys.sort_by{|k,v| k.to_s }.each do |k|
|
139
|
-
change_key = prefix_append_key(opts[:prefix], k, opts)
|
140
|
-
custom_result = custom_compare(opts[:comparison], change_key, obj1[k], nil)
|
141
|
-
|
142
|
-
if custom_result
|
143
|
-
result.concat(custom_result)
|
144
|
-
else
|
145
|
-
result << ['-', change_key, obj1[k]]
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
# recursive comparison for common keys
|
150
|
-
common_keys.sort_by{|k,v| k.to_s }.each do |k|
|
151
|
-
prefix = prefix_append_key(opts[:prefix], k, opts)
|
152
|
-
result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => prefix)))
|
153
|
-
end
|
109
|
+
return CompareHashes.call(obj1, obj2, opts) if obj1.is_a?(Hash)
|
154
110
|
|
155
|
-
|
156
|
-
added_keys.sort_by{|k,v| k.to_s }.each do |k|
|
157
|
-
change_key = prefix_append_key(opts[:prefix], k, opts)
|
158
|
-
unless obj1.key?(k)
|
159
|
-
custom_result = custom_compare(opts[:comparison], change_key, nil, obj2[k])
|
160
|
-
|
161
|
-
if custom_result
|
162
|
-
result.concat(custom_result)
|
163
|
-
else
|
164
|
-
result << ['+', change_key, obj2[k]]
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
else
|
169
|
-
return [] if compare_values(obj1, obj2, opts)
|
170
|
-
return [['~', opts[:prefix], obj1, obj2]]
|
171
|
-
end
|
111
|
+
return [] if compare_values(obj1, obj2, opts)
|
172
112
|
|
173
|
-
|
113
|
+
[['~', opts[:prefix], obj1, obj2]]
|
174
114
|
end
|
175
115
|
|
176
116
|
# @private
|
177
117
|
#
|
178
118
|
# diff array using LCS algorithm
|
179
|
-
def self.diff_array_lcs(
|
180
|
-
|
181
|
-
:prefix => '',
|
182
|
-
:similarity => 0.8,
|
183
|
-
:delimiter => '.'
|
184
|
-
}.merge!(options)
|
119
|
+
def self.diff_array_lcs(arraya, arrayb, options = {})
|
120
|
+
return [] if arraya.empty? && arrayb.empty?
|
185
121
|
|
186
122
|
change_set = []
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
change_set << ['+', index, b[index]]
|
123
|
+
|
124
|
+
if arraya.empty?
|
125
|
+
arrayb.each_index do |index|
|
126
|
+
change_set << ['+', index, arrayb[index]]
|
192
127
|
end
|
128
|
+
|
193
129
|
return change_set
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
130
|
+
end
|
131
|
+
|
132
|
+
if arrayb.empty?
|
133
|
+
arraya.each_index do |index|
|
134
|
+
i = arraya.size - index - 1
|
135
|
+
change_set << ['-', i, arraya[i]]
|
198
136
|
end
|
137
|
+
|
199
138
|
return change_set
|
200
139
|
end
|
201
140
|
|
202
|
-
|
141
|
+
opts = {
|
142
|
+
prefix: '',
|
143
|
+
similarity: 0.8,
|
144
|
+
delimiter: '.'
|
145
|
+
}.merge!(options)
|
146
|
+
|
147
|
+
links = lcs(arraya, arrayb, opts)
|
203
148
|
|
204
149
|
# yield common
|
205
150
|
yield links if block_given?
|
206
151
|
|
207
152
|
# padding the end
|
208
|
-
links << [
|
153
|
+
links << [arraya.size, arrayb.size]
|
209
154
|
|
210
155
|
last_x = -1
|
211
156
|
last_y = -1
|
@@ -213,13 +158,13 @@ module HashDiff
|
|
213
158
|
x, y = pair
|
214
159
|
|
215
160
|
# remove from a, beginning from the end
|
216
|
-
(x > last_x + 1)
|
217
|
-
change_set << ['-', last_y + i + 1,
|
161
|
+
(x > last_x + 1) && (x - last_x - 2).downto(0).each do |i|
|
162
|
+
change_set << ['-', last_y + i + 1, arraya[i + last_x + 1]]
|
218
163
|
end
|
219
164
|
|
220
165
|
# add from b, beginning from the head
|
221
|
-
(y > last_y + 1)
|
222
|
-
change_set << ['+', last_y + i + 1,
|
166
|
+
(y > last_y + 1) && 0.upto(y - last_y - 2).each do |i|
|
167
|
+
change_set << ['+', last_y + i + 1, arrayb[i + last_y + 1]]
|
223
168
|
end
|
224
169
|
|
225
170
|
# update flags
|
data/lib/hashdiff/lcs.rb
CHANGED
@@ -1,46 +1,44 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
2
4
|
# @private
|
3
5
|
#
|
4
6
|
# caculate array difference using LCS algorithm
|
5
7
|
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
|
6
|
-
def self.lcs(
|
7
|
-
|
8
|
+
def self.lcs(arraya, arrayb, options = {})
|
9
|
+
return [] if arraya.empty? || arrayb.empty?
|
8
10
|
|
9
|
-
opts
|
11
|
+
opts = { similarity: 0.8 }.merge!(options)
|
10
12
|
|
11
|
-
|
13
|
+
opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts)
|
12
14
|
|
13
15
|
a_start = b_start = 0
|
14
|
-
a_finish =
|
15
|
-
b_finish =
|
16
|
+
a_finish = arraya.size - 1
|
17
|
+
b_finish = arrayb.size - 1
|
16
18
|
vector = []
|
17
19
|
|
18
20
|
lcs = []
|
19
21
|
(b_start..b_finish).each do |bi|
|
20
|
-
lcs[bi] = []
|
22
|
+
lcs[bi] = []
|
21
23
|
(a_start..a_finish).each do |ai|
|
22
|
-
if similar?(
|
23
|
-
topleft = (ai > 0
|
24
|
+
if similar?(arraya[ai], arrayb[bi], opts)
|
25
|
+
topleft = (ai > 0) && (bi > 0) ? lcs[bi - 1][ai - 1][1] : 0
|
24
26
|
lcs[bi][ai] = [:topleft, topleft + 1]
|
25
|
-
elsif
|
26
|
-
|
27
|
-
|
28
|
-
count = (top > left) ? top : left
|
27
|
+
elsif (top = bi > 0 ? lcs[bi - 1][ai][1] : 0)
|
28
|
+
left = ai > 0 ? lcs[bi][ai - 1][1] : 0
|
29
|
+
count = top > left ? top : left
|
29
30
|
|
30
|
-
direction =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
direction = :both
|
42
|
-
end
|
43
|
-
end
|
31
|
+
direction = if top > left
|
32
|
+
:top
|
33
|
+
elsif top < left
|
34
|
+
:left
|
35
|
+
elsif bi.zero?
|
36
|
+
:top
|
37
|
+
elsif ai.zero?
|
38
|
+
:left
|
39
|
+
else
|
40
|
+
:both
|
41
|
+
end
|
44
42
|
|
45
43
|
lcs[bi][ai] = [direction, count]
|
46
44
|
end
|
@@ -49,7 +47,7 @@ module HashDiff
|
|
49
47
|
|
50
48
|
x = a_finish
|
51
49
|
y = b_finish
|
52
|
-
while x >= 0
|
50
|
+
while (x >= 0) && (y >= 0) && (lcs[y][x][1] > 0)
|
53
51
|
if lcs[y][x][0] == :both
|
54
52
|
x -= 1
|
55
53
|
elsif lcs[y][x][0] == :topleft
|
@@ -65,5 +63,4 @@ module HashDiff
|
|
65
63
|
|
66
64
|
vector
|
67
65
|
end
|
68
|
-
|
69
66
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
4
|
+
# @private
|
5
|
+
# Used to compare arrays using the lcs algorithm
|
6
|
+
class LcsCompareArrays
|
7
|
+
class << self
|
8
|
+
def call(obj1, obj2, opts = {})
|
9
|
+
result = []
|
10
|
+
|
11
|
+
changeset = Hashdiff.diff_array_lcs(obj1, obj2, opts) do |lcs|
|
12
|
+
# use a's index for similarity
|
13
|
+
lcs.each do |pair|
|
14
|
+
prefix = Hashdiff.prefix_append_array_index(opts[:prefix], pair[0], opts)
|
15
|
+
|
16
|
+
result.concat(Hashdiff.diff(obj1[pair[0]], obj2[pair[1]], opts.merge(prefix: prefix)))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
changeset.each do |change|
|
21
|
+
next if change[0] != '-' && change[0] != '+'
|
22
|
+
|
23
|
+
change_key = Hashdiff.prefix_append_array_index(opts[:prefix], change[1], opts)
|
24
|
+
|
25
|
+
result << [change[0], change_key, change[2]]
|
26
|
+
end
|
27
|
+
|
28
|
+
result
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,11 +1,13 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hashdiff
|
2
4
|
# @private
|
3
5
|
#
|
4
6
|
# Used to compare arrays in a linear complexity, which produces longer diffs
|
5
7
|
# than using the lcs algorithm but is considerably faster
|
6
8
|
class LinearCompareArray
|
7
9
|
def self.call(old_array, new_array, options = {})
|
8
|
-
instance =
|
10
|
+
instance = new(old_array, new_array, options)
|
9
11
|
instance.call
|
10
12
|
end
|
11
13
|
|
@@ -78,8 +80,8 @@ module HashDiff
|
|
78
80
|
end
|
79
81
|
|
80
82
|
def item_difference(old_item, new_item, item_index)
|
81
|
-
prefix =
|
82
|
-
|
83
|
+
prefix = Hashdiff.prefix_append_array_index(options[:prefix], item_index, options)
|
84
|
+
Hashdiff.diff(old_item, new_item, options.merge(prefix: prefix))
|
83
85
|
end
|
84
86
|
|
85
87
|
# look ahead in the new array to see if the current item appears later
|
@@ -120,6 +122,7 @@ module HashDiff
|
|
120
122
|
|
121
123
|
def append_addititions_before_match(match_index)
|
122
124
|
return unless match_index
|
125
|
+
|
123
126
|
(new_index...match_index).each { |i| append_addition(new_array[i], i) }
|
124
127
|
self.expected_additions = expected_additions - (match_index - new_index)
|
125
128
|
self.new_index = match_index
|
@@ -127,18 +130,19 @@ module HashDiff
|
|
127
130
|
|
128
131
|
def append_deletions_before_match(match_index)
|
129
132
|
return unless match_index
|
133
|
+
|
130
134
|
(old_index...match_index).each { |i| append_deletion(old_array[i], i) }
|
131
135
|
self.expected_additions = expected_additions + (match_index - new_index)
|
132
136
|
self.old_index = match_index
|
133
137
|
end
|
134
138
|
|
135
139
|
def append_addition(item, index)
|
136
|
-
key =
|
140
|
+
key = Hashdiff.prefix_append_array_index(options[:prefix], index, options)
|
137
141
|
additions << ['+', key, item]
|
138
142
|
end
|
139
143
|
|
140
144
|
def append_deletion(item, index)
|
141
|
-
key =
|
145
|
+
key = Hashdiff.prefix_append_array_index(options[:prefix], index, options)
|
142
146
|
deletions << ['-', key, item]
|
143
147
|
end
|
144
148
|
|
data/lib/hashdiff/patch.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# This module provides methods to diff two hash, patch and unpatch hash
|
3
5
|
#
|
4
|
-
module
|
5
|
-
|
6
|
+
module Hashdiff
|
6
7
|
# Apply patch to object
|
7
8
|
#
|
8
9
|
# @param [Hash, Array] obj the object to be patched, can be an Array or a Hash
|
@@ -22,7 +23,7 @@ module HashDiff
|
|
22
23
|
|
23
24
|
last_part = parts.last
|
24
25
|
|
25
|
-
parent_node = node(obj, parts[0, parts.size-1])
|
26
|
+
parent_node = node(obj, parts[0, parts.size - 1])
|
26
27
|
|
27
28
|
if change[0] == '+'
|
28
29
|
if parent_node.is_a?(Array)
|
@@ -63,7 +64,7 @@ module HashDiff
|
|
63
64
|
|
64
65
|
last_part = parts.last
|
65
66
|
|
66
|
-
parent_node = node(obj, parts[0, parts.size-1])
|
67
|
+
parent_node = node(obj, parts[0, parts.size - 1])
|
67
68
|
|
68
69
|
if change[0] == '+'
|
69
70
|
if parent_node.is_a?(Array)
|
@@ -84,5 +85,4 @@ module HashDiff
|
|
84
85
|
|
85
86
|
obj
|
86
87
|
end
|
87
|
-
|
88
88
|
end
|