json-diff 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 29a90b679f5bf30e17ce0b2cabd2963a9a68ea03
4
+ data.tar.gz: 06c46f1cb99d0564522c30507df1f65fbcca003b
5
+ SHA512:
6
+ metadata.gz: 613a0223292d0d84d7bf32a46b5f58468336251351fb1d243032172162c24a53c22f56474c1873898c492930d2283227302345febbfccdbdd9cad4c152474174
7
+ data.tar.gz: c1457cdf32d04f6f368b49b7dde261751ac1c9ef7350578e766ae63a598afa37d10d7e8edb4eb30a2590df9c0cec3bfb3f5dcdfb64b54dcd30f1211161525ee7
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+ gemspec
3
+
4
+ group :test do
5
+ gem 'rake'
6
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2015 Captain Train
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ install:
2
+ gem build json-diff.gemspec && sudo gem install ./json-diff-*.gem
3
+
4
+ .PHONY: install
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # `json-diff`
2
+
3
+ *Take two Ruby objects that can be serialized to JSON. Output an array of operations (additions, deletions, moves) that would convert the first one to the second one.*
4
+
5
+ ```bash
6
+ gem install json-diff # Or `gem 'json-diff'` in your Gemfile.
7
+ ```
8
+
9
+ ```ruby
10
+ require 'json-diff'
11
+ JsonDiff.diff(1, 2)
12
+ #> [{:op => :replace, :path => "/", :value => 2}]
13
+ ```
14
+
15
+ Outputs [RFC6902][]. Look at [hana][] for a JSON patch algorithm that can use this output.
16
+
17
+ [RFC6902]: http://www.rfc-editor.org/rfc/rfc6902.txt
18
+ [hana]: https://github.com/tenderlove/hana
19
+
20
+ # Heart
21
+
22
+ - Recursive similarity computation between any two Ruby values.
23
+ - For arrays, match elements above a certain level of similarity pairwise, and treat them as a move.
24
+ - Matching happens highest-similarity first.
25
+ - The creation of move operations is generated by detecting rings in the list of moved elements (eg, A → B → C → A).
26
+
27
+ Pros:
28
+
29
+ - For lists which are not necessarily ordered, this approach yields far better results than LCS.
30
+ - Move operations require no custom code to match elements.
31
+
32
+ Cons:
33
+
34
+ - This approach's quality is heavily reliant on how good the similarity algorithm is. Empirically, it yields sensible output. It can be improved by a user-defined procedure.
35
+ - There is a computational overhead to the default similarity computation that scales with the total number of entities in the structure.
36
+
37
+ # Plans & Bugs
38
+
39
+ Roughly ordered by priority.
40
+
41
+ - Support adding a custom procedure which computes similarities.
42
+ - Support LCS as an option. (The default will remain what yields the best results, regardless of the time it takes.)
43
+ - Support specifying a depth for similarity computation.
44
+
45
+ ---
46
+
47
+ See the LICENSE file for licensing information.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task default: :spec
data/json-diff.gemspec ADDED
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
2
+ require 'json-diff/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'json-diff'
6
+ s.license = 'MIT'
7
+ s.version = JsonDiff::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ['Captain Train']
10
+ s.email = ['ttyl@captaintrain.com']
11
+ s.homepage = 'http://github.com/captaintrain/json-diff'
12
+ s.summary = %q{Compute the difference between two JSON-serializable Ruby objects.}
13
+ s.description = %q{Take two Ruby objects that can be serialized to JSON. Output an array of operations (additions, deletions, moves) that would convert the first one to the second one.}
14
+ s.files = `git ls-files`.split("\n")
15
+ end
data/lib/json-diff.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'json-diff/diff'
2
+ require 'json-diff/index-map'
3
+ require 'json-diff/operation'
4
+ require 'json-diff/version'
@@ -0,0 +1,303 @@
1
+ module JsonDiff
2
+
3
+ def self.diff(before, after, opts = {})
4
+ path = opts[:path] || '/'
5
+ include_addition = (opts[:additions] == nil) ? true : opts[:additions]
6
+ include_moves = (opts[:moves] == nil) ? true : opts[:moves]
7
+
8
+ changes = []
9
+
10
+ if before.is_a?(Hash)
11
+ if !after.is_a?(Hash)
12
+ changes << replace(path, before, after)
13
+ else
14
+ lost = before.keys - after.keys
15
+ lost.each do |key|
16
+ inner_path = extend_json_pointer(path, key)
17
+ changes << remove(inner_path, before[key])
18
+ end
19
+
20
+ if include_addition
21
+ gained = after.keys - before.keys
22
+ gained.each do |key|
23
+ inner_path = extend_json_pointer(path, key)
24
+ changes << add(inner_path, after[key])
25
+ end
26
+ end
27
+
28
+ kept = before.keys & after.keys
29
+ kept.each do |key|
30
+ inner_path = extend_json_pointer(path, key)
31
+ changes += diff(before[key], after[key], opts.merge(path: inner_path))
32
+ end
33
+ end
34
+ elsif before.is_a?(Array)
35
+ if !after.is_a?(Array)
36
+ changes << replace(path, before, after)
37
+ elsif before.size == 0
38
+ if include_addition
39
+ after.each_with_index do |item, index|
40
+ inner_path = extend_json_pointer(path, index)
41
+ changes << add(inner_path, item)
42
+ end
43
+ end
44
+ elsif after.size == 0
45
+ before.each do |item|
46
+ # Delete elements from the start.
47
+ inner_path = extend_json_pointer(path, 0)
48
+ changes << remove(inner_path, item)
49
+ end
50
+ else
51
+ pairing = array_pairing(before, after)
52
+ # FIXME: detect replacements.
53
+
54
+ # All detected moves that do not reach the similarity limit are deleted
55
+ # and re-added.
56
+ pairing[:pairs].select! do |pair|
57
+ sim = pair[2]
58
+ kept = (sim >= 0.5)
59
+ if !kept
60
+ pairing[:removed] << pair[0]
61
+ pairing[:added] << pair[1]
62
+ end
63
+ kept
64
+ end
65
+
66
+ array_changes(pairing)
67
+
68
+ pairing[:removed].each do |before_index|
69
+ inner_path = extend_json_pointer(path, before_index)
70
+ changes << remove(inner_path, before[before_index])
71
+ end
72
+
73
+ pairing[:pairs].each do |pair|
74
+ before_index, after_index, orig_before, orig_after = pair
75
+ inner_before_path = extend_json_pointer(path, before_index)
76
+ inner_after_path = extend_json_pointer(path, after_index)
77
+
78
+ if before_index != after_index && include_moves
79
+ changes << move(inner_before_path, inner_after_path)
80
+ end
81
+ changes += diff(before[orig_before], after[orig_after], opts.merge(path: inner_after_path))
82
+ end
83
+
84
+ if include_addition
85
+ pairing[:added].each do |after_index|
86
+ inner_path = extend_json_pointer(path, after_index)
87
+ changes << add(inner_path, after[after_index])
88
+ end
89
+ end
90
+ end
91
+ else
92
+ if before != after
93
+ changes << replace(path, before, after)
94
+ end
95
+ end
96
+
97
+ changes
98
+ end
99
+
100
+ # {pairs: [[before index, after index, similarity]],
101
+ # removed: [before index],
102
+ # added: [after index]}
103
+ def self.array_pairing(before, after)
104
+ # Array containing the array of similarities from before to after.
105
+ similarities = before.map do |before_item|
106
+ after.map do |after_item|
107
+ similarity(before_item, after_item)
108
+ end
109
+ end
110
+
111
+ # Array containing the array of couples of indices, sorted by similarity.
112
+ indices = before.map.with_index do |before_item, before_index|
113
+ after.map.with_index do |after_item, after_index|
114
+ [before_index, after_index]
115
+ end
116
+ end
117
+
118
+ # Sort them in O(n^2 log(n)).
119
+ indices.map! do |couples|
120
+ couples.sort! do |a, b|
121
+ a_before_index = a[0]
122
+ b_before_index = b[0]
123
+ a_after_index = a[1]
124
+ b_after_index = b[1]
125
+
126
+ similarities[b_before_index][b_after_index] <=> similarities[a_before_index][a_after_index]
127
+ end
128
+ end
129
+ # Sort the toplevel.
130
+ indices.sort! do |a, b|
131
+ a_top_before_index = a[0][0]
132
+ a_top_after_index = a[0][1]
133
+ b_top_before_index = b[0][0]
134
+ b_top_after_index = b[0][1]
135
+
136
+ similarities[b_top_before_index][b_top_after_index] <=> similarities[a_top_before_index][a_top_after_index]
137
+ end
138
+
139
+ # Map from indices to boolean (true if paired).
140
+ before_paired = {}
141
+ after_paired = {}
142
+
143
+ num_pairs = [before.size, after.size].min
144
+
145
+ pairs = (0...num_pairs).map do |_|
146
+ unpaired_before_index = indices.index { |a| !before_paired[a[0][0]] }
147
+ unpaired_after_index = indices[unpaired_before_index].index { |a| !after_paired[a[1]] }
148
+ unpaired_couple = indices[unpaired_before_index][unpaired_after_index]
149
+ before_paired[unpaired_couple[0]] = true
150
+ after_paired[unpaired_couple[1]] = true
151
+
152
+ [unpaired_couple[0], unpaired_couple[1],
153
+ similarities[unpaired_couple[0]][unpaired_couple[1]]]
154
+ end
155
+
156
+ if before.size < after.size
157
+ added = after.map.with_index { |_, i| i} - after_paired.keys
158
+ removed = []
159
+ else
160
+ removed = before.map.with_index { |_, i| i } - before_paired.keys
161
+ added = []
162
+ end
163
+
164
+ {
165
+ pairs: pairs,
166
+ removed: removed,
167
+ added: added,
168
+ }
169
+ end
170
+
171
+ # Compute an arbitrary notion of how probable it is that
172
+ def self.similarity(before, after)
173
+ return 0.0 if before.class != after.class
174
+
175
+ # FIXME: call custom similarity procedure.
176
+
177
+ if before.is_a?(Hash)
178
+ if before.size == 0
179
+ if after.size == 0
180
+ return 1.0
181
+ else
182
+ return 0.0
183
+ end
184
+ end
185
+
186
+ # Average similarity between keys' value.
187
+ # We don't consider key renames.
188
+ similarities = []
189
+ before.each do |before_key, before_item|
190
+ similarities << similarity(before_item, after[before_key])
191
+ end
192
+
193
+ similarities.reduce(:+) / similarities.size
194
+ elsif before.is_a?(Array)
195
+ return 1.0 if before.size == 0
196
+
197
+ # The most likely match between an element in the old and the new list is
198
+ # presumably the right one, so we take the average of the maximum
199
+ # similarity between each elements of the list.
200
+ similarities = before.map do |before_item|
201
+ after.map do |after_item|
202
+ similarity(before_item, after_item)
203
+ end.max || 0.0
204
+ end
205
+
206
+ similarities.reduce(:+) / similarities.size
207
+ elsif before == after
208
+ 1.0
209
+ else
210
+ 0.0
211
+ end
212
+ end
213
+
214
+ # Input:
215
+ # {pairs: [[before index, after index, similarity]],
216
+ # removed: [before index],
217
+ # added: [after index]}
218
+ #
219
+ # Output:
220
+ # {removed: [before index],
221
+ # pairs: [[before index, after index,
222
+ # original before index, original after index]],
223
+ # added: [after index]}
224
+ def self.array_changes(pairing)
225
+ # We perform removals starting from the highest index.
226
+ # That way, they don't offset their own.
227
+ pairing[:removed].sort!.reverse!
228
+ pairing[:added].sort!
229
+
230
+ # First, map indices from before to after removals.
231
+ removal_map = IndexMaps.new
232
+ pairing[:removed].each { |rm| removal_map.removal(rm) }
233
+ # And map indices from after to before additions
234
+ # (removals, since it is reversed).
235
+ addition_map = IndexMaps.new
236
+ pairing[:added].each { |ad| addition_map.removal(ad) }
237
+
238
+ moves = {}
239
+ orig_before = {}
240
+ orig_after = {}
241
+ pairing[:pairs].each do |before, after|
242
+ mapped_before = removal_map.map(before)
243
+ mapped_after = addition_map.map(after)
244
+ orig_before[mapped_before] = before
245
+ orig_after[mapped_after] = after
246
+ moves[mapped_before] = mapped_after
247
+ end
248
+
249
+ # Now, detect rings within the pairs.
250
+ # The proof is, if whatever was at position i was sent to position j,
251
+ # whatever was at position j cannot have stayed at j.
252
+ # By induction, there is a ring.
253
+ # Oh, and a piece of the proof is that the arrays have the same length.
254
+ # Trivially. Right. Hey, this is not an interview!
255
+ rings = []
256
+ while moves.size > 0
257
+ # i goes to j. j goes to (…). k goes to i.
258
+ ring = []
259
+ pair = moves.shift
260
+ origin, target = pair
261
+ first_origin = origin
262
+ while target != first_origin
263
+ ring << origin
264
+ origin = target
265
+ target = moves[target]
266
+ moves.delete(origin)
267
+ end
268
+ ring << origin
269
+ rings << ring
270
+ end
271
+ # rings is of the form [[i,j,k], …]
272
+
273
+ # Finally, we can register the moves.
274
+ # The idea is, if the whole ring moves instantaneously,
275
+ # no element outside of the ring changed position.
276
+ pairs = []
277
+ rings.each do |ring|
278
+ orig_ring = ring.map { |i| [orig_before[i], orig_after[i]] }
279
+ ring_map = IndexMaps.new
280
+ len = ring.size
281
+ i = 0
282
+ while i < len
283
+ ni = (i + 1) % len # next i
284
+ if ring[i] != ring[ni]
285
+ pairs << [ring[i], ring[ni], orig_ring[i][0], orig_ring[ni][1]]
286
+ end
287
+ ring_map.removal(ring[i])
288
+ ring_map.addition(ring[ni])
289
+ j = i + 1
290
+ while j < len
291
+ ring[j] = ring_map.map(ring[j])
292
+ j += 1
293
+ end
294
+ i += 1
295
+ end
296
+ end
297
+
298
+ pairing[:pairs] = pairs
299
+
300
+ pairing
301
+ end
302
+
303
+ end
@@ -0,0 +1,51 @@
1
+ module JsonDiff
2
+
3
+ class IndexMaps
4
+ def initialize
5
+ @maps = []
6
+ end
7
+
8
+ def addition(index)
9
+ @maps << AdditionIndexMap.new(index)
10
+ end
11
+
12
+ def removal(index)
13
+ @maps << RemovalIndexMap.new(index)
14
+ end
15
+
16
+ def map(index)
17
+ @maps.each do |map|
18
+ index = map.map(index)
19
+ end
20
+ index
21
+ end
22
+ end
23
+
24
+ class IndexMap
25
+ def initialize(pivot)
26
+ @pivot = pivot
27
+ end
28
+
29
+ def map(index)
30
+ if index >= @pivot
31
+ index + 1
32
+ else
33
+ index
34
+ end
35
+ end
36
+ end
37
+
38
+ class AdditionIndexMap < IndexMap
39
+ end
40
+
41
+ class RemovalIndexMap < IndexMap
42
+ def map(index)
43
+ if index >= @pivot
44
+ index - 1
45
+ else
46
+ index
47
+ end
48
+ end
49
+ end
50
+
51
+ end
@@ -0,0 +1,47 @@
1
+ module JsonDiff
2
+
3
+ # Convert a list of strings or numbers to an RFC6901 JSON pointer.
4
+ # http://tools.ietf.org/html/rfc6901
5
+ def self.json_pointer(path)
6
+ escaped_path = path.map do |key|
7
+ if key.is_a?(String)
8
+ key.gsub('~', '~0')
9
+ .gsub('/', '~1')
10
+ else
11
+ key.to_s
12
+ end
13
+ end.join('/')
14
+
15
+ "/#{escaped_path}"
16
+ end
17
+
18
+ # Add a key to a JSON pointer.
19
+ def self.extend_json_pointer(pointer, key)
20
+ if pointer == '/'
21
+ json_pointer([key])
22
+ else
23
+ pointer + json_pointer([key])
24
+ end
25
+ end
26
+
27
+ def self.add(path, value)
28
+ {op: :add, path: path, value: value}
29
+ end
30
+
31
+ def self.remove(path, value)
32
+ if value != nil
33
+ {op: :remove, path: path, value: value}
34
+ else
35
+ {op: :remove, path: path}
36
+ end
37
+ end
38
+
39
+ def self.replace(path, value)
40
+ {op: :replace, path: path, value: value}
41
+ end
42
+
43
+ def self.move(source, target)
44
+ {op: :move, from: source, path: target}
45
+ end
46
+
47
+ end
@@ -0,0 +1,3 @@
1
+ module JsonDiff
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ describe JsonDiff do
4
+ it "should be able to diff two empty arrays" do
5
+ diff = JsonDiff.diff([], [])
6
+ expect(diff).to eql([])
7
+ end
8
+
9
+ it "should be able to diff an empty array with a filled one" do
10
+ diff = JsonDiff.diff([], [1, 2, 3])
11
+ expect(diff).to eql([
12
+ {op: :add, path: "/0", value: 1},
13
+ {op: :add, path: "/1", value: 2},
14
+ {op: :add, path: "/2", value: 3},
15
+ ])
16
+ end
17
+
18
+ it "should be able to diff a filled array with an empty one" do
19
+ diff = JsonDiff.diff([1, 2, 3], [])
20
+ expect(diff).to eql([
21
+ {op: :remove, path: "/0", value: 1},
22
+ {op: :remove, path: "/0", value: 2},
23
+ {op: :remove, path: "/0", value: 3},
24
+ ])
25
+ end
26
+
27
+ it "should be able to diff a 1-array with a filled one" do
28
+ diff = JsonDiff.diff([0], [1, 2, 3])
29
+ expect(diff).to eql([
30
+ {op: :remove, path: "/0", value: 0},
31
+ {op: :add, path: "/0", value: 1},
32
+ {op: :add, path: "/1", value: 2},
33
+ {op: :add, path: "/2", value: 3},
34
+ ])
35
+ end
36
+
37
+ it "should be able to diff a filled array with a 1-array" do
38
+ diff = JsonDiff.diff([1, 2, 3], [0])
39
+ expect(diff).to eql([
40
+ {op: :remove, path: "/2", value: 3},
41
+ {op: :remove, path: "/1", value: 2},
42
+ {op: :remove, path: "/0", value: 1},
43
+ {op: :add, path: "/0", value: 0},
44
+ ])
45
+ end
46
+
47
+ it "should be able to diff two integer arrays" do
48
+ diff = JsonDiff.diff([1, 2, 3, 4, 5], [6, 4, 3, 2])
49
+ expect(diff).to eql([
50
+ {op: :remove, path: "/4", value: 5},
51
+ {op: :remove, path: "/0", value: 1},
52
+ {op: :move, from: "/0", path: "/2"},
53
+ {op: :move, from: "/1", path: "/0"},
54
+ {op: :add, path: "/0", value: 6},
55
+ ])
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
2
+
3
+ require 'rubygems'
4
+ require 'json-diff'
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json-diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Captain Train
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-11 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Take two Ruby objects that can be serialized to JSON. Output an array
14
+ of operations (additions, deletions, moves) that would convert the first one to
15
+ the second one.
16
+ email:
17
+ - ttyl@captaintrain.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .rspec
23
+ - Gemfile
24
+ - LICENSE
25
+ - Makefile
26
+ - README.md
27
+ - Rakefile
28
+ - json-diff.gemspec
29
+ - lib/json-diff.rb
30
+ - lib/json-diff/diff.rb
31
+ - lib/json-diff/index-map.rb
32
+ - lib/json-diff/operation.rb
33
+ - lib/json-diff/version.rb
34
+ - spec/json-diff/diff_spec.rb
35
+ - spec/spec_helper.rb
36
+ homepage: http://github.com/captaintrain/json-diff
37
+ licenses:
38
+ - MIT
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.0.14
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Compute the difference between two JSON-serializable Ruby objects.
60
+ test_files: []