json-diff 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 29a90b679f5bf30e17ce0b2cabd2963a9a68ea03
4
+ data.tar.gz: 06c46f1cb99d0564522c30507df1f65fbcca003b
5
+ SHA512:
6
+ metadata.gz: 613a0223292d0d84d7bf32a46b5f58468336251351fb1d243032172162c24a53c22f56474c1873898c492930d2283227302345febbfccdbdd9cad4c152474174
7
+ data.tar.gz: c1457cdf32d04f6f368b49b7dde261751ac1c9ef7350578e766ae63a598afa37d10d7e8edb4eb30a2590df9c0cec3bfb3f5dcdfb64b54dcd30f1211161525ee7
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+ gemspec
3
+
4
+ group :test do
5
+ gem 'rake'
6
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2015 Captain Train
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/Makefile ADDED
@@ -0,0 +1,4 @@
1
+ install:
2
+ gem build json-diff.gemspec && sudo gem install ./json-diff-*.gem
3
+
4
+ .PHONY: install
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # `json-diff`
2
+
3
+ *Take two Ruby objects that can be serialized to JSON. Output an array of operations (additions, deletions, moves) that would convert the first one to the second one.*
4
+
5
+ ```bash
6
+ gem install json-diff # Or `gem 'json-diff'` in your Gemfile.
7
+ ```
8
+
9
+ ```ruby
10
+ require 'json-diff'
11
+ JsonDiff.diff(1, 2)
12
+ #> [{:op => :replace, :path => "/", :value => 2}]
13
+ ```
14
+
15
+ Outputs [RFC6902][]. Look at [hana][] for a JSON patch algorithm that can use this output.
16
+
17
+ [RFC6902]: http://www.rfc-editor.org/rfc/rfc6902.txt
18
+ [hana]: https://github.com/tenderlove/hana
19
+
20
+ # Heart
21
+
22
+ - Recursive similarity computation between any two Ruby values.
23
+ - For arrays, match elements above a certain level of similarity pairwise, and treat them as a move.
24
+ - Matching happens highest-similarity first.
25
+ - The creation of move operations is generated by detecting rings in the list of moved elements (eg, A → B → C → A).
26
+
27
+ Pros:
28
+
29
+ - For lists which are not necessarily ordered, this approach yields far better results than LCS.
30
+ - Move operations require no custom code to match elements.
31
+
32
+ Cons:
33
+
34
+ - This approach's quality is heavily reliant on how good the similarity algorithm is. Empirically, it yields sensible output. It can be improved by a user-defined procedure.
35
+ - There is a computational overhead to the default similarity computation that scales with the total number of entities in the structure.
36
+
37
+ # Plans & Bugs
38
+
39
+ Roughly ordered by priority.
40
+
41
+ - Support adding a custom procedure which computes similarities.
42
+ - Support LCS as an option. (The default will remain what yields the best results, regardless of the time it takes.)
43
+ - Support specifying a depth for similarity computation.
44
+
45
+ ---
46
+
47
+ See the LICENSE file for licensing information.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task default: :spec
data/json-diff.gemspec ADDED
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
2
+ require 'json-diff/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'json-diff'
6
+ s.license = 'MIT'
7
+ s.version = JsonDiff::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ['Captain Train']
10
+ s.email = ['ttyl@captaintrain.com']
11
+ s.homepage = 'http://github.com/captaintrain/json-diff'
12
+ s.summary = %q{Compute the difference between two JSON-serializable Ruby objects.}
13
+ s.description = %q{Take two Ruby objects that can be serialized to JSON. Output an array of operations (additions, deletions, moves) that would convert the first one to the second one.}
14
+ s.files = `git ls-files`.split("\n")
15
+ end
data/lib/json-diff.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'json-diff/diff'
2
+ require 'json-diff/index-map'
3
+ require 'json-diff/operation'
4
+ require 'json-diff/version'
@@ -0,0 +1,303 @@
1
+ module JsonDiff
2
+
3
+ def self.diff(before, after, opts = {})
4
+ path = opts[:path] || '/'
5
+ include_addition = (opts[:additions] == nil) ? true : opts[:additions]
6
+ include_moves = (opts[:moves] == nil) ? true : opts[:moves]
7
+
8
+ changes = []
9
+
10
+ if before.is_a?(Hash)
11
+ if !after.is_a?(Hash)
12
+ changes << replace(path, before, after)
13
+ else
14
+ lost = before.keys - after.keys
15
+ lost.each do |key|
16
+ inner_path = extend_json_pointer(path, key)
17
+ changes << remove(inner_path, before[key])
18
+ end
19
+
20
+ if include_addition
21
+ gained = after.keys - before.keys
22
+ gained.each do |key|
23
+ inner_path = extend_json_pointer(path, key)
24
+ changes << add(inner_path, after[key])
25
+ end
26
+ end
27
+
28
+ kept = before.keys & after.keys
29
+ kept.each do |key|
30
+ inner_path = extend_json_pointer(path, key)
31
+ changes += diff(before[key], after[key], opts.merge(path: inner_path))
32
+ end
33
+ end
34
+ elsif before.is_a?(Array)
35
+ if !after.is_a?(Array)
36
+ changes << replace(path, before, after)
37
+ elsif before.size == 0
38
+ if include_addition
39
+ after.each_with_index do |item, index|
40
+ inner_path = extend_json_pointer(path, index)
41
+ changes << add(inner_path, item)
42
+ end
43
+ end
44
+ elsif after.size == 0
45
+ before.each do |item|
46
+ # Delete elements from the start.
47
+ inner_path = extend_json_pointer(path, 0)
48
+ changes << remove(inner_path, item)
49
+ end
50
+ else
51
+ pairing = array_pairing(before, after)
52
+ # FIXME: detect replacements.
53
+
54
+ # All detected moves that do not reach the similarity limit are deleted
55
+ # and re-added.
56
+ pairing[:pairs].select! do |pair|
57
+ sim = pair[2]
58
+ kept = (sim >= 0.5)
59
+ if !kept
60
+ pairing[:removed] << pair[0]
61
+ pairing[:added] << pair[1]
62
+ end
63
+ kept
64
+ end
65
+
66
+ array_changes(pairing)
67
+
68
+ pairing[:removed].each do |before_index|
69
+ inner_path = extend_json_pointer(path, before_index)
70
+ changes << remove(inner_path, before[before_index])
71
+ end
72
+
73
+ pairing[:pairs].each do |pair|
74
+ before_index, after_index, orig_before, orig_after = pair
75
+ inner_before_path = extend_json_pointer(path, before_index)
76
+ inner_after_path = extend_json_pointer(path, after_index)
77
+
78
+ if before_index != after_index && include_moves
79
+ changes << move(inner_before_path, inner_after_path)
80
+ end
81
+ changes += diff(before[orig_before], after[orig_after], opts.merge(path: inner_after_path))
82
+ end
83
+
84
+ if include_addition
85
+ pairing[:added].each do |after_index|
86
+ inner_path = extend_json_pointer(path, after_index)
87
+ changes << add(inner_path, after[after_index])
88
+ end
89
+ end
90
+ end
91
+ else
92
+ if before != after
93
+ changes << replace(path, before, after)
94
+ end
95
+ end
96
+
97
+ changes
98
+ end
99
+
100
+ # {pairs: [[before index, after index, similarity]],
101
+ # removed: [before index],
102
+ # added: [after index]}
103
+ def self.array_pairing(before, after)
104
+ # Array containing the array of similarities from before to after.
105
+ similarities = before.map do |before_item|
106
+ after.map do |after_item|
107
+ similarity(before_item, after_item)
108
+ end
109
+ end
110
+
111
+ # Array containing the array of couples of indices, sorted by similarity.
112
+ indices = before.map.with_index do |before_item, before_index|
113
+ after.map.with_index do |after_item, after_index|
114
+ [before_index, after_index]
115
+ end
116
+ end
117
+
118
+ # Sort them in O(n^2 log(n)).
119
+ indices.map! do |couples|
120
+ couples.sort! do |a, b|
121
+ a_before_index = a[0]
122
+ b_before_index = b[0]
123
+ a_after_index = a[1]
124
+ b_after_index = b[1]
125
+
126
+ similarities[b_before_index][b_after_index] <=> similarities[a_before_index][a_after_index]
127
+ end
128
+ end
129
+ # Sort the toplevel.
130
+ indices.sort! do |a, b|
131
+ a_top_before_index = a[0][0]
132
+ a_top_after_index = a[0][1]
133
+ b_top_before_index = b[0][0]
134
+ b_top_after_index = b[0][1]
135
+
136
+ similarities[b_top_before_index][b_top_after_index] <=> similarities[a_top_before_index][a_top_after_index]
137
+ end
138
+
139
+ # Map from indices to boolean (true if paired).
140
+ before_paired = {}
141
+ after_paired = {}
142
+
143
+ num_pairs = [before.size, after.size].min
144
+
145
+ pairs = (0...num_pairs).map do |_|
146
+ unpaired_before_index = indices.index { |a| !before_paired[a[0][0]] }
147
+ unpaired_after_index = indices[unpaired_before_index].index { |a| !after_paired[a[1]] }
148
+ unpaired_couple = indices[unpaired_before_index][unpaired_after_index]
149
+ before_paired[unpaired_couple[0]] = true
150
+ after_paired[unpaired_couple[1]] = true
151
+
152
+ [unpaired_couple[0], unpaired_couple[1],
153
+ similarities[unpaired_couple[0]][unpaired_couple[1]]]
154
+ end
155
+
156
+ if before.size < after.size
157
+ added = after.map.with_index { |_, i| i} - after_paired.keys
158
+ removed = []
159
+ else
160
+ removed = before.map.with_index { |_, i| i } - before_paired.keys
161
+ added = []
162
+ end
163
+
164
+ {
165
+ pairs: pairs,
166
+ removed: removed,
167
+ added: added,
168
+ }
169
+ end
170
+
171
+ # Compute an arbitrary notion of how probable it is that
172
+ def self.similarity(before, after)
173
+ return 0.0 if before.class != after.class
174
+
175
+ # FIXME: call custom similarity procedure.
176
+
177
+ if before.is_a?(Hash)
178
+ if before.size == 0
179
+ if after.size == 0
180
+ return 1.0
181
+ else
182
+ return 0.0
183
+ end
184
+ end
185
+
186
+ # Average similarity between keys' value.
187
+ # We don't consider key renames.
188
+ similarities = []
189
+ before.each do |before_key, before_item|
190
+ similarities << similarity(before_item, after[before_key])
191
+ end
192
+
193
+ similarities.reduce(:+) / similarities.size
194
+ elsif before.is_a?(Array)
195
+ return 1.0 if before.size == 0
196
+
197
+ # The most likely match between an element in the old and the new list is
198
+ # presumably the right one, so we take the average of the maximum
199
+ # similarity between each elements of the list.
200
+ similarities = before.map do |before_item|
201
+ after.map do |after_item|
202
+ similarity(before_item, after_item)
203
+ end.max || 0.0
204
+ end
205
+
206
+ similarities.reduce(:+) / similarities.size
207
+ elsif before == after
208
+ 1.0
209
+ else
210
+ 0.0
211
+ end
212
+ end
213
+
214
+ # Input:
215
+ # {pairs: [[before index, after index, similarity]],
216
+ # removed: [before index],
217
+ # added: [after index]}
218
+ #
219
+ # Output:
220
+ # {removed: [before index],
221
+ # pairs: [[before index, after index,
222
+ # original before index, original after index]],
223
+ # added: [after index]}
224
+ def self.array_changes(pairing)
225
+ # We perform removals starting from the highest index.
226
+ # That way, they don't offset their own.
227
+ pairing[:removed].sort!.reverse!
228
+ pairing[:added].sort!
229
+
230
+ # First, map indices from before to after removals.
231
+ removal_map = IndexMaps.new
232
+ pairing[:removed].each { |rm| removal_map.removal(rm) }
233
+ # And map indices from after to before additions
234
+ # (removals, since it is reversed).
235
+ addition_map = IndexMaps.new
236
+ pairing[:added].each { |ad| addition_map.removal(ad) }
237
+
238
+ moves = {}
239
+ orig_before = {}
240
+ orig_after = {}
241
+ pairing[:pairs].each do |before, after|
242
+ mapped_before = removal_map.map(before)
243
+ mapped_after = addition_map.map(after)
244
+ orig_before[mapped_before] = before
245
+ orig_after[mapped_after] = after
246
+ moves[mapped_before] = mapped_after
247
+ end
248
+
249
+ # Now, detect rings within the pairs.
250
+ # The proof is, if whatever was at position i was sent to position j,
251
+ # whatever was at position j cannot have stayed at j.
252
+ # By induction, there is a ring.
253
+ # Oh, and a piece of the proof is that the arrays have the same length.
254
+ # Trivially. Right. Hey, this is not an interview!
255
+ rings = []
256
+ while moves.size > 0
257
+ # i goes to j. j goes to (…). k goes to i.
258
+ ring = []
259
+ pair = moves.shift
260
+ origin, target = pair
261
+ first_origin = origin
262
+ while target != first_origin
263
+ ring << origin
264
+ origin = target
265
+ target = moves[target]
266
+ moves.delete(origin)
267
+ end
268
+ ring << origin
269
+ rings << ring
270
+ end
271
+ # rings is of the form [[i,j,k], …]
272
+
273
+ # Finally, we can register the moves.
274
+ # The idea is, if the whole ring moves instantaneously,
275
+ # no element outside of the ring changed position.
276
+ pairs = []
277
+ rings.each do |ring|
278
+ orig_ring = ring.map { |i| [orig_before[i], orig_after[i]] }
279
+ ring_map = IndexMaps.new
280
+ len = ring.size
281
+ i = 0
282
+ while i < len
283
+ ni = (i + 1) % len # next i
284
+ if ring[i] != ring[ni]
285
+ pairs << [ring[i], ring[ni], orig_ring[i][0], orig_ring[ni][1]]
286
+ end
287
+ ring_map.removal(ring[i])
288
+ ring_map.addition(ring[ni])
289
+ j = i + 1
290
+ while j < len
291
+ ring[j] = ring_map.map(ring[j])
292
+ j += 1
293
+ end
294
+ i += 1
295
+ end
296
+ end
297
+
298
+ pairing[:pairs] = pairs
299
+
300
+ pairing
301
+ end
302
+
303
+ end
@@ -0,0 +1,51 @@
1
+ module JsonDiff
2
+
3
+ class IndexMaps
4
+ def initialize
5
+ @maps = []
6
+ end
7
+
8
+ def addition(index)
9
+ @maps << AdditionIndexMap.new(index)
10
+ end
11
+
12
+ def removal(index)
13
+ @maps << RemovalIndexMap.new(index)
14
+ end
15
+
16
+ def map(index)
17
+ @maps.each do |map|
18
+ index = map.map(index)
19
+ end
20
+ index
21
+ end
22
+ end
23
+
24
+ class IndexMap
25
+ def initialize(pivot)
26
+ @pivot = pivot
27
+ end
28
+
29
+ def map(index)
30
+ if index >= @pivot
31
+ index + 1
32
+ else
33
+ index
34
+ end
35
+ end
36
+ end
37
+
38
+ class AdditionIndexMap < IndexMap
39
+ end
40
+
41
+ class RemovalIndexMap < IndexMap
42
+ def map(index)
43
+ if index >= @pivot
44
+ index - 1
45
+ else
46
+ index
47
+ end
48
+ end
49
+ end
50
+
51
+ end
@@ -0,0 +1,47 @@
1
+ module JsonDiff
2
+
3
+ # Convert a list of strings or numbers to an RFC6901 JSON pointer.
4
+ # http://tools.ietf.org/html/rfc6901
5
+ def self.json_pointer(path)
6
+ escaped_path = path.map do |key|
7
+ if key.is_a?(String)
8
+ key.gsub('~', '~0')
9
+ .gsub('/', '~1')
10
+ else
11
+ key.to_s
12
+ end
13
+ end.join('/')
14
+
15
+ "/#{escaped_path}"
16
+ end
17
+
18
+ # Add a key to a JSON pointer.
19
+ def self.extend_json_pointer(pointer, key)
20
+ if pointer == '/'
21
+ json_pointer([key])
22
+ else
23
+ pointer + json_pointer([key])
24
+ end
25
+ end
26
+
27
+ def self.add(path, value)
28
+ {op: :add, path: path, value: value}
29
+ end
30
+
31
+ def self.remove(path, value)
32
+ if value != nil
33
+ {op: :remove, path: path, value: value}
34
+ else
35
+ {op: :remove, path: path}
36
+ end
37
+ end
38
+
39
+ def self.replace(path, value)
40
+ {op: :replace, path: path, value: value}
41
+ end
42
+
43
+ def self.move(source, target)
44
+ {op: :move, from: source, path: target}
45
+ end
46
+
47
+ end
@@ -0,0 +1,3 @@
1
+ module JsonDiff
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ describe JsonDiff do
4
+ it "should be able to diff two empty arrays" do
5
+ diff = JsonDiff.diff([], [])
6
+ expect(diff).to eql([])
7
+ end
8
+
9
+ it "should be able to diff an empty array with a filled one" do
10
+ diff = JsonDiff.diff([], [1, 2, 3])
11
+ expect(diff).to eql([
12
+ {op: :add, path: "/0", value: 1},
13
+ {op: :add, path: "/1", value: 2},
14
+ {op: :add, path: "/2", value: 3},
15
+ ])
16
+ end
17
+
18
+ it "should be able to diff a filled array with an empty one" do
19
+ diff = JsonDiff.diff([1, 2, 3], [])
20
+ expect(diff).to eql([
21
+ {op: :remove, path: "/0", value: 1},
22
+ {op: :remove, path: "/0", value: 2},
23
+ {op: :remove, path: "/0", value: 3},
24
+ ])
25
+ end
26
+
27
+ it "should be able to diff a 1-array with a filled one" do
28
+ diff = JsonDiff.diff([0], [1, 2, 3])
29
+ expect(diff).to eql([
30
+ {op: :remove, path: "/0", value: 0},
31
+ {op: :add, path: "/0", value: 1},
32
+ {op: :add, path: "/1", value: 2},
33
+ {op: :add, path: "/2", value: 3},
34
+ ])
35
+ end
36
+
37
+ it "should be able to diff a filled array with a 1-array" do
38
+ diff = JsonDiff.diff([1, 2, 3], [0])
39
+ expect(diff).to eql([
40
+ {op: :remove, path: "/2", value: 3},
41
+ {op: :remove, path: "/1", value: 2},
42
+ {op: :remove, path: "/0", value: 1},
43
+ {op: :add, path: "/0", value: 0},
44
+ ])
45
+ end
46
+
47
+ it "should be able to diff two integer arrays" do
48
+ diff = JsonDiff.diff([1, 2, 3, 4, 5], [6, 4, 3, 2])
49
+ expect(diff).to eql([
50
+ {op: :remove, path: "/4", value: 5},
51
+ {op: :remove, path: "/0", value: 1},
52
+ {op: :move, from: "/0", path: "/2"},
53
+ {op: :move, from: "/1", path: "/0"},
54
+ {op: :add, path: "/0", value: 6},
55
+ ])
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
2
+
3
+ require 'rubygems'
4
+ require 'json-diff'
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json-diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Captain Train
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-11 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Take two Ruby objects that can be serialized to JSON. Output an array
14
+ of operations (additions, deletions, moves) that would convert the first one to
15
+ the second one.
16
+ email:
17
+ - ttyl@captaintrain.com
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .rspec
23
+ - Gemfile
24
+ - LICENSE
25
+ - Makefile
26
+ - README.md
27
+ - Rakefile
28
+ - json-diff.gemspec
29
+ - lib/json-diff.rb
30
+ - lib/json-diff/diff.rb
31
+ - lib/json-diff/index-map.rb
32
+ - lib/json-diff/operation.rb
33
+ - lib/json-diff/version.rb
34
+ - spec/json-diff/diff_spec.rb
35
+ - spec/spec_helper.rb
36
+ homepage: http://github.com/captaintrain/json-diff
37
+ licenses:
38
+ - MIT
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.0.14
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Compute the difference between two JSON-serializable Ruby objects.
60
+ test_files: []