patience_diff 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ === 1.0.1 / 2012-06-19
2
+
3
+ * Renamed gem from 'ruby_patience_diff' to just 'patience_diff'
4
+ * Fixed basically everything
5
+ * Added command-line options
6
+ * A little bit of documentation
7
+
8
+ === 1.0.0 / 2012-06-16
9
+
10
+ * First release
@@ -0,0 +1,10 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ bin/patience_diff
6
+ lib/patience_diff.rb
7
+ lib/patience_diff/card.rb
8
+ lib/patience_diff/sequence_matcher.rb
9
+ lib/patience_diff/unified_differ.rb
10
+ lib/patience_diff/usage_error.rb
@@ -0,0 +1,61 @@
1
+ # ruby_patience_diff
2
+
3
+ * http://github.com/watt/ruby_patience_diff
4
+
5
+ ## DESCRIPTION:
6
+
7
+ A Ruby implementation of the Patience diff algorithm.
8
+
9
+ Patience Diff creates more readable diffs than other algorithms in some cases, particularly when much of the content has changed between the documents being compared. There's a great explanation and example [here][example].
10
+
11
+ Patience diff was originally written by Bram Cohen and is used in the [Bazaar][bazaar] version control system. This version is loosely based off the Python implementation in Bazaar.
12
+
13
+ [example]: http://alfedenzo.livejournal.com/170301.html
14
+ [bazaar]: http://bazaar.canonical.com/
15
+
16
+ ## INSTALL:
17
+
18
+ $ gem install patience_diff
19
+
20
+ ## USAGE:
21
+
22
+ ### Command line:
23
+
24
+ $ patience_diff [options] file-a file-b
25
+
26
+ Run with `--help` to see available options.
27
+
28
+ ### Programmatically:
29
+
30
+ left = File.read("/path/to/old").split($RS)
31
+ left_timestamp = File.mtime("/path/to/old")
32
+ right = File.read("/path/to/new").split($RS)
33
+ right_timestamp = File.mtime("/path/to/new")
34
+
35
+ differ = PatienceDiff::UnifiedDiffer.new(:context => 10)
36
+ puts differ.diff(left, right, left_file, right_file, left_timestamp, right_timestamp)
37
+
38
+ ## LICENSE:
39
+
40
+ (The MIT License)
41
+
42
+ Copyright (c) 2012 Andrew Watt
43
+
44
+ Permission is hereby granted, free of charge, to any person obtaining
45
+ a copy of this software and associated documentation files (the
46
+ 'Software'), to deal in the Software without restriction, including
47
+ without limitation the rights to use, copy, modify, merge, publish,
48
+ distribute, sublicense, and/or sell copies of the Software, and to
49
+ permit persons to whom the Software is furnished to do so, subject to
50
+ the following conditions:
51
+
52
+ The above copyright notice and this permission notice shall be
53
+ included in all copies or substantial portions of the Software.
54
+
55
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
56
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
58
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
59
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
60
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
61
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :rubyforge
5
+
6
+ Hoe.spec 'patience_diff' do
7
+ developer "Andrew Watt", "andrew@wattornot.com"
8
+ dependency "trollop", "~> 1.16"
9
+ self.rubyforge_name = 'patiencediff'
10
+ end
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ require 'English'
3
+ require 'trollop'
4
+
5
+ lib_path = File.expand_path(File.join(File.dirname(__FILE__),"..","lib"))
6
+ $LOAD_PATH.unshift(lib_path)
7
+ require 'patience_diff'
8
+
9
+ begin
10
+ opts = Trollop::options do
11
+ banner <<-EOF
12
+ Usage: #{File.basename($0)} [options] left-file right-file
13
+ Options:
14
+ EOF
15
+ version "patience_diff #{PatienceDiff::VERSION}"
16
+ opt :debug, "Debugging mode"
17
+ opt :context, "Lines of context", :default => 3
18
+ opt :full_context, "Don't collapse common sections; output entire files"
19
+ opt :ignore_whitespace,
20
+ "Ignore trailing whitespace, and treat leading whitespace as either present or not. This switch is for compatibility with diff's -b option.",
21
+ :short => '-b'
22
+ end
23
+
24
+ raise PatienceDiff::UsageError unless ARGV.length == 2
25
+
26
+ left_file, right_file = *ARGV
27
+
28
+ left = File.read(left_file, :external_encoding => Encoding::BINARY).split($RS)
29
+ left_timestamp = File.mtime(left_file)
30
+ right = File.read(right_file, :external_encoding => Encoding::BINARY).split($RS)
31
+ right_timestamp = File.mtime(right_file)
32
+
33
+ opts[:no_grouping] = opts[:full_context]
34
+ formatter = PatienceDiff::UnifiedDiffer.new(opts)
35
+ puts formatter.diff(left, right, left_file, right_file, left_timestamp, right_timestamp)
36
+
37
+ rescue PatienceDiff::UsageError => e
38
+ Trollop.module_eval do
39
+ @last_parser.educate($stderr)
40
+ end
41
+ rescue StandardError => e
42
+ if opts[:debug]
43
+ raise
44
+ else
45
+ $stderr.puts "Error: #{e}"
46
+ end
47
+ end
@@ -0,0 +1,8 @@
1
+ require 'patience_diff/card'
2
+ require 'patience_diff/sequence_matcher'
3
+ require 'patience_diff/unified_differ'
4
+ require 'patience_diff/usage_error'
5
+
6
+ module PatienceDiff
7
+ VERSION = "1.0.1"
8
+ end
@@ -0,0 +1,9 @@
1
+ module PatienceDiff
2
+ class Card
3
+ attr_accessor :previous, :index, :value
4
+ def initialize(index, value)
5
+ @index = index
6
+ @value = value
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,253 @@
1
+ require 'patience_diff/card'
2
+
3
+ module PatienceDiff
4
+ class SequenceMatcher
5
+ attr_accessor :context
6
+
7
+ def initialize(opts = {})
8
+ @context = opts[:context] || 3
9
+ end
10
+
11
+ # Generate a diff of a and b using #diff_opcodes, and split the opcode into groups
12
+ # whenever an :equal range is encountered that is longer than @context * 2.
13
+ # Returns an array of arrays of 5-tuples as described for #diff_opcodes.
14
+ def grouped_opcodes(a, b)
15
+ groups = []
16
+ last_group = []
17
+ diff_opcodes(a, b).each do |opcode|
18
+ if opcode[0] == :equal
19
+ if @context.zero?
20
+ groups << last_group
21
+ last_group = []
22
+ next
23
+ end
24
+
25
+ code, a_start, a_end, b_start, b_end = *opcode
26
+
27
+ if (a_start.zero? and b_start.zero?) or (a_end == a.length-1 and b_end == b.length-1)
28
+ threshold = @context
29
+ else
30
+ threshold = @context * 2
31
+ end
32
+
33
+ if (b_end - b_start + 1) > threshold
34
+ unless last_group.empty?
35
+ last_group << [
36
+ code,
37
+ a_start,
38
+ a_start + @context - 1,
39
+ b_start,
40
+ b_start + @context - 1
41
+ ]
42
+ groups << last_group
43
+ last_group = []
44
+ end
45
+ opcode = [
46
+ code,
47
+ a_end - @context + 1,
48
+ a_end,
49
+ b_end - @context + 1,
50
+ b_end
51
+ ]
52
+ end
53
+ end
54
+ last_group << opcode
55
+ end
56
+ groups << last_group unless last_group.one? and last_group.first[0] == :equal
57
+ groups
58
+ end
59
+
60
+ # Generate a diff of a and b, and return an array of opcodes describing that diff.
61
+ # Each opcode represents a range in a and b that is either equal, only in a,
62
+ # or only in b. Opcodes are 5-tuples, in the format:
63
+ # 0: code
64
+ # A symbol indicating the diff operation. Can be :equal, :delete, or :insert.
65
+ # 1: a_start
66
+ # Index in a where the range begins
67
+ # 2: a_end
68
+ # Index in a where the range ends.
69
+ # 3: b_start
70
+ # Index in b where the range begins
71
+ # 4: b_end
72
+ # Index in b where the range ends.
73
+ #
74
+ # For :equal, (a_end - a_start) == (b_end - b_start).
75
+ # For :delete, a_start == a_end.
76
+ # For :insert, b_start == b_end.
77
+ def diff_opcodes(a, b)
78
+ sequences = collapse_matches(match(a, b))
79
+ sequences << [a.length, b.length, 0]
80
+
81
+ a_pos = b_pos = 0
82
+ opcodes = []
83
+ sequences.each do |(i, j, len)|
84
+ if a_pos < i
85
+ opcodes << [:delete, a_pos, i-1, b_pos, b_pos]
86
+ end
87
+ if b_pos < j
88
+ opcodes << [:insert, a_pos, a_pos, b_pos, j-1]
89
+ end
90
+ if len > 0
91
+ opcodes << [:equal, i, i+len-1, j, j+len-1]
92
+ end
93
+ a_pos = i+len
94
+ b_pos = j+len
95
+ end
96
+ opcodes
97
+ end
98
+
99
+ private
100
+ def match(a, b)
101
+ matches = []
102
+ recursively_match(a, b, 0, 0, a.length, b.length) do |match|
103
+ matches << match
104
+ end
105
+ matches
106
+ end
107
+
108
+ def recursively_match(a, b, a_lo, b_lo, a_hi, b_hi)
109
+ return if a_lo == a_hi or b_lo == b_hi
110
+
111
+ last_a_pos = a_lo - 1
112
+ last_b_pos = b_lo - 1
113
+
114
+ longest_unique_subsequence(a[a_lo...a_hi], b[b_lo...b_hi]).each do |(a_pos, b_pos)|
115
+ # recurse betwen unique lines
116
+ a_pos += a_lo
117
+ b_pos += b_lo
118
+ if (last_a_pos+1 != a_pos) or (last_b_pos+1 != b_pos)
119
+ recursively_match(a, b, last_a_pos+1, last_b_pos+1, a_pos, b_pos) { |match| yield match }
120
+ end
121
+ last_a_pos = a_pos
122
+ last_b_pos = b_pos
123
+ yield [a_pos, b_pos]
124
+ end
125
+
126
+ if last_a_pos >= a_lo or last_b_pos >= b_lo
127
+ # there was at least one match
128
+ # recurse between last match and end
129
+ recursively_match(a, b, last_a_pos+1, last_b_pos+1, a_hi, b_hi) { |match| yield match }
130
+ elsif a[a_lo] == b[b_lo]
131
+ # no unique lines
132
+ # diff forward from beginning
133
+ while a_lo < a_hi and b_lo < b_hi and a[a_lo] == b[b_lo]
134
+ yield [a_lo, b_lo]
135
+ a_lo += 1
136
+ b_lo += 1
137
+ end
138
+ recursively_match(a, b, a_lo, b_lo, a_hi, b_hi) { |match| yield match }
139
+ elsif a[a_hi-1] == b[b_hi-1]
140
+ # no unique lines
141
+ # diff back from end
142
+ a_mid = a_hi - 1
143
+ b_mid = b_hi - 1
144
+ while a_mid > a_lo and b_mid > b_lo and a[a_mid-1] == b[b_mid-1]
145
+ a_mid -= 1
146
+ b_mid -= 1
147
+ end
148
+ recursively_match(a, b, a_lo, b_lo, a_mid, b_mid) { |match| yield match }
149
+ 0...(a_hi-a_mid).each do |i|
150
+ yield [a_mid+i, b_mid+i]
151
+ end
152
+ end
153
+ end
154
+
155
+ def collapse_matches(matches)
156
+ return [] if matches.empty?
157
+ sequences = []
158
+ start_a, start_b = *(matches.first)
159
+ len = 1
160
+ matches[1..-1].each do |(i_a, i_b)|
161
+ if i_a == start_a + len and i_b == start_b + len
162
+ len += 1
163
+ else
164
+ sequences << [start_a, start_b, len]
165
+ start_a = i_a
166
+ start_b = i_b
167
+ len = 1
168
+ end
169
+ end
170
+ sequences << [start_a, start_b, len]
171
+ sequences
172
+ end
173
+
174
+ def longest_unique_subsequence(a, b)
175
+ deck = Array.new(b.length)
176
+ unique_a = {}
177
+ unique_b = {}
178
+
179
+ a.each_with_index do |val, index|
180
+ if unique_a.has_key? val
181
+ unique_a[val] = nil
182
+ else
183
+ unique_a[val] = index
184
+ end
185
+ end
186
+
187
+ b.each_with_index do |val, index|
188
+ a_index = unique_a[val]
189
+ next unless a_index
190
+ dupe_index = unique_b[val]
191
+ if dupe_index
192
+ deck[dupe_index] = nil
193
+ unique_a.delete(val)
194
+ else
195
+ unique_b[val] = index
196
+ deck[index] = a_index
197
+ end
198
+ end
199
+
200
+ card = patience_sort(deck).last
201
+ result = []
202
+ while card
203
+ result.unshift [card.value, card.index]
204
+ card = card.previous
205
+ end
206
+ result
207
+ end
208
+
209
+ def patience_sort(deck)
210
+ piles = []
211
+ pile = 0
212
+ deck.each_with_index do |card_value, index|
213
+ next if card_value.nil?
214
+ card = Card.new(index, card_value)
215
+
216
+ if piles.any? and piles.last.value < card_value
217
+ pile = piles.size
218
+ elsif piles.any? and piles[pile].value < card_value and
219
+ (pile == piles.size-1 or piles[pile+1].value > card_value)
220
+ pile += 1
221
+ else
222
+ pile = bisect(piles, card_value)
223
+ end
224
+
225
+ card.previous = piles[pile-1] if pile > 0
226
+
227
+ if pile < piles.size
228
+ #puts "putting card #{card.value} on pile #{pile}"
229
+ piles[pile] = card
230
+ else
231
+ #puts "putting card #{card.value} on new pile"
232
+ piles << card
233
+ end
234
+ end
235
+
236
+ piles
237
+ end
238
+
239
+ def bisect(piles, target)
240
+ low = 0
241
+ high = piles.size - 1
242
+ while (low <= high)
243
+ mid = (low + high)/2
244
+ if piles[mid].value < target
245
+ low = mid + 1
246
+ else
247
+ high = mid - 1
248
+ end
249
+ end
250
+ low
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,66 @@
1
+ require 'English'
2
+
3
+ module PatienceDiff
4
+ class UnifiedDiffer
5
+ attr_reader :matcher
6
+ attr_accessor :no_grouping, :line_ending, :ignore_whitespace
7
+
8
+ def initialize(opts = {})
9
+ @no_grouping = opts.delete(:no_grouping)
10
+ @line_ending = opts.delete(:line_ending) || $RS
11
+ @ignore_whitespace = opts.delete(:ignore_whitespace)
12
+ @matcher = SequenceMatcher.new(opts)
13
+ end
14
+
15
+ def diff(left, right, left_name=nil, right_name=nil, left_timestamp=nil, right_timestamp=nil)
16
+ left_name ||= "Original"
17
+ right_name ||= "Current"
18
+ left_timestamp ||= right_timestamp || Time.now
19
+ right_timestamp ||= left_timestamp || Time.now
20
+
21
+ if @ignore_whitespace
22
+ puts "ignoring whitespace"
23
+ a = left.map { |line| line.rstrip.gsub(/^\s+/, ' ') }
24
+ b = right.map { |line| line.rstrip.gsub(/^\s+/, ' ') }
25
+ else
26
+ a = left
27
+ b = right
28
+ end
29
+
30
+ if @no_grouping
31
+ groups = [@matcher.diff_opcodes(a, b)]
32
+ else
33
+ groups = @matcher.grouped_opcodes(a, b)
34
+ end
35
+ [
36
+ "--- %s\t%s" % [left_name, left_timestamp.strftime("%Y-%m-%d %H:%m:%S.%N %z")],
37
+ "+++ %s\t%s" % [right_name, right_timestamp.strftime("%Y-%m-%d %H:%m:%S.%N %z")],
38
+ groups.collect { |group| unified_diff_group(left, right, group) }.flatten.compact
39
+ ].join(@line_ending)
40
+ end
41
+
42
+ private
43
+ def unified_diff_group(a, b, opcodes)
44
+ return nil if opcodes.empty?
45
+
46
+ a_start = opcodes.first[1] + 1
47
+ a_end = opcodes.last[2] + 2
48
+ b_start = opcodes.first[3] + 1
49
+ b_end = opcodes.last[4] + 2
50
+
51
+ lines = ["@@ -%d,%d +%d,%d @@" % [a_start, a_end-a_start, b_start, b_end-b_start]]
52
+
53
+ lines << opcodes.collect do |(code, a_start, a_end, b_start, b_end)|
54
+ case code
55
+ when :equal
56
+ b[b_start..b_end].map { |line| ' ' + line }
57
+ when :delete
58
+ a[a_start..a_end].map { |line| '-' + line }
59
+ when :insert
60
+ b[b_start..b_end].map { |line| '+' + line }
61
+ end
62
+ end
63
+ lines
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,3 @@
1
+ module PatienceDiff
2
+ class UsageError < StandardError ; end
3
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patience_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Watt
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: trollop
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.16'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.16'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rubyforge
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 2.0.4
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 2.0.4
46
+ - !ruby/object:Gem::Dependency
47
+ name: rdoc
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.10'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.10'
62
+ - !ruby/object:Gem::Dependency
63
+ name: hoe
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.0'
78
+ description: ! 'A Ruby implementation of the Patience diff algorithm.
79
+
80
+
81
+ Patience Diff creates more readable diffs than other algorithms in some cases, particularly
82
+ when much of the content has changed between the documents being compared. There''s
83
+ a great explanation and example [here][example].
84
+
85
+
86
+ Patience diff was originally written by Bram Cohen and is used in the [Bazaar][bazaar]
87
+ version control system. This version is loosely based off the Python implementation
88
+ in Bazaar.
89
+
90
+
91
+ [example]: http://alfedenzo.livejournal.com/170301.html
92
+
93
+ [bazaar]: http://bazaar.canonical.com/'
94
+ email:
95
+ - andrew@wattornot.com
96
+ executables:
97
+ - patience_diff
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - History.txt
101
+ - Manifest.txt
102
+ files:
103
+ - History.txt
104
+ - Manifest.txt
105
+ - README.md
106
+ - Rakefile
107
+ - bin/patience_diff
108
+ - lib/patience_diff.rb
109
+ - lib/patience_diff/card.rb
110
+ - lib/patience_diff/sequence_matcher.rb
111
+ - lib/patience_diff/unified_differ.rb
112
+ - lib/patience_diff/usage_error.rb
113
+ homepage: http://github.com/watt/ruby_patience_diff
114
+ licenses: []
115
+ post_install_message:
116
+ rdoc_options:
117
+ - --main
118
+ - README.md
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project: patiencediff
135
+ rubygems_version: 1.8.24
136
+ signing_key:
137
+ specification_version: 3
138
+ summary: A Ruby implementation of the Patience diff algorithm
139
+ test_files: []