patience_diff 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ === 1.0.1 / 2012-06-19
2
+
3
+ * Renamed gem from 'ruby_patience_diff' to just 'patience_diff'
4
+ * Fixed basically everything
5
+ * Added command-line options
6
+ * A little bit of documentation
7
+
8
+ === 1.0.0 / 2012-06-16
9
+
10
+ * First release
@@ -0,0 +1,10 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ bin/patience_diff
6
+ lib/patience_diff.rb
7
+ lib/patience_diff/card.rb
8
+ lib/patience_diff/sequence_matcher.rb
9
+ lib/patience_diff/unified_differ.rb
10
+ lib/patience_diff/usage_error.rb
@@ -0,0 +1,61 @@
1
+ # ruby_patience_diff
2
+
3
+ * http://github.com/watt/ruby_patience_diff
4
+
5
+ ## DESCRIPTION:
6
+
7
+ A Ruby implementation of the Patience diff algorithm.
8
+
9
+ Patience Diff creates more readable diffs than other algorithms in some cases, particularly when much of the content has changed between the documents being compared. There's a great explanation and example [here][example].
10
+
11
+ Patience diff was originally written by Bram Cohen and is used in the [Bazaar][bazaar] version control system. This version is loosely based off the Python implementation in Bazaar.
12
+
13
+ [example]: http://alfedenzo.livejournal.com/170301.html
14
+ [bazaar]: http://bazaar.canonical.com/
15
+
16
+ ## INSTALL:
17
+
18
+ $ gem install patience_diff
19
+
20
+ ## USAGE:
21
+
22
+ ### Command line:
23
+
24
+ $ patience_diff [options] file-a file-b
25
+
26
+ Run with `--help` to see available options.
27
+
28
+ ### Programmatically:
29
+
30
+ left = File.read("/path/to/old").split($RS)
31
+ left_timestamp = File.mtime("/path/to/old")
32
+ right = File.read("/path/to/new").split($RS)
33
+ right_timestamp = File.mtime("/path/to/new")
34
+
35
+ differ = PatienceDiff::UnifiedDiffer.new(:context => 10)
36
+ puts differ.diff(left, right, left_file, right_file, left_timestamp, right_timestamp)
37
+
38
+ ## LICENSE:
39
+
40
+ (The MIT License)
41
+
42
+ Copyright (c) 2012 Andrew Watt
43
+
44
+ Permission is hereby granted, free of charge, to any person obtaining
45
+ a copy of this software and associated documentation files (the
46
+ 'Software'), to deal in the Software without restriction, including
47
+ without limitation the rights to use, copy, modify, merge, publish,
48
+ distribute, sublicense, and/or sell copies of the Software, and to
49
+ permit persons to whom the Software is furnished to do so, subject to
50
+ the following conditions:
51
+
52
+ The above copyright notice and this permission notice shall be
53
+ included in all copies or substantial portions of the Software.
54
+
55
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
56
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
57
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
58
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
59
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
60
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
61
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :rubyforge
5
+
6
+ Hoe.spec 'patience_diff' do
7
+ developer "Andrew Watt", "andrew@wattornot.com"
8
+ dependency "trollop", "~> 1.16"
9
+ self.rubyforge_name = 'patiencediff'
10
+ end
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ require 'English'
3
+ require 'trollop'
4
+
5
+ lib_path = File.expand_path(File.join(File.dirname(__FILE__),"..","lib"))
6
+ $LOAD_PATH.unshift(lib_path)
7
+ require 'patience_diff'
8
+
9
+ begin
10
+ opts = Trollop::options do
11
+ banner <<-EOF
12
+ Usage: #{File.basename($0)} [options] left-file right-file
13
+ Options:
14
+ EOF
15
+ version "patience_diff #{PatienceDiff::VERSION}"
16
+ opt :debug, "Debugging mode"
17
+ opt :context, "Lines of context", :default => 3
18
+ opt :full_context, "Don't collapse common sections; output entire files"
19
+ opt :ignore_whitespace,
20
+ "Ignore trailing whitespace, and treat leading whitespace as either present or not. This switch is for compatibility with diff's -b option.",
21
+ :short => '-b'
22
+ end
23
+
24
+ raise PatienceDiff::UsageError unless ARGV.length == 2
25
+
26
+ left_file, right_file = *ARGV
27
+
28
+ left = File.read(left_file, :external_encoding => Encoding::BINARY).split($RS)
29
+ left_timestamp = File.mtime(left_file)
30
+ right = File.read(right_file, :external_encoding => Encoding::BINARY).split($RS)
31
+ right_timestamp = File.mtime(right_file)
32
+
33
+ opts[:no_grouping] = opts[:full_context]
34
+ formatter = PatienceDiff::UnifiedDiffer.new(opts)
35
+ puts formatter.diff(left, right, left_file, right_file, left_timestamp, right_timestamp)
36
+
37
+ rescue PatienceDiff::UsageError => e
38
+ Trollop.module_eval do
39
+ @last_parser.educate($stderr)
40
+ end
41
+ rescue StandardError => e
42
+ if opts[:debug]
43
+ raise
44
+ else
45
+ $stderr.puts "Error: #{e}"
46
+ end
47
+ end
@@ -0,0 +1,8 @@
1
+ require 'patience_diff/card'
2
+ require 'patience_diff/sequence_matcher'
3
+ require 'patience_diff/unified_differ'
4
+ require 'patience_diff/usage_error'
5
+
6
+ module PatienceDiff
7
+ VERSION = "1.0.1"
8
+ end
@@ -0,0 +1,9 @@
1
+ module PatienceDiff
2
+ class Card
3
+ attr_accessor :previous, :index, :value
4
+ def initialize(index, value)
5
+ @index = index
6
+ @value = value
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,253 @@
1
+ require 'patience_diff/card'
2
+
3
+ module PatienceDiff
4
+ class SequenceMatcher
5
+ attr_accessor :context
6
+
7
+ def initialize(opts = {})
8
+ @context = opts[:context] || 3
9
+ end
10
+
11
+ # Generate a diff of a and b using #diff_opcodes, and split the opcode into groups
12
+ # whenever an :equal range is encountered that is longer than @context * 2.
13
+ # Returns an array of arrays of 5-tuples as described for #diff_opcodes.
14
+ def grouped_opcodes(a, b)
15
+ groups = []
16
+ last_group = []
17
+ diff_opcodes(a, b).each do |opcode|
18
+ if opcode[0] == :equal
19
+ if @context.zero?
20
+ groups << last_group
21
+ last_group = []
22
+ next
23
+ end
24
+
25
+ code, a_start, a_end, b_start, b_end = *opcode
26
+
27
+ if (a_start.zero? and b_start.zero?) or (a_end == a.length-1 and b_end == b.length-1)
28
+ threshold = @context
29
+ else
30
+ threshold = @context * 2
31
+ end
32
+
33
+ if (b_end - b_start + 1) > threshold
34
+ unless last_group.empty?
35
+ last_group << [
36
+ code,
37
+ a_start,
38
+ a_start + @context - 1,
39
+ b_start,
40
+ b_start + @context - 1
41
+ ]
42
+ groups << last_group
43
+ last_group = []
44
+ end
45
+ opcode = [
46
+ code,
47
+ a_end - @context + 1,
48
+ a_end,
49
+ b_end - @context + 1,
50
+ b_end
51
+ ]
52
+ end
53
+ end
54
+ last_group << opcode
55
+ end
56
+ groups << last_group unless last_group.one? and last_group.first[0] == :equal
57
+ groups
58
+ end
59
+
60
+ # Generate a diff of a and b, and return an array of opcodes describing that diff.
61
+ # Each opcode represents a range in a and b that is either equal, only in a,
62
+ # or only in b. Opcodes are 5-tuples, in the format:
63
+ # 0: code
64
+ # A symbol indicating the diff operation. Can be :equal, :delete, or :insert.
65
+ # 1: a_start
66
+ # Index in a where the range begins
67
+ # 2: a_end
68
+ # Index in a where the range ends.
69
+ # 3: b_start
70
+ # Index in b where the range begins
71
+ # 4: b_end
72
+ # Index in b where the range ends.
73
+ #
74
+ # For :equal, (a_end - a_start) == (b_end - b_start).
75
+ # For :delete, a_start == a_end.
76
+ # For :insert, b_start == b_end.
77
+ def diff_opcodes(a, b)
78
+ sequences = collapse_matches(match(a, b))
79
+ sequences << [a.length, b.length, 0]
80
+
81
+ a_pos = b_pos = 0
82
+ opcodes = []
83
+ sequences.each do |(i, j, len)|
84
+ if a_pos < i
85
+ opcodes << [:delete, a_pos, i-1, b_pos, b_pos]
86
+ end
87
+ if b_pos < j
88
+ opcodes << [:insert, a_pos, a_pos, b_pos, j-1]
89
+ end
90
+ if len > 0
91
+ opcodes << [:equal, i, i+len-1, j, j+len-1]
92
+ end
93
+ a_pos = i+len
94
+ b_pos = j+len
95
+ end
96
+ opcodes
97
+ end
98
+
99
+ private
100
+ def match(a, b)
101
+ matches = []
102
+ recursively_match(a, b, 0, 0, a.length, b.length) do |match|
103
+ matches << match
104
+ end
105
+ matches
106
+ end
107
+
108
+ def recursively_match(a, b, a_lo, b_lo, a_hi, b_hi)
109
+ return if a_lo == a_hi or b_lo == b_hi
110
+
111
+ last_a_pos = a_lo - 1
112
+ last_b_pos = b_lo - 1
113
+
114
+ longest_unique_subsequence(a[a_lo...a_hi], b[b_lo...b_hi]).each do |(a_pos, b_pos)|
115
+ # recurse betwen unique lines
116
+ a_pos += a_lo
117
+ b_pos += b_lo
118
+ if (last_a_pos+1 != a_pos) or (last_b_pos+1 != b_pos)
119
+ recursively_match(a, b, last_a_pos+1, last_b_pos+1, a_pos, b_pos) { |match| yield match }
120
+ end
121
+ last_a_pos = a_pos
122
+ last_b_pos = b_pos
123
+ yield [a_pos, b_pos]
124
+ end
125
+
126
+ if last_a_pos >= a_lo or last_b_pos >= b_lo
127
+ # there was at least one match
128
+ # recurse between last match and end
129
+ recursively_match(a, b, last_a_pos+1, last_b_pos+1, a_hi, b_hi) { |match| yield match }
130
+ elsif a[a_lo] == b[b_lo]
131
+ # no unique lines
132
+ # diff forward from beginning
133
+ while a_lo < a_hi and b_lo < b_hi and a[a_lo] == b[b_lo]
134
+ yield [a_lo, b_lo]
135
+ a_lo += 1
136
+ b_lo += 1
137
+ end
138
+ recursively_match(a, b, a_lo, b_lo, a_hi, b_hi) { |match| yield match }
139
+ elsif a[a_hi-1] == b[b_hi-1]
140
+ # no unique lines
141
+ # diff back from end
142
+ a_mid = a_hi - 1
143
+ b_mid = b_hi - 1
144
+ while a_mid > a_lo and b_mid > b_lo and a[a_mid-1] == b[b_mid-1]
145
+ a_mid -= 1
146
+ b_mid -= 1
147
+ end
148
+ recursively_match(a, b, a_lo, b_lo, a_mid, b_mid) { |match| yield match }
149
+ 0...(a_hi-a_mid).each do |i|
150
+ yield [a_mid+i, b_mid+i]
151
+ end
152
+ end
153
+ end
154
+
155
+ def collapse_matches(matches)
156
+ return [] if matches.empty?
157
+ sequences = []
158
+ start_a, start_b = *(matches.first)
159
+ len = 1
160
+ matches[1..-1].each do |(i_a, i_b)|
161
+ if i_a == start_a + len and i_b == start_b + len
162
+ len += 1
163
+ else
164
+ sequences << [start_a, start_b, len]
165
+ start_a = i_a
166
+ start_b = i_b
167
+ len = 1
168
+ end
169
+ end
170
+ sequences << [start_a, start_b, len]
171
+ sequences
172
+ end
173
+
174
+ def longest_unique_subsequence(a, b)
175
+ deck = Array.new(b.length)
176
+ unique_a = {}
177
+ unique_b = {}
178
+
179
+ a.each_with_index do |val, index|
180
+ if unique_a.has_key? val
181
+ unique_a[val] = nil
182
+ else
183
+ unique_a[val] = index
184
+ end
185
+ end
186
+
187
+ b.each_with_index do |val, index|
188
+ a_index = unique_a[val]
189
+ next unless a_index
190
+ dupe_index = unique_b[val]
191
+ if dupe_index
192
+ deck[dupe_index] = nil
193
+ unique_a.delete(val)
194
+ else
195
+ unique_b[val] = index
196
+ deck[index] = a_index
197
+ end
198
+ end
199
+
200
+ card = patience_sort(deck).last
201
+ result = []
202
+ while card
203
+ result.unshift [card.value, card.index]
204
+ card = card.previous
205
+ end
206
+ result
207
+ end
208
+
209
+ def patience_sort(deck)
210
+ piles = []
211
+ pile = 0
212
+ deck.each_with_index do |card_value, index|
213
+ next if card_value.nil?
214
+ card = Card.new(index, card_value)
215
+
216
+ if piles.any? and piles.last.value < card_value
217
+ pile = piles.size
218
+ elsif piles.any? and piles[pile].value < card_value and
219
+ (pile == piles.size-1 or piles[pile+1].value > card_value)
220
+ pile += 1
221
+ else
222
+ pile = bisect(piles, card_value)
223
+ end
224
+
225
+ card.previous = piles[pile-1] if pile > 0
226
+
227
+ if pile < piles.size
228
+ #puts "putting card #{card.value} on pile #{pile}"
229
+ piles[pile] = card
230
+ else
231
+ #puts "putting card #{card.value} on new pile"
232
+ piles << card
233
+ end
234
+ end
235
+
236
+ piles
237
+ end
238
+
239
+ def bisect(piles, target)
240
+ low = 0
241
+ high = piles.size - 1
242
+ while (low <= high)
243
+ mid = (low + high)/2
244
+ if piles[mid].value < target
245
+ low = mid + 1
246
+ else
247
+ high = mid - 1
248
+ end
249
+ end
250
+ low
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,66 @@
1
+ require 'English'
2
+
3
+ module PatienceDiff
4
+ class UnifiedDiffer
5
+ attr_reader :matcher
6
+ attr_accessor :no_grouping, :line_ending, :ignore_whitespace
7
+
8
+ def initialize(opts = {})
9
+ @no_grouping = opts.delete(:no_grouping)
10
+ @line_ending = opts.delete(:line_ending) || $RS
11
+ @ignore_whitespace = opts.delete(:ignore_whitespace)
12
+ @matcher = SequenceMatcher.new(opts)
13
+ end
14
+
15
+ def diff(left, right, left_name=nil, right_name=nil, left_timestamp=nil, right_timestamp=nil)
16
+ left_name ||= "Original"
17
+ right_name ||= "Current"
18
+ left_timestamp ||= right_timestamp || Time.now
19
+ right_timestamp ||= left_timestamp || Time.now
20
+
21
+ if @ignore_whitespace
22
+ puts "ignoring whitespace"
23
+ a = left.map { |line| line.rstrip.gsub(/^\s+/, ' ') }
24
+ b = right.map { |line| line.rstrip.gsub(/^\s+/, ' ') }
25
+ else
26
+ a = left
27
+ b = right
28
+ end
29
+
30
+ if @no_grouping
31
+ groups = [@matcher.diff_opcodes(a, b)]
32
+ else
33
+ groups = @matcher.grouped_opcodes(a, b)
34
+ end
35
+ [
36
+ "--- %s\t%s" % [left_name, left_timestamp.strftime("%Y-%m-%d %H:%m:%S.%N %z")],
37
+ "+++ %s\t%s" % [right_name, right_timestamp.strftime("%Y-%m-%d %H:%m:%S.%N %z")],
38
+ groups.collect { |group| unified_diff_group(left, right, group) }.flatten.compact
39
+ ].join(@line_ending)
40
+ end
41
+
42
+ private
43
+ def unified_diff_group(a, b, opcodes)
44
+ return nil if opcodes.empty?
45
+
46
+ a_start = opcodes.first[1] + 1
47
+ a_end = opcodes.last[2] + 2
48
+ b_start = opcodes.first[3] + 1
49
+ b_end = opcodes.last[4] + 2
50
+
51
+ lines = ["@@ -%d,%d +%d,%d @@" % [a_start, a_end-a_start, b_start, b_end-b_start]]
52
+
53
+ lines << opcodes.collect do |(code, a_start, a_end, b_start, b_end)|
54
+ case code
55
+ when :equal
56
+ b[b_start..b_end].map { |line| ' ' + line }
57
+ when :delete
58
+ a[a_start..a_end].map { |line| '-' + line }
59
+ when :insert
60
+ b[b_start..b_end].map { |line| '+' + line }
61
+ end
62
+ end
63
+ lines
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,3 @@
1
+ module PatienceDiff
2
+ class UsageError < StandardError ; end
3
+ end
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patience_diff
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Watt
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: trollop
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.16'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.16'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rubyforge
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 2.0.4
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 2.0.4
46
+ - !ruby/object:Gem::Dependency
47
+ name: rdoc
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.10'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.10'
62
+ - !ruby/object:Gem::Dependency
63
+ name: hoe
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.0'
78
+ description: ! 'A Ruby implementation of the Patience diff algorithm.
79
+
80
+
81
+ Patience Diff creates more readable diffs than other algorithms in some cases, particularly
82
+ when much of the content has changed between the documents being compared. There''s
83
+ a great explanation and example [here][example].
84
+
85
+
86
+ Patience diff was originally written by Bram Cohen and is used in the [Bazaar][bazaar]
87
+ version control system. This version is loosely based off the Python implementation
88
+ in Bazaar.
89
+
90
+
91
+ [example]: http://alfedenzo.livejournal.com/170301.html
92
+
93
+ [bazaar]: http://bazaar.canonical.com/'
94
+ email:
95
+ - andrew@wattornot.com
96
+ executables:
97
+ - patience_diff
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - History.txt
101
+ - Manifest.txt
102
+ files:
103
+ - History.txt
104
+ - Manifest.txt
105
+ - README.md
106
+ - Rakefile
107
+ - bin/patience_diff
108
+ - lib/patience_diff.rb
109
+ - lib/patience_diff/card.rb
110
+ - lib/patience_diff/sequence_matcher.rb
111
+ - lib/patience_diff/unified_differ.rb
112
+ - lib/patience_diff/usage_error.rb
113
+ homepage: http://github.com/watt/ruby_patience_diff
114
+ licenses: []
115
+ post_install_message:
116
+ rdoc_options:
117
+ - --main
118
+ - README.md
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project: patiencediff
135
+ rubygems_version: 1.8.24
136
+ signing_key:
137
+ specification_version: 3
138
+ summary: A Ruby implementation of the Patience diff algorithm
139
+ test_files: []