difflcs 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt ADDED
@@ -0,0 +1,43 @@
1
+ = Diff Longest Common Sub String -- The diff sniffing out every move
2
+
3
+ PositionRange is a library by the LogiLogi Foundation, extracted from
4
+ http://www.logilogi.org (http://foundation.logilogi.org).
5
+
6
+ == Usage
7
+
8
+ == Download
9
+
10
+ The latest version of Diff LCS can be found at:
11
+
12
+ * http://rubyforge.org/frs/?group_id=7565
13
+
14
+ Documentation can be found at:
15
+
16
+ * http://difflcs.rubyonrails.org
17
+
18
+ == Installation
19
+
20
+ You can install Diff LCS with the following command:
21
+
22
+ % [sudo] gem install difflcs
23
+
24
+ Or from its distribution directory with:
25
+
26
+ % [sudo] ruby install.rb
27
+
28
+ == License
29
+
30
+ Diff LCS is released under the GNU Affero GPL licence.
31
+
32
+ * http://www.fsf.org/licensing/licenses/agpl-3.0.html
33
+
34
+ == Support
35
+
36
+ The Diff LCS homepage is http://difflcs.rubyforge.org.
37
+
38
+ For the latest news on Diff LCS:
39
+
40
+ * http://foundation.logilogi.org/tags/DiffLCS
41
+
42
+ Feel free to submit commits or feature requests. If you send a patch,
43
+ remember to update the corresponding unit tests.
data/Rakefile ADDED
@@ -0,0 +1,93 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/contrib/sshpublisher'
8
+ require File.join(File.dirname(__FILE__), 'lib', 'diff_l_c_s', 'version')
9
+
10
+ PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
11
+ PKG_NAME = 'difflcs'
12
+ PKG_VERSION = DiffLCS::VERSION::STRING + PKG_BUILD
13
+ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
14
+
15
+ RELEASE_NAME = "REL #{PKG_VERSION}"
16
+
17
+ RUBY_FORGE_PROJECT = "difflcs"
18
+ RUBY_FORGE_USER = "wybo"
19
+
20
+ desc "Default Task"
21
+ task :default => [ :test ]
22
+
23
+ # Run the unit tests
24
+ Rake::TestTask.new { |t|
25
+ t.libs << "test"
26
+ t.pattern = 'test/*_test.rb'
27
+ t.verbose = true
28
+ t.warning = false
29
+ }
30
+
31
+ # Generate the RDoc documentation
32
+ Rake::RDocTask.new { |rdoc|
33
+ rdoc.rdoc_dir = 'doc'
34
+ rdoc.title = "Diff Longest Common Substring -- The diff sniffing out every move"
35
+ rdoc.options << '--line-numbers' << '--inline-source' << '-A cattr_accessor=object'
36
+ rdoc.options << '--charset' << 'utf-8'
37
+ rdoc.rdoc_files.include('README.txt', 'CHANGELOG.txt')
38
+ rdoc.rdoc_files.include('lib/diff_l_c_s.rb')
39
+ rdoc.rdoc_files.include('lib/diff_l_c_s/*.rb')
40
+ }
41
+
42
+ # Create compressed packages
43
+ spec = Gem::Specification.new do |s|
44
+ s.platform = Gem::Platform::RUBY
45
+ s.name = PKG_NAME
46
+ s.summary = "Diffing that sniffs out moved text."
47
+ s.description = %q{A diff algoritm using longest common substrings that can also find text that has moved.}
48
+ s.version = PKG_VERSION
49
+
50
+ s.author = "Wybo Wiersma"
51
+ s.email = "wybo@logilogi.org"
52
+ s.rubyforge_project = "difflcs"
53
+ s.homepage = "http://difflcs.rubyforge.org"
54
+
55
+ s.add_dependency('positionrange', '>= 0.6.0' + PKG_BUILD)
56
+
57
+ s.has_rdoc = true
58
+ s.requirements << 'none'
59
+ s.require_path = 'lib'
60
+
61
+ s.files = [ "Rakefile", "install.rb", "README.txt", "CHANGELOG.txt", "LICENSE.txt" ]
62
+ s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
63
+ s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
64
+ end
65
+
66
+ Rake::GemPackageTask.new(spec) do |p|
67
+ p.gem_spec = spec
68
+ p.need_tar = true
69
+ p.need_zip = true
70
+ end
71
+
72
+ desc "Publish the docs, gem, and release files"
73
+ task :deploy => [:release, :pdoc] do
74
+ puts 'Published gem'
75
+ end
76
+
77
+ desc "Publish the API documentation"
78
+ task :pdoc => [:rdoc] do
79
+ sh "rsync -azv --no-perms --no-times doc/*" +
80
+ " rubyforge.org:/var/www/gforge-projects/difflcs"
81
+ end
82
+
83
+ desc "Publish the release files to RubyForge."
84
+ task :release => [ :package ] do
85
+ require 'rubyforge'
86
+ require 'rake/contrib/rubyforgepublisher'
87
+
88
+ packages = %w( gem tgz zip ).collect{ |ext| "pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}" }
89
+
90
+ rubyforge = RubyForge.new.configure
91
+ rubyforge.login
92
+ rubyforge.add_release(PKG_NAME, PKG_NAME, "REL #{PKG_VERSION}", *packages)
93
+ end
data/install.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'rbconfig'
2
+ require 'find'
3
+ require 'ftools'
4
+
5
+ include Config
6
+
7
+ # this was adapted from rdoc's install.rb by way of Log4r
8
+
9
+ $sitedir = CONFIG["sitelibdir"]
10
+ unless $sitedir
11
+ version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
12
+ $libdir = File.join(CONFIG["libdir"], "ruby", version)
13
+ $sitedir = $:.find {|x| x =~ /site_ruby/ }
14
+ if !$sitedir
15
+ $sitedir = File.join($libdir, "site_ruby")
16
+ elsif $sitedir !~ Regexp.quote(version)
17
+ $sitedir = File.join($sitedir, version)
18
+ end
19
+ end
20
+
21
+ # the actual gruntwork
22
+ Dir.chdir("lib")
23
+
24
+ Find.find("diff_l_c_s", "diff_l_c_s.rb") { |f|
25
+ if f[-3..-1] == ".rb"
26
+ File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
27
+ else
28
+ File::makedirs(File.join($sitedir, *f.split(/\//)))
29
+ end
30
+ }
data/lib/diff_l_c_s.rb ADDED
@@ -0,0 +1,148 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ $:.unshift(File.dirname(__FILE__)) unless
15
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
16
+
17
+ unless defined?(PositionRange)
18
+ begin
19
+ $:.unshift(File.dirname(__FILE__) + "/../../positionrange/lib")
20
+ require 'position_range'
21
+ rescue LoadError
22
+ require 'rubygems'
23
+ gem 'positionrange'
24
+ require 'position_range'
25
+ end
26
+ end
27
+
28
+ module DiffLCS
29
+ # Diffs self with other, see DiffLCS#diff
30
+ #
31
+ def diff(other, options = {})
32
+ DiffLCS.diff(self.split(''), other.split(''), options)
33
+ end
34
+
35
+ # Diffs words in self with other, see DiffLCS#diff
36
+ #
37
+ # Words are non-spaces or groups of spaces delimited by either
38
+ # spaces or the beginning or the end of the string.
39
+ #
40
+ def word_diff(other, options = {})
41
+ DiffLCS.word_diff(self, other, options)
42
+ end
43
+ end
44
+
45
+ require 'diff_l_c_s/counter'
46
+ require 'diff_l_c_s/word_split_array'
47
+
48
+ module DiffLCS
49
+ class << self
50
+ # Diffs the current logi_version and the logi's body_text with the
51
+ # logi_versions body_text given and returns a hash containing:
52
+ #
53
+ # <tt>:matched_old</tt> = the position_ranges in the old text for
54
+ # the places where the new matches the old.
55
+ # <tt>:remaining_new</tt> = the position-ranges for the part of
56
+ # the new text that remains unmatched in the old
57
+ #
58
+ # Valid options are:
59
+ # * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
60
+ #
61
+ def diff(old_arr, new_arr, options = {})
62
+ minimum_lcs_size = options[:minimum_lcs_size] || 0
63
+ diff_hash = DiffLCS.longest_common_sub_strings(old_arr, new_arr,
64
+ :minimum_lcs_size => minimum_lcs_size)
65
+ original_matched_old = diff_hash[:matched_old]
66
+ matched_old = PositionRange::List.new
67
+ original_matched_new = diff_hash[:matched_new]
68
+ matched_new = original_matched_new.sort
69
+ i = 0
70
+ while i < original_matched_old.size
71
+ matched_old[matched_new.index(original_matched_new[i])] =
72
+ original_matched_old[i]
73
+ i += 1
74
+ end
75
+
76
+ return {:matched_old => matched_old,
77
+ :matched_new => matched_new}
78
+ end
79
+
80
+ # Words are non-spaces or groups of spaces delimited by either
81
+ # spaces or the beginning or the end of the string.
82
+ #
83
+ def word_diff(old_string, new_string, options = {})
84
+ old_w_s_arr = DiffLCS::WordSplitArray.new(old_string)
85
+ new_w_s_arr = DiffLCS::WordSplitArray.new(new_string)
86
+ diff = DiffLCS.diff(old_w_s_arr, new_w_s_arr, options)
87
+ return {:matched_old => old_w_s_arr.translate_to_pos(diff[:matched_old]),
88
+ :matched_new => new_w_s_arr.translate_to_pos(diff[:matched_new])}
89
+ end
90
+
91
+ # Returns a PositionRange::List containing pointers to the Longest
92
+ # Common Substrings (not Subsequences) of the Arrays or an empty
93
+ # PositionRange::List if none was found.
94
+ #
95
+ # Valid options are:
96
+ # * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
97
+ #
98
+ # The returned List is sorted by LCS-size.
99
+ #
100
+ def longest_common_sub_strings(old_arr, new_arr, options = {})
101
+ minimum_lcs_size = options[:minimum_lcs_size] || 0
102
+
103
+ counter_hash = Hash.new
104
+ counter_array = Array.new
105
+ old_arr.each_with_index do |old_el, old_i|
106
+ counter_hash[old_i] = Hash.new
107
+ new_arr.each_with_index do |new_el, new_i|
108
+ if old_el == new_el
109
+ if new_i > 0 and old_i > 0 and counter_hash[old_i - 1][new_i - 1]
110
+ counter_hash[old_i][new_i] = counter_hash[old_i - 1][new_i - 1]
111
+ counter_hash[old_i][new_i].step_up
112
+ else
113
+ counter = Counter.new(old_i, new_i)
114
+ counter_hash[old_i][new_i] = counter
115
+ counter_array.push(counter)
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ in_old_p_r_list = PositionRange::List.new
122
+ in_new_p_r_list = PositionRange::List.new
123
+
124
+ counter_array = counter_array.select {|co| co.step_size > minimum_lcs_size}
125
+
126
+ while counter = counter_array.sort!.pop
127
+ i = 0
128
+ while i < counter_array.size
129
+ if counter_array[i].in_old === counter.in_old
130
+ counter_array[i].in_old = counter_array[i].in_old - counter.in_old
131
+ end
132
+ if counter_array[i].in_new === counter.in_new
133
+ counter_array[i].in_new = counter_array[i].in_new - counter.in_new
134
+ end
135
+ if counter_array[i].size <= minimum_lcs_size
136
+ counter_array.delete_at(i)
137
+ else
138
+ i += 1
139
+ end
140
+ end
141
+ in_old_p_r_list.push(counter.in_old)
142
+ in_new_p_r_list.push(counter.in_new)
143
+ end
144
+ return {:matched_old => in_old_p_r_list,
145
+ :matched_new => in_new_p_r_list}
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,120 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ class DiffLCS::Counter
15
+ include Comparable
16
+
17
+ # Creates a new counter and sets the initial positions and size
18
+ #
19
+ def initialize(old_i, new_i)
20
+ @in_old_begin = old_i
21
+ @in_new_begin = new_i
22
+ @size = 1
23
+ end
24
+
25
+ # Increases the size
26
+ #
27
+ def step_up
28
+ @size += 1
29
+ return self
30
+ end
31
+
32
+ # Returns the PositionRange for the range in the old string.
33
+ #
34
+ # NOTE: No step_up's possible anymore after this function has been
35
+ # called.
36
+ #
37
+ def in_old
38
+ if !@in_old
39
+ @in_old = PositionRange.new(@in_old_begin, @in_old_begin + @size)
40
+ end
41
+ return @in_old
42
+ end
43
+
44
+ # Returns the PositionRange for the range in the new string
45
+ #
46
+ # NOTE: No step_up's possible anymore after this function has been
47
+ # called.
48
+ #
49
+ def in_new
50
+ if !@in_new
51
+ @in_new = PositionRange.new(@in_new_begin, @in_new_begin + @size)
52
+ end
53
+ return @in_new
54
+ end
55
+
56
+ # Sets the in_old PositionRange, and updates the in_new too
57
+ #
58
+ # If new_in_old is nil, the counter is set empty
59
+ #
60
+ # NOTE: Assumed to be smaller than before, and not moved.
61
+ #
62
+ def in_old=(new_in_old)
63
+ if new_in_old
64
+ @in_new = self.adjust(self.in_new, self.in_old, new_in_old)
65
+ @in_old = new_in_old
66
+ else
67
+ @empty = true
68
+ end
69
+ end
70
+
71
+ # Sets the in_new PositionRange, and updates the in_old too
72
+ #
73
+ # If new_in_old is nil, the counter is set empty
74
+ #
75
+ # NOTE: Assumed to be smaller than before, and not moved.
76
+ #
77
+ def in_new=(new_in_new)
78
+ if new_in_new
79
+ @in_old = self.adjust(self.in_old, self.in_new, new_in_new)
80
+ @in_new = new_in_new
81
+ else
82
+ @empty = true
83
+ end
84
+ end
85
+
86
+ # Faster than size, but only tells the size to which was
87
+ # stepped.
88
+ #
89
+ def step_size
90
+ return @size
91
+ end
92
+
93
+ # Returns the size of this Counter
94
+ #
95
+ def size
96
+ if @empty
97
+ return 0
98
+ else
99
+ return self.in_old.size
100
+ end
101
+ end
102
+
103
+ # Compares it's own size with the size of the other
104
+ #
105
+ def <=> (other)
106
+ return self.size <=> other.size
107
+ end
108
+
109
+ protected
110
+
111
+ # Helper for in_new = and in_old =
112
+ #
113
+ def adjust(to_set, other_old, other_new)
114
+ if other_new.end < other_old.end
115
+ return to_set.new_dup(to_set.begin, to_set.end - (other_old.end - other_new.end))
116
+ else
117
+ return to_set.new_dup(to_set.begin + (other_new.begin - other_old.begin), to_set.end)
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,18 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require 'diff_l_c_s'
15
+
16
+ class String
17
+ include DiffLCS
18
+ end