difflcs 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt ADDED
@@ -0,0 +1,43 @@
1
+ = Diff Longest Common Sub String -- The diff sniffing out every move
2
+
3
+ PositionRange is a library by the LogiLogi Foundation, extracted from
4
+ http://www.logilogi.org (http://foundation.logilogi.org).
5
+
6
+ == Usage
7
+
8
+ == Download
9
+
10
+ The latest version of Diff LCS can be found at:
11
+
12
+ * http://rubyforge.org/frs/?group_id=7565
13
+
14
+ Documentation can be found at:
15
+
16
+ * http://difflcs.rubyonrails.org
17
+
18
+ == Installation
19
+
20
+ You can install Diff LCS with the following command:
21
+
22
+ % [sudo] gem install difflcs
23
+
24
+ Or from its distribution directory with:
25
+
26
+ % [sudo] ruby install.rb
27
+
28
+ == License
29
+
30
+ Diff LCS is released under the GNU Affero GPL licence.
31
+
32
+ * http://www.fsf.org/licensing/licenses/agpl-3.0.html
33
+
34
+ == Support
35
+
36
+ The Diff LCS homepage is http://difflcs.rubyforge.org.
37
+
38
+ For the latest news on Diff LCS:
39
+
40
+ * http://foundation.logilogi.org/tags/DiffLCS
41
+
42
+ Feel free to submit commits or feature requests. If you send a patch,
43
+ remember to update the corresponding unit tests.
data/Rakefile ADDED
@@ -0,0 +1,93 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/contrib/sshpublisher'
8
+ require File.join(File.dirname(__FILE__), 'lib', 'diff_l_c_s', 'version')
9
+
10
+ PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
11
+ PKG_NAME = 'difflcs'
12
+ PKG_VERSION = DiffLCS::VERSION::STRING + PKG_BUILD
13
+ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
14
+
15
+ RELEASE_NAME = "REL #{PKG_VERSION}"
16
+
17
+ RUBY_FORGE_PROJECT = "difflcs"
18
+ RUBY_FORGE_USER = "wybo"
19
+
20
+ desc "Default Task"
21
+ task :default => [ :test ]
22
+
23
+ # Run the unit tests
24
+ Rake::TestTask.new { |t|
25
+ t.libs << "test"
26
+ t.pattern = 'test/*_test.rb'
27
+ t.verbose = true
28
+ t.warning = false
29
+ }
30
+
31
+ # Generate the RDoc documentation
32
+ Rake::RDocTask.new { |rdoc|
33
+ rdoc.rdoc_dir = 'doc'
34
+ rdoc.title = "Diff Longest Common Substring -- The diff sniffing out every move"
35
+ rdoc.options << '--line-numbers' << '--inline-source' << '-A cattr_accessor=object'
36
+ rdoc.options << '--charset' << 'utf-8'
37
+ rdoc.rdoc_files.include('README.txt', 'CHANGELOG.txt')
38
+ rdoc.rdoc_files.include('lib/diff_l_c_s.rb')
39
+ rdoc.rdoc_files.include('lib/diff_l_c_s/*.rb')
40
+ }
41
+
42
+ # Create compressed packages
43
+ spec = Gem::Specification.new do |s|
44
+ s.platform = Gem::Platform::RUBY
45
+ s.name = PKG_NAME
46
+ s.summary = "Diffing that sniffs out moved text."
47
+ s.description = %q{A diff algoritm using longest common substrings that can also find text that has moved.}
48
+ s.version = PKG_VERSION
49
+
50
+ s.author = "Wybo Wiersma"
51
+ s.email = "wybo@logilogi.org"
52
+ s.rubyforge_project = "difflcs"
53
+ s.homepage = "http://difflcs.rubyforge.org"
54
+
55
+ s.add_dependency('positionrange', '>= 0.6.0' + PKG_BUILD)
56
+
57
+ s.has_rdoc = true
58
+ s.requirements << 'none'
59
+ s.require_path = 'lib'
60
+
61
+ s.files = [ "Rakefile", "install.rb", "README.txt", "CHANGELOG.txt", "LICENSE.txt" ]
62
+ s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
63
+ s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
64
+ end
65
+
66
+ Rake::GemPackageTask.new(spec) do |p|
67
+ p.gem_spec = spec
68
+ p.need_tar = true
69
+ p.need_zip = true
70
+ end
71
+
72
+ desc "Publish the docs, gem, and release files"
73
+ task :deploy => [:release, :pdoc] do
74
+ puts 'Published gem'
75
+ end
76
+
77
+ desc "Publish the API documentation"
78
+ task :pdoc => [:rdoc] do
79
+ sh "rsync -azv --no-perms --no-times doc/*" +
80
+ " rubyforge.org:/var/www/gforge-projects/difflcs"
81
+ end
82
+
83
+ desc "Publish the release files to RubyForge."
84
+ task :release => [ :package ] do
85
+ require 'rubyforge'
86
+ require 'rake/contrib/rubyforgepublisher'
87
+
88
+ packages = %w( gem tgz zip ).collect{ |ext| "pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}" }
89
+
90
+ rubyforge = RubyForge.new.configure
91
+ rubyforge.login
92
+ rubyforge.add_release(PKG_NAME, PKG_NAME, "REL #{PKG_VERSION}", *packages)
93
+ end
data/install.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'rbconfig'
2
+ require 'find'
3
+ require 'ftools'
4
+
5
+ include Config
6
+
7
+ # this was adapted from rdoc's install.rb by way of Log4r
8
+
9
+ $sitedir = CONFIG["sitelibdir"]
10
+ unless $sitedir
11
+ version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
12
+ $libdir = File.join(CONFIG["libdir"], "ruby", version)
13
+ $sitedir = $:.find {|x| x =~ /site_ruby/ }
14
+ if !$sitedir
15
+ $sitedir = File.join($libdir, "site_ruby")
16
+ elsif $sitedir !~ Regexp.quote(version)
17
+ $sitedir = File.join($sitedir, version)
18
+ end
19
+ end
20
+
21
+ # the actual gruntwork
22
+ Dir.chdir("lib")
23
+
24
+ Find.find("diff_l_c_s", "diff_l_c_s.rb") { |f|
25
+ if f[-3..-1] == ".rb"
26
+ File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
27
+ else
28
+ File::makedirs(File.join($sitedir, *f.split(/\//)))
29
+ end
30
+ }
data/lib/diff_l_c_s.rb ADDED
@@ -0,0 +1,148 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ $:.unshift(File.dirname(__FILE__)) unless
15
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
16
+
17
+ unless defined?(PositionRange)
18
+ begin
19
+ $:.unshift(File.dirname(__FILE__) + "/../../positionrange/lib")
20
+ require 'position_range'
21
+ rescue LoadError
22
+ require 'rubygems'
23
+ gem 'positionrange'
24
+ require 'position_range'
25
+ end
26
+ end
27
+
28
+ module DiffLCS
29
+ # Diffs self with other, see DiffLCS#diff
30
+ #
31
+ def diff(other, options = {})
32
+ DiffLCS.diff(self.split(''), other.split(''), options)
33
+ end
34
+
35
+ # Diffs words in self with other, see DiffLCS#diff
36
+ #
37
+ # Words are non-spaces or groups of spaces delimited by either
38
+ # spaces or the beginning or the end of the string.
39
+ #
40
+ def word_diff(other, options = {})
41
+ DiffLCS.word_diff(self, other, options)
42
+ end
43
+ end
44
+
45
+ require 'diff_l_c_s/counter'
46
+ require 'diff_l_c_s/word_split_array'
47
+
48
+ module DiffLCS
49
+ class << self
50
+ # Diffs the current logi_version and the logi's body_text with the
51
+ # logi_versions body_text given and returns a hash containing:
52
+ #
53
+ # <tt>:matched_old</tt> = the position_ranges in the old text for
54
+ # the places where the new matches the old.
55
+ # <tt>:remaining_new</tt> = the position-ranges for the part of
56
+ # the new text that remains unmatched in the old
57
+ #
58
+ # Valid options are:
59
+ # * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
60
+ #
61
+ def diff(old_arr, new_arr, options = {})
62
+ minimum_lcs_size = options[:minimum_lcs_size] || 0
63
+ diff_hash = DiffLCS.longest_common_sub_strings(old_arr, new_arr,
64
+ :minimum_lcs_size => minimum_lcs_size)
65
+ original_matched_old = diff_hash[:matched_old]
66
+ matched_old = PositionRange::List.new
67
+ original_matched_new = diff_hash[:matched_new]
68
+ matched_new = original_matched_new.sort
69
+ i = 0
70
+ while i < original_matched_old.size
71
+ matched_old[matched_new.index(original_matched_new[i])] =
72
+ original_matched_old[i]
73
+ i += 1
74
+ end
75
+
76
+ return {:matched_old => matched_old,
77
+ :matched_new => matched_new}
78
+ end
79
+
80
+ # Words are non-spaces or groups of spaces delimited by either
81
+ # spaces or the beginning or the end of the string.
82
+ #
83
+ def word_diff(old_string, new_string, options = {})
84
+ old_w_s_arr = DiffLCS::WordSplitArray.new(old_string)
85
+ new_w_s_arr = DiffLCS::WordSplitArray.new(new_string)
86
+ diff = DiffLCS.diff(old_w_s_arr, new_w_s_arr, options)
87
+ return {:matched_old => old_w_s_arr.translate_to_pos(diff[:matched_old]),
88
+ :matched_new => new_w_s_arr.translate_to_pos(diff[:matched_new])}
89
+ end
90
+
91
+ # Returns a PositionRange::List containing pointers to the Longest
92
+ # Common Substrings (not Subsequences) of the Arrays or an empty
93
+ # PositionRange::List if none was found.
94
+ #
95
+ # Valid options are:
96
+ # * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
97
+ #
98
+ # The returned List is sorted by LCS-size.
99
+ #
100
+ def longest_common_sub_strings(old_arr, new_arr, options = {})
101
+ minimum_lcs_size = options[:minimum_lcs_size] || 0
102
+
103
+ counter_hash = Hash.new
104
+ counter_array = Array.new
105
+ old_arr.each_with_index do |old_el, old_i|
106
+ counter_hash[old_i] = Hash.new
107
+ new_arr.each_with_index do |new_el, new_i|
108
+ if old_el == new_el
109
+ if new_i > 0 and old_i > 0 and counter_hash[old_i - 1][new_i - 1]
110
+ counter_hash[old_i][new_i] = counter_hash[old_i - 1][new_i - 1]
111
+ counter_hash[old_i][new_i].step_up
112
+ else
113
+ counter = Counter.new(old_i, new_i)
114
+ counter_hash[old_i][new_i] = counter
115
+ counter_array.push(counter)
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ in_old_p_r_list = PositionRange::List.new
122
+ in_new_p_r_list = PositionRange::List.new
123
+
124
+ counter_array = counter_array.select {|co| co.step_size > minimum_lcs_size}
125
+
126
+ while counter = counter_array.sort!.pop
127
+ i = 0
128
+ while i < counter_array.size
129
+ if counter_array[i].in_old === counter.in_old
130
+ counter_array[i].in_old = counter_array[i].in_old - counter.in_old
131
+ end
132
+ if counter_array[i].in_new === counter.in_new
133
+ counter_array[i].in_new = counter_array[i].in_new - counter.in_new
134
+ end
135
+ if counter_array[i].size <= minimum_lcs_size
136
+ counter_array.delete_at(i)
137
+ else
138
+ i += 1
139
+ end
140
+ end
141
+ in_old_p_r_list.push(counter.in_old)
142
+ in_new_p_r_list.push(counter.in_new)
143
+ end
144
+ return {:matched_old => in_old_p_r_list,
145
+ :matched_new => in_new_p_r_list}
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,120 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ class DiffLCS::Counter
15
+ include Comparable
16
+
17
+ # Creates a new counter and sets the initial positions and size
18
+ #
19
+ def initialize(old_i, new_i)
20
+ @in_old_begin = old_i
21
+ @in_new_begin = new_i
22
+ @size = 1
23
+ end
24
+
25
+ # Increases the size
26
+ #
27
+ def step_up
28
+ @size += 1
29
+ return self
30
+ end
31
+
32
+ # Returns the PositionRange for the range in the old string.
33
+ #
34
+ # NOTE: No step_up's possible anymore after this function has been
35
+ # called.
36
+ #
37
+ def in_old
38
+ if !@in_old
39
+ @in_old = PositionRange.new(@in_old_begin, @in_old_begin + @size)
40
+ end
41
+ return @in_old
42
+ end
43
+
44
+ # Returns the PositionRange for the range in the new string
45
+ #
46
+ # NOTE: No step_up's possible anymore after this function has been
47
+ # called.
48
+ #
49
+ def in_new
50
+ if !@in_new
51
+ @in_new = PositionRange.new(@in_new_begin, @in_new_begin + @size)
52
+ end
53
+ return @in_new
54
+ end
55
+
56
+ # Sets the in_old PositionRange, and updates the in_new too
57
+ #
58
+ # If new_in_old is nil, the counter is set empty
59
+ #
60
+ # NOTE: Assumed to be smaller than before, and not moved.
61
+ #
62
+ def in_old=(new_in_old)
63
+ if new_in_old
64
+ @in_new = self.adjust(self.in_new, self.in_old, new_in_old)
65
+ @in_old = new_in_old
66
+ else
67
+ @empty = true
68
+ end
69
+ end
70
+
71
+ # Sets the in_new PositionRange, and updates the in_old too
72
+ #
73
+ # If new_in_old is nil, the counter is set empty
74
+ #
75
+ # NOTE: Assumed to be smaller than before, and not moved.
76
+ #
77
+ def in_new=(new_in_new)
78
+ if new_in_new
79
+ @in_old = self.adjust(self.in_old, self.in_new, new_in_new)
80
+ @in_new = new_in_new
81
+ else
82
+ @empty = true
83
+ end
84
+ end
85
+
86
+ # Faster than size, but only tells the size to which was
87
+ # stepped.
88
+ #
89
+ def step_size
90
+ return @size
91
+ end
92
+
93
+ # Returns the size of this Counter
94
+ #
95
+ def size
96
+ if @empty
97
+ return 0
98
+ else
99
+ return self.in_old.size
100
+ end
101
+ end
102
+
103
+ # Compares it's own size with the size of the other
104
+ #
105
+ def <=> (other)
106
+ return self.size <=> other.size
107
+ end
108
+
109
+ protected
110
+
111
+ # Helper for in_new = and in_old =
112
+ #
113
+ def adjust(to_set, other_old, other_new)
114
+ if other_new.end < other_old.end
115
+ return to_set.new_dup(to_set.begin, to_set.end - (other_old.end - other_new.end))
116
+ else
117
+ return to_set.new_dup(to_set.begin + (other_new.begin - other_old.begin), to_set.end)
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,18 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require 'diff_l_c_s'
15
+
16
+ class String
17
+ include DiffLCS
18
+ end