difflcs 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +3 -0
- data/LICENSE.txt +662 -0
- data/README.txt +43 -0
- data/Rakefile +93 -0
- data/install.rb +30 -0
- data/lib/diff_l_c_s.rb +148 -0
- data/lib/diff_l_c_s/counter.rb +120 -0
- data/lib/diff_l_c_s/string.rb +18 -0
- data/lib/diff_l_c_s/version.rb +9 -0
- data/lib/diff_l_c_s/word_split_array.rb +58 -0
- data/lib/difflcs.rb +1 -0
- data/test/counter_test.rb +93 -0
- data/test/diff_l_c_s_test.rb +124 -0
- data/test/test_helper.rb +3 -0
- data/test/word_split_array_test.rb +55 -0
- metadata +77 -0
data/README.txt
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
= Diff Longest Common Sub String -- The diff sniffing out every move
|
2
|
+
|
3
|
+
PositionRange is a library by the LogiLogi Foundation, extracted from
|
4
|
+
http://www.logilogi.org (http://foundation.logilogi.org).
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
== Download
|
9
|
+
|
10
|
+
The latest version of Diff LCS can be found at:
|
11
|
+
|
12
|
+
* http://rubyforge.org/frs/?group_id=7565
|
13
|
+
|
14
|
+
Documentation can be found at:
|
15
|
+
|
16
|
+
* http://difflcs.rubyonrails.org
|
17
|
+
|
18
|
+
== Installation
|
19
|
+
|
20
|
+
You can install Diff LCS with the following command:
|
21
|
+
|
22
|
+
% [sudo] gem install difflcs
|
23
|
+
|
24
|
+
Or from its distribution directory with:
|
25
|
+
|
26
|
+
% [sudo] ruby install.rb
|
27
|
+
|
28
|
+
== License
|
29
|
+
|
30
|
+
Diff LCS is released under the GNU Affero GPL licence.
|
31
|
+
|
32
|
+
* http://www.fsf.org/licensing/licenses/agpl-3.0.html
|
33
|
+
|
34
|
+
== Support
|
35
|
+
|
36
|
+
The Diff LCS homepage is http://difflcs.rubyforge.org.
|
37
|
+
|
38
|
+
For the latest news on Diff LCS:
|
39
|
+
|
40
|
+
* http://foundation.logilogi.org/tags/DiffLCS
|
41
|
+
|
42
|
+
Feel free to submit commits or feature requests. If you send a patch,
|
43
|
+
remember to update the corresponding unit tests.
|
data/Rakefile
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rake/rdoctask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/contrib/sshpublisher'
|
8
|
+
require File.join(File.dirname(__FILE__), 'lib', 'diff_l_c_s', 'version')
|
9
|
+
|
10
|
+
PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
|
11
|
+
PKG_NAME = 'difflcs'
|
12
|
+
PKG_VERSION = DiffLCS::VERSION::STRING + PKG_BUILD
|
13
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
14
|
+
|
15
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
16
|
+
|
17
|
+
RUBY_FORGE_PROJECT = "difflcs"
|
18
|
+
RUBY_FORGE_USER = "wybo"
|
19
|
+
|
20
|
+
desc "Default Task"
|
21
|
+
task :default => [ :test ]
|
22
|
+
|
23
|
+
# Run the unit tests
|
24
|
+
Rake::TestTask.new { |t|
|
25
|
+
t.libs << "test"
|
26
|
+
t.pattern = 'test/*_test.rb'
|
27
|
+
t.verbose = true
|
28
|
+
t.warning = false
|
29
|
+
}
|
30
|
+
|
31
|
+
# Generate the RDoc documentation
|
32
|
+
Rake::RDocTask.new { |rdoc|
|
33
|
+
rdoc.rdoc_dir = 'doc'
|
34
|
+
rdoc.title = "Diff Longest Common Substring -- The diff sniffing out every move"
|
35
|
+
rdoc.options << '--line-numbers' << '--inline-source' << '-A cattr_accessor=object'
|
36
|
+
rdoc.options << '--charset' << 'utf-8'
|
37
|
+
rdoc.rdoc_files.include('README.txt', 'CHANGELOG.txt')
|
38
|
+
rdoc.rdoc_files.include('lib/diff_l_c_s.rb')
|
39
|
+
rdoc.rdoc_files.include('lib/diff_l_c_s/*.rb')
|
40
|
+
}
|
41
|
+
|
42
|
+
# Create compressed packages
|
43
|
+
spec = Gem::Specification.new do |s|
|
44
|
+
s.platform = Gem::Platform::RUBY
|
45
|
+
s.name = PKG_NAME
|
46
|
+
s.summary = "Diffing that sniffs out moved text."
|
47
|
+
s.description = %q{A diff algoritm using longest common substrings that can also find text that has moved.}
|
48
|
+
s.version = PKG_VERSION
|
49
|
+
|
50
|
+
s.author = "Wybo Wiersma"
|
51
|
+
s.email = "wybo@logilogi.org"
|
52
|
+
s.rubyforge_project = "difflcs"
|
53
|
+
s.homepage = "http://difflcs.rubyforge.org"
|
54
|
+
|
55
|
+
s.add_dependency('positionrange', '>= 0.6.0' + PKG_BUILD)
|
56
|
+
|
57
|
+
s.has_rdoc = true
|
58
|
+
s.requirements << 'none'
|
59
|
+
s.require_path = 'lib'
|
60
|
+
|
61
|
+
s.files = [ "Rakefile", "install.rb", "README.txt", "CHANGELOG.txt", "LICENSE.txt" ]
|
62
|
+
s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
63
|
+
s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
64
|
+
end
|
65
|
+
|
66
|
+
Rake::GemPackageTask.new(spec) do |p|
|
67
|
+
p.gem_spec = spec
|
68
|
+
p.need_tar = true
|
69
|
+
p.need_zip = true
|
70
|
+
end
|
71
|
+
|
72
|
+
desc "Publish the docs, gem, and release files"
|
73
|
+
task :deploy => [:release, :pdoc] do
|
74
|
+
puts 'Published gem'
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "Publish the API documentation"
|
78
|
+
task :pdoc => [:rdoc] do
|
79
|
+
sh "rsync -azv --no-perms --no-times doc/*" +
|
80
|
+
" rubyforge.org:/var/www/gforge-projects/difflcs"
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "Publish the release files to RubyForge."
|
84
|
+
task :release => [ :package ] do
|
85
|
+
require 'rubyforge'
|
86
|
+
require 'rake/contrib/rubyforgepublisher'
|
87
|
+
|
88
|
+
packages = %w( gem tgz zip ).collect{ |ext| "pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}" }
|
89
|
+
|
90
|
+
rubyforge = RubyForge.new.configure
|
91
|
+
rubyforge.login
|
92
|
+
rubyforge.add_release(PKG_NAME, PKG_NAME, "REL #{PKG_VERSION}", *packages)
|
93
|
+
end
|
data/install.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'find'
|
3
|
+
require 'ftools'
|
4
|
+
|
5
|
+
include Config
|
6
|
+
|
7
|
+
# this was adapted from rdoc's install.rb by way of Log4r
|
8
|
+
|
9
|
+
$sitedir = CONFIG["sitelibdir"]
|
10
|
+
unless $sitedir
|
11
|
+
version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
|
12
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
13
|
+
$sitedir = $:.find {|x| x =~ /site_ruby/ }
|
14
|
+
if !$sitedir
|
15
|
+
$sitedir = File.join($libdir, "site_ruby")
|
16
|
+
elsif $sitedir !~ Regexp.quote(version)
|
17
|
+
$sitedir = File.join($sitedir, version)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# the actual gruntwork
|
22
|
+
Dir.chdir("lib")
|
23
|
+
|
24
|
+
Find.find("diff_l_c_s", "diff_l_c_s.rb") { |f|
|
25
|
+
if f[-3..-1] == ".rb"
|
26
|
+
File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
|
27
|
+
else
|
28
|
+
File::makedirs(File.join($sitedir, *f.split(/\//)))
|
29
|
+
end
|
30
|
+
}
|
data/lib/diff_l_c_s.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
15
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
16
|
+
|
17
|
+
unless defined?(PositionRange)
|
18
|
+
begin
|
19
|
+
$:.unshift(File.dirname(__FILE__) + "/../../positionrange/lib")
|
20
|
+
require 'position_range'
|
21
|
+
rescue LoadError
|
22
|
+
require 'rubygems'
|
23
|
+
gem 'positionrange'
|
24
|
+
require 'position_range'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module DiffLCS
|
29
|
+
# Diffs self with other, see DiffLCS#diff
|
30
|
+
#
|
31
|
+
def diff(other, options = {})
|
32
|
+
DiffLCS.diff(self.split(''), other.split(''), options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Diffs words in self with other, see DiffLCS#diff
|
36
|
+
#
|
37
|
+
# Words are non-spaces or groups of spaces delimited by either
|
38
|
+
# spaces or the beginning or the end of the string.
|
39
|
+
#
|
40
|
+
def word_diff(other, options = {})
|
41
|
+
DiffLCS.word_diff(self, other, options)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'diff_l_c_s/counter'
|
46
|
+
require 'diff_l_c_s/word_split_array'
|
47
|
+
|
48
|
+
module DiffLCS
|
49
|
+
class << self
|
50
|
+
# Diffs the current logi_version and the logi's body_text with the
|
51
|
+
# logi_versions body_text given and returns a hash containing:
|
52
|
+
#
|
53
|
+
# <tt>:matched_old</tt> = the position_ranges in the old text for
|
54
|
+
# the places where the new matches the old.
|
55
|
+
# <tt>:remaining_new</tt> = the position-ranges for the part of
|
56
|
+
# the new text that remains unmatched in the old
|
57
|
+
#
|
58
|
+
# Valid options are:
|
59
|
+
# * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
|
60
|
+
#
|
61
|
+
def diff(old_arr, new_arr, options = {})
|
62
|
+
minimum_lcs_size = options[:minimum_lcs_size] || 0
|
63
|
+
diff_hash = DiffLCS.longest_common_sub_strings(old_arr, new_arr,
|
64
|
+
:minimum_lcs_size => minimum_lcs_size)
|
65
|
+
original_matched_old = diff_hash[:matched_old]
|
66
|
+
matched_old = PositionRange::List.new
|
67
|
+
original_matched_new = diff_hash[:matched_new]
|
68
|
+
matched_new = original_matched_new.sort
|
69
|
+
i = 0
|
70
|
+
while i < original_matched_old.size
|
71
|
+
matched_old[matched_new.index(original_matched_new[i])] =
|
72
|
+
original_matched_old[i]
|
73
|
+
i += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
return {:matched_old => matched_old,
|
77
|
+
:matched_new => matched_new}
|
78
|
+
end
|
79
|
+
|
80
|
+
# Words are non-spaces or groups of spaces delimited by either
|
81
|
+
# spaces or the beginning or the end of the string.
|
82
|
+
#
|
83
|
+
def word_diff(old_string, new_string, options = {})
|
84
|
+
old_w_s_arr = DiffLCS::WordSplitArray.new(old_string)
|
85
|
+
new_w_s_arr = DiffLCS::WordSplitArray.new(new_string)
|
86
|
+
diff = DiffLCS.diff(old_w_s_arr, new_w_s_arr, options)
|
87
|
+
return {:matched_old => old_w_s_arr.translate_to_pos(diff[:matched_old]),
|
88
|
+
:matched_new => new_w_s_arr.translate_to_pos(diff[:matched_new])}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns a PositionRange::List containing pointers to the Longest
|
92
|
+
# Common Substrings (not Subsequences) of the Arrays or an empty
|
93
|
+
# PositionRange::List if none was found.
|
94
|
+
#
|
95
|
+
# Valid options are:
|
96
|
+
# * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
|
97
|
+
#
|
98
|
+
# The returned List is sorted by LCS-size.
|
99
|
+
#
|
100
|
+
def longest_common_sub_strings(old_arr, new_arr, options = {})
|
101
|
+
minimum_lcs_size = options[:minimum_lcs_size] || 0
|
102
|
+
|
103
|
+
counter_hash = Hash.new
|
104
|
+
counter_array = Array.new
|
105
|
+
old_arr.each_with_index do |old_el, old_i|
|
106
|
+
counter_hash[old_i] = Hash.new
|
107
|
+
new_arr.each_with_index do |new_el, new_i|
|
108
|
+
if old_el == new_el
|
109
|
+
if new_i > 0 and old_i > 0 and counter_hash[old_i - 1][new_i - 1]
|
110
|
+
counter_hash[old_i][new_i] = counter_hash[old_i - 1][new_i - 1]
|
111
|
+
counter_hash[old_i][new_i].step_up
|
112
|
+
else
|
113
|
+
counter = Counter.new(old_i, new_i)
|
114
|
+
counter_hash[old_i][new_i] = counter
|
115
|
+
counter_array.push(counter)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
in_old_p_r_list = PositionRange::List.new
|
122
|
+
in_new_p_r_list = PositionRange::List.new
|
123
|
+
|
124
|
+
counter_array = counter_array.select {|co| co.step_size > minimum_lcs_size}
|
125
|
+
|
126
|
+
while counter = counter_array.sort!.pop
|
127
|
+
i = 0
|
128
|
+
while i < counter_array.size
|
129
|
+
if counter_array[i].in_old === counter.in_old
|
130
|
+
counter_array[i].in_old = counter_array[i].in_old - counter.in_old
|
131
|
+
end
|
132
|
+
if counter_array[i].in_new === counter.in_new
|
133
|
+
counter_array[i].in_new = counter_array[i].in_new - counter.in_new
|
134
|
+
end
|
135
|
+
if counter_array[i].size <= minimum_lcs_size
|
136
|
+
counter_array.delete_at(i)
|
137
|
+
else
|
138
|
+
i += 1
|
139
|
+
end
|
140
|
+
end
|
141
|
+
in_old_p_r_list.push(counter.in_old)
|
142
|
+
in_new_p_r_list.push(counter.in_new)
|
143
|
+
end
|
144
|
+
return {:matched_old => in_old_p_r_list,
|
145
|
+
:matched_new => in_new_p_r_list}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
class DiffLCS::Counter
|
15
|
+
include Comparable
|
16
|
+
|
17
|
+
# Creates a new counter and sets the initial positions and size
|
18
|
+
#
|
19
|
+
def initialize(old_i, new_i)
|
20
|
+
@in_old_begin = old_i
|
21
|
+
@in_new_begin = new_i
|
22
|
+
@size = 1
|
23
|
+
end
|
24
|
+
|
25
|
+
# Increases the size
|
26
|
+
#
|
27
|
+
def step_up
|
28
|
+
@size += 1
|
29
|
+
return self
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns the PositionRange for the range in the old string.
|
33
|
+
#
|
34
|
+
# NOTE: No step_up's possible anymore after this function has been
|
35
|
+
# called.
|
36
|
+
#
|
37
|
+
def in_old
|
38
|
+
if !@in_old
|
39
|
+
@in_old = PositionRange.new(@in_old_begin, @in_old_begin + @size)
|
40
|
+
end
|
41
|
+
return @in_old
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the PositionRange for the range in the new string
|
45
|
+
#
|
46
|
+
# NOTE: No step_up's possible anymore after this function has been
|
47
|
+
# called.
|
48
|
+
#
|
49
|
+
def in_new
|
50
|
+
if !@in_new
|
51
|
+
@in_new = PositionRange.new(@in_new_begin, @in_new_begin + @size)
|
52
|
+
end
|
53
|
+
return @in_new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Sets the in_old PositionRange, and updates the in_new too
|
57
|
+
#
|
58
|
+
# If new_in_old is nil, the counter is set empty
|
59
|
+
#
|
60
|
+
# NOTE: Assumed to be smaller than before, and not moved.
|
61
|
+
#
|
62
|
+
def in_old=(new_in_old)
|
63
|
+
if new_in_old
|
64
|
+
@in_new = self.adjust(self.in_new, self.in_old, new_in_old)
|
65
|
+
@in_old = new_in_old
|
66
|
+
else
|
67
|
+
@empty = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Sets the in_new PositionRange, and updates the in_old too
|
72
|
+
#
|
73
|
+
# If new_in_old is nil, the counter is set empty
|
74
|
+
#
|
75
|
+
# NOTE: Assumed to be smaller than before, and not moved.
|
76
|
+
#
|
77
|
+
def in_new=(new_in_new)
|
78
|
+
if new_in_new
|
79
|
+
@in_old = self.adjust(self.in_old, self.in_new, new_in_new)
|
80
|
+
@in_new = new_in_new
|
81
|
+
else
|
82
|
+
@empty = true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Faster than size, but only tells the size to which was
|
87
|
+
# stepped.
|
88
|
+
#
|
89
|
+
def step_size
|
90
|
+
return @size
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns the size of this Counter
|
94
|
+
#
|
95
|
+
def size
|
96
|
+
if @empty
|
97
|
+
return 0
|
98
|
+
else
|
99
|
+
return self.in_old.size
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Compares it's own size with the size of the other
|
104
|
+
#
|
105
|
+
def <=> (other)
|
106
|
+
return self.size <=> other.size
|
107
|
+
end
|
108
|
+
|
109
|
+
protected
|
110
|
+
|
111
|
+
# Helper for in_new = and in_old =
|
112
|
+
#
|
113
|
+
def adjust(to_set, other_old, other_new)
|
114
|
+
if other_new.end < other_old.end
|
115
|
+
return to_set.new_dup(to_set.begin, to_set.end - (other_old.end - other_new.end))
|
116
|
+
else
|
117
|
+
return to_set.new_dup(to_set.begin + (other_new.begin - other_old.begin), to_set.end)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
require 'diff_l_c_s'
|
15
|
+
|
16
|
+
class String
|
17
|
+
include DiffLCS
|
18
|
+
end
|