difflcs 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.txt +3 -0
- data/LICENSE.txt +662 -0
- data/README.txt +43 -0
- data/Rakefile +93 -0
- data/install.rb +30 -0
- data/lib/diff_l_c_s.rb +148 -0
- data/lib/diff_l_c_s/counter.rb +120 -0
- data/lib/diff_l_c_s/string.rb +18 -0
- data/lib/diff_l_c_s/version.rb +9 -0
- data/lib/diff_l_c_s/word_split_array.rb +58 -0
- data/lib/difflcs.rb +1 -0
- data/test/counter_test.rb +93 -0
- data/test/diff_l_c_s_test.rb +124 -0
- data/test/test_helper.rb +3 -0
- data/test/word_split_array_test.rb +55 -0
- metadata +77 -0
data/README.txt
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
= Diff Longest Common Sub String -- The diff sniffing out every move
|
2
|
+
|
3
|
+
PositionRange is a library by the LogiLogi Foundation, extracted from
|
4
|
+
http://www.logilogi.org (http://foundation.logilogi.org).
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
== Download
|
9
|
+
|
10
|
+
The latest version of Diff LCS can be found at:
|
11
|
+
|
12
|
+
* http://rubyforge.org/frs/?group_id=7565
|
13
|
+
|
14
|
+
Documentation can be found at:
|
15
|
+
|
16
|
+
* http://difflcs.rubyonrails.org
|
17
|
+
|
18
|
+
== Installation
|
19
|
+
|
20
|
+
You can install Diff LCS with the following command:
|
21
|
+
|
22
|
+
% [sudo] gem install difflcs
|
23
|
+
|
24
|
+
Or from its distribution directory with:
|
25
|
+
|
26
|
+
% [sudo] ruby install.rb
|
27
|
+
|
28
|
+
== License
|
29
|
+
|
30
|
+
Diff LCS is released under the GNU Affero GPL licence.
|
31
|
+
|
32
|
+
* http://www.fsf.org/licensing/licenses/agpl-3.0.html
|
33
|
+
|
34
|
+
== Support
|
35
|
+
|
36
|
+
The Diff LCS homepage is http://difflcs.rubyforge.org.
|
37
|
+
|
38
|
+
For the latest news on Diff LCS:
|
39
|
+
|
40
|
+
* http://foundation.logilogi.org/tags/DiffLCS
|
41
|
+
|
42
|
+
Feel free to submit commits or feature requests. If you send a patch,
|
43
|
+
remember to update the corresponding unit tests.
|
data/Rakefile
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rake/rdoctask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/contrib/sshpublisher'
|
8
|
+
require File.join(File.dirname(__FILE__), 'lib', 'diff_l_c_s', 'version')
|
9
|
+
|
10
|
+
PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
|
11
|
+
PKG_NAME = 'difflcs'
|
12
|
+
PKG_VERSION = DiffLCS::VERSION::STRING + PKG_BUILD
|
13
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
14
|
+
|
15
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
16
|
+
|
17
|
+
RUBY_FORGE_PROJECT = "difflcs"
|
18
|
+
RUBY_FORGE_USER = "wybo"
|
19
|
+
|
20
|
+
desc "Default Task"
|
21
|
+
task :default => [ :test ]
|
22
|
+
|
23
|
+
# Run the unit tests
|
24
|
+
Rake::TestTask.new { |t|
|
25
|
+
t.libs << "test"
|
26
|
+
t.pattern = 'test/*_test.rb'
|
27
|
+
t.verbose = true
|
28
|
+
t.warning = false
|
29
|
+
}
|
30
|
+
|
31
|
+
# Generate the RDoc documentation
|
32
|
+
Rake::RDocTask.new { |rdoc|
|
33
|
+
rdoc.rdoc_dir = 'doc'
|
34
|
+
rdoc.title = "Diff Longest Common Substring -- The diff sniffing out every move"
|
35
|
+
rdoc.options << '--line-numbers' << '--inline-source' << '-A cattr_accessor=object'
|
36
|
+
rdoc.options << '--charset' << 'utf-8'
|
37
|
+
rdoc.rdoc_files.include('README.txt', 'CHANGELOG.txt')
|
38
|
+
rdoc.rdoc_files.include('lib/diff_l_c_s.rb')
|
39
|
+
rdoc.rdoc_files.include('lib/diff_l_c_s/*.rb')
|
40
|
+
}
|
41
|
+
|
42
|
+
# Create compressed packages
|
43
|
+
spec = Gem::Specification.new do |s|
|
44
|
+
s.platform = Gem::Platform::RUBY
|
45
|
+
s.name = PKG_NAME
|
46
|
+
s.summary = "Diffing that sniffs out moved text."
|
47
|
+
s.description = %q{A diff algoritm using longest common substrings that can also find text that has moved.}
|
48
|
+
s.version = PKG_VERSION
|
49
|
+
|
50
|
+
s.author = "Wybo Wiersma"
|
51
|
+
s.email = "wybo@logilogi.org"
|
52
|
+
s.rubyforge_project = "difflcs"
|
53
|
+
s.homepage = "http://difflcs.rubyforge.org"
|
54
|
+
|
55
|
+
s.add_dependency('positionrange', '>= 0.6.0' + PKG_BUILD)
|
56
|
+
|
57
|
+
s.has_rdoc = true
|
58
|
+
s.requirements << 'none'
|
59
|
+
s.require_path = 'lib'
|
60
|
+
|
61
|
+
s.files = [ "Rakefile", "install.rb", "README.txt", "CHANGELOG.txt", "LICENSE.txt" ]
|
62
|
+
s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
63
|
+
s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
64
|
+
end
|
65
|
+
|
66
|
+
Rake::GemPackageTask.new(spec) do |p|
|
67
|
+
p.gem_spec = spec
|
68
|
+
p.need_tar = true
|
69
|
+
p.need_zip = true
|
70
|
+
end
|
71
|
+
|
72
|
+
desc "Publish the docs, gem, and release files"
|
73
|
+
task :deploy => [:release, :pdoc] do
|
74
|
+
puts 'Published gem'
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "Publish the API documentation"
|
78
|
+
task :pdoc => [:rdoc] do
|
79
|
+
sh "rsync -azv --no-perms --no-times doc/*" +
|
80
|
+
" rubyforge.org:/var/www/gforge-projects/difflcs"
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "Publish the release files to RubyForge."
|
84
|
+
task :release => [ :package ] do
|
85
|
+
require 'rubyforge'
|
86
|
+
require 'rake/contrib/rubyforgepublisher'
|
87
|
+
|
88
|
+
packages = %w( gem tgz zip ).collect{ |ext| "pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}" }
|
89
|
+
|
90
|
+
rubyforge = RubyForge.new.configure
|
91
|
+
rubyforge.login
|
92
|
+
rubyforge.add_release(PKG_NAME, PKG_NAME, "REL #{PKG_VERSION}", *packages)
|
93
|
+
end
|
data/install.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'find'
|
3
|
+
require 'ftools'
|
4
|
+
|
5
|
+
include Config
|
6
|
+
|
7
|
+
# this was adapted from rdoc's install.rb by way of Log4r
|
8
|
+
|
9
|
+
$sitedir = CONFIG["sitelibdir"]
|
10
|
+
unless $sitedir
|
11
|
+
version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
|
12
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
13
|
+
$sitedir = $:.find {|x| x =~ /site_ruby/ }
|
14
|
+
if !$sitedir
|
15
|
+
$sitedir = File.join($libdir, "site_ruby")
|
16
|
+
elsif $sitedir !~ Regexp.quote(version)
|
17
|
+
$sitedir = File.join($sitedir, version)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# the actual gruntwork
|
22
|
+
Dir.chdir("lib")
|
23
|
+
|
24
|
+
Find.find("diff_l_c_s", "diff_l_c_s.rb") { |f|
|
25
|
+
if f[-3..-1] == ".rb"
|
26
|
+
File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
|
27
|
+
else
|
28
|
+
File::makedirs(File.join($sitedir, *f.split(/\//)))
|
29
|
+
end
|
30
|
+
}
|
data/lib/diff_l_c_s.rb
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
15
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
16
|
+
|
17
|
+
unless defined?(PositionRange)
|
18
|
+
begin
|
19
|
+
$:.unshift(File.dirname(__FILE__) + "/../../positionrange/lib")
|
20
|
+
require 'position_range'
|
21
|
+
rescue LoadError
|
22
|
+
require 'rubygems'
|
23
|
+
gem 'positionrange'
|
24
|
+
require 'position_range'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module DiffLCS
|
29
|
+
# Diffs self with other, see DiffLCS#diff
|
30
|
+
#
|
31
|
+
def diff(other, options = {})
|
32
|
+
DiffLCS.diff(self.split(''), other.split(''), options)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Diffs words in self with other, see DiffLCS#diff
|
36
|
+
#
|
37
|
+
# Words are non-spaces or groups of spaces delimited by either
|
38
|
+
# spaces or the beginning or the end of the string.
|
39
|
+
#
|
40
|
+
def word_diff(other, options = {})
|
41
|
+
DiffLCS.word_diff(self, other, options)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'diff_l_c_s/counter'
|
46
|
+
require 'diff_l_c_s/word_split_array'
|
47
|
+
|
48
|
+
module DiffLCS
|
49
|
+
class << self
|
50
|
+
# Diffs the current logi_version and the logi's body_text with the
|
51
|
+
# logi_versions body_text given and returns a hash containing:
|
52
|
+
#
|
53
|
+
# <tt>:matched_old</tt> = the position_ranges in the old text for
|
54
|
+
# the places where the new matches the old.
|
55
|
+
# <tt>:remaining_new</tt> = the position-ranges for the part of
|
56
|
+
# the new text that remains unmatched in the old
|
57
|
+
#
|
58
|
+
# Valid options are:
|
59
|
+
# * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
|
60
|
+
#
|
61
|
+
def diff(old_arr, new_arr, options = {})
|
62
|
+
minimum_lcs_size = options[:minimum_lcs_size] || 0
|
63
|
+
diff_hash = DiffLCS.longest_common_sub_strings(old_arr, new_arr,
|
64
|
+
:minimum_lcs_size => minimum_lcs_size)
|
65
|
+
original_matched_old = diff_hash[:matched_old]
|
66
|
+
matched_old = PositionRange::List.new
|
67
|
+
original_matched_new = diff_hash[:matched_new]
|
68
|
+
matched_new = original_matched_new.sort
|
69
|
+
i = 0
|
70
|
+
while i < original_matched_old.size
|
71
|
+
matched_old[matched_new.index(original_matched_new[i])] =
|
72
|
+
original_matched_old[i]
|
73
|
+
i += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
return {:matched_old => matched_old,
|
77
|
+
:matched_new => matched_new}
|
78
|
+
end
|
79
|
+
|
80
|
+
# Words are non-spaces or groups of spaces delimited by either
|
81
|
+
# spaces or the beginning or the end of the string.
|
82
|
+
#
|
83
|
+
def word_diff(old_string, new_string, options = {})
|
84
|
+
old_w_s_arr = DiffLCS::WordSplitArray.new(old_string)
|
85
|
+
new_w_s_arr = DiffLCS::WordSplitArray.new(new_string)
|
86
|
+
diff = DiffLCS.diff(old_w_s_arr, new_w_s_arr, options)
|
87
|
+
return {:matched_old => old_w_s_arr.translate_to_pos(diff[:matched_old]),
|
88
|
+
:matched_new => new_w_s_arr.translate_to_pos(diff[:matched_new])}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns a PositionRange::List containing pointers to the Longest
|
92
|
+
# Common Substrings (not Subsequences) of the Arrays or an empty
|
93
|
+
# PositionRange::List if none was found.
|
94
|
+
#
|
95
|
+
# Valid options are:
|
96
|
+
# * <tt>:minimum_lcs_size</tt> = the minimum size of LCS-es to allow
|
97
|
+
#
|
98
|
+
# The returned List is sorted by LCS-size.
|
99
|
+
#
|
100
|
+
def longest_common_sub_strings(old_arr, new_arr, options = {})
|
101
|
+
minimum_lcs_size = options[:minimum_lcs_size] || 0
|
102
|
+
|
103
|
+
counter_hash = Hash.new
|
104
|
+
counter_array = Array.new
|
105
|
+
old_arr.each_with_index do |old_el, old_i|
|
106
|
+
counter_hash[old_i] = Hash.new
|
107
|
+
new_arr.each_with_index do |new_el, new_i|
|
108
|
+
if old_el == new_el
|
109
|
+
if new_i > 0 and old_i > 0 and counter_hash[old_i - 1][new_i - 1]
|
110
|
+
counter_hash[old_i][new_i] = counter_hash[old_i - 1][new_i - 1]
|
111
|
+
counter_hash[old_i][new_i].step_up
|
112
|
+
else
|
113
|
+
counter = Counter.new(old_i, new_i)
|
114
|
+
counter_hash[old_i][new_i] = counter
|
115
|
+
counter_array.push(counter)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
in_old_p_r_list = PositionRange::List.new
|
122
|
+
in_new_p_r_list = PositionRange::List.new
|
123
|
+
|
124
|
+
counter_array = counter_array.select {|co| co.step_size > minimum_lcs_size}
|
125
|
+
|
126
|
+
while counter = counter_array.sort!.pop
|
127
|
+
i = 0
|
128
|
+
while i < counter_array.size
|
129
|
+
if counter_array[i].in_old === counter.in_old
|
130
|
+
counter_array[i].in_old = counter_array[i].in_old - counter.in_old
|
131
|
+
end
|
132
|
+
if counter_array[i].in_new === counter.in_new
|
133
|
+
counter_array[i].in_new = counter_array[i].in_new - counter.in_new
|
134
|
+
end
|
135
|
+
if counter_array[i].size <= minimum_lcs_size
|
136
|
+
counter_array.delete_at(i)
|
137
|
+
else
|
138
|
+
i += 1
|
139
|
+
end
|
140
|
+
end
|
141
|
+
in_old_p_r_list.push(counter.in_old)
|
142
|
+
in_new_p_r_list.push(counter.in_new)
|
143
|
+
end
|
144
|
+
return {:matched_old => in_old_p_r_list,
|
145
|
+
:matched_new => in_new_p_r_list}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
class DiffLCS::Counter
|
15
|
+
include Comparable
|
16
|
+
|
17
|
+
# Creates a new counter and sets the initial positions and size
|
18
|
+
#
|
19
|
+
def initialize(old_i, new_i)
|
20
|
+
@in_old_begin = old_i
|
21
|
+
@in_new_begin = new_i
|
22
|
+
@size = 1
|
23
|
+
end
|
24
|
+
|
25
|
+
# Increases the size
|
26
|
+
#
|
27
|
+
def step_up
|
28
|
+
@size += 1
|
29
|
+
return self
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns the PositionRange for the range in the old string.
|
33
|
+
#
|
34
|
+
# NOTE: No step_up's possible anymore after this function has been
|
35
|
+
# called.
|
36
|
+
#
|
37
|
+
def in_old
|
38
|
+
if !@in_old
|
39
|
+
@in_old = PositionRange.new(@in_old_begin, @in_old_begin + @size)
|
40
|
+
end
|
41
|
+
return @in_old
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the PositionRange for the range in the new string
|
45
|
+
#
|
46
|
+
# NOTE: No step_up's possible anymore after this function has been
|
47
|
+
# called.
|
48
|
+
#
|
49
|
+
def in_new
|
50
|
+
if !@in_new
|
51
|
+
@in_new = PositionRange.new(@in_new_begin, @in_new_begin + @size)
|
52
|
+
end
|
53
|
+
return @in_new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Sets the in_old PositionRange, and updates the in_new too
|
57
|
+
#
|
58
|
+
# If new_in_old is nil, the counter is set empty
|
59
|
+
#
|
60
|
+
# NOTE: Assumed to be smaller than before, and not moved.
|
61
|
+
#
|
62
|
+
def in_old=(new_in_old)
|
63
|
+
if new_in_old
|
64
|
+
@in_new = self.adjust(self.in_new, self.in_old, new_in_old)
|
65
|
+
@in_old = new_in_old
|
66
|
+
else
|
67
|
+
@empty = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Sets the in_new PositionRange, and updates the in_old too
|
72
|
+
#
|
73
|
+
# If new_in_old is nil, the counter is set empty
|
74
|
+
#
|
75
|
+
# NOTE: Assumed to be smaller than before, and not moved.
|
76
|
+
#
|
77
|
+
def in_new=(new_in_new)
|
78
|
+
if new_in_new
|
79
|
+
@in_old = self.adjust(self.in_old, self.in_new, new_in_new)
|
80
|
+
@in_new = new_in_new
|
81
|
+
else
|
82
|
+
@empty = true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Faster than size, but only tells the size to which was
|
87
|
+
# stepped.
|
88
|
+
#
|
89
|
+
def step_size
|
90
|
+
return @size
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns the size of this Counter
|
94
|
+
#
|
95
|
+
def size
|
96
|
+
if @empty
|
97
|
+
return 0
|
98
|
+
else
|
99
|
+
return self.in_old.size
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Compares it's own size with the size of the other
|
104
|
+
#
|
105
|
+
def <=> (other)
|
106
|
+
return self.size <=> other.size
|
107
|
+
end
|
108
|
+
|
109
|
+
protected
|
110
|
+
|
111
|
+
# Helper for in_new = and in_old =
|
112
|
+
#
|
113
|
+
def adjust(to_set, other_old, other_new)
|
114
|
+
if other_new.end < other_old.end
|
115
|
+
return to_set.new_dup(to_set.begin, to_set.end - (other_old.end - other_new.end))
|
116
|
+
else
|
117
|
+
return to_set.new_dup(to_set.begin + (other_new.begin - other_old.begin), to_set.end)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
require 'diff_l_c_s'
|
15
|
+
|
16
|
+
class String
|
17
|
+
include DiffLCS
|
18
|
+
end
|