difflcs 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.txt +3 -0
- data/LICENSE.txt +662 -0
- data/README.txt +43 -0
- data/Rakefile +93 -0
- data/install.rb +30 -0
- data/lib/diff_l_c_s.rb +148 -0
- data/lib/diff_l_c_s/counter.rb +120 -0
- data/lib/diff_l_c_s/string.rb +18 -0
- data/lib/diff_l_c_s/version.rb +9 -0
- data/lib/diff_l_c_s/word_split_array.rb +58 -0
- data/lib/difflcs.rb +1 -0
- data/test/counter_test.rb +93 -0
- data/test/diff_l_c_s_test.rb +124 -0
- data/test/test_helper.rb +3 -0
- data/test/word_split_array_test.rb +55 -0
- metadata +77 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
class DiffLCS::WordSplitArray < Array
|
15
|
+
|
16
|
+
### Constants
|
17
|
+
|
18
|
+
# Used as a separator
|
19
|
+
SEPARATOR = "\031" # The Unit Separator character
|
20
|
+
|
21
|
+
### Constructors
|
22
|
+
|
23
|
+
# Splits the words, and treats whitespace correctly.
|
24
|
+
#
|
25
|
+
def initialize(text)
|
26
|
+
old_end = 0
|
27
|
+
# splits for html-tags, for any non-word-characters & for SEPARATORs
|
28
|
+
treated = text.scan(/<\/?\w+>|[^\w<\/>#{SEPARATOR}]+|#{SEPARATOR}/) do |literal|
|
29
|
+
match = $~
|
30
|
+
if match.begin(0) > old_end
|
31
|
+
self.push(text[old_end...match.begin(0)])
|
32
|
+
end
|
33
|
+
self.push(literal)
|
34
|
+
old_end = match.end(0)
|
35
|
+
end
|
36
|
+
if old_end < text.size
|
37
|
+
self.push(text[old_end...text.size])
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
### Methods
|
42
|
+
|
43
|
+
# Translates word-positions to character-positions.
|
44
|
+
#
|
45
|
+
def translate_to_pos(positions)
|
46
|
+
word_p = 0
|
47
|
+
temp_array = [0]
|
48
|
+
i = 0
|
49
|
+
while i < self.size
|
50
|
+
word_p += self[i].size
|
51
|
+
temp_array.push(word_p)
|
52
|
+
i += 1
|
53
|
+
end
|
54
|
+
return PositionRange::List.new(
|
55
|
+
positions.collect {|position| position.new_dup(
|
56
|
+
temp_array[position.begin], temp_array[position.end])})
|
57
|
+
end
|
58
|
+
end
|
data/lib/difflcs.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'diff_l_c_s'
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
15
|
+
|
16
|
+
class CounterTest < Test::Unit::TestCase
|
17
|
+
|
18
|
+
### Initialization
|
19
|
+
|
20
|
+
def test_initialize
|
21
|
+
c = DiffLCS::Counter.new(1,2)
|
22
|
+
assert_equal 1, c.step_size
|
23
|
+
end
|
24
|
+
|
25
|
+
### Methods
|
26
|
+
|
27
|
+
def test_step_up_in_old_and_in_new
|
28
|
+
c = DiffLCS::Counter.new(5,80)
|
29
|
+
assert_equal PositionRange.new(5,6), c.in_old
|
30
|
+
assert_equal PositionRange.new(80,81), c.in_new
|
31
|
+
|
32
|
+
c2 = DiffLCS::Counter.new(5,80)
|
33
|
+
c2.step_up
|
34
|
+
c2.step_up
|
35
|
+
|
36
|
+
assert_equal PositionRange.new(5,8), c2.in_old
|
37
|
+
assert_equal PositionRange.new(80,83), c2.in_new
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_size_in_old_and_in_new_assignment
|
41
|
+
c = DiffLCS::Counter.new(5,80)
|
42
|
+
5.times do c.step_up end
|
43
|
+
|
44
|
+
assert_equal 6, c.size
|
45
|
+
c.in_old = PositionRange.new(5,9)
|
46
|
+
assert_equal 4, c.size
|
47
|
+
assert_equal PositionRange.new(80,84), c.in_new
|
48
|
+
|
49
|
+
c2 = DiffLCS::Counter.new(5,80)
|
50
|
+
3.times do c2.step_up end
|
51
|
+
|
52
|
+
assert_equal 4, c2.size
|
53
|
+
c2.in_new = PositionRange.new(80,82)
|
54
|
+
assert_equal 2, c2.size
|
55
|
+
assert_equal PositionRange.new(5,7), c2.in_old
|
56
|
+
|
57
|
+
c3 = DiffLCS::Counter.new(5,80)
|
58
|
+
4.times do c3.step_up end
|
59
|
+
|
60
|
+
assert_equal 5, c3.size
|
61
|
+
|
62
|
+
c3.in_new = PositionRange.new(80,82)
|
63
|
+
assert_equal 2, c3.size
|
64
|
+
assert_equal PositionRange.new(5,7), c3.in_old
|
65
|
+
|
66
|
+
c3.in_old = PositionRange.new(6,7)
|
67
|
+
assert_equal 1, c3.size
|
68
|
+
assert_equal PositionRange.new(81,82), c3.in_new
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_step_size
|
72
|
+
c = DiffLCS::Counter.new(1,5)
|
73
|
+
assert_equal 1, c.step_size
|
74
|
+
c.step_up
|
75
|
+
assert_equal 2, c.step_size
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_comparison
|
79
|
+
c1 = DiffLCS::Counter.new(5,80)
|
80
|
+
c2 = DiffLCS::Counter.new(15,90)
|
81
|
+
|
82
|
+
c1.step_up
|
83
|
+
|
84
|
+
assert c1 > c2
|
85
|
+
|
86
|
+
# cause of no step_ups after in_old => size => comparing
|
87
|
+
c2 = DiffLCS::Counter.new(15,90)
|
88
|
+
c2.step_up
|
89
|
+
c2.step_up
|
90
|
+
|
91
|
+
assert c1 < c2
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
15
|
+
|
16
|
+
require 'diff_l_c_s/string'
|
17
|
+
|
18
|
+
class DiffLCSTest < Test::Unit::TestCase
|
19
|
+
### Class methods
|
20
|
+
|
21
|
+
def test_diff
|
22
|
+
similar = ['This is the first small diff test. Isn\'t it nice ? ']
|
23
|
+
old_chunk_arr = similar.dup
|
24
|
+
new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
|
25
|
+
do_diff_test(similar, old_chunk_arr, new_chunk_arr)
|
26
|
+
|
27
|
+
similar = [
|
28
|
+
'We are now really into testing ',
|
29
|
+
' of hand-written - or is it typed ? - texts ',
|
30
|
+
' diffing (finding differences between two rows) ']
|
31
|
+
old_chunk_arr = [similar[0], 'the sound process of', similar[2],
|
32
|
+
'beauties of wizzardly', similar[1], '~']
|
33
|
+
new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
|
34
|
+
similar[1], '&', similar[2], '=']
|
35
|
+
do_diff_test(similar, old_chunk_arr, new_chunk_arr)
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_word_diff
|
39
|
+
similar = ['This is the first small diff test. Isn\'t it nice ? ']
|
40
|
+
old_chunk_arr = similar.dup
|
41
|
+
new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
|
42
|
+
do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
|
43
|
+
|
44
|
+
similar = [
|
45
|
+
'We are now really into testing ',
|
46
|
+
' of hand-written - or is it typed ? - texts ',
|
47
|
+
' diffing (finding differences between two rows) ']
|
48
|
+
old_chunk_arr = [similar[0], 'the sound process of', similar[2],
|
49
|
+
'beauties of wizzardly', similar[1]]
|
50
|
+
new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
|
51
|
+
similar[1], 'Und', similar[2]]
|
52
|
+
do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_longest_common_sub_strings
|
56
|
+
old_arr = 'abcde'.split('')
|
57
|
+
new_arr = 'acdbe'.split('')
|
58
|
+
|
59
|
+
diff_hash = DiffLCS.longest_common_sub_strings(old_arr,new_arr)
|
60
|
+
|
61
|
+
assert_equal PositionRange.new(2,4),
|
62
|
+
diff_hash[:matched_old].first
|
63
|
+
assert_equal PositionRange.new(1,3),
|
64
|
+
diff_hash[:matched_new].first
|
65
|
+
|
66
|
+
old_arr = 'aaaaablabbbbbbccccc'.split('')
|
67
|
+
new_arr = 'aaaaakbbbbbbk'.split('')
|
68
|
+
|
69
|
+
assert_equal({:matched_old => PositionRange::List.from_s('8,14:0,5'),
|
70
|
+
:matched_new => PositionRange::List.from_s('6,12:0,5')},
|
71
|
+
DiffLCS.longest_common_sub_strings(old_arr,new_arr))
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_string
|
75
|
+
assert_equal({:matched_old => PositionRange::List.from_s('0,2:5,9:2,4'),
|
76
|
+
:matched_new => PositionRange::List.from_s('0,2:3,7:7,9')},
|
77
|
+
'123456789'.diff('120678934'))
|
78
|
+
end
|
79
|
+
|
80
|
+
### Test helpers
|
81
|
+
|
82
|
+
def do_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
|
83
|
+
old = old_chunk_arr.join
|
84
|
+
new = new_chunk_arr.join
|
85
|
+
old_arr = old.split('')
|
86
|
+
new_arr = new.split('')
|
87
|
+
target_matched_old =
|
88
|
+
self.get_position_range_list_for_ranges_of_in(
|
89
|
+
similar_chunk_arr, old)
|
90
|
+
target_matched_new =
|
91
|
+
self.get_position_range_list_for_ranges_of_in(
|
92
|
+
new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
|
93
|
+
assert_equal({:matched_old => target_matched_old,
|
94
|
+
:matched_new => target_matched_new},
|
95
|
+
DiffLCS.diff(old_arr, new_arr, :minimum_lcs_size => 15))
|
96
|
+
end
|
97
|
+
|
98
|
+
def do_word_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
|
99
|
+
old = old_chunk_arr.join
|
100
|
+
new = new_chunk_arr.join
|
101
|
+
target_matched_old =
|
102
|
+
self.get_position_range_list_for_ranges_of_in(
|
103
|
+
similar_chunk_arr, old)
|
104
|
+
target_matched_new =
|
105
|
+
self.get_position_range_list_for_ranges_of_in(
|
106
|
+
new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
|
107
|
+
assert_equal({:matched_old => target_matched_old,
|
108
|
+
:matched_new => target_matched_new},
|
109
|
+
DiffLCS.word_diff(old, new, :minimum_lcs_size => 3))
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_position_range_list_for_ranges_of_in(substrings, string)
|
113
|
+
p_r_l = PositionRange::List.new
|
114
|
+
substrings.each {|substring|
|
115
|
+
pos = string.index(substring)
|
116
|
+
if pos
|
117
|
+
p_r_l.push(PositionRange.new(pos, pos + substring.size))
|
118
|
+
else
|
119
|
+
raise StandardError, ' Substring not found'
|
120
|
+
end
|
121
|
+
}
|
122
|
+
return p_r_l
|
123
|
+
end
|
124
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the DiffLCS library. DiffLCS is Free Software.
|
6
|
+
# You can run/distribute/modify DiffLCS under the terms of the GNU Affero
|
7
|
+
# General Public License version 3. The Affero GPL states that running a
|
8
|
+
# modified version or a derivative work also requires you to make the
|
9
|
+
# sourcecode of that work available to everyone that can interact with it.
|
10
|
+
# We chose the Affero GPL to ensure that DiffLCS remains open and libre
|
11
|
+
# (LICENSE.txt contains the full text of the legally binding license).
|
12
|
+
#++#
|
13
|
+
|
14
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
15
|
+
|
16
|
+
class WordSplitArrayTest < Test::Unit::TestCase
|
17
|
+
### Initialization
|
18
|
+
|
19
|
+
def test_initialize
|
20
|
+
assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
|
21
|
+
assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
|
22
|
+
assert_equal ['boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo. ba bol')
|
23
|
+
assert_equal [' ','boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new(' boo. ba bol')
|
24
|
+
assert_equal ['boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new('boo. ba bol ')
|
25
|
+
assert_equal [' ','boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new(' boo. ba bol ')
|
26
|
+
assert_equal [' ','boo','. ','<ba>',' ','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba> </bol> ')
|
27
|
+
assert_equal [' ','boo','. ','<ba>','moma','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba>moma</bol> ')
|
28
|
+
assert_equal ['boo',' ',DiffLCS::WordSplitArray::SEPARATOR,' ','ba',' ','bol'],
|
29
|
+
DiffLCS::WordSplitArray.new('boo ' + DiffLCS::WordSplitArray::SEPARATOR + ' ba bol')
|
30
|
+
end
|
31
|
+
|
32
|
+
### Methods
|
33
|
+
|
34
|
+
def test_translate_to_pos
|
35
|
+
# normal
|
36
|
+
assert_equal PositionRange::List.from_s('0,3:3,4:4,6:6,9:9,12'),
|
37
|
+
DiffLCS::WordSplitArray.new('boo ba bol').translate_to_pos(
|
38
|
+
PositionRange::List.from_s('0,1:1,2:2,3:3,4:4,5'))
|
39
|
+
|
40
|
+
# scrambled
|
41
|
+
assert_equal PositionRange::List.from_s('3,4:0,3'),
|
42
|
+
DiffLCS::WordSplitArray.new('boo ').translate_to_pos(
|
43
|
+
PositionRange::List.from_s('1,2:0,1'))
|
44
|
+
|
45
|
+
# ends with space
|
46
|
+
assert_equal PositionRange::List.from_s('0,3:3,4:4,7:7,8'),
|
47
|
+
DiffLCS::WordSplitArray.new('boo baa ').translate_to_pos(
|
48
|
+
PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
|
49
|
+
|
50
|
+
# starts with space
|
51
|
+
assert_equal PositionRange::List.from_s('0,1:1,4:4,5:5,8'),
|
52
|
+
DiffLCS::WordSplitArray.new(' boo baa').translate_to_pos(
|
53
|
+
PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
|
54
|
+
end
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: difflcs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.6.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Wybo Wiersma
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-12-30 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: positionrange
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.6.0
|
24
|
+
version:
|
25
|
+
description: A diff algoritm using longest common substrings that can also find text that has moved.
|
26
|
+
email: wybo@logilogi.org
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- Rakefile
|
35
|
+
- install.rb
|
36
|
+
- README.txt
|
37
|
+
- CHANGELOG.txt
|
38
|
+
- LICENSE.txt
|
39
|
+
- lib/diff_l_c_s
|
40
|
+
- lib/diff_l_c_s/counter.rb
|
41
|
+
- lib/diff_l_c_s/word_split_array.rb
|
42
|
+
- lib/diff_l_c_s/string.rb
|
43
|
+
- lib/diff_l_c_s/version.rb
|
44
|
+
- lib/difflcs.rb
|
45
|
+
- lib/diff_l_c_s.rb
|
46
|
+
- test/counter_test.rb
|
47
|
+
- test/diff_l_c_s_test.rb
|
48
|
+
- test/test_helper.rb
|
49
|
+
- test/word_split_array_test.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://difflcs.rubyforge.org
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: "0"
|
62
|
+
version:
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
version:
|
69
|
+
requirements:
|
70
|
+
- none
|
71
|
+
rubyforge_project: difflcs
|
72
|
+
rubygems_version: 1.3.1
|
73
|
+
signing_key:
|
74
|
+
specification_version: 2
|
75
|
+
summary: Diffing that sniffs out moved text.
|
76
|
+
test_files: []
|
77
|
+
|