difflcs 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ module DiffLCS
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 6
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,58 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ class DiffLCS::WordSplitArray < Array
15
+
16
+ ### Constants
17
+
18
+ # Used as a separator
19
+ SEPARATOR = "\031" # The Unit Separator character
20
+
21
+ ### Constructors
22
+
23
+ # Splits the words, and treats whitespace correctly.
24
+ #
25
+ def initialize(text)
26
+ old_end = 0
27
+ # splits for html-tags, for any non-word-characters & for SEPARATORs
28
+ treated = text.scan(/<\/?\w+>|[^\w<\/>#{SEPARATOR}]+|#{SEPARATOR}/) do |literal|
29
+ match = $~
30
+ if match.begin(0) > old_end
31
+ self.push(text[old_end...match.begin(0)])
32
+ end
33
+ self.push(literal)
34
+ old_end = match.end(0)
35
+ end
36
+ if old_end < text.size
37
+ self.push(text[old_end...text.size])
38
+ end
39
+ end
40
+
41
+ ### Methods
42
+
43
+ # Translates word-positions to character-positions.
44
+ #
45
+ def translate_to_pos(positions)
46
+ word_p = 0
47
+ temp_array = [0]
48
+ i = 0
49
+ while i < self.size
50
+ word_p += self[i].size
51
+ temp_array.push(word_p)
52
+ i += 1
53
+ end
54
+ return PositionRange::List.new(
55
+ positions.collect {|position| position.new_dup(
56
+ temp_array[position.begin], temp_array[position.end])})
57
+ end
58
+ end
data/lib/difflcs.rb ADDED
@@ -0,0 +1 @@
1
+ require 'diff_l_c_s'
@@ -0,0 +1,93 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ class CounterTest < Test::Unit::TestCase
17
+
18
+ ### Initialization
19
+
20
+ def test_initialize
21
+ c = DiffLCS::Counter.new(1,2)
22
+ assert_equal 1, c.step_size
23
+ end
24
+
25
+ ### Methods
26
+
27
+ def test_step_up_in_old_and_in_new
28
+ c = DiffLCS::Counter.new(5,80)
29
+ assert_equal PositionRange.new(5,6), c.in_old
30
+ assert_equal PositionRange.new(80,81), c.in_new
31
+
32
+ c2 = DiffLCS::Counter.new(5,80)
33
+ c2.step_up
34
+ c2.step_up
35
+
36
+ assert_equal PositionRange.new(5,8), c2.in_old
37
+ assert_equal PositionRange.new(80,83), c2.in_new
38
+ end
39
+
40
+ def test_size_in_old_and_in_new_assignment
41
+ c = DiffLCS::Counter.new(5,80)
42
+ 5.times do c.step_up end
43
+
44
+ assert_equal 6, c.size
45
+ c.in_old = PositionRange.new(5,9)
46
+ assert_equal 4, c.size
47
+ assert_equal PositionRange.new(80,84), c.in_new
48
+
49
+ c2 = DiffLCS::Counter.new(5,80)
50
+ 3.times do c2.step_up end
51
+
52
+ assert_equal 4, c2.size
53
+ c2.in_new = PositionRange.new(80,82)
54
+ assert_equal 2, c2.size
55
+ assert_equal PositionRange.new(5,7), c2.in_old
56
+
57
+ c3 = DiffLCS::Counter.new(5,80)
58
+ 4.times do c3.step_up end
59
+
60
+ assert_equal 5, c3.size
61
+
62
+ c3.in_new = PositionRange.new(80,82)
63
+ assert_equal 2, c3.size
64
+ assert_equal PositionRange.new(5,7), c3.in_old
65
+
66
+ c3.in_old = PositionRange.new(6,7)
67
+ assert_equal 1, c3.size
68
+ assert_equal PositionRange.new(81,82), c3.in_new
69
+ end
70
+
71
+ def test_step_size
72
+ c = DiffLCS::Counter.new(1,5)
73
+ assert_equal 1, c.step_size
74
+ c.step_up
75
+ assert_equal 2, c.step_size
76
+ end
77
+
78
+ def test_comparison
79
+ c1 = DiffLCS::Counter.new(5,80)
80
+ c2 = DiffLCS::Counter.new(15,90)
81
+
82
+ c1.step_up
83
+
84
+ assert c1 > c2
85
+
86
+ # cause of no step_ups after in_old => size => comparing
87
+ c2 = DiffLCS::Counter.new(15,90)
88
+ c2.step_up
89
+ c2.step_up
90
+
91
+ assert c1 < c2
92
+ end
93
+ end
@@ -0,0 +1,124 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ require 'diff_l_c_s/string'
17
+
18
+ class DiffLCSTest < Test::Unit::TestCase
19
+ ### Class methods
20
+
21
+ def test_diff
22
+ similar = ['This is the first small diff test. Isn\'t it nice ? ']
23
+ old_chunk_arr = similar.dup
24
+ new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
25
+ do_diff_test(similar, old_chunk_arr, new_chunk_arr)
26
+
27
+ similar = [
28
+ 'We are now really into testing ',
29
+ ' of hand-written - or is it typed ? - texts ',
30
+ ' diffing (finding differences between two rows) ']
31
+ old_chunk_arr = [similar[0], 'the sound process of', similar[2],
32
+ 'beauties of wizzardly', similar[1], '~']
33
+ new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
34
+ similar[1], '&', similar[2], '=']
35
+ do_diff_test(similar, old_chunk_arr, new_chunk_arr)
36
+ end
37
+
38
+ def test_word_diff
39
+ similar = ['This is the first small diff test. Isn\'t it nice ? ']
40
+ old_chunk_arr = similar.dup
41
+ new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
42
+ do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
43
+
44
+ similar = [
45
+ 'We are now really into testing ',
46
+ ' of hand-written - or is it typed ? - texts ',
47
+ ' diffing (finding differences between two rows) ']
48
+ old_chunk_arr = [similar[0], 'the sound process of', similar[2],
49
+ 'beauties of wizzardly', similar[1]]
50
+ new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
51
+ similar[1], 'Und', similar[2]]
52
+ do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
53
+ end
54
+
55
+ def test_longest_common_sub_strings
56
+ old_arr = 'abcde'.split('')
57
+ new_arr = 'acdbe'.split('')
58
+
59
+ diff_hash = DiffLCS.longest_common_sub_strings(old_arr,new_arr)
60
+
61
+ assert_equal PositionRange.new(2,4),
62
+ diff_hash[:matched_old].first
63
+ assert_equal PositionRange.new(1,3),
64
+ diff_hash[:matched_new].first
65
+
66
+ old_arr = 'aaaaablabbbbbbccccc'.split('')
67
+ new_arr = 'aaaaakbbbbbbk'.split('')
68
+
69
+ assert_equal({:matched_old => PositionRange::List.from_s('8,14:0,5'),
70
+ :matched_new => PositionRange::List.from_s('6,12:0,5')},
71
+ DiffLCS.longest_common_sub_strings(old_arr,new_arr))
72
+ end
73
+
74
+ def test_string
75
+ assert_equal({:matched_old => PositionRange::List.from_s('0,2:5,9:2,4'),
76
+ :matched_new => PositionRange::List.from_s('0,2:3,7:7,9')},
77
+ '123456789'.diff('120678934'))
78
+ end
79
+
80
+ ### Test helpers
81
+
82
+ def do_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
83
+ old = old_chunk_arr.join
84
+ new = new_chunk_arr.join
85
+ old_arr = old.split('')
86
+ new_arr = new.split('')
87
+ target_matched_old =
88
+ self.get_position_range_list_for_ranges_of_in(
89
+ similar_chunk_arr, old)
90
+ target_matched_new =
91
+ self.get_position_range_list_for_ranges_of_in(
92
+ new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
93
+ assert_equal({:matched_old => target_matched_old,
94
+ :matched_new => target_matched_new},
95
+ DiffLCS.diff(old_arr, new_arr, :minimum_lcs_size => 15))
96
+ end
97
+
98
+ def do_word_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
99
+ old = old_chunk_arr.join
100
+ new = new_chunk_arr.join
101
+ target_matched_old =
102
+ self.get_position_range_list_for_ranges_of_in(
103
+ similar_chunk_arr, old)
104
+ target_matched_new =
105
+ self.get_position_range_list_for_ranges_of_in(
106
+ new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
107
+ assert_equal({:matched_old => target_matched_old,
108
+ :matched_new => target_matched_new},
109
+ DiffLCS.word_diff(old, new, :minimum_lcs_size => 3))
110
+ end
111
+
112
+ def get_position_range_list_for_ranges_of_in(substrings, string)
113
+ p_r_l = PositionRange::List.new
114
+ substrings.each {|substring|
115
+ pos = string.index(substring)
116
+ if pos
117
+ p_r_l.push(PositionRange.new(pos, pos + substring.size))
118
+ else
119
+ raise StandardError, ' Substring not found'
120
+ end
121
+ }
122
+ return p_r_l
123
+ end
124
+ end
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/diff_l_c_s'
@@ -0,0 +1,55 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ class WordSplitArrayTest < Test::Unit::TestCase
17
+ ### Initialization
18
+
19
+ def test_initialize
20
+ assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
21
+ assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
22
+ assert_equal ['boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo. ba bol')
23
+ assert_equal [' ','boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new(' boo. ba bol')
24
+ assert_equal ['boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new('boo. ba bol ')
25
+ assert_equal [' ','boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new(' boo. ba bol ')
26
+ assert_equal [' ','boo','. ','<ba>',' ','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba> </bol> ')
27
+ assert_equal [' ','boo','. ','<ba>','moma','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba>moma</bol> ')
28
+ assert_equal ['boo',' ',DiffLCS::WordSplitArray::SEPARATOR,' ','ba',' ','bol'],
29
+ DiffLCS::WordSplitArray.new('boo ' + DiffLCS::WordSplitArray::SEPARATOR + ' ba bol')
30
+ end
31
+
32
+ ### Methods
33
+
34
+ def test_translate_to_pos
35
+ # normal
36
+ assert_equal PositionRange::List.from_s('0,3:3,4:4,6:6,9:9,12'),
37
+ DiffLCS::WordSplitArray.new('boo ba bol').translate_to_pos(
38
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4:4,5'))
39
+
40
+ # scrambled
41
+ assert_equal PositionRange::List.from_s('3,4:0,3'),
42
+ DiffLCS::WordSplitArray.new('boo ').translate_to_pos(
43
+ PositionRange::List.from_s('1,2:0,1'))
44
+
45
+ # ends with space
46
+ assert_equal PositionRange::List.from_s('0,3:3,4:4,7:7,8'),
47
+ DiffLCS::WordSplitArray.new('boo baa ').translate_to_pos(
48
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
49
+
50
+ # starts with space
51
+ assert_equal PositionRange::List.from_s('0,1:1,4:4,5:5,8'),
52
+ DiffLCS::WordSplitArray.new(' boo baa').translate_to_pos(
53
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: difflcs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.0
5
+ platform: ruby
6
+ authors:
7
+ - Wybo Wiersma
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-30 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: positionrange
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.6.0
24
+ version:
25
+ description: A diff algoritm using longest common substrings that can also find text that has moved.
26
+ email: wybo@logilogi.org
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - Rakefile
35
+ - install.rb
36
+ - README.txt
37
+ - CHANGELOG.txt
38
+ - LICENSE.txt
39
+ - lib/diff_l_c_s
40
+ - lib/diff_l_c_s/counter.rb
41
+ - lib/diff_l_c_s/word_split_array.rb
42
+ - lib/diff_l_c_s/string.rb
43
+ - lib/diff_l_c_s/version.rb
44
+ - lib/difflcs.rb
45
+ - lib/diff_l_c_s.rb
46
+ - test/counter_test.rb
47
+ - test/diff_l_c_s_test.rb
48
+ - test/test_helper.rb
49
+ - test/word_split_array_test.rb
50
+ has_rdoc: true
51
+ homepage: http://difflcs.rubyforge.org
52
+ post_install_message:
53
+ rdoc_options: []
54
+
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ requirements:
70
+ - none
71
+ rubyforge_project: difflcs
72
+ rubygems_version: 1.3.1
73
+ signing_key:
74
+ specification_version: 2
75
+ summary: Diffing that sniffs out moved text.
76
+ test_files: []
77
+