difflcs 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ module DiffLCS
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 6
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,58 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ class DiffLCS::WordSplitArray < Array
15
+
16
+ ### Constants
17
+
18
+ # Used as a separator
19
+ SEPARATOR = "\031" # The Unit Separator character
20
+
21
+ ### Constructors
22
+
23
+ # Splits the words, and treats whitespace correctly.
24
+ #
25
+ def initialize(text)
26
+ old_end = 0
27
+ # splits for html-tags, for any non-word-characters & for SEPARATORs
28
+ treated = text.scan(/<\/?\w+>|[^\w<\/>#{SEPARATOR}]+|#{SEPARATOR}/) do |literal|
29
+ match = $~
30
+ if match.begin(0) > old_end
31
+ self.push(text[old_end...match.begin(0)])
32
+ end
33
+ self.push(literal)
34
+ old_end = match.end(0)
35
+ end
36
+ if old_end < text.size
37
+ self.push(text[old_end...text.size])
38
+ end
39
+ end
40
+
41
+ ### Methods
42
+
43
+ # Translates word-positions to character-positions.
44
+ #
45
+ def translate_to_pos(positions)
46
+ word_p = 0
47
+ temp_array = [0]
48
+ i = 0
49
+ while i < self.size
50
+ word_p += self[i].size
51
+ temp_array.push(word_p)
52
+ i += 1
53
+ end
54
+ return PositionRange::List.new(
55
+ positions.collect {|position| position.new_dup(
56
+ temp_array[position.begin], temp_array[position.end])})
57
+ end
58
+ end
data/lib/difflcs.rb ADDED
@@ -0,0 +1 @@
1
+ require 'diff_l_c_s'
@@ -0,0 +1,93 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ class CounterTest < Test::Unit::TestCase
17
+
18
+ ### Initialization
19
+
20
+ def test_initialize
21
+ c = DiffLCS::Counter.new(1,2)
22
+ assert_equal 1, c.step_size
23
+ end
24
+
25
+ ### Methods
26
+
27
+ def test_step_up_in_old_and_in_new
28
+ c = DiffLCS::Counter.new(5,80)
29
+ assert_equal PositionRange.new(5,6), c.in_old
30
+ assert_equal PositionRange.new(80,81), c.in_new
31
+
32
+ c2 = DiffLCS::Counter.new(5,80)
33
+ c2.step_up
34
+ c2.step_up
35
+
36
+ assert_equal PositionRange.new(5,8), c2.in_old
37
+ assert_equal PositionRange.new(80,83), c2.in_new
38
+ end
39
+
40
+ def test_size_in_old_and_in_new_assignment
41
+ c = DiffLCS::Counter.new(5,80)
42
+ 5.times do c.step_up end
43
+
44
+ assert_equal 6, c.size
45
+ c.in_old = PositionRange.new(5,9)
46
+ assert_equal 4, c.size
47
+ assert_equal PositionRange.new(80,84), c.in_new
48
+
49
+ c2 = DiffLCS::Counter.new(5,80)
50
+ 3.times do c2.step_up end
51
+
52
+ assert_equal 4, c2.size
53
+ c2.in_new = PositionRange.new(80,82)
54
+ assert_equal 2, c2.size
55
+ assert_equal PositionRange.new(5,7), c2.in_old
56
+
57
+ c3 = DiffLCS::Counter.new(5,80)
58
+ 4.times do c3.step_up end
59
+
60
+ assert_equal 5, c3.size
61
+
62
+ c3.in_new = PositionRange.new(80,82)
63
+ assert_equal 2, c3.size
64
+ assert_equal PositionRange.new(5,7), c3.in_old
65
+
66
+ c3.in_old = PositionRange.new(6,7)
67
+ assert_equal 1, c3.size
68
+ assert_equal PositionRange.new(81,82), c3.in_new
69
+ end
70
+
71
+ def test_step_size
72
+ c = DiffLCS::Counter.new(1,5)
73
+ assert_equal 1, c.step_size
74
+ c.step_up
75
+ assert_equal 2, c.step_size
76
+ end
77
+
78
+ def test_comparison
79
+ c1 = DiffLCS::Counter.new(5,80)
80
+ c2 = DiffLCS::Counter.new(15,90)
81
+
82
+ c1.step_up
83
+
84
+ assert c1 > c2
85
+
86
+ # cause of no step_ups after in_old => size => comparing
87
+ c2 = DiffLCS::Counter.new(15,90)
88
+ c2.step_up
89
+ c2.step_up
90
+
91
+ assert c1 < c2
92
+ end
93
+ end
@@ -0,0 +1,124 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ require 'diff_l_c_s/string'
17
+
18
+ class DiffLCSTest < Test::Unit::TestCase
19
+ ### Class methods
20
+
21
+ def test_diff
22
+ similar = ['This is the first small diff test. Isn\'t it nice ? ']
23
+ old_chunk_arr = similar.dup
24
+ new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
25
+ do_diff_test(similar, old_chunk_arr, new_chunk_arr)
26
+
27
+ similar = [
28
+ 'We are now really into testing ',
29
+ ' of hand-written - or is it typed ? - texts ',
30
+ ' diffing (finding differences between two rows) ']
31
+ old_chunk_arr = [similar[0], 'the sound process of', similar[2],
32
+ 'beauties of wizzardly', similar[1], '~']
33
+ new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
34
+ similar[1], '&', similar[2], '=']
35
+ do_diff_test(similar, old_chunk_arr, new_chunk_arr)
36
+ end
37
+
38
+ def test_word_diff
39
+ similar = ['This is the first small diff test. Isn\'t it nice ? ']
40
+ old_chunk_arr = similar.dup
41
+ new_chunk_arr = [similar[0], 'Yes it is! Look! It works.']
42
+ do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
43
+
44
+ similar = [
45
+ 'We are now really into testing ',
46
+ ' of hand-written - or is it typed ? - texts ',
47
+ ' diffing (finding differences between two rows) ']
48
+ old_chunk_arr = [similar[0], 'the sound process of', similar[2],
49
+ 'beauties of wizzardly', similar[1]]
50
+ new_chunk_arr = [similar[0], 'all of it. We are happy with our world',
51
+ similar[1], 'Und', similar[2]]
52
+ do_word_diff_test(similar, old_chunk_arr, new_chunk_arr)
53
+ end
54
+
55
+ def test_longest_common_sub_strings
56
+ old_arr = 'abcde'.split('')
57
+ new_arr = 'acdbe'.split('')
58
+
59
+ diff_hash = DiffLCS.longest_common_sub_strings(old_arr,new_arr)
60
+
61
+ assert_equal PositionRange.new(2,4),
62
+ diff_hash[:matched_old].first
63
+ assert_equal PositionRange.new(1,3),
64
+ diff_hash[:matched_new].first
65
+
66
+ old_arr = 'aaaaablabbbbbbccccc'.split('')
67
+ new_arr = 'aaaaakbbbbbbk'.split('')
68
+
69
+ assert_equal({:matched_old => PositionRange::List.from_s('8,14:0,5'),
70
+ :matched_new => PositionRange::List.from_s('6,12:0,5')},
71
+ DiffLCS.longest_common_sub_strings(old_arr,new_arr))
72
+ end
73
+
74
+ def test_string
75
+ assert_equal({:matched_old => PositionRange::List.from_s('0,2:5,9:2,4'),
76
+ :matched_new => PositionRange::List.from_s('0,2:3,7:7,9')},
77
+ '123456789'.diff('120678934'))
78
+ end
79
+
80
+ ### Test helpers
81
+
82
+ def do_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
83
+ old = old_chunk_arr.join
84
+ new = new_chunk_arr.join
85
+ old_arr = old.split('')
86
+ new_arr = new.split('')
87
+ target_matched_old =
88
+ self.get_position_range_list_for_ranges_of_in(
89
+ similar_chunk_arr, old)
90
+ target_matched_new =
91
+ self.get_position_range_list_for_ranges_of_in(
92
+ new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
93
+ assert_equal({:matched_old => target_matched_old,
94
+ :matched_new => target_matched_new},
95
+ DiffLCS.diff(old_arr, new_arr, :minimum_lcs_size => 15))
96
+ end
97
+
98
+ def do_word_diff_test(similar_chunk_arr, old_chunk_arr, new_chunk_arr)
99
+ old = old_chunk_arr.join
100
+ new = new_chunk_arr.join
101
+ target_matched_old =
102
+ self.get_position_range_list_for_ranges_of_in(
103
+ similar_chunk_arr, old)
104
+ target_matched_new =
105
+ self.get_position_range_list_for_ranges_of_in(
106
+ new_chunk_arr - (new_chunk_arr - similar_chunk_arr), new)
107
+ assert_equal({:matched_old => target_matched_old,
108
+ :matched_new => target_matched_new},
109
+ DiffLCS.word_diff(old, new, :minimum_lcs_size => 3))
110
+ end
111
+
112
+ def get_position_range_list_for_ranges_of_in(substrings, string)
113
+ p_r_l = PositionRange::List.new
114
+ substrings.each {|substring|
115
+ pos = string.index(substring)
116
+ if pos
117
+ p_r_l.push(PositionRange.new(pos, pos + substring.size))
118
+ else
119
+ raise StandardError, ' Substring not found'
120
+ end
121
+ }
122
+ return p_r_l
123
+ end
124
+ end
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/diff_l_c_s'
@@ -0,0 +1,55 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the DiffLCS library. DiffLCS is Free Software.
6
+ # You can run/distribute/modify DiffLCS under the terms of the GNU Affero
7
+ # General Public License version 3. The Affero GPL states that running a
8
+ # modified version or a derivative work also requires you to make the
9
+ # sourcecode of that work available to everyone that can interact with it.
10
+ # We chose the Affero GPL to ensure that DiffLCS remains open and libre
11
+ # (LICENSE.txt contains the full text of the legally binding license).
12
+ #++#
13
+
14
+ require File.dirname(__FILE__) + '/test_helper.rb'
15
+
16
+ class WordSplitArrayTest < Test::Unit::TestCase
17
+ ### Initialization
18
+
19
+ def test_initialize
20
+ assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
21
+ assert_equal ['boo',' ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo ba bol')
22
+ assert_equal ['boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new('boo. ba bol')
23
+ assert_equal [' ','boo','. ','ba',' ','bol'], DiffLCS::WordSplitArray.new(' boo. ba bol')
24
+ assert_equal ['boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new('boo. ba bol ')
25
+ assert_equal [' ','boo','. ','ba',' ','bol',' '], DiffLCS::WordSplitArray.new(' boo. ba bol ')
26
+ assert_equal [' ','boo','. ','<ba>',' ','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba> </bol> ')
27
+ assert_equal [' ','boo','. ','<ba>','moma','</bol>',' '], DiffLCS::WordSplitArray.new(' boo. <ba>moma</bol> ')
28
+ assert_equal ['boo',' ',DiffLCS::WordSplitArray::SEPARATOR,' ','ba',' ','bol'],
29
+ DiffLCS::WordSplitArray.new('boo ' + DiffLCS::WordSplitArray::SEPARATOR + ' ba bol')
30
+ end
31
+
32
+ ### Methods
33
+
34
+ def test_translate_to_pos
35
+ # normal
36
+ assert_equal PositionRange::List.from_s('0,3:3,4:4,6:6,9:9,12'),
37
+ DiffLCS::WordSplitArray.new('boo ba bol').translate_to_pos(
38
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4:4,5'))
39
+
40
+ # scrambled
41
+ assert_equal PositionRange::List.from_s('3,4:0,3'),
42
+ DiffLCS::WordSplitArray.new('boo ').translate_to_pos(
43
+ PositionRange::List.from_s('1,2:0,1'))
44
+
45
+ # ends with space
46
+ assert_equal PositionRange::List.from_s('0,3:3,4:4,7:7,8'),
47
+ DiffLCS::WordSplitArray.new('boo baa ').translate_to_pos(
48
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
49
+
50
+ # starts with space
51
+ assert_equal PositionRange::List.from_s('0,1:1,4:4,5:5,8'),
52
+ DiffLCS::WordSplitArray.new(' boo baa').translate_to_pos(
53
+ PositionRange::List.from_s('0,1:1,2:2,3:3,4'))
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: difflcs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.0
5
+ platform: ruby
6
+ authors:
7
+ - Wybo Wiersma
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-12-30 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: positionrange
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.6.0
24
+ version:
25
+ description: A diff algoritm using longest common substrings that can also find text that has moved.
26
+ email: wybo@logilogi.org
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - Rakefile
35
+ - install.rb
36
+ - README.txt
37
+ - CHANGELOG.txt
38
+ - LICENSE.txt
39
+ - lib/diff_l_c_s
40
+ - lib/diff_l_c_s/counter.rb
41
+ - lib/diff_l_c_s/word_split_array.rb
42
+ - lib/diff_l_c_s/string.rb
43
+ - lib/diff_l_c_s/version.rb
44
+ - lib/difflcs.rb
45
+ - lib/diff_l_c_s.rb
46
+ - test/counter_test.rb
47
+ - test/diff_l_c_s_test.rb
48
+ - test/test_helper.rb
49
+ - test/word_split_array_test.rb
50
+ has_rdoc: true
51
+ homepage: http://difflcs.rubyforge.org
52
+ post_install_message:
53
+ rdoc_options: []
54
+
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ requirements:
70
+ - none
71
+ rubyforge_project: difflcs
72
+ rubygems_version: 1.3.1
73
+ signing_key:
74
+ specification_version: 2
75
+ summary: Diffing that sniffs out moved text.
76
+ test_files: []
77
+