docdiff 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +7 -7
  3. data/Guardfile +4 -4
  4. data/Makefile +1 -1
  5. data/Rakefile +6 -6
  6. data/bin/docdiff +1 -1
  7. data/devutil/Rakefile +12 -5
  8. data/devutil/char_by_charclass.rb +43 -20
  9. data/devutil/charclass_by_char.rb +40 -19
  10. data/devutil/jis0208.rb +263 -231
  11. data/devutil/jis0208_test.rb +196 -0
  12. data/doc/news.md +8 -0
  13. data/docdiff.gemspec +12 -10
  14. data/lib/doc_diff.rb +59 -60
  15. data/lib/docdiff/charstring.rb +225 -241
  16. data/lib/docdiff/cli.rb +285 -250
  17. data/lib/docdiff/diff/contours.rb +1 -1
  18. data/lib/docdiff/diff/editscript.rb +1 -1
  19. data/lib/docdiff/diff/rcsdiff.rb +1 -1
  20. data/lib/docdiff/diff/shortestpath.rb +1 -1
  21. data/lib/docdiff/diff/speculative.rb +1 -1
  22. data/lib/docdiff/diff/subsequence.rb +1 -1
  23. data/lib/docdiff/diff/unidiff.rb +1 -1
  24. data/lib/docdiff/diff.rb +1 -1
  25. data/lib/docdiff/difference.rb +71 -70
  26. data/lib/docdiff/document.rb +129 -109
  27. data/lib/docdiff/encoding/en_ascii.rb +64 -58
  28. data/lib/docdiff/encoding/ja_eucjp.rb +250 -235
  29. data/lib/docdiff/encoding/ja_sjis.rb +240 -226
  30. data/lib/docdiff/encoding/ja_utf8.rb +6952 -6939
  31. data/lib/docdiff/version.rb +1 -1
  32. data/lib/docdiff/view.rb +522 -438
  33. data/lib/docdiff.rb +2 -2
  34. data/test/charstring_test.rb +475 -351
  35. data/test/cli_test.rb +103 -101
  36. data/test/diff_test.rb +15 -16
  37. data/test/difference_test.rb +40 -31
  38. data/test/docdiff_test.rb +162 -136
  39. data/test/document_test.rb +280 -175
  40. data/test/test_helper.rb +2 -1
  41. data/test/view_test.rb +636 -497
  42. metadata +8 -8
  43. data/devutil/testjis0208.rb +0 -38
@@ -1,94 +1,95 @@
1
1
  # Difference class for DocDiff
2
- # 2003-03-24 ..
2
+ # 2003-03-24 ..
3
3
  # Hisashi MORITA
4
4
 
5
- require 'docdiff/diff'
5
+ require "docdiff/diff"
6
6
 
7
7
  class DocDiff
8
- class Difference < Array
8
+ class Difference < Array
9
+ # @resolution = nil # char, word, phrase, sentence, line, paragraph..
10
+ # @codeset = ''
11
+ # @eol_char = "\n"
12
+ # @source = 'source'
13
+ # @target = 'target'
14
+ # attr_accessor :resolution, :codeset, :eol_char, :source, :target
9
15
 
10
- # @resolution = nil # char, word, phrase, sentence, line, paragraph..
11
- # @codeset = ''
12
- # @eol_char = "\n"
13
- # @source = 'source'
14
- # @target = 'target'
15
- # attr_accessor :resolution, :codeset, :eol_char, :source, :target
16
+ def initialize(array1 = nil, array2 = nil)
17
+ if array1.nil? && array2.nil?
18
+ return []
19
+ end
16
20
 
17
- def initialize(array1 = nil, array2 = nil)
18
- if (array1 == nil) && (array2 == nil)
19
- return []
21
+ diff = Diff.new(array1, array2)
22
+ @raw_list = []
23
+ diff.ses.each do |block| # Diff::EditScript does not have each_with_index
24
+ @raw_list << block
25
+ end
26
+ combine_del_add_to_change!
20
27
  end
21
- diff = Diff.new(array1, array2)
22
- @raw_list = []
23
- diff.ses.each{|block| # Diff::EditScript does not have each_with_index()
24
- @raw_list << block
25
- }
26
- combine_del_add_to_change!()
27
- end
28
28
 
29
- def combine_del_add_to_change!()
30
-
31
- @raw_list.each_with_index{|block, i|
32
- case block.first
33
- when :common_elt_elt
34
- if i == 0 # first block
35
- self << block
36
- else # in-between or the last block
37
- if @raw_list[i - 1].first == :del_elt # previous block was del
29
+ def combine_del_add_to_change!
30
+ @raw_list.each_with_index do |block, i|
31
+ case block.first
32
+ when :common_elt_elt
33
+ if i == 0
34
+ # first block
35
+ self << block
36
+ elsif @raw_list[i - 1].first == :del_elt
37
+ # previous block was del
38
38
  self << @raw_list[i - 1]
39
39
  self << block
40
- else # previous block was add
40
+ else
41
+ # previous block was add
41
42
  self << block
42
43
  end
43
- end
44
- when :del_elt
45
- if i == (@raw_list.size - 1) # last block
46
- self << block
47
- else # first block or in-between
48
- # do nothing, let the next block to decide what to do
49
- end
50
- when :add_elt
51
- if i == 0 # first block
52
- self << block
53
- else # in-between or the last block
54
- if @raw_list[i - 1].first == :del_elt # previous block was del
44
+ when :del_elt
45
+ if i == (@raw_list.size - 1)
46
+ # last block
47
+ self << block
48
+ else
49
+ # do nothing, let the next block to decide what to do
50
+ end
51
+ when :add_elt
52
+ if i == 0
53
+ # first block
54
+ self << block
55
+ elsif @raw_list[i - 1].first == :del_elt
56
+ # previous block was del
55
57
  deleted = @raw_list[i - 1][1]
56
- added = @raw_list[i][2]
58
+ added = @raw_list[i][2]
57
59
  self << [:change_elt, deleted, added]
58
- else # previous block was common
60
+ else
61
+ # previous block was common
59
62
  self << block
60
63
  end
64
+ else
65
+ raise "the first element of the block #{i} is invalid: (#{block.first})\n"
61
66
  end
62
- else
63
- raise "the first element of the block #{i} is invalid: (#{block.first})\n"
64
67
  end
65
- }
66
- end
67
- attr_accessor :raw_list
68
+ end
69
+ attr_accessor :raw_list
68
70
 
69
- def former_only()
70
- elms = self.dup.delete_if{|e| e[0] == :add_elt}
71
- elms.collect!{|e|
72
- if e[0] == :change_elt
73
- [e[0], e[1], nil]
74
- else
75
- e
71
+ def former_only
72
+ elms = dup.delete_if { |e| e[0] == :add_elt }
73
+ elms.collect! do |e|
74
+ if e[0] == :change_elt
75
+ [e[0], e[1], nil]
76
+ else
77
+ e
78
+ end
76
79
  end
77
- }
78
- return elms
79
- end
80
+ elms
81
+ end
80
82
 
81
- def latter_only()
82
- elms = self.dup.delete_if{|e| e[0] == :del_elt}
83
- elms.collect!{|e|
84
- if e[0] == :change_elt
85
- [e[0], nil, e[2]]
86
- else
87
- e
83
+ def latter_only
84
+ elms = dup.delete_if { |e| e[0] == :del_elt }
85
+ elms.collect! do |e|
86
+ if e[0] == :change_elt
87
+ [e[0], nil, e[2]]
88
+ else
89
+ e
90
+ end
88
91
  end
89
- }
90
- return elms
92
+ elms
93
+ end
91
94
  end
92
-
93
- end # class Difference
94
- end # class DocDiff
95
+ end
@@ -1,129 +1,149 @@
1
1
  # Document class, a part of DocDiff
2
2
  # 2004-01-14.. Hisashi MORITA
3
3
 
4
- require 'docdiff/charstring'
4
+ require "docdiff/charstring"
5
5
 
6
- class EncodingDetectionFailure < Exception
6
+ class EncodingDetectionFailure < StandardError
7
7
  end
8
- class EOLDetectionFailure < Exception
8
+
9
+ class EOLDetectionFailure < StandardError
9
10
  end
10
11
 
11
12
  class DocDiff
12
- class Document
13
-
14
- def initialize(str, enc = nil, e = nil)
15
- @body = str
16
- @body.extend CharString
17
- if enc
18
- @body.encoding = enc
19
- elsif !@body.encoding
20
- guessed_encoding = CharString.guess_encoding(str)
21
- if guessed_encoding == "UNKNOWN"
22
- raise EncodingDetectionFailure, "encoding not specified, and auto detection failed."
13
+ class Document
14
+ def initialize(str, enc = nil, e = nil)
15
+ @body = str
16
+ @body.extend(CharString)
17
+ if enc
18
+ @body.encoding = enc
19
+ elsif !@body.encoding
20
+ guessed_encoding = CharString.guess_encoding(str)
21
+ if guessed_encoding == "UNKNOWN"
22
+ raise EncodingDetectionFailure, "encoding not specified, and auto detection failed."
23
23
  # @body.encoding = 'ASCII' # default to ASCII <= BAD!
24
- else
25
- @body.encoding = guessed_encoding
24
+ else
25
+ @body.encoding = guessed_encoding
26
+ end
26
27
  end
27
- end
28
- if e
29
- @body.eol = e
30
- else
31
- guessed_eol = CharString.guess_eol(str)
32
- if guessed_eol == "UNKNOWN"
33
- raise EOLDetectionFailure, "eol not specified, and auto detection failed."
34
- # @body.eol = 'LF' # default to LF
28
+ if e
29
+ @body.eol = e
35
30
  else
36
- @body.eol = guessed_eol
31
+ guessed_eol = CharString.guess_eol(str)
32
+ if guessed_eol == "UNKNOWN"
33
+ raise EOLDetectionFailure, "eol not specified, and auto detection failed."
34
+ # @body.eol = 'LF' # default to LF
35
+ else
36
+ @body.eol = guessed_eol
37
+ end
37
38
  end
38
39
  end
39
- end
40
- def encoding()
41
- @body.encoding
42
- end
43
- def encoding=(cs)
44
- @body.encoding = cs
45
- end
46
- def eol()
47
- @body.eol
48
- end
49
- def eol=(eolstr)
50
- @body.eol = eolstr
51
- end
52
40
 
53
- def split_to_line()
54
- @body.split_to_line
55
- end
56
- def split_to_word()
57
- @body.split_to_word
58
- end
59
- def split_to_char()
60
- @body.split_to_char
61
- end
62
- def split_to_byte()
63
- @body.split_to_byte
64
- end
41
+ def encoding
42
+ @body.encoding
43
+ end
65
44
 
66
- def count_line()
67
- @body.count_line
68
- end
69
- def count_blank_line()
70
- @body.count_blank_line
71
- end
72
- def count_empty_line()
73
- @body.count_empty_line
74
- end
75
- def count_graph_line()
76
- @body.count_graph_line
77
- end
45
+ def encoding=(cs)
46
+ @body.encoding = cs
47
+ end
78
48
 
79
- def count_word()
80
- @body.count_word
81
- end
82
- def count_latin_word()
83
- @body.count_latin_word
84
- end
85
- def count_ja_word()
86
- @body.count_ja_word
87
- end
88
- def count_valid_word()
89
- @body.count_valid_word
90
- end
91
- def count_latin_valid_word()
92
- @body.count_latin_valid_word
93
- end
94
- def count_ja_valid_word()
95
- @body.count_ja_valid_word
96
- end
49
+ def eol
50
+ @body.eol
51
+ end
97
52
 
98
- def count_char()
99
- @body.count_char
100
- end
101
- def count_blank_char()
102
- @body.count_blank_char
103
- end
104
- def count_graph_char()
105
- @body.count_graph_char
106
- end
107
- def count_latin_blank_char()
108
- @body.count_latin_blank_char
109
- end
110
- def count_latin_graph_char()
111
- @body.count_latin_graph_char
112
- end
113
- def count_ja_blank_char()
114
- @body.count_ja_blank_char
115
- end
116
- def count_ja_graph_char()
117
- @body.count_ja_graph_char
118
- end
53
+ def eol=(eolstr)
54
+ @body.eol = eolstr
55
+ end
119
56
 
120
- def count_byte()
121
- @body.count_byte
122
- end
57
+ def split_to_line
58
+ @body.split_to_line
59
+ end
123
60
 
124
- def eol_char()
125
- @body.eol_char
126
- end
61
+ def split_to_word
62
+ @body.split_to_word
63
+ end
64
+
65
+ def split_to_char
66
+ @body.split_to_char
67
+ end
127
68
 
128
- end # class Document
129
- end # class DocDiff
69
+ def split_to_byte
70
+ @body.split_to_byte
71
+ end
72
+
73
+ def count_line
74
+ @body.count_line
75
+ end
76
+
77
+ def count_blank_line
78
+ @body.count_blank_line
79
+ end
80
+
81
+ def count_empty_line
82
+ @body.count_empty_line
83
+ end
84
+
85
+ def count_graph_line
86
+ @body.count_graph_line
87
+ end
88
+
89
+ def count_word
90
+ @body.count_word
91
+ end
92
+
93
+ def count_latin_word
94
+ @body.count_latin_word
95
+ end
96
+
97
+ def count_ja_word
98
+ @body.count_ja_word
99
+ end
100
+
101
+ def count_valid_word
102
+ @body.count_valid_word
103
+ end
104
+
105
+ def count_latin_valid_word
106
+ @body.count_latin_valid_word
107
+ end
108
+
109
+ def count_ja_valid_word
110
+ @body.count_ja_valid_word
111
+ end
112
+
113
+ def count_char
114
+ @body.count_char
115
+ end
116
+
117
+ def count_blank_char
118
+ @body.count_blank_char
119
+ end
120
+
121
+ def count_graph_char
122
+ @body.count_graph_char
123
+ end
124
+
125
+ def count_latin_blank_char
126
+ @body.count_latin_blank_char
127
+ end
128
+
129
+ def count_latin_graph_char
130
+ @body.count_latin_graph_char
131
+ end
132
+
133
+ def count_ja_blank_char
134
+ @body.count_ja_blank_char
135
+ end
136
+
137
+ def count_ja_graph_char
138
+ @body.count_ja_graph_char
139
+ end
140
+
141
+ def count_byte
142
+ @body.count_byte
143
+ end
144
+
145
+ def eol_char
146
+ @body.eol_char
147
+ end
148
+ end
149
+ end
@@ -1,72 +1,78 @@
1
1
  # English ASCII encoding module for CharString
2
2
  # 2003- Hisashi MORITA
3
3
 
4
- # frozen_string_literal: false
4
+ # frozen_string_literal: true
5
5
 
6
6
  class DocDiff
7
- module CharString
8
- module ASCII
7
+ module CharString
8
+ module ASCII
9
+ ENCODING = "US-ASCII"
9
10
 
10
- Encoding = "US-ASCII"
11
+ CNTRL =
12
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
13
+ "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
14
+ "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
15
+ "\x1e\x1f\x7f"
16
+ SPACE =
17
+ "\x09\x0a\x0b\x0c\x0d\x20"
18
+ BLANK =
19
+ "\x09\x20"
20
+ DIGIT =
21
+ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
22
+ UPPER =
23
+ "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
24
+ "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
25
+ "\x55\x56\x57\x58\x59\x5a"
26
+ LOWER =
27
+ "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
28
+ "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
29
+ "\x75\x76\x77\x78\x79\x7a"
30
+ ALPHA = UPPER + LOWER
31
+ ALNUM = DIGIT + ALPHA
32
+ PUNCT =
33
+ Regexp.quote(
34
+ "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
35
+ "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
36
+ "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
37
+ "\x7d\x7e",
38
+ )
39
+ GRAPH = DIGIT + UPPER + LOWER + PUNCT
40
+ PRINT = "\x20" + GRAPH
41
+ XDIGIT =
42
+ DIGIT +
43
+ "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
44
+ "\x65\x66"
11
45
 
12
- CNTRL = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" \
13
- "\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13" \
14
- "\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d" \
15
- "\x1e\x1f\x7f"
16
- SPACE = "\x09\x0a\x0b\x0c\x0d\x20"
17
- BLANK = "\x09\x20"
18
- DIGIT = "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39"
19
- UPPER = "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a" \
20
- "\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54" \
21
- "\x55\x56\x57\x58\x59\x5a"
22
- LOWER = "\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a" \
23
- "\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74" \
24
- "\x75\x76\x77\x78\x79\x7a"
25
- ALPHA = UPPER + LOWER
26
- ALNUM = DIGIT + ALPHA
27
- PUNCT = "\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a" \
28
- "\x2b\x2c\x2d\x2e\x2f\x3a\x3b\x3c\x3d\x3e" \
29
- "\x3f\x40\x5b\x5c\x5d\x5e\x5f\x60\x7b\x7c" \
30
- "\x7d\x7e"
31
- GRAPH = DIGIT + UPPER + LOWER + PUNCT
32
- PRINT = "\x20" + GRAPH
33
- XDIGIT = DIGIT +
34
- "\x41\x42\x43\x44\x45\x46\x61\x62\x63\x64" \
35
- "\x65\x66"
46
+ JA_BLANK = "" # kludge...
47
+ JA_GRAPH = "" # kludge...
36
48
 
37
- JA_BLANK = "" # kludge...
38
- JA_GRAPH = "" # kludge...
49
+ WORD_REGEXP_SRC = [
50
+ "(?:[#{GRAPH}]+[#{BLANK}]?)",
51
+ "|(?:[#{SPACE}]+)",
52
+ "|(?:.+?)",
53
+ ].join
39
54
 
40
- PUNCT.replace(Regexp.quote(PUNCT)) # kludge to avoid warning "character class has `[' without escape"
41
- PRINT.replace(Regexp.quote(PRINT)) # kludge to avoid warning "character class has `[' without escape"
42
- GRAPH.replace(Regexp.quote(GRAPH)) # kludge to avoid warning "character class has `[' without escape"
55
+ # override default method, as ASCII has no Japanese in it
56
+ def count_ja_graph_char
57
+ 0
58
+ end
43
59
 
44
- WORD_REGEXP_SRC = ["(?:[#{GRAPH}]+[#{BLANK}]?)",
45
- "|(?:[#{SPACE}]+)",
46
- "|(?:.+?)"].join
60
+ # override default method, as ASCII has no Japanese in it
61
+ def count_ja_blank_char
62
+ 0
63
+ end
47
64
 
48
- # override default method, as ASCII has no Japanese in it
49
- def count_ja_graph_char()
50
- 0
51
- end
52
-
53
- # override default method, as ASCII has no Japanese in it
54
- def count_ja_blank_char()
55
- 0
56
- end
65
+ # override default method, as ASCII has no Japanese in it
66
+ def count_ja_word
67
+ 0
68
+ end
57
69
 
58
- # override default method, as ASCII has no Japanese in it
59
- def count_ja_word()
60
- 0
61
- end
70
+ # override default method, as ASCII has no Japanese in it
71
+ def count_ja_valid_word
72
+ 0
73
+ end
62
74
 
63
- # override default method, as ASCII has no Japanese in it
64
- def count_ja_valid_word()
65
- 0
75
+ CharString.register_encoding(self)
66
76
  end
67
-
68
- CharString.register_encoding(self)
69
-
70
- end # module ASCII
71
- end # module CharString
72
- end # class DocDiff
77
+ end
78
+ end