docdiff 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,148 @@
1
+ require 'docdiff/diff/rcsdiff'
2
+ require 'docdiff/diff/unidiff'
3
+
4
+ class Diff
5
+ class EditScript
6
+ def initialize
7
+ @chunk_common = nil
8
+ @chunk_add = []
9
+ @chunk_del = []
10
+ @list = []
11
+ @list << @chunk_del
12
+ @list << @chunk_add
13
+
14
+ @cs = Subsequence.new
15
+ @count_a = 0
16
+ @count_b = 0
17
+ @additions = 0
18
+ @deletions = 0
19
+ end
20
+
21
+ attr_reader :count_a, :additions
22
+ attr_reader :count_b, :deletions
23
+
24
+ def commonsubsequence
25
+ return @cs
26
+ end
27
+
28
+ def del(seq_or_len)
29
+ unless @chunk_del
30
+ @chunk_add = []
31
+ @chunk_del = []
32
+ @chunk_common = nil
33
+ @list << @chunk_del
34
+ @list << @chunk_add
35
+ end
36
+ if Array === seq_or_len
37
+ len = seq_or_len.length
38
+ mark = :del_elt
39
+ else
40
+ len = seq_or_len
41
+ mark = :del_num
42
+ end
43
+ if !@chunk_del.empty? && @chunk_del.last[0] == mark
44
+ @chunk_del.last[1] += seq_or_len
45
+ else
46
+ @chunk_del << [mark, seq_or_len, nil]
47
+ end
48
+ @count_a += len
49
+ @deletions += len
50
+ end
51
+
52
+ def add(seq_or_len)
53
+ unless @chunk_add
54
+ @chunk_add = []
55
+ @chunk_del = []
56
+ @chunk_common = nil
57
+ @list << @chunk_del
58
+ @list << @chunk_add
59
+ end
60
+ if Array === seq_or_len
61
+ len = seq_or_len.length
62
+ mark = :add_elt
63
+ else
64
+ len = seq_or_len
65
+ mark = :add_num
66
+ end
67
+ if !@chunk_add.empty? && @chunk_add.last[0] == mark
68
+ @chunk_add.last[2] += seq_or_len
69
+ else
70
+ @chunk_add << [mark, nil, seq_or_len]
71
+ end
72
+ @count_b += len
73
+ @additions += len
74
+ end
75
+
76
+ def common(seq_or_len_a, seq_or_len_b=seq_or_len_a)
77
+ unless @chunk_common
78
+ @list.pop
79
+ @list.pop
80
+ @list << @chunk_del unless @chunk_del.empty?
81
+ @list << @chunk_add unless @chunk_add.empty?
82
+ @chunk_add = nil
83
+ @chunk_del = nil
84
+ @chunk_common = []
85
+ @list << @chunk_common
86
+ end
87
+
88
+ len_a = Array === seq_or_len_a ? seq_or_len_a.length : seq_or_len_a
89
+ len_b = Array === seq_or_len_b ? seq_or_len_b.length : seq_or_len_b
90
+ raise ArgumentError.new("length not equal: #{len_a} != #{len_b}") if len_a != len_b
91
+ len = len_a
92
+
93
+ mark = ((Array === seq_or_len_a) ?
94
+ (Array === seq_or_len_b ? :common_elt_elt : :common_elt_num) :
95
+ (Array === seq_or_len_b ? :common_num_elt : :common_num_num))
96
+
97
+ if !@chunk_common.empty? && @chunk_common.last[0] == mark
98
+ @chunk_common.last[1] += seq_or_len_a
99
+ @chunk_common.last[2] += seq_or_len_b
100
+ else
101
+ @chunk_common << [mark, seq_or_len_a, seq_or_len_b]
102
+ end
103
+
104
+ @cs.add @count_a, @count_b, len
105
+ @count_a += len
106
+ @count_b += len
107
+ end
108
+
109
+ def each
110
+ @list.each {|chunk|
111
+ chunk.each {|mark_del_add|
112
+ yield mark_del_add
113
+ }
114
+ }
115
+ end
116
+
117
+ def apply(src)
118
+ l = 0
119
+ dst = []
120
+ each {|mark, del, add|
121
+ case mark
122
+ when :add_elt
123
+ dst.concat add
124
+ when :add_num
125
+ raise ArgumentError.new("additionnal lines are not known.")
126
+ when :common_elt_elt
127
+ dst.concat add
128
+ l += del.length
129
+ when :common_elt_num
130
+ dst.concat src[l, del]
131
+ l += del
132
+ when :common_num_elt
133
+ dst.concat add
134
+ l += add
135
+ when :common_num_num
136
+ dst.concat src[l, del]
137
+ l += del
138
+ when :del_elt
139
+ l += del.length
140
+ when :del_num
141
+ l += del
142
+ end
143
+ }
144
+ dst.concat src[l..-1]
145
+ return dst
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,107 @@
1
+ class Diff
2
+ def Diff.rcsdiff(a, b)
3
+ al = []
4
+ a.each_line {|l| al << l}
5
+ bl = []
6
+ b.each_line {|l| bl << l}
7
+ return Diff.new(al, bl).ses.rcsdiff
8
+ end
9
+
10
+ class EditScript
11
+ def EditScript.parse_rcsdiff(input)
12
+ ses = EditScript.new
13
+ l = 1
14
+ scan_rcsdiff(input) {|mark, beg, len, lines|
15
+ if mark == :del
16
+ ses.common beg - l if l < beg
17
+ ses.del len
18
+ l = beg + len
19
+ else
20
+ ses.add lines
21
+ end
22
+ }
23
+ return ses
24
+ end
25
+
26
+ def EditScript.scan_rcsdiff(input)
27
+ state = :command
28
+ beg = len = nil
29
+ adds = nil
30
+ input.each_line("\n") {|line|
31
+ case state
32
+ when :command
33
+ case line
34
+ when /\Aa(\d+)\s+(\d+)/
35
+ beg = $1.to_i
36
+ len = $2.to_i
37
+ adds = []
38
+ state = :add
39
+ when /\Ad(\d+)\s+(\d+)/
40
+ beg = $1.to_i
41
+ len = $2.to_i
42
+ yield :del, beg, len, nil
43
+ state = :command
44
+ else
45
+ raise InvalidRCSDiffFormat.new(line)
46
+ end
47
+ when :add
48
+ adds << line
49
+ if adds.length == len
50
+ yield :add, beg, len, adds
51
+ adds = nil
52
+ state = :command
53
+ end
54
+ else
55
+ raise StandardError.new("unknown state")
56
+ end
57
+ }
58
+ end
59
+
60
+ def rcsdiff(out='')
61
+ state = :lines
62
+ l = 1
63
+ each {|mark, del, add|
64
+ case mark
65
+ when :add_elt
66
+ out << "a#{l - 1} #{add.length}\n"
67
+ add.each {|line|
68
+ case state
69
+ when :lines
70
+ case line
71
+ when /\A.*\n\z/
72
+ when /\A.*\z/
73
+ state = :after_last_line
74
+ else
75
+ raise ArgumentError.new("additional element is not line")
76
+ end
77
+ when :after_last_line
78
+ raise ArgumentError.new("additional elements after last incomplete line")
79
+ end
80
+ out << line
81
+ }
82
+ when :add_num
83
+ raise ArgumentError.new("additionnal lines are not known.")
84
+ when :common_elt_elt
85
+ l += del.length
86
+ when :common_elt_num
87
+ l += add
88
+ when :common_num_elt
89
+ l += del
90
+ when :common_num_num
91
+ l += del
92
+ when :del_elt
93
+ del = del.length
94
+ out << "d#{l} #{del}\n"
95
+ l += del
96
+ when :del_num
97
+ out << "d#{l} #{del}\n"
98
+ l += del
99
+ end
100
+ }
101
+ return out
102
+ end
103
+
104
+ class InvalidRCSDiffFormat < StandardError
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,93 @@
1
+ =begin
2
+ Diff::ShortestPath uses the algorithm described in following paper.
3
+
4
+ [Wu1990] Sun Wu, Udi Manber, Gene Myers and Webb Miller,
5
+ An O(NP) Sequence Comparison Algorithm,
6
+ Information Processing Letters 35, 1990, 317-323
7
+ =end
8
+
9
+ class Diff
10
+ class ShortestPath
11
+ def initialize(a, b)
12
+ if a.length > b.length
13
+ @a = b
14
+ @b = a
15
+ @exchanged = true
16
+ else
17
+ @a = a
18
+ @b = b
19
+ @exchanged = false
20
+ end
21
+ @m = @a.length
22
+ @n = @b.length
23
+ end
24
+
25
+ def lcs(lcs=Subsequence.new)
26
+ d = @n - @m
27
+ fp = Array.new(@n+1+@m+1+1, -1)
28
+ fp_base = -(@m+1)
29
+ path = Array.new(fp.length)
30
+ p = -1
31
+ begin
32
+ p += 1
33
+ (-p).upto(d-1) {|k|
34
+ a = fp[fp_base+k-1]+1
35
+ b = fp[fp_base+k+1]
36
+ if a < b
37
+ y = fp[fp_base+k] = snake(k, b)
38
+ path[fp_base+k] = path[fp_base+k+1]
39
+ path[fp_base+k] = [y - k, y, y - b, path[fp_base+k]] if b < y
40
+ else
41
+ y = fp[fp_base+k] = snake(k, a)
42
+ path[fp_base+k] = path[fp_base+k-1]
43
+ path[fp_base+k] = [y - k, y, y - a, path[fp_base+k]] if a < y
44
+ end
45
+ }
46
+ (d+p).downto(d+1) {|k|
47
+ a = fp[fp_base+k-1]+1
48
+ b = fp[fp_base+k+1]
49
+ if a < b
50
+ y = fp[fp_base+k] = snake(k, b)
51
+ path[fp_base+k] = path[fp_base+k+1]
52
+ path[fp_base+k] = [y - k, y, y - b, path[fp_base+k]] if b < y
53
+ else
54
+ y = fp[fp_base+k] = snake(k, a)
55
+ path[fp_base+k] = path[fp_base+k-1]
56
+ path[fp_base+k] = [y - k, y, y - a, path[fp_base+k]] if a < y
57
+ end
58
+ }
59
+ a = fp[fp_base+d-1]+1
60
+ b = fp[fp_base+d+1]
61
+ if a < b
62
+ y = fp[fp_base+d] = snake(d, b)
63
+ path[fp_base+d] = path[fp_base+d+1]
64
+ path[fp_base+d] = [y - d, y, y - b, path[fp_base+d]] if b < y
65
+ else
66
+ y = fp[fp_base+d] = snake(d, a)
67
+ path[fp_base+d] = path[fp_base+d-1]
68
+ path[fp_base+d] = [y - d, y, y - a, path[fp_base+d]] if a < y
69
+ end
70
+ end until fp[fp_base+d] == @n
71
+ shortest_path = path[fp_base+d]
72
+ list = []
73
+ while shortest_path
74
+ x, y, l, shortest_path = shortest_path
75
+ list << [x - l, y - l, l]
76
+ end
77
+ if @exchanged
78
+ list.collect {|xyl| tmp = xyl[0]; xyl[0] = xyl[1]; xyl[1] = tmp}
79
+ end
80
+ list.reverse_each {|xyl| lcs.add(*xyl)}
81
+ return lcs
82
+ end
83
+
84
+ def snake(k, y)
85
+ x = y - k
86
+ while x < @m && y < @n && @a[x] == @b[y]
87
+ x += 1
88
+ y += 1
89
+ end
90
+ return y
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,40 @@
1
+ require 'docdiff/diff/shortestpath'
2
+ require 'docdiff/diff/contours'
3
+ require 'thread'
4
+
5
+ class Diff
6
+ class Speculative
7
+ def initialize(a, b)
8
+ @a = a
9
+ @b = b
10
+ end
11
+
12
+ def lcs
13
+ # Try speculative execution.
14
+ result = nil
15
+
16
+ tg = ThreadGroup.new
17
+
18
+ # Since ShortestPath is faster than Contours if two sequences are very similar,
19
+ # try it first.
20
+ tg.add(Thread.new {
21
+ #print "ShortestPath start.\n"
22
+ result = ShortestPath.new(@a, @b).lcs
23
+ Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
24
+ #print "ShortestPath win.\n"
25
+ })
26
+
27
+ # start Contours unless ShortestPath is already ended with first quantum,
28
+ tg.add(Thread.new {
29
+ #print "Contours start.\n"
30
+ result = Contours.new(@a, @b).lcs
31
+ Thread.exclusive {tg.list.each {|t| t.kill if t != Thread.current}}
32
+ #print "Contours win.\n"
33
+ }) unless tg.list.empty?
34
+
35
+ tg.list.each {|t| t.join}
36
+
37
+ return result
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,39 @@
1
+ class Diff
2
+ class Subsequence
3
+ def initialize
4
+ @list = []
5
+ end
6
+
7
+ def add(i, j, len=1)
8
+ raise ArgumentError.new("non-positive length: #{len}") if len <= 0
9
+
10
+ if @list.empty?
11
+ @list << [i, j, len]
12
+ return
13
+ end
14
+
15
+ i0, j0, len0 = @list.last
16
+
17
+ if i0 + len0 == i && j0 + len0 == j
18
+ @list.last[2] += len
19
+ return
20
+ end
21
+
22
+ if i0 + len0 > i || j0 + len0 > j
23
+ raise ArgumentError.new("additional common sequence overlapped.")
24
+ end
25
+
26
+ @list << [i, j, len]
27
+ end
28
+
29
+ def each(&block)
30
+ @list.each(&block)
31
+ end
32
+
33
+ def length
34
+ len = 0
35
+ each {|i, j, l| len += l}
36
+ return len
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,124 @@
1
+ class Diff
2
+ def Diff.unidiff(a, b, algorithm=nil)
3
+ al = []
4
+ a.each_line {|l| al << l}
5
+ bl = []
6
+ b.each_line {|l| bl << l}
7
+ return Diff.new(al, bl).ses(algorithm).unidiff
8
+ end
9
+
10
+ class EditScript
11
+ def unidiff_hunk_header(l1, ll1, l2, ll2)
12
+ l1 = 0 if ll1 == 0
13
+ l2 = 0 if ll2 == 0
14
+ result = "@@ -#{l1}"
15
+ result << ",#{ll1}" if ll1 != 1
16
+ result << " +#{l2}"
17
+ result << ",#{ll2}" if ll2 != 1
18
+ result << " @@\n"
19
+ end
20
+
21
+ def unidiff(out='', context_lines=3)
22
+ state = :common
23
+ l1 = l2 = 1
24
+ hunk = []
25
+ hunk_l1 = hunk_l2 = 1
26
+ hunk_tail = 0
27
+ each {|mark, del, add|
28
+ case mark
29
+ when :add_elt
30
+ unless hunk
31
+ hunk = []
32
+ hunk_l1 = l1
33
+ hunk_l2 = l2
34
+ end
35
+
36
+ add.each {|line| hunk << '+' + line}
37
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
38
+ l2 += add.length
39
+ hunk_tail = 0
40
+ when :add_num
41
+ raise ArgumentError.new("additionnal lines are not known.")
42
+ when :common_elt_elt
43
+ if hunk
44
+ if hunk_tail + add.length <= context_lines * 2
45
+ add.each {|line| hunk << ' ' + line}
46
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
47
+ l1 += add.length
48
+ l2 += add.length
49
+ hunk_tail += add.length
50
+ else
51
+ i = 0
52
+ if hunk_tail != hunk.length
53
+ while hunk_tail < context_lines
54
+ hunk << ' ' + add[i]
55
+ l1 += 1
56
+ l2 += 1
57
+ hunk_tail += 1
58
+ i += 1
59
+ end
60
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
61
+
62
+ out << unidiff_hunk_header(hunk_l1, l1 - hunk_l1, hunk_l2, l2 - hunk_l1)
63
+ h = hunk.length - (hunk_tail - context_lines)
64
+ (0...h).each {|j| out << hunk[j]}
65
+ hunk[0, h] = []
66
+ end
67
+
68
+ l1 += add.length - i
69
+ l2 += add.length - i
70
+
71
+ hunk_l1 = l1 - context_lines
72
+ hunk_l2 = l2 - context_lines
73
+ hunk = add[-context_lines..-1].collect {|line| ' ' + line}
74
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
75
+ hunk_tail = context_lines
76
+ end
77
+ else
78
+ hunk_l1 = l1
79
+ hunk_l2 = l2
80
+ l1 += add.length
81
+ l2 += add.length
82
+ if context_lines <= add.length
83
+ hunk = add[-context_lines..-1].collect {|line| ' ' + line}
84
+ else
85
+ hunk = add.collect {|line| ' ' + line}
86
+ end
87
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
88
+ hunk_tail = hunk.length
89
+ end
90
+ when :common_elt_num
91
+ raise ArgumentError.new("deleted lines are not known.")
92
+ when :common_num_elt
93
+ raise ArgumentError.new("additional lines are not known.")
94
+ when :common_num_num
95
+ raise ArgumentError.new("deleted and additional lines are not known.")
96
+ when :del_elt
97
+ if hunk_tail == hunk.length && context_lines < hunk_tail
98
+ i = hunk_tail - context_lines
99
+ hunk[0, i] = []
100
+ hunk_l1 += i
101
+ hunk_l2 += i
102
+ end
103
+ del.each {|line| hunk << '-' + line}
104
+ hunk[-1] += "\n\\n" if /\n\z/ !~ hunk[-1]
105
+ l1 += del.length
106
+ hunk_tail = 0
107
+ when :del_num
108
+ raise ArgumentError.new("deleted lines are not known.")
109
+ end
110
+ }
111
+ if hunk_tail != hunk.length
112
+ if context_lines < hunk_tail
113
+ i = hunk_tail - context_lines
114
+ hunk[-i..-1] = []
115
+ l1 -= i
116
+ l2 -= i
117
+ end
118
+ out << unidiff_hunk_header(hunk_l1, l1 - hunk_l1, hunk_l2, l2 - hunk_l1)
119
+ hunk.each {|line| out << line}
120
+ end
121
+ return out
122
+ end
123
+ end
124
+ end