docdiff 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/.gitignore +6 -0
  2. data/.travis.yml +7 -0
  3. data/Gemfile +17 -0
  4. data/Guardfile +8 -0
  5. data/Makefile +108 -0
  6. data/Rakefile +17 -0
  7. data/bin/docdiff +179 -0
  8. data/devutil/JIS0208.TXT +6952 -0
  9. data/devutil/char_by_charclass.rb +23 -0
  10. data/devutil/charclass_by_char.rb +21 -0
  11. data/devutil/jis0208.rb +343 -0
  12. data/devutil/testjis0208.rb +38 -0
  13. data/docdiff.conf.example +22 -0
  14. data/docdiff.gemspec +23 -0
  15. data/docdiffwebui.cgi +176 -0
  16. data/docdiffwebui.html +123 -0
  17. data/img/docdiff-screenshot-format-html-digest-firefox.png +0 -0
  18. data/img/docdiff-screenshot-format-html-firefox.png +0 -0
  19. data/img/docdiff-screenshot-format-tty-cmdexe-en.png +0 -0
  20. data/img/docdiff-screenshot-format-tty-cmdexe-ja.png +0 -0
  21. data/img/docdiff-screenshot-format-tty-rxvtunicode-en.png +0 -0
  22. data/img/docdiff-screenshot-format-tty-rxvtunicode-ja.png +0 -0
  23. data/img/docdiff-screenshot-format-tty-xterm-en.png +0 -0
  24. data/img/docdiff-screenshot-format-tty-xterm-ja.png +0 -0
  25. data/img/docdiff-screenshot-resolution-linewordchar-xterm.png +0 -0
  26. data/index.html +181 -0
  27. data/langfilter.rb +14 -0
  28. data/lib/doc_diff.rb +170 -0
  29. data/lib/docdiff.rb +7 -0
  30. data/lib/docdiff/charstring.rb +579 -0
  31. data/lib/docdiff/diff.rb +217 -0
  32. data/lib/docdiff/diff/contours.rb +382 -0
  33. data/lib/docdiff/diff/editscript.rb +148 -0
  34. data/lib/docdiff/diff/rcsdiff.rb +107 -0
  35. data/lib/docdiff/diff/shortestpath.rb +93 -0
  36. data/lib/docdiff/diff/speculative.rb +40 -0
  37. data/lib/docdiff/diff/subsequence.rb +39 -0
  38. data/lib/docdiff/diff/unidiff.rb +124 -0
  39. data/lib/docdiff/difference.rb +92 -0
  40. data/lib/docdiff/document.rb +127 -0
  41. data/lib/docdiff/encoding/en_ascii.rb +97 -0
  42. data/lib/docdiff/encoding/ja_eucjp.rb +269 -0
  43. data/lib/docdiff/encoding/ja_sjis.rb +260 -0
  44. data/lib/docdiff/encoding/ja_utf8.rb +6974 -0
  45. data/lib/docdiff/version.rb +3 -0
  46. data/lib/docdiff/view.rb +476 -0
  47. data/lib/viewdiff.rb +375 -0
  48. data/readme.html +713 -0
  49. data/sample/01.en.ascii.cr +1 -0
  50. data/sample/01.en.ascii.crlf +2 -0
  51. data/sample/01.en.ascii.lf +2 -0
  52. data/sample/01.ja.eucjp.lf +2 -0
  53. data/sample/01.ja.sjis.cr +1 -0
  54. data/sample/01.ja.sjis.crlf +2 -0
  55. data/sample/01.ja.utf8.crlf +2 -0
  56. data/sample/02.en.ascii.cr +1 -0
  57. data/sample/02.en.ascii.crlf +2 -0
  58. data/sample/02.en.ascii.lf +2 -0
  59. data/sample/02.ja.eucjp.lf +2 -0
  60. data/sample/02.ja.sjis.cr +1 -0
  61. data/sample/02.ja.sjis.crlf +2 -0
  62. data/sample/02.ja.utf8.crlf +2 -0
  63. data/sample/humpty_dumpty01.ascii.lf +4 -0
  64. data/sample/humpty_dumpty02.ascii.lf +4 -0
  65. data/test/charstring_test.rb +1008 -0
  66. data/test/diff_test.rb +36 -0
  67. data/test/difference_test.rb +64 -0
  68. data/test/docdiff_test.rb +193 -0
  69. data/test/document_test.rb +626 -0
  70. data/test/test_helper.rb +7 -0
  71. data/test/view_test.rb +570 -0
  72. data/test/viewdiff_test.rb +908 -0
  73. metadata +129 -0
@@ -0,0 +1,217 @@
1
+ =begin
2
+ = Diff
3
+ --- Diff.new(seq_a, seq_b)
4
+ --- Diff#ses([algorithm=:speculative])
5
+ --- Diff#lcs([algorithm=:speculative])
6
+
7
+ Available algorithms are follows.
8
+ * :shortestpath
9
+ * :contours
10
+ * :speculative
11
+
12
+ = Diff::EditScript
13
+ --- Diff::EditScript.new
14
+ --- Diff::EditScript#del(seq_or_len_a)
15
+ --- Diff::EditScript#add(seq_or_len_b)
16
+ --- Diff::EditScript#common(seq_or_len_a[, seq_or_len_b])
17
+ --- Diff::EditScript#commonsubsequence
18
+ --- Diff::EditScript#count_a
19
+ --- Diff::EditScript#count_b
20
+ --- Diff::EditScript#additions
21
+ --- Diff::EditScript#deletions
22
+ --- Diff::EditScript#each {|mark, a, b| ...}
23
+ --- Diff::EditScript#apply(arr)
24
+ --- Diff::EditScript.parse_rcsdiff(input)
25
+ --- Diff::EditScript#rcsdiff([out=''])
26
+
27
+ = Diff::Subsequence
28
+ --- Diff::Subsequence.new
29
+ --- Diff::Subsequence.add(i, j[, len=1])
30
+ --- Diff::Subsequence#length
31
+ --- Diff::Subsequence#each {|i, j, len| ...}
32
+ =end
33
+
34
+ require 'docdiff/diff/editscript'
35
+ require 'docdiff/diff/subsequence'
36
+ require 'docdiff/diff/shortestpath'
37
+ require 'docdiff/diff/contours'
38
+ require 'docdiff/diff/speculative'
39
+
40
+ =begin
41
+ Data class reduces input for diff and convert alphabet to Integer.
42
+
43
+ It reduces input by removing common prefix, suffix and
44
+ unique elements.
45
+
46
+ So, reduced input has following properties:
47
+ * First element is different.
48
+ * Last element is different.
49
+ * Any elemnt in A is also exist in B.
50
+ * Any elemnt in B is also exist in A.
51
+
52
+ =end
53
+ class Diff
54
+ def initialize(a, b)
55
+ @original_a = a
56
+ @original_b = b
57
+
58
+ count_a = {}
59
+ count_a.default = 0
60
+ a.each {|e| count_a[e] += 1}
61
+
62
+ count_b = {}
63
+ count_b.default = 0
64
+ b.each {|e| count_b[e] += 1}
65
+
66
+ beg_a = 0
67
+ end_a = a.length
68
+
69
+ beg_b = 0
70
+ end_b = b.length
71
+
72
+ @prefix_lcs = []
73
+ @suffix_lcs = []
74
+
75
+ flag = true
76
+ while flag
77
+ flag = false
78
+
79
+ while beg_a < end_a && beg_b < end_b && a[beg_a].eql?(b[beg_b])
80
+ @prefix_lcs << [beg_a, beg_b]
81
+ count_a[a[beg_a]] -= 1
82
+ count_b[b[beg_b]] -= 1
83
+ beg_a += 1
84
+ beg_b += 1
85
+ flag = true
86
+ end
87
+
88
+ while beg_a < end_a && beg_b < end_b && a[end_a - 1].eql?(b[end_b - 1])
89
+ @suffix_lcs << [end_a - 1, end_b - 1]
90
+ count_a[a[end_a - 1]] -= 1
91
+ count_b[b[end_b - 1]] -= 1
92
+ end_a -= 1
93
+ end_b -= 1
94
+ flag = true
95
+ end
96
+
97
+ while beg_a < end_a && count_b[a[beg_a]] == 0
98
+ count_a[a[beg_a]] -= 1
99
+ beg_a += 1
100
+ flag = true
101
+ end
102
+
103
+ while beg_b < end_b && count_a[b[beg_b]] == 0
104
+ count_b[b[beg_b]] -= 1
105
+ beg_b += 1
106
+ flag = true
107
+ end
108
+
109
+ while beg_a < end_a && count_b[a[end_a - 1]] == 0
110
+ count_a[a[end_a - 1]] -= 1
111
+ end_a -= 1
112
+ flag = true
113
+ end
114
+
115
+ while beg_b < end_b && count_a[b[end_b - 1]] == 0
116
+ count_b[b[end_b - 1]] -= 1
117
+ end_b -= 1
118
+ flag = true
119
+ end
120
+ end
121
+
122
+ @alphabet = Alphabet.new
123
+
124
+ @a = []
125
+ @revert_index_a = []
126
+ (beg_a...end_a).each {|i|
127
+ if count_b[a[i]] != 0
128
+ @a << @alphabet.add(a[i])
129
+ @revert_index_a << i
130
+ end
131
+ }
132
+
133
+ @b = []
134
+ @revert_index_b = []
135
+ (beg_b...end_b).each {|i|
136
+ if count_a[b[i]] != 0
137
+ @b << @alphabet.add(b[i])
138
+ @revert_index_b << i
139
+ end
140
+ }
141
+ end
142
+
143
+ def Diff.algorithm(algorithm)
144
+ case algorithm
145
+ when :shortestpath
146
+ return ShortestPath
147
+ when :contours
148
+ return Contours
149
+ when :speculative
150
+ return Speculative
151
+ else
152
+ raise ArgumentError.new("unknown diff algorithm: #{algorithm}")
153
+ end
154
+ end
155
+
156
+ def lcs(algorithm=:speculative) # longest common subsequence
157
+ klass = Diff.algorithm(algorithm)
158
+ reduced_lcs = klass.new(@a, @b).lcs
159
+
160
+ lcs = Subsequence.new
161
+ @prefix_lcs.each {|i, j| lcs.add i, j}
162
+ reduced_lcs.each {|i, j, l|
163
+ l.times {|k|
164
+ lcs.add @revert_index_a[i+k], @revert_index_b[j+k]
165
+ }
166
+ }
167
+ @suffix_lcs.reverse_each {|i, j| lcs.add i, j}
168
+
169
+ return lcs
170
+ end
171
+
172
+ def ses(algorithm=nil) # shortest edit script
173
+ algorithm ||= :speculative
174
+ lcs = lcs(algorithm)
175
+ ses = EditScript.new
176
+ i0 = j0 = 0
177
+ lcs.each {|i, j, l|
178
+ ses.del @original_a[i0, i - i0] if i0 < i
179
+ ses.add @original_b[j0, j - j0] if j0 < j
180
+ ses.common @original_a[i, l], @original_b[j, l]
181
+
182
+ i0 = i + l
183
+ j0 = j + l
184
+ }
185
+
186
+ i = @original_a.length
187
+ j = @original_b.length
188
+ ses.del @original_a[i0, i - i0] if i0 < i
189
+ ses.add @original_b[j0, j - j0] if j0 < j
190
+
191
+ return ses
192
+ end
193
+
194
+ class Alphabet
195
+ def initialize
196
+ @hash = {}
197
+ end
198
+
199
+ def add(v)
200
+ if @hash.include? v
201
+ return @hash[v]
202
+ else
203
+ return @hash[v] = @hash.size
204
+ end
205
+ end
206
+
207
+ class NoSymbol < StandardError
208
+ end
209
+ def index(v)
210
+ return @hash.fetch {raise NoSymbol.new(v.to_s)}
211
+ end
212
+
213
+ def size
214
+ return @hash.size
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,382 @@
1
+ =begin
2
+ == Contours
3
+ Contours is based on the algorithm which is presented by Claus Rick.
4
+
5
+ I made two optimizations (for long LCS):
6
+
7
+ * When a midpoint of LCS is found, adjacent matches on same diagonal is checked.
8
+ They are also part of LCS. If LCS is long, they may exist and even long.
9
+
10
+ * Search method for next contour uses divide and conquer.
11
+
12
+ * Search region is rectangle: (This is forward contour case.)
13
+
14
+ (min{i|(i,j) in dominants}, min{j|(i,j) in dominants}) to (end_a, end_b).
15
+
16
+ * In search region (i0,j0) to (i1,j1), For each dominant match (i,j) in
17
+ Ck and the region, (i+1,j+1) is checked first. If LCS is long it is
18
+ match frequently.
19
+ If it is match, it's a match in Ck+1 and it divides search region:
20
+
21
+ (i0,j) to (i+1,end_b)
22
+ (i,j0) to (end_a,j+1)
23
+
24
+ * For each divided region, dominants is searchd line by line:
25
+ topmost row or leftmost column. Longer one is selected.
26
+
27
+ If no dominant match is found in the line,
28
+ search region is reduced with only the line.
29
+
30
+ If a dominant match is found in the line,
31
+ search region is reduced with the line and
32
+ rectangle farer than the match.
33
+
34
+ == References
35
+ [Claus2000] Claus Rick,
36
+ Simple and Fast Linear Space Computation of Longest Common Subsequences,
37
+ Information Processing Letters, Vol. 75/6, 275 - 281,
38
+ Elsevier (2000)
39
+
40
+ [Claus1995] Claus Rick,
41
+ A New Flexible Algorithm for the Longest Common Subsequence Problem,
42
+ Proceedings of the 6th Symposium on Combinatorial Pattern Matching (CPM'95),
43
+ Lecture Notes in Computer Science, Vol. 937, 340 - 351,
44
+ Springer Verlag (1995)
45
+ Also in Nordic Journal of Computing (NJC), Vol. 2, No. 4, Winter 1995, 444 - 461.
46
+ http://web.informatik.uni-bonn.de/IV/Mitarbeiter/rick/lcs.dvi.Z
47
+ =end
48
+
49
+ class Diff
50
+ class Contours
51
+ def initialize(a, b)
52
+ @a = a
53
+ @b = b
54
+ @closest_a = Closest.new(@a)
55
+ @closest_b = Closest.new(@b)
56
+ end
57
+
58
+ def lcs(lcs=Subsequence.new, beg_a=0, beg_b=0, end_a=@a.length, end_b=@b.length, len=nil)
59
+ #p [:lcs, beg_a, beg_b, end_a, end_b]
60
+ found, len, mid_a, mid_b = midpoint(beg_a, beg_b, end_a, end_b, len)
61
+
62
+ return lcs unless found
63
+
64
+ len1 = len2 = len / 2
65
+ if len & 1 == 0
66
+ len2 -= 1
67
+ end
68
+
69
+ l = 1
70
+
71
+ while beg_a < mid_a && beg_b < mid_b && @a[mid_a-1] == @b[mid_b-1]
72
+ len1 -= 1
73
+ mid_a -= 1
74
+ mid_b -= 1
75
+ l += 1
76
+ end
77
+
78
+ while mid_a+l < end_a && mid_b+l < end_b && @a[mid_a+l] == @b[mid_b+l]
79
+ len2 -= 1
80
+ l += 1
81
+ end
82
+
83
+ lcs(lcs, beg_a, beg_b, mid_a, mid_b, len1)
84
+ lcs.add(mid_a, mid_b, l)
85
+ lcs(lcs, mid_a + l, mid_b + l, end_a, end_b, len2)
86
+
87
+ return lcs
88
+ end
89
+
90
+ def midpoint(beg_a, beg_b, end_a, end_b, len)
91
+ return false, 0, nil, nil if len == 0
92
+
93
+ fc = newForwardContour(beg_a, beg_b, end_a, end_b)
94
+ return false, 0, nil, nil if fc.empty?
95
+
96
+ bc = newBackwardContour(beg_a, beg_b, end_a, end_b)
97
+
98
+ midpoints = nil
99
+
100
+ l = 1
101
+
102
+ while true
103
+ crossed = contourCrossed(fc, bc)
104
+ if crossed
105
+ midpoints = fc
106
+ break
107
+ end
108
+ l += 1
109
+ fc = nextForwardContour(fc, end_a, end_b)
110
+ crossed = contourCrossed(fc, bc)
111
+ if crossed
112
+ midpoints = bc
113
+ break
114
+ end
115
+ l += 1
116
+ bc = nextBackwardContour(bc, beg_a, beg_b)
117
+ end
118
+
119
+ # select a dominant match which is closest to diagonal.
120
+ m = midpoints[0]
121
+ (1...midpoints.length).each {|m1| m = m1 if m[0] < m1[0] && m[1] < m1[1] }
122
+
123
+ return [true, l, *m]
124
+ end
125
+
126
+ def newForwardContour(beg_a, beg_b, end_a, end_b)
127
+ return nextForwardContour([[beg_a-1,beg_b-1]], end_a, end_b)
128
+ end
129
+
130
+ def nextForwardContour(fc0, end_a, end_b)
131
+ next_dominants = []
132
+ topright_dominant = 0
133
+ bottomleft_dominant = fc0.length - 1
134
+
135
+ fc0.each_index {|k|
136
+ i, j = fc0[k]
137
+ if i+1 < end_a && j+1 < end_b && @a[i+1] == @b[j+1]
138
+ if topright_dominant <= k - 1
139
+ nextForwardContour1(fc0, topright_dominant, k - 1, i+1, end_b, next_dominants)
140
+ end
141
+ next_dominants << [i+1, j+1]
142
+ end_b = j+1
143
+ topright_dominant = k + 1
144
+ end
145
+ }
146
+
147
+ if topright_dominant <= bottomleft_dominant
148
+ nextForwardContour1(fc0, topright_dominant, bottomleft_dominant, end_a, end_b, next_dominants)
149
+ end
150
+ return next_dominants
151
+ end
152
+
153
+ def nextForwardContour1(fc0, topright_dominant, bottomleft_dominant, end_a, end_b, next_dominants_topright)
154
+ beg_a = fc0[topright_dominant][0] + 1
155
+ beg_b = fc0[bottomleft_dominant][1] + 1
156
+
157
+ next_dominants_bottomleft = []
158
+
159
+ while beg_a < end_a && beg_b < end_b
160
+ if end_a - beg_a < end_b - beg_b
161
+ # search top row: [beg_a, beg_b] to [beg_a, end_b-1] inclusive
162
+ if topright_dominant + 1 < fc0.length && fc0[topright_dominant + 1][0] < beg_a
163
+ topright_dominant += 1
164
+ end
165
+ search_start_b = fc0[topright_dominant][1]
166
+ # search top row: [beg_a, search_start_b+1] to [beg_a, end_b-1] inclusive
167
+ j = @closest_b.next(@a[beg_a], search_start_b)
168
+ if j < end_b
169
+ # new dominant found.
170
+ # it means that the rectangle [beg_a, j] to [end_a-1, end_b-1] is not required to search any more.
171
+ next_dominants_topright << [beg_a, j]
172
+ end_b = j
173
+ end
174
+ beg_a += 1
175
+ else
176
+ # search left column: [beg_a, beg_b] to [end_a-1, beg_b]
177
+ if 0 <= bottomleft_dominant - 1 && fc0[bottomleft_dominant - 1][1] < beg_b
178
+ bottomleft_dominant -= 1
179
+ end
180
+ search_start_a = fc0[bottomleft_dominant][0]
181
+ # search left column: [search_start_a, beg_b] to [end_a-1, beg_b]
182
+ i = @closest_a.next(@b[beg_b], search_start_a)
183
+ if i < end_a
184
+ # new dominant found.
185
+ # if means that the rectangle [i, beg_b] to [end_a-1, end_b-1] is not required to search any more.
186
+ next_dominants_bottomleft << [i, beg_b]
187
+ end_a = i
188
+ end
189
+ beg_b += 1
190
+ end
191
+ end
192
+
193
+ next_dominants_bottomleft.reverse!
194
+ next_dominants_topright.concat next_dominants_bottomleft
195
+ end
196
+
197
+ def newBackwardContour(beg_a, beg_b, end_a, end_b)
198
+ return nextBackwardContour([[end_a,end_b]], beg_a, beg_b)
199
+ end
200
+
201
+ def nextBackwardContour(bc0, beg_a, beg_b)
202
+ next_dominants = []
203
+ topright_dominant = 0
204
+ bottomleft_dominant = bc0.length - 1
205
+
206
+ bc0.each_index {|k|
207
+ i, j = bc0[k]
208
+ if beg_a <= i-1 && beg_b <= j-1 && @a[i-1] == @b[j-1]
209
+ if topright_dominant <= k - 1
210
+ nextBackwardContour1(bc0, topright_dominant, k - 1, beg_a, j, next_dominants)
211
+ end
212
+ next_dominants << [i-1, j-1]
213
+ beg_a = i
214
+ topright_dominant = k + 1
215
+ end
216
+ }
217
+
218
+ if topright_dominant <= bottomleft_dominant
219
+ nextBackwardContour1(bc0, topright_dominant, bottomleft_dominant, beg_a, beg_b, next_dominants)
220
+ end
221
+ return next_dominants
222
+ end
223
+
224
+ def nextBackwardContour1(bc0, topright_dominant, bottomleft_dominant, beg_a, beg_b, next_dominants_topright)
225
+ end_a = bc0[bottomleft_dominant][0]
226
+ end_b = bc0[topright_dominant][1]
227
+
228
+ next_dominants_bottomleft = []
229
+
230
+ while beg_a < end_a && beg_b < end_b
231
+ if end_a - beg_a < end_b - beg_b
232
+ # search bottom row: [end_a-1, end_b-1] from [end_a-1, beg_b]
233
+ if 0 <= bottomleft_dominant - 1 && end_a - 1 < bc0[bottomleft_dominant - 1][0]
234
+ bottomleft_dominant -= 1
235
+ end
236
+ search_end_b = bc0[bottomleft_dominant][1]
237
+ # search bottom row: [end_a-1, search_end_b-1] from [end_a-1, beg_b]
238
+ j = @closest_b.prev(@a[end_a-1], search_end_b)
239
+ if beg_b <= j
240
+ # new dominant found.
241
+ # it means that the rectangle [beg_a, beg_b] to [end_a-1, j] is not required to search any more.
242
+ next_dominants_bottomleft << [end_a-1, j]
243
+ beg_b = j + 1
244
+ end
245
+ end_a -= 1
246
+ else
247
+ # search right column: [end_a-1, end_b-1] to [beg_a, end_b-1]
248
+ if topright_dominant + 1 < bc0.length && end_b - 1 < bc0[topright_dominant + 1][1]
249
+ topright_dominant += 1
250
+ end
251
+ search_end_a = bc0[topright_dominant][0]
252
+ # search right column: [search_end_a-1, end_b-1] to [beg_a, end_b-1]
253
+ i = @closest_a.prev(@b[end_b-1], search_end_a)
254
+ if beg_a <= i
255
+ # new dominant found.
256
+ # if means that the rectangle [beg_a, beg_b] to [i, end_b-1] is not required to search any more.
257
+ next_dominants_topright << [i, end_b-1]
258
+ beg_a = i + 1
259
+ end
260
+ end_b -= 1
261
+ end
262
+ end
263
+
264
+ next_dominants_bottomleft.reverse!
265
+ next_dominants_topright.concat next_dominants_bottomleft
266
+ end
267
+
268
+ def contourCrossed(fc, bc)
269
+ #p [:contourCrossed1Beg, fc, bc]
270
+ new_fc, new_bc = contourCrossed1(fc, bc)
271
+ #p [:contourCrossed1End, new_fc, new_bc]
272
+ if new_fc.empty? && new_bc.empty?
273
+ return true
274
+ end
275
+
276
+ fc.replace new_fc
277
+ bc.replace new_bc
278
+
279
+ return false
280
+ end
281
+
282
+ def contourCrossed1(fc, bc)
283
+ new_fc = []
284
+ new_bc = []
285
+ fc_k = 0
286
+ bc_k = 0
287
+ bc_j = bc[0][1]
288
+ fc_j = fc[0][1]
289
+ fc_j = bc_j if fc_j < bc_j
290
+ fc_j += 1
291
+ while fc_k < fc.length || bc_k < bc.length
292
+ if bc_k < bc.length && (!(fc_k < fc.length) || bc[bc_k][0] <= fc[fc_k][0])
293
+ if fc_j < bc[bc_k][1]
294
+ new_bc << bc[bc_k]
295
+ end
296
+ bc_k += 1
297
+ bc_j = bc_k < bc.length ? bc[bc_k][1] : 0
298
+ end
299
+
300
+ if fc_k < fc.length && (!(bc_k < bc.length) || fc[fc_k][0] < bc[bc_k][0])
301
+ if fc[fc_k][1] < bc_j
302
+ new_fc << fc[fc_k]
303
+ end
304
+ fc_j = fc[fc_k][1]
305
+ fc_k += 1
306
+ end
307
+ end
308
+ return new_fc, new_bc
309
+ end
310
+
311
+ class Closest
312
+ def initialize(arr)
313
+ @n = arr.length + 1
314
+
315
+ @table = Array.new
316
+ arr.each_index {|i|
317
+ s = arr[i]
318
+ @table[s] = [-1] unless @table[s]
319
+ @table[s] << i
320
+ }
321
+ @table.each_index {|s|
322
+ @table[s] = [-1] unless @table[s]
323
+ @table[s] << @n
324
+ }
325
+ end
326
+
327
+ def next(s, i)
328
+ t = @table[s]
329
+
330
+ if t.length < 10
331
+ t.each {|j| return j if i < j}
332
+ return @n
333
+ end
334
+
335
+ lower = -1
336
+ upper = t.length
337
+ while lower + 1 != upper
338
+ mid = (lower + upper) / 2
339
+ if t[mid] <= i
340
+ lower = mid
341
+ else
342
+ upper = mid
343
+ end
344
+ end
345
+ b = lower + 1
346
+
347
+ if b < t.length
348
+ return t[b]
349
+ else
350
+ return @n
351
+ end
352
+
353
+ end
354
+
355
+ def prev(s, i)
356
+ t = @table[s]
357
+
358
+ if t.length < 10
359
+ t.reverse_each {|j| return j if j < i}
360
+ return -1
361
+ end
362
+
363
+ lower = -1
364
+ upper = t.length
365
+ while lower + 1 != upper
366
+ mid = (lower + upper) / 2
367
+ if t[mid] < i
368
+ lower = mid
369
+ else
370
+ upper = mid
371
+ end
372
+ end
373
+ if 0 < upper
374
+ return t[upper - 1]
375
+ else
376
+ return -1
377
+ end
378
+ end
379
+ end
380
+ end
381
+ end
382
+